qemu/target-arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "tcg-op.h"
  23#include "qemu/log.h"
  24#include "arm_ldst.h"
  25#include "translate.h"
  26#include "internals.h"
  27#include "qemu/host-utils.h"
  28
  29#include "exec/semihost.h"
  30#include "exec/gen-icount.h"
  31
  32#include "exec/helper-proto.h"
  33#include "exec/helper-gen.h"
  34#include "exec/log.h"
  35
  36#include "trace-tcg.h"
  37
  38static TCGv_i64 cpu_X[32];
  39static TCGv_i64 cpu_pc;
  40
  41/* Load/store exclusive handling */
  42static TCGv_i64 cpu_exclusive_high;
  43
  44static const char *regnames[] = {
  45    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  46    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  47    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  48    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  49};
  50
  51enum a64_shift_type {
  52    A64_SHIFT_TYPE_LSL = 0,
  53    A64_SHIFT_TYPE_LSR = 1,
  54    A64_SHIFT_TYPE_ASR = 2,
  55    A64_SHIFT_TYPE_ROR = 3
  56};
  57
  58/* Table based decoder typedefs - used when the relevant bits for decode
  59 * are too awkwardly scattered across the instruction (eg SIMD).
  60 */
  61typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  62
  63typedef struct AArch64DecodeTable {
  64    uint32_t pattern;
  65    uint32_t mask;
  66    AArch64DecodeFn *disas_fn;
  67} AArch64DecodeTable;
  68
  69/* Function prototype for gen_ functions for calling Neon helpers */
  70typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  71typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  72typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  73typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  74typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  75typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  76typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  77typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  78typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  80typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  81typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  82typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  83
  84/* initialize TCG globals.  */
  85void a64_translate_init(void)
  86{
  87    int i;
  88
  89    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  90                                    offsetof(CPUARMState, pc),
  91                                    "pc");
  92    for (i = 0; i < 32; i++) {
  93        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  94                                          offsetof(CPUARMState, xregs[i]),
  95                                          regnames[i]);
  96    }
  97
  98    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
  99        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 100}
 101
 102static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 103{
 104    /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 105     *  if EL1, access as if EL0; otherwise access at current EL
 106     */
 107    switch (s->mmu_idx) {
 108    case ARMMMUIdx_S12NSE1:
 109        return ARMMMUIdx_S12NSE0;
 110    case ARMMMUIdx_S1SE1:
 111        return ARMMMUIdx_S1SE0;
 112    case ARMMMUIdx_S2NS:
 113        g_assert_not_reached();
 114    default:
 115        return s->mmu_idx;
 116    }
 117}
 118
 119void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 120                            fprintf_function cpu_fprintf, int flags)
 121{
 122    ARMCPU *cpu = ARM_CPU(cs);
 123    CPUARMState *env = &cpu->env;
 124    uint32_t psr = pstate_read(env);
 125    int i;
 126    int el = arm_current_el(env);
 127    const char *ns_status;
 128
 129    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 130            env->pc, env->xregs[31]);
 131    for (i = 0; i < 31; i++) {
 132        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 133        if ((i % 4) == 3) {
 134            cpu_fprintf(f, "\n");
 135        } else {
 136            cpu_fprintf(f, " ");
 137        }
 138    }
 139
 140    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 141        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 142    } else {
 143        ns_status = "";
 144    }
 145
 146    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 147                psr,
 148                psr & PSTATE_N ? 'N' : '-',
 149                psr & PSTATE_Z ? 'Z' : '-',
 150                psr & PSTATE_C ? 'C' : '-',
 151                psr & PSTATE_V ? 'V' : '-',
 152                ns_status,
 153                el,
 154                psr & PSTATE_SP ? 'h' : 't');
 155
 156    if (flags & CPU_DUMP_FPU) {
 157        int numvfpregs = 32;
 158        for (i = 0; i < numvfpregs; i += 2) {
 159            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 160            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 161            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 162                        i, vhi, vlo);
 163            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 164            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 165            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 166                        i + 1, vhi, vlo);
 167        }
 168        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 169                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 170    }
 171}
 172
 173void gen_a64_set_pc_im(uint64_t val)
 174{
 175    tcg_gen_movi_i64(cpu_pc, val);
 176}
 177
 178typedef struct DisasCompare64 {
 179    TCGCond cond;
 180    TCGv_i64 value;
 181} DisasCompare64;
 182
 183static void a64_test_cc(DisasCompare64 *c64, int cc)
 184{
 185    DisasCompare c32;
 186
 187    arm_test_cc(&c32, cc);
 188
 189    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 190       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 191    c64->cond = c32.cond;
 192    c64->value = tcg_temp_new_i64();
 193    tcg_gen_ext_i32_i64(c64->value, c32.value);
 194
 195    arm_free_cc(&c32);
 196}
 197
 198static void a64_free_cc(DisasCompare64 *c64)
 199{
 200    tcg_temp_free_i64(c64->value);
 201}
 202
 203static void gen_exception_internal(int excp)
 204{
 205    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 206
 207    assert(excp_is_internal(excp));
 208    gen_helper_exception_internal(cpu_env, tcg_excp);
 209    tcg_temp_free_i32(tcg_excp);
 210}
 211
 212static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 213{
 214    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 215    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 216    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 217
 218    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 219                                       tcg_syn, tcg_el);
 220    tcg_temp_free_i32(tcg_el);
 221    tcg_temp_free_i32(tcg_syn);
 222    tcg_temp_free_i32(tcg_excp);
 223}
 224
 225static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 226{
 227    gen_a64_set_pc_im(s->pc - offset);
 228    gen_exception_internal(excp);
 229    s->is_jmp = DISAS_EXC;
 230}
 231
 232static void gen_exception_insn(DisasContext *s, int offset, int excp,
 233                               uint32_t syndrome, uint32_t target_el)
 234{
 235    gen_a64_set_pc_im(s->pc - offset);
 236    gen_exception(excp, syndrome, target_el);
 237    s->is_jmp = DISAS_EXC;
 238}
 239
 240static void gen_ss_advance(DisasContext *s)
 241{
 242    /* If the singlestep state is Active-not-pending, advance to
 243     * Active-pending.
 244     */
 245    if (s->ss_active) {
 246        s->pstate_ss = 0;
 247        gen_helper_clear_pstate_ss(cpu_env);
 248    }
 249}
 250
 251static void gen_step_complete_exception(DisasContext *s)
 252{
 253    /* We just completed step of an insn. Move from Active-not-pending
 254     * to Active-pending, and then also take the swstep exception.
 255     * This corresponds to making the (IMPDEF) choice to prioritize
 256     * swstep exceptions over asynchronous exceptions taken to an exception
 257     * level where debug is disabled. This choice has the advantage that
 258     * we do not need to maintain internal state corresponding to the
 259     * ISV/EX syndrome bits between completion of the step and generation
 260     * of the exception, and our syndrome information is always correct.
 261     */
 262    gen_ss_advance(s);
 263    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 264                  default_exception_el(s));
 265    s->is_jmp = DISAS_EXC;
 266}
 267
 268static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 269{
 270    /* No direct tb linking with singlestep (either QEMU's or the ARM
 271     * debug architecture kind) or deterministic io
 272     */
 273    if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 274        return false;
 275    }
 276
 277    /* Only link tbs from inside the same guest page */
 278    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 279        return false;
 280    }
 281
 282    return true;
 283}
 284
 285static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 286{
 287    TranslationBlock *tb;
 288
 289    tb = s->tb;
 290    if (use_goto_tb(s, n, dest)) {
 291        tcg_gen_goto_tb(n);
 292        gen_a64_set_pc_im(dest);
 293        tcg_gen_exit_tb((intptr_t)tb + n);
 294        s->is_jmp = DISAS_TB_JUMP;
 295    } else {
 296        gen_a64_set_pc_im(dest);
 297        if (s->ss_active) {
 298            gen_step_complete_exception(s);
 299        } else if (s->singlestep_enabled) {
 300            gen_exception_internal(EXCP_DEBUG);
 301        } else {
 302            tcg_gen_exit_tb(0);
 303            s->is_jmp = DISAS_TB_JUMP;
 304        }
 305    }
 306}
 307
 308static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 309{
 310    /* We don't need to save all of the syndrome so we mask and shift
 311     * out uneeded bits to help the sleb128 encoder do a better job.
 312     */
 313    syn &= ARM_INSN_START_WORD2_MASK;
 314    syn >>= ARM_INSN_START_WORD2_SHIFT;
 315
 316    /* We check and clear insn_start_idx to catch multiple updates.  */
 317    assert(s->insn_start_idx != 0);
 318    tcg_set_insn_param(s->insn_start_idx, 2, syn);
 319    s->insn_start_idx = 0;
 320}
 321
 322static void unallocated_encoding(DisasContext *s)
 323{
 324    /* Unallocated and reserved encodings are uncategorized */
 325    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 326                       default_exception_el(s));
 327}
 328
 329#define unsupported_encoding(s, insn)                                    \
 330    do {                                                                 \
 331        qemu_log_mask(LOG_UNIMP,                                         \
 332                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 333                      "at pc=%016" PRIx64 "\n",                          \
 334                      __FILE__, __LINE__, insn, s->pc - 4);              \
 335        unallocated_encoding(s);                                         \
 336    } while (0);
 337
 338static void init_tmp_a64_array(DisasContext *s)
 339{
 340#ifdef CONFIG_DEBUG_TCG
 341    int i;
 342    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 343        TCGV_UNUSED_I64(s->tmp_a64[i]);
 344    }
 345#endif
 346    s->tmp_a64_count = 0;
 347}
 348
 349static void free_tmp_a64(DisasContext *s)
 350{
 351    int i;
 352    for (i = 0; i < s->tmp_a64_count; i++) {
 353        tcg_temp_free_i64(s->tmp_a64[i]);
 354    }
 355    init_tmp_a64_array(s);
 356}
 357
 358static TCGv_i64 new_tmp_a64(DisasContext *s)
 359{
 360    assert(s->tmp_a64_count < TMP_A64_MAX);
 361    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 362}
 363
 364static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 365{
 366    TCGv_i64 t = new_tmp_a64(s);
 367    tcg_gen_movi_i64(t, 0);
 368    return t;
 369}
 370
 371/*
 372 * Register access functions
 373 *
 374 * These functions are used for directly accessing a register in where
 375 * changes to the final register value are likely to be made. If you
 376 * need to use a register for temporary calculation (e.g. index type
 377 * operations) use the read_* form.
 378 *
 379 * B1.2.1 Register mappings
 380 *
 381 * In instruction register encoding 31 can refer to ZR (zero register) or
 382 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 383 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 384 * This is the point of the _sp forms.
 385 */
 386static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 387{
 388    if (reg == 31) {
 389        return new_tmp_a64_zero(s);
 390    } else {
 391        return cpu_X[reg];
 392    }
 393}
 394
 395/* register access for when 31 == SP */
 396static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 397{
 398    return cpu_X[reg];
 399}
 400
 401/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 402 * representing the register contents. This TCGv is an auto-freed
 403 * temporary so it need not be explicitly freed, and may be modified.
 404 */
 405static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 406{
 407    TCGv_i64 v = new_tmp_a64(s);
 408    if (reg != 31) {
 409        if (sf) {
 410            tcg_gen_mov_i64(v, cpu_X[reg]);
 411        } else {
 412            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 413        }
 414    } else {
 415        tcg_gen_movi_i64(v, 0);
 416    }
 417    return v;
 418}
 419
 420static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 421{
 422    TCGv_i64 v = new_tmp_a64(s);
 423    if (sf) {
 424        tcg_gen_mov_i64(v, cpu_X[reg]);
 425    } else {
 426        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 427    }
 428    return v;
 429}
 430
 431/* We should have at some point before trying to access an FP register
 432 * done the necessary access check, so assert that
 433 * (a) we did the check and
 434 * (b) we didn't then just plough ahead anyway if it failed.
 435 * Print the instruction pattern in the abort message so we can figure
 436 * out what we need to fix if a user encounters this problem in the wild.
 437 */
 438static inline void assert_fp_access_checked(DisasContext *s)
 439{
 440#ifdef CONFIG_DEBUG_TCG
 441    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 442        fprintf(stderr, "target-arm: FP access check missing for "
 443                "instruction 0x%08x\n", s->insn);
 444        abort();
 445    }
 446#endif
 447}
 448
 449/* Return the offset into CPUARMState of an element of specified
 450 * size, 'element' places in from the least significant end of
 451 * the FP/vector register Qn.
 452 */
 453static inline int vec_reg_offset(DisasContext *s, int regno,
 454                                 int element, TCGMemOp size)
 455{
 456    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 457#ifdef HOST_WORDS_BIGENDIAN
 458    /* This is complicated slightly because vfp.regs[2n] is
 459     * still the low half and  vfp.regs[2n+1] the high half
 460     * of the 128 bit vector, even on big endian systems.
 461     * Calculate the offset assuming a fully bigendian 128 bits,
 462     * then XOR to account for the order of the two 64 bit halves.
 463     */
 464    offs += (16 - ((element + 1) * (1 << size)));
 465    offs ^= 8;
 466#else
 467    offs += element * (1 << size);
 468#endif
 469    assert_fp_access_checked(s);
 470    return offs;
 471}
 472
 473/* Return the offset into CPUARMState of a slice (from
 474 * the least significant end) of FP register Qn (ie
 475 * Dn, Sn, Hn or Bn).
 476 * (Note that this is not the same mapping as for A32; see cpu.h)
 477 */
 478static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 479{
 480    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 481#ifdef HOST_WORDS_BIGENDIAN
 482    offs += (8 - (1 << size));
 483#endif
 484    assert_fp_access_checked(s);
 485    return offs;
 486}
 487
 488/* Offset of the high half of the 128 bit vector Qn */
 489static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 490{
 491    assert_fp_access_checked(s);
 492    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 493}
 494
 495/* Convenience accessors for reading and writing single and double
 496 * FP registers. Writing clears the upper parts of the associated
 497 * 128 bit vector register, as required by the architecture.
 498 * Note that unlike the GP register accessors, the values returned
 499 * by the read functions must be manually freed.
 500 */
 501static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 502{
 503    TCGv_i64 v = tcg_temp_new_i64();
 504
 505    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 506    return v;
 507}
 508
 509static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 510{
 511    TCGv_i32 v = tcg_temp_new_i32();
 512
 513    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 514    return v;
 515}
 516
 517static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 518{
 519    TCGv_i64 tcg_zero = tcg_const_i64(0);
 520
 521    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 522    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 523    tcg_temp_free_i64(tcg_zero);
 524}
 525
 526static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 527{
 528    TCGv_i64 tmp = tcg_temp_new_i64();
 529
 530    tcg_gen_extu_i32_i64(tmp, v);
 531    write_fp_dreg(s, reg, tmp);
 532    tcg_temp_free_i64(tmp);
 533}
 534
 535static TCGv_ptr get_fpstatus_ptr(void)
 536{
 537    TCGv_ptr statusptr = tcg_temp_new_ptr();
 538    int offset;
 539
 540    /* In A64 all instructions (both FP and Neon) use the FPCR;
 541     * there is no equivalent of the A32 Neon "standard FPSCR value"
 542     * and all operations use vfp.fp_status.
 543     */
 544    offset = offsetof(CPUARMState, vfp.fp_status);
 545    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 546    return statusptr;
 547}
 548
 549/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 550 * than the 32 bit equivalent.
 551 */
 552static inline void gen_set_NZ64(TCGv_i64 result)
 553{
 554    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 555    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 556}
 557
 558/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 559static inline void gen_logic_CC(int sf, TCGv_i64 result)
 560{
 561    if (sf) {
 562        gen_set_NZ64(result);
 563    } else {
 564        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 565        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 566    }
 567    tcg_gen_movi_i32(cpu_CF, 0);
 568    tcg_gen_movi_i32(cpu_VF, 0);
 569}
 570
 571/* dest = T0 + T1; compute C, N, V and Z flags */
 572static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 573{
 574    if (sf) {
 575        TCGv_i64 result, flag, tmp;
 576        result = tcg_temp_new_i64();
 577        flag = tcg_temp_new_i64();
 578        tmp = tcg_temp_new_i64();
 579
 580        tcg_gen_movi_i64(tmp, 0);
 581        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 582
 583        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 584
 585        gen_set_NZ64(result);
 586
 587        tcg_gen_xor_i64(flag, result, t0);
 588        tcg_gen_xor_i64(tmp, t0, t1);
 589        tcg_gen_andc_i64(flag, flag, tmp);
 590        tcg_temp_free_i64(tmp);
 591        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 592
 593        tcg_gen_mov_i64(dest, result);
 594        tcg_temp_free_i64(result);
 595        tcg_temp_free_i64(flag);
 596    } else {
 597        /* 32 bit arithmetic */
 598        TCGv_i32 t0_32 = tcg_temp_new_i32();
 599        TCGv_i32 t1_32 = tcg_temp_new_i32();
 600        TCGv_i32 tmp = tcg_temp_new_i32();
 601
 602        tcg_gen_movi_i32(tmp, 0);
 603        tcg_gen_extrl_i64_i32(t0_32, t0);
 604        tcg_gen_extrl_i64_i32(t1_32, t1);
 605        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 606        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 607        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 608        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 609        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 610        tcg_gen_extu_i32_i64(dest, cpu_NF);
 611
 612        tcg_temp_free_i32(tmp);
 613        tcg_temp_free_i32(t0_32);
 614        tcg_temp_free_i32(t1_32);
 615    }
 616}
 617
 618/* dest = T0 - T1; compute C, N, V and Z flags */
 619static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 620{
 621    if (sf) {
 622        /* 64 bit arithmetic */
 623        TCGv_i64 result, flag, tmp;
 624
 625        result = tcg_temp_new_i64();
 626        flag = tcg_temp_new_i64();
 627        tcg_gen_sub_i64(result, t0, t1);
 628
 629        gen_set_NZ64(result);
 630
 631        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 632        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 633
 634        tcg_gen_xor_i64(flag, result, t0);
 635        tmp = tcg_temp_new_i64();
 636        tcg_gen_xor_i64(tmp, t0, t1);
 637        tcg_gen_and_i64(flag, flag, tmp);
 638        tcg_temp_free_i64(tmp);
 639        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 640        tcg_gen_mov_i64(dest, result);
 641        tcg_temp_free_i64(flag);
 642        tcg_temp_free_i64(result);
 643    } else {
 644        /* 32 bit arithmetic */
 645        TCGv_i32 t0_32 = tcg_temp_new_i32();
 646        TCGv_i32 t1_32 = tcg_temp_new_i32();
 647        TCGv_i32 tmp;
 648
 649        tcg_gen_extrl_i64_i32(t0_32, t0);
 650        tcg_gen_extrl_i64_i32(t1_32, t1);
 651        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 652        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 653        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 654        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 655        tmp = tcg_temp_new_i32();
 656        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 657        tcg_temp_free_i32(t0_32);
 658        tcg_temp_free_i32(t1_32);
 659        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 660        tcg_temp_free_i32(tmp);
 661        tcg_gen_extu_i32_i64(dest, cpu_NF);
 662    }
 663}
 664
 665/* dest = T0 + T1 + CF; do not compute flags. */
 666static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 667{
 668    TCGv_i64 flag = tcg_temp_new_i64();
 669    tcg_gen_extu_i32_i64(flag, cpu_CF);
 670    tcg_gen_add_i64(dest, t0, t1);
 671    tcg_gen_add_i64(dest, dest, flag);
 672    tcg_temp_free_i64(flag);
 673
 674    if (!sf) {
 675        tcg_gen_ext32u_i64(dest, dest);
 676    }
 677}
 678
 679/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 680static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 681{
 682    if (sf) {
 683        TCGv_i64 result, cf_64, vf_64, tmp;
 684        result = tcg_temp_new_i64();
 685        cf_64 = tcg_temp_new_i64();
 686        vf_64 = tcg_temp_new_i64();
 687        tmp = tcg_const_i64(0);
 688
 689        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 690        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 691        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 692        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 693        gen_set_NZ64(result);
 694
 695        tcg_gen_xor_i64(vf_64, result, t0);
 696        tcg_gen_xor_i64(tmp, t0, t1);
 697        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 698        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 699
 700        tcg_gen_mov_i64(dest, result);
 701
 702        tcg_temp_free_i64(tmp);
 703        tcg_temp_free_i64(vf_64);
 704        tcg_temp_free_i64(cf_64);
 705        tcg_temp_free_i64(result);
 706    } else {
 707        TCGv_i32 t0_32, t1_32, tmp;
 708        t0_32 = tcg_temp_new_i32();
 709        t1_32 = tcg_temp_new_i32();
 710        tmp = tcg_const_i32(0);
 711
 712        tcg_gen_extrl_i64_i32(t0_32, t0);
 713        tcg_gen_extrl_i64_i32(t1_32, t1);
 714        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 715        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 716
 717        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 718        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 719        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 720        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 721        tcg_gen_extu_i32_i64(dest, cpu_NF);
 722
 723        tcg_temp_free_i32(tmp);
 724        tcg_temp_free_i32(t1_32);
 725        tcg_temp_free_i32(t0_32);
 726    }
 727}
 728
 729/*
 730 * Load/Store generators
 731 */
 732
 733/*
 734 * Store from GPR register to memory.
 735 */
 736static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 737                             TCGv_i64 tcg_addr, int size, int memidx,
 738                             bool iss_valid,
 739                             unsigned int iss_srt,
 740                             bool iss_sf, bool iss_ar)
 741{
 742    g_assert(size <= 3);
 743    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 744
 745    if (iss_valid) {
 746        uint32_t syn;
 747
 748        syn = syn_data_abort_with_iss(0,
 749                                      size,
 750                                      false,
 751                                      iss_srt,
 752                                      iss_sf,
 753                                      iss_ar,
 754                                      0, 0, 0, 0, 0, false);
 755        disas_set_insn_syndrome(s, syn);
 756    }
 757}
 758
 759static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 760                      TCGv_i64 tcg_addr, int size,
 761                      bool iss_valid,
 762                      unsigned int iss_srt,
 763                      bool iss_sf, bool iss_ar)
 764{
 765    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 766                     iss_valid, iss_srt, iss_sf, iss_ar);
 767}
 768
 769/*
 770 * Load from memory to GPR register
 771 */
 772static void do_gpr_ld_memidx(DisasContext *s,
 773                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 774                             int size, bool is_signed,
 775                             bool extend, int memidx,
 776                             bool iss_valid, unsigned int iss_srt,
 777                             bool iss_sf, bool iss_ar)
 778{
 779    TCGMemOp memop = s->be_data + size;
 780
 781    g_assert(size <= 3);
 782
 783    if (is_signed) {
 784        memop += MO_SIGN;
 785    }
 786
 787    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 788
 789    if (extend && is_signed) {
 790        g_assert(size < 3);
 791        tcg_gen_ext32u_i64(dest, dest);
 792    }
 793
 794    if (iss_valid) {
 795        uint32_t syn;
 796
 797        syn = syn_data_abort_with_iss(0,
 798                                      size,
 799                                      is_signed,
 800                                      iss_srt,
 801                                      iss_sf,
 802                                      iss_ar,
 803                                      0, 0, 0, 0, 0, false);
 804        disas_set_insn_syndrome(s, syn);
 805    }
 806}
 807
 808static void do_gpr_ld(DisasContext *s,
 809                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 810                      int size, bool is_signed, bool extend,
 811                      bool iss_valid, unsigned int iss_srt,
 812                      bool iss_sf, bool iss_ar)
 813{
 814    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 815                     get_mem_index(s),
 816                     iss_valid, iss_srt, iss_sf, iss_ar);
 817}
 818
 819/*
 820 * Store from FP register to memory
 821 */
 822static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 823{
 824    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 825    TCGv_i64 tmp = tcg_temp_new_i64();
 826    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 827    if (size < 4) {
 828        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 829                            s->be_data + size);
 830    } else {
 831        bool be = s->be_data == MO_BE;
 832        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 833
 834        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 835        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 836                            s->be_data | MO_Q);
 837        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 838        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 839                            s->be_data | MO_Q);
 840        tcg_temp_free_i64(tcg_hiaddr);
 841    }
 842
 843    tcg_temp_free_i64(tmp);
 844}
 845
 846/*
 847 * Load from memory to FP register
 848 */
 849static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 850{
 851    /* This always zero-extends and writes to a full 128 bit wide vector */
 852    TCGv_i64 tmplo = tcg_temp_new_i64();
 853    TCGv_i64 tmphi;
 854
 855    if (size < 4) {
 856        TCGMemOp memop = s->be_data + size;
 857        tmphi = tcg_const_i64(0);
 858        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 859    } else {
 860        bool be = s->be_data == MO_BE;
 861        TCGv_i64 tcg_hiaddr;
 862
 863        tmphi = tcg_temp_new_i64();
 864        tcg_hiaddr = tcg_temp_new_i64();
 865
 866        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 867        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 868                            s->be_data | MO_Q);
 869        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 870                            s->be_data | MO_Q);
 871        tcg_temp_free_i64(tcg_hiaddr);
 872    }
 873
 874    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 875    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 876
 877    tcg_temp_free_i64(tmplo);
 878    tcg_temp_free_i64(tmphi);
 879}
 880
 881/*
 882 * Vector load/store helpers.
 883 *
 884 * The principal difference between this and a FP load is that we don't
 885 * zero extend as we are filling a partial chunk of the vector register.
 886 * These functions don't support 128 bit loads/stores, which would be
 887 * normal load/store operations.
 888 *
 889 * The _i32 versions are useful when operating on 32 bit quantities
 890 * (eg for floating point single or using Neon helper functions).
 891 */
 892
 893/* Get value of an element within a vector register */
 894static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 895                             int element, TCGMemOp memop)
 896{
 897    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 898    switch (memop) {
 899    case MO_8:
 900        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 901        break;
 902    case MO_16:
 903        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 904        break;
 905    case MO_32:
 906        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 907        break;
 908    case MO_8|MO_SIGN:
 909        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 910        break;
 911    case MO_16|MO_SIGN:
 912        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 913        break;
 914    case MO_32|MO_SIGN:
 915        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 916        break;
 917    case MO_64:
 918    case MO_64|MO_SIGN:
 919        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 920        break;
 921    default:
 922        g_assert_not_reached();
 923    }
 924}
 925
 926static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 927                                 int element, TCGMemOp memop)
 928{
 929    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 930    switch (memop) {
 931    case MO_8:
 932        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 933        break;
 934    case MO_16:
 935        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 936        break;
 937    case MO_8|MO_SIGN:
 938        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 939        break;
 940    case MO_16|MO_SIGN:
 941        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 942        break;
 943    case MO_32:
 944    case MO_32|MO_SIGN:
 945        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 946        break;
 947    default:
 948        g_assert_not_reached();
 949    }
 950}
 951
 952/* Set value of an element within a vector register */
 953static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 954                              int element, TCGMemOp memop)
 955{
 956    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 957    switch (memop) {
 958    case MO_8:
 959        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 960        break;
 961    case MO_16:
 962        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 963        break;
 964    case MO_32:
 965        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 966        break;
 967    case MO_64:
 968        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 969        break;
 970    default:
 971        g_assert_not_reached();
 972    }
 973}
 974
 975static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 976                                  int destidx, int element, TCGMemOp memop)
 977{
 978    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 979    switch (memop) {
 980    case MO_8:
 981        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 982        break;
 983    case MO_16:
 984        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 985        break;
 986    case MO_32:
 987        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 988        break;
 989    default:
 990        g_assert_not_reached();
 991    }
 992}
 993
 994/* Clear the high 64 bits of a 128 bit vector (in general non-quad
 995 * vector ops all need to do this).
 996 */
 997static void clear_vec_high(DisasContext *s, int rd)
 998{
 999    TCGv_i64 tcg_zero = tcg_const_i64(0);
1000
1001    write_vec_element(s, tcg_zero, rd, 1, MO_64);
1002    tcg_temp_free_i64(tcg_zero);
1003}
1004
1005/* Store from vector register to memory */
1006static void do_vec_st(DisasContext *s, int srcidx, int element,
1007                      TCGv_i64 tcg_addr, int size)
1008{
1009    TCGMemOp memop = s->be_data + size;
1010    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1011
1012    read_vec_element(s, tcg_tmp, srcidx, element, size);
1013    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1014
1015    tcg_temp_free_i64(tcg_tmp);
1016}
1017
1018/* Load from memory to vector register */
1019static void do_vec_ld(DisasContext *s, int destidx, int element,
1020                      TCGv_i64 tcg_addr, int size)
1021{
1022    TCGMemOp memop = s->be_data + size;
1023    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1024
1025    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1026    write_vec_element(s, tcg_tmp, destidx, element, size);
1027
1028    tcg_temp_free_i64(tcg_tmp);
1029}
1030
1031/* Check that FP/Neon access is enabled. If it is, return
1032 * true. If not, emit code to generate an appropriate exception,
1033 * and return false; the caller should not emit any code for
1034 * the instruction. Note that this check must happen after all
1035 * unallocated-encoding checks (otherwise the syndrome information
1036 * for the resulting exception will be incorrect).
1037 */
1038static inline bool fp_access_check(DisasContext *s)
1039{
1040    assert(!s->fp_access_checked);
1041    s->fp_access_checked = true;
1042
1043    if (!s->fp_excp_el) {
1044        return true;
1045    }
1046
1047    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1048                       s->fp_excp_el);
1049    return false;
1050}
1051
1052/*
1053 * This utility function is for doing register extension with an
1054 * optional shift. You will likely want to pass a temporary for the
1055 * destination register. See DecodeRegExtend() in the ARM ARM.
1056 */
1057static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1058                              int option, unsigned int shift)
1059{
1060    int extsize = extract32(option, 0, 2);
1061    bool is_signed = extract32(option, 2, 1);
1062
1063    if (is_signed) {
1064        switch (extsize) {
1065        case 0:
1066            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1067            break;
1068        case 1:
1069            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1070            break;
1071        case 2:
1072            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1073            break;
1074        case 3:
1075            tcg_gen_mov_i64(tcg_out, tcg_in);
1076            break;
1077        }
1078    } else {
1079        switch (extsize) {
1080        case 0:
1081            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1082            break;
1083        case 1:
1084            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1085            break;
1086        case 2:
1087            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1088            break;
1089        case 3:
1090            tcg_gen_mov_i64(tcg_out, tcg_in);
1091            break;
1092        }
1093    }
1094
1095    if (shift) {
1096        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1097    }
1098}
1099
1100static inline void gen_check_sp_alignment(DisasContext *s)
1101{
1102    /* The AArch64 architecture mandates that (if enabled via PSTATE
1103     * or SCTLR bits) there is a check that SP is 16-aligned on every
1104     * SP-relative load or store (with an exception generated if it is not).
1105     * In line with general QEMU practice regarding misaligned accesses,
1106     * we omit these checks for the sake of guest program performance.
1107     * This function is provided as a hook so we can more easily add these
1108     * checks in future (possibly as a "favour catching guest program bugs
1109     * over speed" user selectable option).
1110     */
1111}
1112
1113/*
1114 * This provides a simple table based table lookup decoder. It is
1115 * intended to be used when the relevant bits for decode are too
1116 * awkwardly placed and switch/if based logic would be confusing and
1117 * deeply nested. Since it's a linear search through the table, tables
1118 * should be kept small.
1119 *
1120 * It returns the first handler where insn & mask == pattern, or
1121 * NULL if there is no match.
1122 * The table is terminated by an empty mask (i.e. 0)
1123 */
1124static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1125                                               uint32_t insn)
1126{
1127    const AArch64DecodeTable *tptr = table;
1128
1129    while (tptr->mask) {
1130        if ((insn & tptr->mask) == tptr->pattern) {
1131            return tptr->disas_fn;
1132        }
1133        tptr++;
1134    }
1135    return NULL;
1136}
1137
1138/*
1139 * the instruction disassembly implemented here matches
1140 * the instruction encoding classifications in chapter 3 (C3)
1141 * of the ARM Architecture Reference Manual (DDI0487A_a)
1142 */
1143
1144/* C3.2.7 Unconditional branch (immediate)
1145 *   31  30       26 25                                  0
1146 * +----+-----------+-------------------------------------+
1147 * | op | 0 0 1 0 1 |                 imm26               |
1148 * +----+-----------+-------------------------------------+
1149 */
1150static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1151{
1152    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1153
1154    if (insn & (1U << 31)) {
1155        /* C5.6.26 BL Branch with link */
1156        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1157    }
1158
1159    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1160    gen_goto_tb(s, 0, addr);
1161}
1162
1163/* C3.2.1 Compare & branch (immediate)
1164 *   31  30         25  24  23                  5 4      0
1165 * +----+-------------+----+---------------------+--------+
1166 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1167 * +----+-------------+----+---------------------+--------+
1168 */
1169static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1170{
1171    unsigned int sf, op, rt;
1172    uint64_t addr;
1173    TCGLabel *label_match;
1174    TCGv_i64 tcg_cmp;
1175
1176    sf = extract32(insn, 31, 1);
1177    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1178    rt = extract32(insn, 0, 5);
1179    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1180
1181    tcg_cmp = read_cpu_reg(s, rt, sf);
1182    label_match = gen_new_label();
1183
1184    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1185                        tcg_cmp, 0, label_match);
1186
1187    gen_goto_tb(s, 0, s->pc);
1188    gen_set_label(label_match);
1189    gen_goto_tb(s, 1, addr);
1190}
1191
1192/* C3.2.5 Test & branch (immediate)
1193 *   31  30         25  24  23   19 18          5 4    0
1194 * +----+-------------+----+-------+-------------+------+
1195 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1196 * +----+-------------+----+-------+-------------+------+
1197 */
1198static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1199{
1200    unsigned int bit_pos, op, rt;
1201    uint64_t addr;
1202    TCGLabel *label_match;
1203    TCGv_i64 tcg_cmp;
1204
1205    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1206    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1207    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1208    rt = extract32(insn, 0, 5);
1209
1210    tcg_cmp = tcg_temp_new_i64();
1211    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1212    label_match = gen_new_label();
1213    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1214                        tcg_cmp, 0, label_match);
1215    tcg_temp_free_i64(tcg_cmp);
1216    gen_goto_tb(s, 0, s->pc);
1217    gen_set_label(label_match);
1218    gen_goto_tb(s, 1, addr);
1219}
1220
1221/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1222 *  31           25  24  23                  5   4  3    0
1223 * +---------------+----+---------------------+----+------+
1224 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1225 * +---------------+----+---------------------+----+------+
1226 */
1227static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1228{
1229    unsigned int cond;
1230    uint64_t addr;
1231
1232    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1233        unallocated_encoding(s);
1234        return;
1235    }
1236    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1237    cond = extract32(insn, 0, 4);
1238
1239    if (cond < 0x0e) {
1240        /* genuinely conditional branches */
1241        TCGLabel *label_match = gen_new_label();
1242        arm_gen_test_cc(cond, label_match);
1243        gen_goto_tb(s, 0, s->pc);
1244        gen_set_label(label_match);
1245        gen_goto_tb(s, 1, addr);
1246    } else {
1247        /* 0xe and 0xf are both "always" conditions */
1248        gen_goto_tb(s, 0, addr);
1249    }
1250}
1251
1252/* C5.6.68 HINT */
1253static void handle_hint(DisasContext *s, uint32_t insn,
1254                        unsigned int op1, unsigned int op2, unsigned int crm)
1255{
1256    unsigned int selector = crm << 3 | op2;
1257
1258    if (op1 != 3) {
1259        unallocated_encoding(s);
1260        return;
1261    }
1262
1263    switch (selector) {
1264    case 0: /* NOP */
1265        return;
1266    case 3: /* WFI */
1267        s->is_jmp = DISAS_WFI;
1268        return;
1269    case 1: /* YIELD */
1270        s->is_jmp = DISAS_YIELD;
1271        return;
1272    case 2: /* WFE */
1273        s->is_jmp = DISAS_WFE;
1274        return;
1275    case 4: /* SEV */
1276        gen_helper_sev(cpu_env);
1277        return;
1278    case 5: /* SEVL */
1279        gen_helper_sevl(cpu_env);
1280        return;
1281    default:
1282        /* default specified as NOP equivalent */
1283        return;
1284    }
1285}
1286
1287static void gen_clrex(DisasContext *s, uint32_t insn)
1288{
1289    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1290    gen_helper_sev(cpu_env);
1291}
1292
1293/* CLREX, DSB, DMB, ISB */
1294static void handle_sync(DisasContext *s, uint32_t insn,
1295                        unsigned int op1, unsigned int op2, unsigned int crm)
1296{
1297    if (op1 != 3) {
1298        unallocated_encoding(s);
1299        return;
1300    }
1301
1302    switch (op2) {
1303    case 2: /* CLREX */
1304        gen_clrex(s, insn);
1305        return;
1306    case 4: /* DSB */
1307    case 5: /* DMB */
1308        /* We don't emulate caches so barriers are no-ops */
1309        return;
1310    case 6: /* ISB */
1311        /* We need to break the TB after this insn to execute
1312         * a self-modified code correctly and also to take
1313         * any pending interrupts immediately.
1314         */
1315        s->is_jmp = DISAS_UPDATE;
1316        return;
1317    default:
1318        unallocated_encoding(s);
1319        return;
1320    }
1321}
1322
1323/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1324static void handle_msr_i(DisasContext *s, uint32_t insn,
1325                         unsigned int op1, unsigned int op2, unsigned int crm)
1326{
1327    int op = op1 << 3 | op2;
1328    switch (op) {
1329    case 0x05: /* SPSel */
1330        if (s->current_el == 0) {
1331            unallocated_encoding(s);
1332            return;
1333        }
1334        /* fall through */
1335    case 0x1e: /* DAIFSet */
1336    case 0x1f: /* DAIFClear */
1337    {
1338        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1339        TCGv_i32 tcg_op = tcg_const_i32(op);
1340        gen_a64_set_pc_im(s->pc - 4);
1341        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1342        tcg_temp_free_i32(tcg_imm);
1343        tcg_temp_free_i32(tcg_op);
1344        s->is_jmp = DISAS_UPDATE;
1345        break;
1346    }
1347    default:
1348        unallocated_encoding(s);
1349        return;
1350    }
1351}
1352
1353static void gen_get_nzcv(TCGv_i64 tcg_rt)
1354{
1355    TCGv_i32 tmp = tcg_temp_new_i32();
1356    TCGv_i32 nzcv = tcg_temp_new_i32();
1357
1358    /* build bit 31, N */
1359    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1360    /* build bit 30, Z */
1361    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1362    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1363    /* build bit 29, C */
1364    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1365    /* build bit 28, V */
1366    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1367    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1368    /* generate result */
1369    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1370
1371    tcg_temp_free_i32(nzcv);
1372    tcg_temp_free_i32(tmp);
1373}
1374
1375static void gen_set_nzcv(TCGv_i64 tcg_rt)
1376
1377{
1378    TCGv_i32 nzcv = tcg_temp_new_i32();
1379
1380    /* take NZCV from R[t] */
1381    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1382
1383    /* bit 31, N */
1384    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1385    /* bit 30, Z */
1386    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1387    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1388    /* bit 29, C */
1389    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1390    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1391    /* bit 28, V */
1392    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1393    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1394    tcg_temp_free_i32(nzcv);
1395}
1396
1397/* C5.6.129 MRS - move from system register
1398 * C5.6.131 MSR (register) - move to system register
1399 * C5.6.204 SYS
1400 * C5.6.205 SYSL
1401 * These are all essentially the same insn in 'read' and 'write'
1402 * versions, with varying op0 fields.
1403 */
1404static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1405                       unsigned int op0, unsigned int op1, unsigned int op2,
1406                       unsigned int crn, unsigned int crm, unsigned int rt)
1407{
1408    const ARMCPRegInfo *ri;
1409    TCGv_i64 tcg_rt;
1410
1411    ri = get_arm_cp_reginfo(s->cp_regs,
1412                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1413                                               crn, crm, op0, op1, op2));
1414
1415    if (!ri) {
1416        /* Unknown register; this might be a guest error or a QEMU
1417         * unimplemented feature.
1418         */
1419        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1420                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1421                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1422        unallocated_encoding(s);
1423        return;
1424    }
1425
1426    /* Check access permissions */
1427    if (!cp_access_ok(s->current_el, ri, isread)) {
1428        unallocated_encoding(s);
1429        return;
1430    }
1431
1432    if (ri->accessfn) {
1433        /* Emit code to perform further access permissions checks at
1434         * runtime; this may result in an exception.
1435         */
1436        TCGv_ptr tmpptr;
1437        TCGv_i32 tcg_syn, tcg_isread;
1438        uint32_t syndrome;
1439
1440        gen_a64_set_pc_im(s->pc - 4);
1441        tmpptr = tcg_const_ptr(ri);
1442        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1443        tcg_syn = tcg_const_i32(syndrome);
1444        tcg_isread = tcg_const_i32(isread);
1445        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1446        tcg_temp_free_ptr(tmpptr);
1447        tcg_temp_free_i32(tcg_syn);
1448        tcg_temp_free_i32(tcg_isread);
1449    }
1450
1451    /* Handle special cases first */
1452    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1453    case ARM_CP_NOP:
1454        return;
1455    case ARM_CP_NZCV:
1456        tcg_rt = cpu_reg(s, rt);
1457        if (isread) {
1458            gen_get_nzcv(tcg_rt);
1459        } else {
1460            gen_set_nzcv(tcg_rt);
1461        }
1462        return;
1463    case ARM_CP_CURRENTEL:
1464        /* Reads as current EL value from pstate, which is
1465         * guaranteed to be constant by the tb flags.
1466         */
1467        tcg_rt = cpu_reg(s, rt);
1468        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1469        return;
1470    case ARM_CP_DC_ZVA:
1471        /* Writes clear the aligned block of memory which rt points into. */
1472        tcg_rt = cpu_reg(s, rt);
1473        gen_helper_dc_zva(cpu_env, tcg_rt);
1474        return;
1475    default:
1476        break;
1477    }
1478
1479    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1480        gen_io_start();
1481    }
1482
1483    tcg_rt = cpu_reg(s, rt);
1484
1485    if (isread) {
1486        if (ri->type & ARM_CP_CONST) {
1487            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1488        } else if (ri->readfn) {
1489            TCGv_ptr tmpptr;
1490            tmpptr = tcg_const_ptr(ri);
1491            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1492            tcg_temp_free_ptr(tmpptr);
1493        } else {
1494            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1495        }
1496    } else {
1497        if (ri->type & ARM_CP_CONST) {
1498            /* If not forbidden by access permissions, treat as WI */
1499            return;
1500        } else if (ri->writefn) {
1501            TCGv_ptr tmpptr;
1502            tmpptr = tcg_const_ptr(ri);
1503            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1504            tcg_temp_free_ptr(tmpptr);
1505        } else {
1506            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1507        }
1508    }
1509
1510    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1511        /* I/O operations must end the TB here (whether read or write) */
1512        gen_io_end();
1513        s->is_jmp = DISAS_UPDATE;
1514    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1515        /* We default to ending the TB on a coprocessor register write,
1516         * but allow this to be suppressed by the register definition
1517         * (usually only necessary to work around guest bugs).
1518         */
1519        s->is_jmp = DISAS_UPDATE;
1520    }
1521}
1522
1523/* C3.2.4 System
1524 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1525 * +---------------------+---+-----+-----+-------+-------+-----+------+
1526 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1527 * +---------------------+---+-----+-----+-------+-------+-----+------+
1528 */
1529static void disas_system(DisasContext *s, uint32_t insn)
1530{
1531    unsigned int l, op0, op1, crn, crm, op2, rt;
1532    l = extract32(insn, 21, 1);
1533    op0 = extract32(insn, 19, 2);
1534    op1 = extract32(insn, 16, 3);
1535    crn = extract32(insn, 12, 4);
1536    crm = extract32(insn, 8, 4);
1537    op2 = extract32(insn, 5, 3);
1538    rt = extract32(insn, 0, 5);
1539
1540    if (op0 == 0) {
1541        if (l || rt != 31) {
1542            unallocated_encoding(s);
1543            return;
1544        }
1545        switch (crn) {
1546        case 2: /* C5.6.68 HINT */
1547            handle_hint(s, insn, op1, op2, crm);
1548            break;
1549        case 3: /* CLREX, DSB, DMB, ISB */
1550            handle_sync(s, insn, op1, op2, crm);
1551            break;
1552        case 4: /* C5.6.130 MSR (immediate) */
1553            handle_msr_i(s, insn, op1, op2, crm);
1554            break;
1555        default:
1556            unallocated_encoding(s);
1557            break;
1558        }
1559        return;
1560    }
1561    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1562}
1563
1564/* C3.2.3 Exception generation
1565 *
1566 *  31             24 23 21 20                     5 4   2 1  0
1567 * +-----------------+-----+------------------------+-----+----+
1568 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1569 * +-----------------------+------------------------+----------+
1570 */
1571static void disas_exc(DisasContext *s, uint32_t insn)
1572{
1573    int opc = extract32(insn, 21, 3);
1574    int op2_ll = extract32(insn, 0, 5);
1575    int imm16 = extract32(insn, 5, 16);
1576    TCGv_i32 tmp;
1577
1578    switch (opc) {
1579    case 0:
1580        /* For SVC, HVC and SMC we advance the single-step state
1581         * machine before taking the exception. This is architecturally
1582         * mandated, to ensure that single-stepping a system call
1583         * instruction works properly.
1584         */
1585        switch (op2_ll) {
1586        case 1:
1587            gen_ss_advance(s);
1588            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1589                               default_exception_el(s));
1590            break;
1591        case 2:
1592            if (s->current_el == 0) {
1593                unallocated_encoding(s);
1594                break;
1595            }
1596            /* The pre HVC helper handles cases when HVC gets trapped
1597             * as an undefined insn by runtime configuration.
1598             */
1599            gen_a64_set_pc_im(s->pc - 4);
1600            gen_helper_pre_hvc(cpu_env);
1601            gen_ss_advance(s);
1602            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1603            break;
1604        case 3:
1605            if (s->current_el == 0) {
1606                unallocated_encoding(s);
1607                break;
1608            }
1609            gen_a64_set_pc_im(s->pc - 4);
1610            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1611            gen_helper_pre_smc(cpu_env, tmp);
1612            tcg_temp_free_i32(tmp);
1613            gen_ss_advance(s);
1614            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1615            break;
1616        default:
1617            unallocated_encoding(s);
1618            break;
1619        }
1620        break;
1621    case 1:
1622        if (op2_ll != 0) {
1623            unallocated_encoding(s);
1624            break;
1625        }
1626        /* BRK */
1627        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1628                           default_exception_el(s));
1629        break;
1630    case 2:
1631        if (op2_ll != 0) {
1632            unallocated_encoding(s);
1633            break;
1634        }
1635        /* HLT. This has two purposes.
1636         * Architecturally, it is an external halting debug instruction.
1637         * Since QEMU doesn't implement external debug, we treat this as
1638         * it is required for halting debug disabled: it will UNDEF.
1639         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1640         */
1641        if (semihosting_enabled() && imm16 == 0xf000) {
1642#ifndef CONFIG_USER_ONLY
1643            /* In system mode, don't allow userspace access to semihosting,
1644             * to provide some semblance of security (and for consistency
1645             * with our 32-bit semihosting).
1646             */
1647            if (s->current_el == 0) {
1648                unsupported_encoding(s, insn);
1649                break;
1650            }
1651#endif
1652            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1653        } else {
1654            unsupported_encoding(s, insn);
1655        }
1656        break;
1657    case 5:
1658        if (op2_ll < 1 || op2_ll > 3) {
1659            unallocated_encoding(s);
1660            break;
1661        }
1662        /* DCPS1, DCPS2, DCPS3 */
1663        unsupported_encoding(s, insn);
1664        break;
1665    default:
1666        unallocated_encoding(s);
1667        break;
1668    }
1669}
1670
1671/* C3.2.7 Unconditional branch (register)
1672 *  31           25 24   21 20   16 15   10 9    5 4     0
1673 * +---------------+-------+-------+-------+------+-------+
1674 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1675 * +---------------+-------+-------+-------+------+-------+
1676 */
1677static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1678{
1679    unsigned int opc, op2, op3, rn, op4;
1680
1681    opc = extract32(insn, 21, 4);
1682    op2 = extract32(insn, 16, 5);
1683    op3 = extract32(insn, 10, 6);
1684    rn = extract32(insn, 5, 5);
1685    op4 = extract32(insn, 0, 5);
1686
1687    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1688        unallocated_encoding(s);
1689        return;
1690    }
1691
1692    switch (opc) {
1693    case 0: /* BR */
1694    case 2: /* RET */
1695        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1696        break;
1697    case 1: /* BLR */
1698        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1699        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1700        break;
1701    case 4: /* ERET */
1702        if (s->current_el == 0) {
1703            unallocated_encoding(s);
1704            return;
1705        }
1706        gen_helper_exception_return(cpu_env);
1707        s->is_jmp = DISAS_JUMP;
1708        return;
1709    case 5: /* DRPS */
1710        if (rn != 0x1f) {
1711            unallocated_encoding(s);
1712        } else {
1713            unsupported_encoding(s, insn);
1714        }
1715        return;
1716    default:
1717        unallocated_encoding(s);
1718        return;
1719    }
1720
1721    s->is_jmp = DISAS_JUMP;
1722}
1723
1724/* C3.2 Branches, exception generating and system instructions */
1725static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1726{
1727    switch (extract32(insn, 25, 7)) {
1728    case 0x0a: case 0x0b:
1729    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1730        disas_uncond_b_imm(s, insn);
1731        break;
1732    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1733        disas_comp_b_imm(s, insn);
1734        break;
1735    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1736        disas_test_b_imm(s, insn);
1737        break;
1738    case 0x2a: /* Conditional branch (immediate) */
1739        disas_cond_b_imm(s, insn);
1740        break;
1741    case 0x6a: /* Exception generation / System */
1742        if (insn & (1 << 24)) {
1743            disas_system(s, insn);
1744        } else {
1745            disas_exc(s, insn);
1746        }
1747        break;
1748    case 0x6b: /* Unconditional branch (register) */
1749        disas_uncond_b_reg(s, insn);
1750        break;
1751    default:
1752        unallocated_encoding(s);
1753        break;
1754    }
1755}
1756
1757/*
1758 * Load/Store exclusive instructions are implemented by remembering
1759 * the value/address loaded, and seeing if these are the same
1760 * when the store is performed. This is not actually the architecturally
1761 * mandated semantics, but it works for typical guest code sequences
1762 * and avoids having to monitor regular stores.
1763 *
1764 * In system emulation mode only one CPU will be running at once, so
1765 * this sequence is effectively atomic.  In user emulation mode we
1766 * throw an exception and handle the atomic operation elsewhere.
1767 */
1768static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1769                               TCGv_i64 addr, int size, bool is_pair)
1770{
1771    TCGv_i64 tmp = tcg_temp_new_i64();
1772    TCGMemOp memop = s->be_data + size;
1773
1774    g_assert(size <= 3);
1775    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1776
1777    if (is_pair) {
1778        TCGv_i64 addr2 = tcg_temp_new_i64();
1779        TCGv_i64 hitmp = tcg_temp_new_i64();
1780
1781        g_assert(size >= 2);
1782        tcg_gen_addi_i64(addr2, addr, 1 << size);
1783        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1784        tcg_temp_free_i64(addr2);
1785        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1786        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1787        tcg_temp_free_i64(hitmp);
1788    }
1789
1790    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1791    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1792
1793    tcg_temp_free_i64(tmp);
1794    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1795}
1796
1797#ifdef CONFIG_USER_ONLY
1798static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1799                                TCGv_i64 addr, int size, int is_pair)
1800{
1801    tcg_gen_mov_i64(cpu_exclusive_test, addr);
1802    tcg_gen_movi_i32(cpu_exclusive_info,
1803                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1804    gen_exception_internal_insn(s, 4, EXCP_STREX);
1805}
1806#else
1807static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1808                                TCGv_i64 inaddr, int size, int is_pair)
1809{
1810    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1811     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1812     *     [addr] = {Rt};
1813     *     if (is_pair) {
1814     *         [addr + datasize] = {Rt2};
1815     *     }
1816     *     {Rd} = 0;
1817     * } else {
1818     *     {Rd} = 1;
1819     * }
1820     * env->exclusive_addr = -1;
1821     */
1822    TCGLabel *fail_label = gen_new_label();
1823    TCGLabel *done_label = gen_new_label();
1824    TCGv_i64 addr = tcg_temp_local_new_i64();
1825    TCGv_i64 tmp;
1826
1827    /* Copy input into a local temp so it is not trashed when the
1828     * basic block ends at the branch insn.
1829     */
1830    tcg_gen_mov_i64(addr, inaddr);
1831    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1832
1833    tmp = tcg_temp_new_i64();
1834    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
1835    tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1836    tcg_temp_free_i64(tmp);
1837
1838    if (is_pair) {
1839        TCGv_i64 addrhi = tcg_temp_new_i64();
1840        TCGv_i64 tmphi = tcg_temp_new_i64();
1841
1842        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1843        tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
1844                            s->be_data + size);
1845        tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1846
1847        tcg_temp_free_i64(tmphi);
1848        tcg_temp_free_i64(addrhi);
1849    }
1850
1851    /* We seem to still have the exclusive monitor, so do the store */
1852    tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
1853                        s->be_data + size);
1854    if (is_pair) {
1855        TCGv_i64 addrhi = tcg_temp_new_i64();
1856
1857        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1858        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1859                            get_mem_index(s), s->be_data + size);
1860        tcg_temp_free_i64(addrhi);
1861    }
1862
1863    tcg_temp_free_i64(addr);
1864
1865    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1866    tcg_gen_br(done_label);
1867    gen_set_label(fail_label);
1868    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1869    gen_set_label(done_label);
1870    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1871
1872    gen_helper_sev(cpu_env);
1873}
1874#endif
1875
1876/* Update the Sixty-Four bit (SF) registersize. This logic is derived
1877 * from the ARMv8 specs for LDR (Shared decode for all encodings).
1878 */
1879static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1880{
1881    int opc0 = extract32(opc, 0, 1);
1882    int regsize;
1883
1884    if (is_signed) {
1885        regsize = opc0 ? 32 : 64;
1886    } else {
1887        regsize = size == 3 ? 64 : 32;
1888    }
1889    return regsize == 64;
1890}
1891
1892/* C3.3.6 Load/store exclusive
1893 *
1894 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1895 * +-----+-------------+----+---+----+------+----+-------+------+------+
1896 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1897 * +-----+-------------+----+---+----+------+----+-------+------+------+
1898 *
1899 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1900 *   L: 0 -> store, 1 -> load
1901 *  o2: 0 -> exclusive, 1 -> not
1902 *  o1: 0 -> single register, 1 -> register pair
1903 *  o0: 1 -> load-acquire/store-release, 0 -> not
1904 */
1905static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1906{
1907    int rt = extract32(insn, 0, 5);
1908    int rn = extract32(insn, 5, 5);
1909    int rt2 = extract32(insn, 10, 5);
1910    int is_lasr = extract32(insn, 15, 1);
1911    int rs = extract32(insn, 16, 5);
1912    int is_pair = extract32(insn, 21, 1);
1913    int is_store = !extract32(insn, 22, 1);
1914    int is_excl = !extract32(insn, 23, 1);
1915    int size = extract32(insn, 30, 2);
1916    TCGv_i64 tcg_addr;
1917
1918    if ((!is_excl && !is_pair && !is_lasr) ||
1919        (!is_excl && is_pair) ||
1920        (is_pair && size < 2)) {
1921        unallocated_encoding(s);
1922        return;
1923    }
1924
1925    if (rn == 31) {
1926        gen_check_sp_alignment(s);
1927    }
1928    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1929
1930    /* Note that since TCG is single threaded load-acquire/store-release
1931     * semantics require no extra if (is_lasr) { ... } handling.
1932     */
1933
1934    if (is_excl) {
1935        if (!is_store) {
1936            s->is_ldex = true;
1937            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1938        } else {
1939            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1940        }
1941    } else {
1942        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1943        bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
1944
1945        /* Generate ISS for non-exclusive accesses including LASR.  */
1946        if (is_store) {
1947            do_gpr_st(s, tcg_rt, tcg_addr, size,
1948                      true, rt, iss_sf, is_lasr);
1949        } else {
1950            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
1951                      true, rt, iss_sf, is_lasr);
1952        }
1953    }
1954}
1955
1956/*
1957 * C3.3.5 Load register (literal)
1958 *
1959 *  31 30 29   27  26 25 24 23                5 4     0
1960 * +-----+-------+---+-----+-------------------+-------+
1961 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1962 * +-----+-------+---+-----+-------------------+-------+
1963 *
1964 * V: 1 -> vector (simd/fp)
1965 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1966 *                   10-> 32 bit signed, 11 -> prefetch
1967 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1968 */
1969static void disas_ld_lit(DisasContext *s, uint32_t insn)
1970{
1971    int rt = extract32(insn, 0, 5);
1972    int64_t imm = sextract32(insn, 5, 19) << 2;
1973    bool is_vector = extract32(insn, 26, 1);
1974    int opc = extract32(insn, 30, 2);
1975    bool is_signed = false;
1976    int size = 2;
1977    TCGv_i64 tcg_rt, tcg_addr;
1978
1979    if (is_vector) {
1980        if (opc == 3) {
1981            unallocated_encoding(s);
1982            return;
1983        }
1984        size = 2 + opc;
1985        if (!fp_access_check(s)) {
1986            return;
1987        }
1988    } else {
1989        if (opc == 3) {
1990            /* PRFM (literal) : prefetch */
1991            return;
1992        }
1993        size = 2 + extract32(opc, 0, 1);
1994        is_signed = extract32(opc, 1, 1);
1995    }
1996
1997    tcg_rt = cpu_reg(s, rt);
1998
1999    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2000    if (is_vector) {
2001        do_fp_ld(s, rt, tcg_addr, size);
2002    } else {
2003        /* Only unsigned 32bit loads target 32bit registers.  */
2004        bool iss_sf = opc == 0 ? 32 : 64;
2005
2006        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2007                  true, rt, iss_sf, false);
2008    }
2009    tcg_temp_free_i64(tcg_addr);
2010}
2011
2012/*
2013 * C5.6.80 LDNP (Load Pair - non-temporal hint)
2014 * C5.6.81 LDP (Load Pair - non vector)
2015 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2016 * C5.6.176 STNP (Store Pair - non-temporal hint)
2017 * C5.6.177 STP (Store Pair - non vector)
2018 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2019 * C6.3.165 LDP (Load Pair of SIMD&FP)
2020 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2021 * C6.3.284 STP (Store Pair of SIMD&FP)
2022 *
2023 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2024 * +-----+-------+---+---+-------+---+-----------------------------+
2025 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2026 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2027 *
2028 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2029 *      LDPSW                    01
2030 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2031 *   V: 0 -> GPR, 1 -> Vector
2032 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2033 *      10 -> signed offset, 11 -> pre-index
2034 *   L: 0 -> Store 1 -> Load
2035 *
2036 * Rt, Rt2 = GPR or SIMD registers to be stored
2037 * Rn = general purpose register containing address
2038 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2039 */
2040static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2041{
2042    int rt = extract32(insn, 0, 5);
2043    int rn = extract32(insn, 5, 5);
2044    int rt2 = extract32(insn, 10, 5);
2045    uint64_t offset = sextract64(insn, 15, 7);
2046    int index = extract32(insn, 23, 2);
2047    bool is_vector = extract32(insn, 26, 1);
2048    bool is_load = extract32(insn, 22, 1);
2049    int opc = extract32(insn, 30, 2);
2050
2051    bool is_signed = false;
2052    bool postindex = false;
2053    bool wback = false;
2054
2055    TCGv_i64 tcg_addr; /* calculated address */
2056    int size;
2057
2058    if (opc == 3) {
2059        unallocated_encoding(s);
2060        return;
2061    }
2062
2063    if (is_vector) {
2064        size = 2 + opc;
2065    } else {
2066        size = 2 + extract32(opc, 1, 1);
2067        is_signed = extract32(opc, 0, 1);
2068        if (!is_load && is_signed) {
2069            unallocated_encoding(s);
2070            return;
2071        }
2072    }
2073
2074    switch (index) {
2075    case 1: /* post-index */
2076        postindex = true;
2077        wback = true;
2078        break;
2079    case 0:
2080        /* signed offset with "non-temporal" hint. Since we don't emulate
2081         * caches we don't care about hints to the cache system about
2082         * data access patterns, and handle this identically to plain
2083         * signed offset.
2084         */
2085        if (is_signed) {
2086            /* There is no non-temporal-hint version of LDPSW */
2087            unallocated_encoding(s);
2088            return;
2089        }
2090        postindex = false;
2091        break;
2092    case 2: /* signed offset, rn not updated */
2093        postindex = false;
2094        break;
2095    case 3: /* pre-index */
2096        postindex = false;
2097        wback = true;
2098        break;
2099    }
2100
2101    if (is_vector && !fp_access_check(s)) {
2102        return;
2103    }
2104
2105    offset <<= size;
2106
2107    if (rn == 31) {
2108        gen_check_sp_alignment(s);
2109    }
2110
2111    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2112
2113    if (!postindex) {
2114        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2115    }
2116
2117    if (is_vector) {
2118        if (is_load) {
2119            do_fp_ld(s, rt, tcg_addr, size);
2120        } else {
2121            do_fp_st(s, rt, tcg_addr, size);
2122        }
2123    } else {
2124        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2125        if (is_load) {
2126            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2127                      false, 0, false, false);
2128        } else {
2129            do_gpr_st(s, tcg_rt, tcg_addr, size,
2130                      false, 0, false, false);
2131        }
2132    }
2133    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2134    if (is_vector) {
2135        if (is_load) {
2136            do_fp_ld(s, rt2, tcg_addr, size);
2137        } else {
2138            do_fp_st(s, rt2, tcg_addr, size);
2139        }
2140    } else {
2141        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2142        if (is_load) {
2143            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2144                      false, 0, false, false);
2145        } else {
2146            do_gpr_st(s, tcg_rt2, tcg_addr, size,
2147                      false, 0, false, false);
2148        }
2149    }
2150
2151    if (wback) {
2152        if (postindex) {
2153            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2154        } else {
2155            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2156        }
2157        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2158    }
2159}
2160
2161/*
2162 * C3.3.8 Load/store (immediate post-indexed)
2163 * C3.3.9 Load/store (immediate pre-indexed)
2164 * C3.3.12 Load/store (unscaled immediate)
2165 *
2166 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2167 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2168 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2169 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2170 *
2171 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2172         10 -> unprivileged
2173 * V = 0 -> non-vector
2174 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2175 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2176 */
2177static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2178{
2179    int rt = extract32(insn, 0, 5);
2180    int rn = extract32(insn, 5, 5);
2181    int imm9 = sextract32(insn, 12, 9);
2182    int opc = extract32(insn, 22, 2);
2183    int size = extract32(insn, 30, 2);
2184    int idx = extract32(insn, 10, 2);
2185    bool is_signed = false;
2186    bool is_store = false;
2187    bool is_extended = false;
2188    bool is_unpriv = (idx == 2);
2189    bool is_vector = extract32(insn, 26, 1);
2190    bool iss_valid = !is_vector;
2191    bool post_index;
2192    bool writeback;
2193
2194    TCGv_i64 tcg_addr;
2195
2196    if (is_vector) {
2197        size |= (opc & 2) << 1;
2198        if (size > 4 || is_unpriv) {
2199            unallocated_encoding(s);
2200            return;
2201        }
2202        is_store = ((opc & 1) == 0);
2203        if (!fp_access_check(s)) {
2204            return;
2205        }
2206    } else {
2207        if (size == 3 && opc == 2) {
2208            /* PRFM - prefetch */
2209            if (is_unpriv) {
2210                unallocated_encoding(s);
2211                return;
2212            }
2213            return;
2214        }
2215        if (opc == 3 && size > 1) {
2216            unallocated_encoding(s);
2217            return;
2218        }
2219        is_store = (opc == 0);
2220        is_signed = opc & (1<<1);
2221        is_extended = (size < 3) && (opc & 1);
2222    }
2223
2224    switch (idx) {
2225    case 0:
2226    case 2:
2227        post_index = false;
2228        writeback = false;
2229        break;
2230    case 1:
2231        post_index = true;
2232        writeback = true;
2233        break;
2234    case 3:
2235        post_index = false;
2236        writeback = true;
2237        break;
2238    }
2239
2240    if (rn == 31) {
2241        gen_check_sp_alignment(s);
2242    }
2243    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2244
2245    if (!post_index) {
2246        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2247    }
2248
2249    if (is_vector) {
2250        if (is_store) {
2251            do_fp_st(s, rt, tcg_addr, size);
2252        } else {
2253            do_fp_ld(s, rt, tcg_addr, size);
2254        }
2255    } else {
2256        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2257        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2258        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2259
2260        if (is_store) {
2261            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2262                             iss_valid, rt, iss_sf, false);
2263        } else {
2264            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2265                             is_signed, is_extended, memidx,
2266                             iss_valid, rt, iss_sf, false);
2267        }
2268    }
2269
2270    if (writeback) {
2271        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2272        if (post_index) {
2273            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2274        }
2275        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2276    }
2277}
2278
2279/*
2280 * C3.3.10 Load/store (register offset)
2281 *
2282 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2283 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2284 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2285 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2286 *
2287 * For non-vector:
2288 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2289 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2290 * For vector:
2291 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2292 *   opc<0>: 0 -> store, 1 -> load
2293 * V: 1 -> vector/simd
2294 * opt: extend encoding (see DecodeRegExtend)
2295 * S: if S=1 then scale (essentially index by sizeof(size))
2296 * Rt: register to transfer into/out of
2297 * Rn: address register or SP for base
2298 * Rm: offset register or ZR for offset
2299 */
2300static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2301{
2302    int rt = extract32(insn, 0, 5);
2303    int rn = extract32(insn, 5, 5);
2304    int shift = extract32(insn, 12, 1);
2305    int rm = extract32(insn, 16, 5);
2306    int opc = extract32(insn, 22, 2);
2307    int opt = extract32(insn, 13, 3);
2308    int size = extract32(insn, 30, 2);
2309    bool is_signed = false;
2310    bool is_store = false;
2311    bool is_extended = false;
2312    bool is_vector = extract32(insn, 26, 1);
2313
2314    TCGv_i64 tcg_rm;
2315    TCGv_i64 tcg_addr;
2316
2317    if (extract32(opt, 1, 1) == 0) {
2318        unallocated_encoding(s);
2319        return;
2320    }
2321
2322    if (is_vector) {
2323        size |= (opc & 2) << 1;
2324        if (size > 4) {
2325            unallocated_encoding(s);
2326            return;
2327        }
2328        is_store = !extract32(opc, 0, 1);
2329        if (!fp_access_check(s)) {
2330            return;
2331        }
2332    } else {
2333        if (size == 3 && opc == 2) {
2334            /* PRFM - prefetch */
2335            return;
2336        }
2337        if (opc == 3 && size > 1) {
2338            unallocated_encoding(s);
2339            return;
2340        }
2341        is_store = (opc == 0);
2342        is_signed = extract32(opc, 1, 1);
2343        is_extended = (size < 3) && extract32(opc, 0, 1);
2344    }
2345
2346    if (rn == 31) {
2347        gen_check_sp_alignment(s);
2348    }
2349    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2350
2351    tcg_rm = read_cpu_reg(s, rm, 1);
2352    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2353
2354    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2355
2356    if (is_vector) {
2357        if (is_store) {
2358            do_fp_st(s, rt, tcg_addr, size);
2359        } else {
2360            do_fp_ld(s, rt, tcg_addr, size);
2361        }
2362    } else {
2363        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2364        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2365        if (is_store) {
2366            do_gpr_st(s, tcg_rt, tcg_addr, size,
2367                      true, rt, iss_sf, false);
2368        } else {
2369            do_gpr_ld(s, tcg_rt, tcg_addr, size,
2370                      is_signed, is_extended,
2371                      true, rt, iss_sf, false);
2372        }
2373    }
2374}
2375
2376/*
2377 * C3.3.13 Load/store (unsigned immediate)
2378 *
2379 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2380 * +----+-------+---+-----+-----+------------+-------+------+
2381 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2382 * +----+-------+---+-----+-----+------------+-------+------+
2383 *
2384 * For non-vector:
2385 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2386 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2387 * For vector:
2388 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2389 *   opc<0>: 0 -> store, 1 -> load
2390 * Rn: base address register (inc SP)
2391 * Rt: target register
2392 */
2393static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2394{
2395    int rt = extract32(insn, 0, 5);
2396    int rn = extract32(insn, 5, 5);
2397    unsigned int imm12 = extract32(insn, 10, 12);
2398    bool is_vector = extract32(insn, 26, 1);
2399    int size = extract32(insn, 30, 2);
2400    int opc = extract32(insn, 22, 2);
2401    unsigned int offset;
2402
2403    TCGv_i64 tcg_addr;
2404
2405    bool is_store;
2406    bool is_signed = false;
2407    bool is_extended = false;
2408
2409    if (is_vector) {
2410        size |= (opc & 2) << 1;
2411        if (size > 4) {
2412            unallocated_encoding(s);
2413            return;
2414        }
2415        is_store = !extract32(opc, 0, 1);
2416        if (!fp_access_check(s)) {
2417            return;
2418        }
2419    } else {
2420        if (size == 3 && opc == 2) {
2421            /* PRFM - prefetch */
2422            return;
2423        }
2424        if (opc == 3 && size > 1) {
2425            unallocated_encoding(s);
2426            return;
2427        }
2428        is_store = (opc == 0);
2429        is_signed = extract32(opc, 1, 1);
2430        is_extended = (size < 3) && extract32(opc, 0, 1);
2431    }
2432
2433    if (rn == 31) {
2434        gen_check_sp_alignment(s);
2435    }
2436    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2437    offset = imm12 << size;
2438    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2439
2440    if (is_vector) {
2441        if (is_store) {
2442            do_fp_st(s, rt, tcg_addr, size);
2443        } else {
2444            do_fp_ld(s, rt, tcg_addr, size);
2445        }
2446    } else {
2447        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2448        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2449        if (is_store) {
2450            do_gpr_st(s, tcg_rt, tcg_addr, size,
2451                      true, rt, iss_sf, false);
2452        } else {
2453            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2454                      true, rt, iss_sf, false);
2455        }
2456    }
2457}
2458
2459/* Load/store register (all forms) */
2460static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2461{
2462    switch (extract32(insn, 24, 2)) {
2463    case 0:
2464        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2465            disas_ldst_reg_roffset(s, insn);
2466        } else {
2467            /* Load/store register (unscaled immediate)
2468             * Load/store immediate pre/post-indexed
2469             * Load/store register unprivileged
2470             */
2471            disas_ldst_reg_imm9(s, insn);
2472        }
2473        break;
2474    case 1:
2475        disas_ldst_reg_unsigned_imm(s, insn);
2476        break;
2477    default:
2478        unallocated_encoding(s);
2479        break;
2480    }
2481}
2482
2483/* C3.3.1 AdvSIMD load/store multiple structures
2484 *
2485 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2486 * +---+---+---------------+---+-------------+--------+------+------+------+
2487 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2488 * +---+---+---------------+---+-------------+--------+------+------+------+
2489 *
2490 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2491 *
2492 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2493 * +---+---+---------------+---+---+---------+--------+------+------+------+
2494 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2495 * +---+---+---------------+---+---+---------+--------+------+------+------+
2496 *
2497 * Rt: first (or only) SIMD&FP register to be transferred
2498 * Rn: base address or SP
2499 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2500 */
2501static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2502{
2503    int rt = extract32(insn, 0, 5);
2504    int rn = extract32(insn, 5, 5);
2505    int size = extract32(insn, 10, 2);
2506    int opcode = extract32(insn, 12, 4);
2507    bool is_store = !extract32(insn, 22, 1);
2508    bool is_postidx = extract32(insn, 23, 1);
2509    bool is_q = extract32(insn, 30, 1);
2510    TCGv_i64 tcg_addr, tcg_rn;
2511
2512    int ebytes = 1 << size;
2513    int elements = (is_q ? 128 : 64) / (8 << size);
2514    int rpt;    /* num iterations */
2515    int selem;  /* structure elements */
2516    int r;
2517
2518    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2519        unallocated_encoding(s);
2520        return;
2521    }
2522
2523    /* From the shared decode logic */
2524    switch (opcode) {
2525    case 0x0:
2526        rpt = 1;
2527        selem = 4;
2528        break;
2529    case 0x2:
2530        rpt = 4;
2531        selem = 1;
2532        break;
2533    case 0x4:
2534        rpt = 1;
2535        selem = 3;
2536        break;
2537    case 0x6:
2538        rpt = 3;
2539        selem = 1;
2540        break;
2541    case 0x7:
2542        rpt = 1;
2543        selem = 1;
2544        break;
2545    case 0x8:
2546        rpt = 1;
2547        selem = 2;
2548        break;
2549    case 0xa:
2550        rpt = 2;
2551        selem = 1;
2552        break;
2553    default:
2554        unallocated_encoding(s);
2555        return;
2556    }
2557
2558    if (size == 3 && !is_q && selem != 1) {
2559        /* reserved */
2560        unallocated_encoding(s);
2561        return;
2562    }
2563
2564    if (!fp_access_check(s)) {
2565        return;
2566    }
2567
2568    if (rn == 31) {
2569        gen_check_sp_alignment(s);
2570    }
2571
2572    tcg_rn = cpu_reg_sp(s, rn);
2573    tcg_addr = tcg_temp_new_i64();
2574    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2575
2576    for (r = 0; r < rpt; r++) {
2577        int e;
2578        for (e = 0; e < elements; e++) {
2579            int tt = (rt + r) % 32;
2580            int xs;
2581            for (xs = 0; xs < selem; xs++) {
2582                if (is_store) {
2583                    do_vec_st(s, tt, e, tcg_addr, size);
2584                } else {
2585                    do_vec_ld(s, tt, e, tcg_addr, size);
2586
2587                    /* For non-quad operations, setting a slice of the low
2588                     * 64 bits of the register clears the high 64 bits (in
2589                     * the ARM ARM pseudocode this is implicit in the fact
2590                     * that 'rval' is a 64 bit wide variable). We optimize
2591                     * by noticing that we only need to do this the first
2592                     * time we touch a register.
2593                     */
2594                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2595                        clear_vec_high(s, tt);
2596                    }
2597                }
2598                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2599                tt = (tt + 1) % 32;
2600            }
2601        }
2602    }
2603
2604    if (is_postidx) {
2605        int rm = extract32(insn, 16, 5);
2606        if (rm == 31) {
2607            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2608        } else {
2609            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2610        }
2611    }
2612    tcg_temp_free_i64(tcg_addr);
2613}
2614
2615/* C3.3.3 AdvSIMD load/store single structure
2616 *
2617 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2618 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2619 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2620 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2621 *
2622 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2623 *
2624 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2625 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2626 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2627 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2628 *
2629 * Rt: first (or only) SIMD&FP register to be transferred
2630 * Rn: base address or SP
2631 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2632 * index = encoded in Q:S:size dependent on size
2633 *
2634 * lane_size = encoded in R, opc
2635 * transfer width = encoded in opc, S, size
2636 */
2637static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2638{
2639    int rt = extract32(insn, 0, 5);
2640    int rn = extract32(insn, 5, 5);
2641    int size = extract32(insn, 10, 2);
2642    int S = extract32(insn, 12, 1);
2643    int opc = extract32(insn, 13, 3);
2644    int R = extract32(insn, 21, 1);
2645    int is_load = extract32(insn, 22, 1);
2646    int is_postidx = extract32(insn, 23, 1);
2647    int is_q = extract32(insn, 30, 1);
2648
2649    int scale = extract32(opc, 1, 2);
2650    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2651    bool replicate = false;
2652    int index = is_q << 3 | S << 2 | size;
2653    int ebytes, xs;
2654    TCGv_i64 tcg_addr, tcg_rn;
2655
2656    switch (scale) {
2657    case 3:
2658        if (!is_load || S) {
2659            unallocated_encoding(s);
2660            return;
2661        }
2662        scale = size;
2663        replicate = true;
2664        break;
2665    case 0:
2666        break;
2667    case 1:
2668        if (extract32(size, 0, 1)) {
2669            unallocated_encoding(s);
2670            return;
2671        }
2672        index >>= 1;
2673        break;
2674    case 2:
2675        if (extract32(size, 1, 1)) {
2676            unallocated_encoding(s);
2677            return;
2678        }
2679        if (!extract32(size, 0, 1)) {
2680            index >>= 2;
2681        } else {
2682            if (S) {
2683                unallocated_encoding(s);
2684                return;
2685            }
2686            index >>= 3;
2687            scale = 3;
2688        }
2689        break;
2690    default:
2691        g_assert_not_reached();
2692    }
2693
2694    if (!fp_access_check(s)) {
2695        return;
2696    }
2697
2698    ebytes = 1 << scale;
2699
2700    if (rn == 31) {
2701        gen_check_sp_alignment(s);
2702    }
2703
2704    tcg_rn = cpu_reg_sp(s, rn);
2705    tcg_addr = tcg_temp_new_i64();
2706    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2707
2708    for (xs = 0; xs < selem; xs++) {
2709        if (replicate) {
2710            /* Load and replicate to all elements */
2711            uint64_t mulconst;
2712            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2713
2714            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2715                                get_mem_index(s), s->be_data + scale);
2716            switch (scale) {
2717            case 0:
2718                mulconst = 0x0101010101010101ULL;
2719                break;
2720            case 1:
2721                mulconst = 0x0001000100010001ULL;
2722                break;
2723            case 2:
2724                mulconst = 0x0000000100000001ULL;
2725                break;
2726            case 3:
2727                mulconst = 0;
2728                break;
2729            default:
2730                g_assert_not_reached();
2731            }
2732            if (mulconst) {
2733                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2734            }
2735            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2736            if (is_q) {
2737                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2738            } else {
2739                clear_vec_high(s, rt);
2740            }
2741            tcg_temp_free_i64(tcg_tmp);
2742        } else {
2743            /* Load/store one element per register */
2744            if (is_load) {
2745                do_vec_ld(s, rt, index, tcg_addr, s->be_data + scale);
2746            } else {
2747                do_vec_st(s, rt, index, tcg_addr, s->be_data + scale);
2748            }
2749        }
2750        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2751        rt = (rt + 1) % 32;
2752    }
2753
2754    if (is_postidx) {
2755        int rm = extract32(insn, 16, 5);
2756        if (rm == 31) {
2757            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2758        } else {
2759            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2760        }
2761    }
2762    tcg_temp_free_i64(tcg_addr);
2763}
2764
2765/* C3.3 Loads and stores */
2766static void disas_ldst(DisasContext *s, uint32_t insn)
2767{
2768    switch (extract32(insn, 24, 6)) {
2769    case 0x08: /* Load/store exclusive */
2770        disas_ldst_excl(s, insn);
2771        break;
2772    case 0x18: case 0x1c: /* Load register (literal) */
2773        disas_ld_lit(s, insn);
2774        break;
2775    case 0x28: case 0x29:
2776    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2777        disas_ldst_pair(s, insn);
2778        break;
2779    case 0x38: case 0x39:
2780    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2781        disas_ldst_reg(s, insn);
2782        break;
2783    case 0x0c: /* AdvSIMD load/store multiple structures */
2784        disas_ldst_multiple_struct(s, insn);
2785        break;
2786    case 0x0d: /* AdvSIMD load/store single structure */
2787        disas_ldst_single_struct(s, insn);
2788        break;
2789    default:
2790        unallocated_encoding(s);
2791        break;
2792    }
2793}
2794
2795/* C3.4.6 PC-rel. addressing
2796 *   31  30   29 28       24 23                5 4    0
2797 * +----+-------+-----------+-------------------+------+
2798 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2799 * +----+-------+-----------+-------------------+------+
2800 */
2801static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2802{
2803    unsigned int page, rd;
2804    uint64_t base;
2805    uint64_t offset;
2806
2807    page = extract32(insn, 31, 1);
2808    /* SignExtend(immhi:immlo) -> offset */
2809    offset = sextract64(insn, 5, 19);
2810    offset = offset << 2 | extract32(insn, 29, 2);
2811    rd = extract32(insn, 0, 5);
2812    base = s->pc - 4;
2813
2814    if (page) {
2815        /* ADRP (page based) */
2816        base &= ~0xfff;
2817        offset <<= 12;
2818    }
2819
2820    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2821}
2822
2823/*
2824 * C3.4.1 Add/subtract (immediate)
2825 *
2826 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2827 * +--+--+--+-----------+-----+-------------+-----+-----+
2828 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2829 * +--+--+--+-----------+-----+-------------+-----+-----+
2830 *
2831 *    sf: 0 -> 32bit, 1 -> 64bit
2832 *    op: 0 -> add  , 1 -> sub
2833 *     S: 1 -> set flags
2834 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2835 */
2836static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2837{
2838    int rd = extract32(insn, 0, 5);
2839    int rn = extract32(insn, 5, 5);
2840    uint64_t imm = extract32(insn, 10, 12);
2841    int shift = extract32(insn, 22, 2);
2842    bool setflags = extract32(insn, 29, 1);
2843    bool sub_op = extract32(insn, 30, 1);
2844    bool is_64bit = extract32(insn, 31, 1);
2845
2846    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2847    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2848    TCGv_i64 tcg_result;
2849
2850    switch (shift) {
2851    case 0x0:
2852        break;
2853    case 0x1:
2854        imm <<= 12;
2855        break;
2856    default:
2857        unallocated_encoding(s);
2858        return;
2859    }
2860
2861    tcg_result = tcg_temp_new_i64();
2862    if (!setflags) {
2863        if (sub_op) {
2864            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2865        } else {
2866            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2867        }
2868    } else {
2869        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2870        if (sub_op) {
2871            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2872        } else {
2873            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2874        }
2875        tcg_temp_free_i64(tcg_imm);
2876    }
2877
2878    if (is_64bit) {
2879        tcg_gen_mov_i64(tcg_rd, tcg_result);
2880    } else {
2881        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2882    }
2883
2884    tcg_temp_free_i64(tcg_result);
2885}
2886
2887/* The input should be a value in the bottom e bits (with higher
2888 * bits zero); returns that value replicated into every element
2889 * of size e in a 64 bit integer.
2890 */
2891static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2892{
2893    assert(e != 0);
2894    while (e < 64) {
2895        mask |= mask << e;
2896        e *= 2;
2897    }
2898    return mask;
2899}
2900
2901/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2902static inline uint64_t bitmask64(unsigned int length)
2903{
2904    assert(length > 0 && length <= 64);
2905    return ~0ULL >> (64 - length);
2906}
2907
2908/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2909 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2910 * value (ie should cause a guest UNDEF exception), and true if they are
2911 * valid, in which case the decoded bit pattern is written to result.
2912 */
2913static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2914                                   unsigned int imms, unsigned int immr)
2915{
2916    uint64_t mask;
2917    unsigned e, levels, s, r;
2918    int len;
2919
2920    assert(immn < 2 && imms < 64 && immr < 64);
2921
2922    /* The bit patterns we create here are 64 bit patterns which
2923     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2924     * 64 bits each. Each element contains the same value: a run
2925     * of between 1 and e-1 non-zero bits, rotated within the
2926     * element by between 0 and e-1 bits.
2927     *
2928     * The element size and run length are encoded into immn (1 bit)
2929     * and imms (6 bits) as follows:
2930     * 64 bit elements: immn = 1, imms = <length of run - 1>
2931     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2932     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2933     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2934     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2935     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2936     * Notice that immn = 0, imms = 11111x is the only combination
2937     * not covered by one of the above options; this is reserved.
2938     * Further, <length of run - 1> all-ones is a reserved pattern.
2939     *
2940     * In all cases the rotation is by immr % e (and immr is 6 bits).
2941     */
2942
2943    /* First determine the element size */
2944    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2945    if (len < 1) {
2946        /* This is the immn == 0, imms == 0x11111x case */
2947        return false;
2948    }
2949    e = 1 << len;
2950
2951    levels = e - 1;
2952    s = imms & levels;
2953    r = immr & levels;
2954
2955    if (s == levels) {
2956        /* <length of run - 1> mustn't be all-ones. */
2957        return false;
2958    }
2959
2960    /* Create the value of one element: s+1 set bits rotated
2961     * by r within the element (which is e bits wide)...
2962     */
2963    mask = bitmask64(s + 1);
2964    if (r) {
2965        mask = (mask >> r) | (mask << (e - r));
2966        mask &= bitmask64(e);
2967    }
2968    /* ...then replicate the element over the whole 64 bit value */
2969    mask = bitfield_replicate(mask, e);
2970    *result = mask;
2971    return true;
2972}
2973
2974/* C3.4.4 Logical (immediate)
2975 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2976 * +----+-----+-------------+---+------+------+------+------+
2977 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2978 * +----+-----+-------------+---+------+------+------+------+
2979 */
2980static void disas_logic_imm(DisasContext *s, uint32_t insn)
2981{
2982    unsigned int sf, opc, is_n, immr, imms, rn, rd;
2983    TCGv_i64 tcg_rd, tcg_rn;
2984    uint64_t wmask;
2985    bool is_and = false;
2986
2987    sf = extract32(insn, 31, 1);
2988    opc = extract32(insn, 29, 2);
2989    is_n = extract32(insn, 22, 1);
2990    immr = extract32(insn, 16, 6);
2991    imms = extract32(insn, 10, 6);
2992    rn = extract32(insn, 5, 5);
2993    rd = extract32(insn, 0, 5);
2994
2995    if (!sf && is_n) {
2996        unallocated_encoding(s);
2997        return;
2998    }
2999
3000    if (opc == 0x3) { /* ANDS */
3001        tcg_rd = cpu_reg(s, rd);
3002    } else {
3003        tcg_rd = cpu_reg_sp(s, rd);
3004    }
3005    tcg_rn = cpu_reg(s, rn);
3006
3007    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3008        /* some immediate field values are reserved */
3009        unallocated_encoding(s);
3010        return;
3011    }
3012
3013    if (!sf) {
3014        wmask &= 0xffffffff;
3015    }
3016
3017    switch (opc) {
3018    case 0x3: /* ANDS */
3019    case 0x0: /* AND */
3020        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3021        is_and = true;
3022        break;
3023    case 0x1: /* ORR */
3024        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3025        break;
3026    case 0x2: /* EOR */
3027        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3028        break;
3029    default:
3030        assert(FALSE); /* must handle all above */
3031        break;
3032    }
3033
3034    if (!sf && !is_and) {
3035        /* zero extend final result; we know we can skip this for AND
3036         * since the immediate had the high 32 bits clear.
3037         */
3038        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3039    }
3040
3041    if (opc == 3) { /* ANDS */
3042        gen_logic_CC(sf, tcg_rd);
3043    }
3044}
3045
3046/*
3047 * C3.4.5 Move wide (immediate)
3048 *
3049 *  31 30 29 28         23 22 21 20             5 4    0
3050 * +--+-----+-------------+-----+----------------+------+
3051 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3052 * +--+-----+-------------+-----+----------------+------+
3053 *
3054 * sf: 0 -> 32 bit, 1 -> 64 bit
3055 * opc: 00 -> N, 10 -> Z, 11 -> K
3056 * hw: shift/16 (0,16, and sf only 32, 48)
3057 */
3058static void disas_movw_imm(DisasContext *s, uint32_t insn)
3059{
3060    int rd = extract32(insn, 0, 5);
3061    uint64_t imm = extract32(insn, 5, 16);
3062    int sf = extract32(insn, 31, 1);
3063    int opc = extract32(insn, 29, 2);
3064    int pos = extract32(insn, 21, 2) << 4;
3065    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3066    TCGv_i64 tcg_imm;
3067
3068    if (!sf && (pos >= 32)) {
3069        unallocated_encoding(s);
3070        return;
3071    }
3072
3073    switch (opc) {
3074    case 0: /* MOVN */
3075    case 2: /* MOVZ */
3076        imm <<= pos;
3077        if (opc == 0) {
3078            imm = ~imm;
3079        }
3080        if (!sf) {
3081            imm &= 0xffffffffu;
3082        }
3083        tcg_gen_movi_i64(tcg_rd, imm);
3084        break;
3085    case 3: /* MOVK */
3086        tcg_imm = tcg_const_i64(imm);
3087        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3088        tcg_temp_free_i64(tcg_imm);
3089        if (!sf) {
3090            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3091        }
3092        break;
3093    default:
3094        unallocated_encoding(s);
3095        break;
3096    }
3097}
3098
3099/* C3.4.2 Bitfield
3100 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3101 * +----+-----+-------------+---+------+------+------+------+
3102 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3103 * +----+-----+-------------+---+------+------+------+------+
3104 */
3105static void disas_bitfield(DisasContext *s, uint32_t insn)
3106{
3107    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3108    TCGv_i64 tcg_rd, tcg_tmp;
3109
3110    sf = extract32(insn, 31, 1);
3111    opc = extract32(insn, 29, 2);
3112    n = extract32(insn, 22, 1);
3113    ri = extract32(insn, 16, 6);
3114    si = extract32(insn, 10, 6);
3115    rn = extract32(insn, 5, 5);
3116    rd = extract32(insn, 0, 5);
3117    bitsize = sf ? 64 : 32;
3118
3119    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3120        unallocated_encoding(s);
3121        return;
3122    }
3123
3124    tcg_rd = cpu_reg(s, rd);
3125
3126    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3127       to be smaller than bitsize, we'll never reference data outside the
3128       low 32-bits anyway.  */
3129    tcg_tmp = read_cpu_reg(s, rn, 1);
3130
3131    /* Recognize the common aliases.  */
3132    if (opc == 0) { /* SBFM */
3133        if (ri == 0) {
3134            if (si == 7) { /* SXTB */
3135                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3136                goto done;
3137            } else if (si == 15) { /* SXTH */
3138                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3139                goto done;
3140            } else if (si == 31) { /* SXTW */
3141                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3142                goto done;
3143            }
3144        }
3145        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3146            if (si == 31) {
3147                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3148            }
3149            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3150            goto done;
3151        }
3152    } else if (opc == 2) { /* UBFM */
3153        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3154            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3155            return;
3156        }
3157        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3158            if (si == 31) {
3159                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3160            }
3161            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3162            return;
3163        }
3164        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3165            int shift = bitsize - 1 - si;
3166            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3167            goto done;
3168        }
3169    }
3170
3171    if (opc != 1) { /* SBFM or UBFM */
3172        tcg_gen_movi_i64(tcg_rd, 0);
3173    }
3174
3175    /* do the bit move operation */
3176    if (si >= ri) {
3177        /* Wd<s-r:0> = Wn<s:r> */
3178        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3179        pos = 0;
3180        len = (si - ri) + 1;
3181    } else {
3182        /* Wd<32+s-r,32-r> = Wn<s:0> */
3183        pos = bitsize - ri;
3184        len = si + 1;
3185    }
3186
3187    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3188
3189    if (opc == 0) { /* SBFM - sign extend the destination field */
3190        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3191        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3192    }
3193
3194 done:
3195    if (!sf) { /* zero extend final result */
3196        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3197    }
3198}
3199
3200/* C3.4.3 Extract
3201 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3202 * +----+------+-------------+---+----+------+--------+------+------+
3203 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3204 * +----+------+-------------+---+----+------+--------+------+------+
3205 */
3206static void disas_extract(DisasContext *s, uint32_t insn)
3207{
3208    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3209
3210    sf = extract32(insn, 31, 1);
3211    n = extract32(insn, 22, 1);
3212    rm = extract32(insn, 16, 5);
3213    imm = extract32(insn, 10, 6);
3214    rn = extract32(insn, 5, 5);
3215    rd = extract32(insn, 0, 5);
3216    op21 = extract32(insn, 29, 2);
3217    op0 = extract32(insn, 21, 1);
3218    bitsize = sf ? 64 : 32;
3219
3220    if (sf != n || op21 || op0 || imm >= bitsize) {
3221        unallocated_encoding(s);
3222    } else {
3223        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3224
3225        tcg_rd = cpu_reg(s, rd);
3226
3227        if (unlikely(imm == 0)) {
3228            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3229             * so an extract from bit 0 is a special case.
3230             */
3231            if (sf) {
3232                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3233            } else {
3234                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3235            }
3236        } else if (rm == rn) { /* ROR */
3237            tcg_rm = cpu_reg(s, rm);
3238            if (sf) {
3239                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3240            } else {
3241                TCGv_i32 tmp = tcg_temp_new_i32();
3242                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3243                tcg_gen_rotri_i32(tmp, tmp, imm);
3244                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3245                tcg_temp_free_i32(tmp);
3246            }
3247        } else {
3248            tcg_rm = read_cpu_reg(s, rm, sf);
3249            tcg_rn = read_cpu_reg(s, rn, sf);
3250            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3251            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3252            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3253            if (!sf) {
3254                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3255            }
3256        }
3257    }
3258}
3259
3260/* C3.4 Data processing - immediate */
3261static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3262{
3263    switch (extract32(insn, 23, 6)) {
3264    case 0x20: case 0x21: /* PC-rel. addressing */
3265        disas_pc_rel_adr(s, insn);
3266        break;
3267    case 0x22: case 0x23: /* Add/subtract (immediate) */
3268        disas_add_sub_imm(s, insn);
3269        break;
3270    case 0x24: /* Logical (immediate) */
3271        disas_logic_imm(s, insn);
3272        break;
3273    case 0x25: /* Move wide (immediate) */
3274        disas_movw_imm(s, insn);
3275        break;
3276    case 0x26: /* Bitfield */
3277        disas_bitfield(s, insn);
3278        break;
3279    case 0x27: /* Extract */
3280        disas_extract(s, insn);
3281        break;
3282    default:
3283        unallocated_encoding(s);
3284        break;
3285    }
3286}
3287
3288/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3289 * Note that it is the caller's responsibility to ensure that the
3290 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3291 * mandated semantics for out of range shifts.
3292 */
3293static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3294                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3295{
3296    switch (shift_type) {
3297    case A64_SHIFT_TYPE_LSL:
3298        tcg_gen_shl_i64(dst, src, shift_amount);
3299        break;
3300    case A64_SHIFT_TYPE_LSR:
3301        tcg_gen_shr_i64(dst, src, shift_amount);
3302        break;
3303    case A64_SHIFT_TYPE_ASR:
3304        if (!sf) {
3305            tcg_gen_ext32s_i64(dst, src);
3306        }
3307        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3308        break;
3309    case A64_SHIFT_TYPE_ROR:
3310        if (sf) {
3311            tcg_gen_rotr_i64(dst, src, shift_amount);
3312        } else {
3313            TCGv_i32 t0, t1;
3314            t0 = tcg_temp_new_i32();
3315            t1 = tcg_temp_new_i32();
3316            tcg_gen_extrl_i64_i32(t0, src);
3317            tcg_gen_extrl_i64_i32(t1, shift_amount);
3318            tcg_gen_rotr_i32(t0, t0, t1);
3319            tcg_gen_extu_i32_i64(dst, t0);
3320            tcg_temp_free_i32(t0);
3321            tcg_temp_free_i32(t1);
3322        }
3323        break;
3324    default:
3325        assert(FALSE); /* all shift types should be handled */
3326        break;
3327    }
3328
3329    if (!sf) { /* zero extend final result */
3330        tcg_gen_ext32u_i64(dst, dst);
3331    }
3332}
3333
3334/* Shift a TCGv src by immediate, put result in dst.
3335 * The shift amount must be in range (this should always be true as the
3336 * relevant instructions will UNDEF on bad shift immediates).
3337 */
3338static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3339                          enum a64_shift_type shift_type, unsigned int shift_i)
3340{
3341    assert(shift_i < (sf ? 64 : 32));
3342
3343    if (shift_i == 0) {
3344        tcg_gen_mov_i64(dst, src);
3345    } else {
3346        TCGv_i64 shift_const;
3347
3348        shift_const = tcg_const_i64(shift_i);
3349        shift_reg(dst, src, sf, shift_type, shift_const);
3350        tcg_temp_free_i64(shift_const);
3351    }
3352}
3353
3354/* C3.5.10 Logical (shifted register)
3355 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3356 * +----+-----+-----------+-------+---+------+--------+------+------+
3357 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3358 * +----+-----+-----------+-------+---+------+--------+------+------+
3359 */
3360static void disas_logic_reg(DisasContext *s, uint32_t insn)
3361{
3362    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3363    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3364
3365    sf = extract32(insn, 31, 1);
3366    opc = extract32(insn, 29, 2);
3367    shift_type = extract32(insn, 22, 2);
3368    invert = extract32(insn, 21, 1);
3369    rm = extract32(insn, 16, 5);
3370    shift_amount = extract32(insn, 10, 6);
3371    rn = extract32(insn, 5, 5);
3372    rd = extract32(insn, 0, 5);
3373
3374    if (!sf && (shift_amount & (1 << 5))) {
3375        unallocated_encoding(s);
3376        return;
3377    }
3378
3379    tcg_rd = cpu_reg(s, rd);
3380
3381    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3382        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3383         * register-register MOV and MVN, so it is worth special casing.
3384         */
3385        tcg_rm = cpu_reg(s, rm);
3386        if (invert) {
3387            tcg_gen_not_i64(tcg_rd, tcg_rm);
3388            if (!sf) {
3389                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3390            }
3391        } else {
3392            if (sf) {
3393                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3394            } else {
3395                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3396            }
3397        }
3398        return;
3399    }
3400
3401    tcg_rm = read_cpu_reg(s, rm, sf);
3402
3403    if (shift_amount) {
3404        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3405    }
3406
3407    tcg_rn = cpu_reg(s, rn);
3408
3409    switch (opc | (invert << 2)) {
3410    case 0: /* AND */
3411    case 3: /* ANDS */
3412        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3413        break;
3414    case 1: /* ORR */
3415        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3416        break;
3417    case 2: /* EOR */
3418        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3419        break;
3420    case 4: /* BIC */
3421    case 7: /* BICS */
3422        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3423        break;
3424    case 5: /* ORN */
3425        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3426        break;
3427    case 6: /* EON */
3428        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3429        break;
3430    default:
3431        assert(FALSE);
3432        break;
3433    }
3434
3435    if (!sf) {
3436        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3437    }
3438
3439    if (opc == 3) {
3440        gen_logic_CC(sf, tcg_rd);
3441    }
3442}
3443
3444/*
3445 * C3.5.1 Add/subtract (extended register)
3446 *
3447 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3448 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3449 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3450 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3451 *
3452 *  sf: 0 -> 32bit, 1 -> 64bit
3453 *  op: 0 -> add  , 1 -> sub
3454 *   S: 1 -> set flags
3455 * opt: 00
3456 * option: extension type (see DecodeRegExtend)
3457 * imm3: optional shift to Rm
3458 *
3459 * Rd = Rn + LSL(extend(Rm), amount)
3460 */
3461static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3462{
3463    int rd = extract32(insn, 0, 5);
3464    int rn = extract32(insn, 5, 5);
3465    int imm3 = extract32(insn, 10, 3);
3466    int option = extract32(insn, 13, 3);
3467    int rm = extract32(insn, 16, 5);
3468    bool setflags = extract32(insn, 29, 1);
3469    bool sub_op = extract32(insn, 30, 1);
3470    bool sf = extract32(insn, 31, 1);
3471
3472    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3473    TCGv_i64 tcg_rd;
3474    TCGv_i64 tcg_result;
3475
3476    if (imm3 > 4) {
3477        unallocated_encoding(s);
3478        return;
3479    }
3480
3481    /* non-flag setting ops may use SP */
3482    if (!setflags) {
3483        tcg_rd = cpu_reg_sp(s, rd);
3484    } else {
3485        tcg_rd = cpu_reg(s, rd);
3486    }
3487    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3488
3489    tcg_rm = read_cpu_reg(s, rm, sf);
3490    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3491
3492    tcg_result = tcg_temp_new_i64();
3493
3494    if (!setflags) {
3495        if (sub_op) {
3496            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3497        } else {
3498            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3499        }
3500    } else {
3501        if (sub_op) {
3502            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3503        } else {
3504            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3505        }
3506    }
3507
3508    if (sf) {
3509        tcg_gen_mov_i64(tcg_rd, tcg_result);
3510    } else {
3511        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3512    }
3513
3514    tcg_temp_free_i64(tcg_result);
3515}
3516
3517/*
3518 * C3.5.2 Add/subtract (shifted register)
3519 *
3520 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3521 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3522 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3523 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3524 *
3525 *    sf: 0 -> 32bit, 1 -> 64bit
3526 *    op: 0 -> add  , 1 -> sub
3527 *     S: 1 -> set flags
3528 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3529 *  imm6: Shift amount to apply to Rm before the add/sub
3530 */
3531static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3532{
3533    int rd = extract32(insn, 0, 5);
3534    int rn = extract32(insn, 5, 5);
3535    int imm6 = extract32(insn, 10, 6);
3536    int rm = extract32(insn, 16, 5);
3537    int shift_type = extract32(insn, 22, 2);
3538    bool setflags = extract32(insn, 29, 1);
3539    bool sub_op = extract32(insn, 30, 1);
3540    bool sf = extract32(insn, 31, 1);
3541
3542    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3543    TCGv_i64 tcg_rn, tcg_rm;
3544    TCGv_i64 tcg_result;
3545
3546    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3547        unallocated_encoding(s);
3548        return;
3549    }
3550
3551    tcg_rn = read_cpu_reg(s, rn, sf);
3552    tcg_rm = read_cpu_reg(s, rm, sf);
3553
3554    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3555
3556    tcg_result = tcg_temp_new_i64();
3557
3558    if (!setflags) {
3559        if (sub_op) {
3560            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3561        } else {
3562            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3563        }
3564    } else {
3565        if (sub_op) {
3566            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3567        } else {
3568            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3569        }
3570    }
3571
3572    if (sf) {
3573        tcg_gen_mov_i64(tcg_rd, tcg_result);
3574    } else {
3575        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3576    }
3577
3578    tcg_temp_free_i64(tcg_result);
3579}
3580
3581/* C3.5.9 Data-processing (3 source)
3582
3583   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3584  +--+------+-----------+------+------+----+------+------+------+
3585  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3586  +--+------+-----------+------+------+----+------+------+------+
3587
3588 */
3589static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3590{
3591    int rd = extract32(insn, 0, 5);
3592    int rn = extract32(insn, 5, 5);
3593    int ra = extract32(insn, 10, 5);
3594    int rm = extract32(insn, 16, 5);
3595    int op_id = (extract32(insn, 29, 3) << 4) |
3596        (extract32(insn, 21, 3) << 1) |
3597        extract32(insn, 15, 1);
3598    bool sf = extract32(insn, 31, 1);
3599    bool is_sub = extract32(op_id, 0, 1);
3600    bool is_high = extract32(op_id, 2, 1);
3601    bool is_signed = false;
3602    TCGv_i64 tcg_op1;
3603    TCGv_i64 tcg_op2;
3604    TCGv_i64 tcg_tmp;
3605
3606    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3607    switch (op_id) {
3608    case 0x42: /* SMADDL */
3609    case 0x43: /* SMSUBL */
3610    case 0x44: /* SMULH */
3611        is_signed = true;
3612        break;
3613    case 0x0: /* MADD (32bit) */
3614    case 0x1: /* MSUB (32bit) */
3615    case 0x40: /* MADD (64bit) */
3616    case 0x41: /* MSUB (64bit) */
3617    case 0x4a: /* UMADDL */
3618    case 0x4b: /* UMSUBL */
3619    case 0x4c: /* UMULH */
3620        break;
3621    default:
3622        unallocated_encoding(s);
3623        return;
3624    }
3625
3626    if (is_high) {
3627        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3628        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3629        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3630        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3631
3632        if (is_signed) {
3633            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3634        } else {
3635            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3636        }
3637
3638        tcg_temp_free_i64(low_bits);
3639        return;
3640    }
3641
3642    tcg_op1 = tcg_temp_new_i64();
3643    tcg_op2 = tcg_temp_new_i64();
3644    tcg_tmp = tcg_temp_new_i64();
3645
3646    if (op_id < 0x42) {
3647        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3648        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3649    } else {
3650        if (is_signed) {
3651            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3652            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3653        } else {
3654            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3655            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3656        }
3657    }
3658
3659    if (ra == 31 && !is_sub) {
3660        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3661        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3662    } else {
3663        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3664        if (is_sub) {
3665            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3666        } else {
3667            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3668        }
3669    }
3670
3671    if (!sf) {
3672        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3673    }
3674
3675    tcg_temp_free_i64(tcg_op1);
3676    tcg_temp_free_i64(tcg_op2);
3677    tcg_temp_free_i64(tcg_tmp);
3678}
3679
3680/* C3.5.3 - Add/subtract (with carry)
3681 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3682 * +--+--+--+------------------------+------+---------+------+-----+
3683 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3684 * +--+--+--+------------------------+------+---------+------+-----+
3685 *                                            [000000]
3686 */
3687
3688static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3689{
3690    unsigned int sf, op, setflags, rm, rn, rd;
3691    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3692
3693    if (extract32(insn, 10, 6) != 0) {
3694        unallocated_encoding(s);
3695        return;
3696    }
3697
3698    sf = extract32(insn, 31, 1);
3699    op = extract32(insn, 30, 1);
3700    setflags = extract32(insn, 29, 1);
3701    rm = extract32(insn, 16, 5);
3702    rn = extract32(insn, 5, 5);
3703    rd = extract32(insn, 0, 5);
3704
3705    tcg_rd = cpu_reg(s, rd);
3706    tcg_rn = cpu_reg(s, rn);
3707
3708    if (op) {
3709        tcg_y = new_tmp_a64(s);
3710        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3711    } else {
3712        tcg_y = cpu_reg(s, rm);
3713    }
3714
3715    if (setflags) {
3716        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3717    } else {
3718        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3719    }
3720}
3721
3722/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3723 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3724 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3725 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3726 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3727 *        [1]                             y                [0]       [0]
3728 */
3729static void disas_cc(DisasContext *s, uint32_t insn)
3730{
3731    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3732    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3733    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3734    DisasCompare c;
3735
3736    if (!extract32(insn, 29, 1)) {
3737        unallocated_encoding(s);
3738        return;
3739    }
3740    if (insn & (1 << 10 | 1 << 4)) {
3741        unallocated_encoding(s);
3742        return;
3743    }
3744    sf = extract32(insn, 31, 1);
3745    op = extract32(insn, 30, 1);
3746    is_imm = extract32(insn, 11, 1);
3747    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3748    cond = extract32(insn, 12, 4);
3749    rn = extract32(insn, 5, 5);
3750    nzcv = extract32(insn, 0, 4);
3751
3752    /* Set T0 = !COND.  */
3753    tcg_t0 = tcg_temp_new_i32();
3754    arm_test_cc(&c, cond);
3755    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3756    arm_free_cc(&c);
3757
3758    /* Load the arguments for the new comparison.  */
3759    if (is_imm) {
3760        tcg_y = new_tmp_a64(s);
3761        tcg_gen_movi_i64(tcg_y, y);
3762    } else {
3763        tcg_y = cpu_reg(s, y);
3764    }
3765    tcg_rn = cpu_reg(s, rn);
3766
3767    /* Set the flags for the new comparison.  */
3768    tcg_tmp = tcg_temp_new_i64();
3769    if (op) {
3770        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3771    } else {
3772        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3773    }
3774    tcg_temp_free_i64(tcg_tmp);
3775
3776    /* If COND was false, force the flags to #nzcv.  Compute two masks
3777     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3778     * For tcg hosts that support ANDC, we can make do with just T1.
3779     * In either case, allow the tcg optimizer to delete any unused mask.
3780     */
3781    tcg_t1 = tcg_temp_new_i32();
3782    tcg_t2 = tcg_temp_new_i32();
3783    tcg_gen_neg_i32(tcg_t1, tcg_t0);
3784    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3785
3786    if (nzcv & 8) { /* N */
3787        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3788    } else {
3789        if (TCG_TARGET_HAS_andc_i32) {
3790            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3791        } else {
3792            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3793        }
3794    }
3795    if (nzcv & 4) { /* Z */
3796        if (TCG_TARGET_HAS_andc_i32) {
3797            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3798        } else {
3799            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3800        }
3801    } else {
3802        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3803    }
3804    if (nzcv & 2) { /* C */
3805        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3806    } else {
3807        if (TCG_TARGET_HAS_andc_i32) {
3808            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3809        } else {
3810            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3811        }
3812    }
3813    if (nzcv & 1) { /* V */
3814        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3815    } else {
3816        if (TCG_TARGET_HAS_andc_i32) {
3817            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3818        } else {
3819            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3820        }
3821    }
3822    tcg_temp_free_i32(tcg_t0);
3823    tcg_temp_free_i32(tcg_t1);
3824    tcg_temp_free_i32(tcg_t2);
3825}
3826
3827/* C3.5.6 Conditional select
3828 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3829 * +----+----+---+-----------------+------+------+-----+------+------+
3830 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3831 * +----+----+---+-----------------+------+------+-----+------+------+
3832 */
3833static void disas_cond_select(DisasContext *s, uint32_t insn)
3834{
3835    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3836    TCGv_i64 tcg_rd, zero;
3837    DisasCompare64 c;
3838
3839    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3840        /* S == 1 or op2<1> == 1 */
3841        unallocated_encoding(s);
3842        return;
3843    }
3844    sf = extract32(insn, 31, 1);
3845    else_inv = extract32(insn, 30, 1);
3846    rm = extract32(insn, 16, 5);
3847    cond = extract32(insn, 12, 4);
3848    else_inc = extract32(insn, 10, 1);
3849    rn = extract32(insn, 5, 5);
3850    rd = extract32(insn, 0, 5);
3851
3852    tcg_rd = cpu_reg(s, rd);
3853
3854    a64_test_cc(&c, cond);
3855    zero = tcg_const_i64(0);
3856
3857    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3858        /* CSET & CSETM.  */
3859        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3860        if (else_inv) {
3861            tcg_gen_neg_i64(tcg_rd, tcg_rd);
3862        }
3863    } else {
3864        TCGv_i64 t_true = cpu_reg(s, rn);
3865        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3866        if (else_inv && else_inc) {
3867            tcg_gen_neg_i64(t_false, t_false);
3868        } else if (else_inv) {
3869            tcg_gen_not_i64(t_false, t_false);
3870        } else if (else_inc) {
3871            tcg_gen_addi_i64(t_false, t_false, 1);
3872        }
3873        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3874    }
3875
3876    tcg_temp_free_i64(zero);
3877    a64_free_cc(&c);
3878
3879    if (!sf) {
3880        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3881    }
3882}
3883
3884static void handle_clz(DisasContext *s, unsigned int sf,
3885                       unsigned int rn, unsigned int rd)
3886{
3887    TCGv_i64 tcg_rd, tcg_rn;
3888    tcg_rd = cpu_reg(s, rd);
3889    tcg_rn = cpu_reg(s, rn);
3890
3891    if (sf) {
3892        gen_helper_clz64(tcg_rd, tcg_rn);
3893    } else {
3894        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3895        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3896        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3897        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3898        tcg_temp_free_i32(tcg_tmp32);
3899    }
3900}
3901
3902static void handle_cls(DisasContext *s, unsigned int sf,
3903                       unsigned int rn, unsigned int rd)
3904{
3905    TCGv_i64 tcg_rd, tcg_rn;
3906    tcg_rd = cpu_reg(s, rd);
3907    tcg_rn = cpu_reg(s, rn);
3908
3909    if (sf) {
3910        gen_helper_cls64(tcg_rd, tcg_rn);
3911    } else {
3912        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3913        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3914        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3915        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3916        tcg_temp_free_i32(tcg_tmp32);
3917    }
3918}
3919
3920static void handle_rbit(DisasContext *s, unsigned int sf,
3921                        unsigned int rn, unsigned int rd)
3922{
3923    TCGv_i64 tcg_rd, tcg_rn;
3924    tcg_rd = cpu_reg(s, rd);
3925    tcg_rn = cpu_reg(s, rn);
3926
3927    if (sf) {
3928        gen_helper_rbit64(tcg_rd, tcg_rn);
3929    } else {
3930        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3931        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3932        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3933        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3934        tcg_temp_free_i32(tcg_tmp32);
3935    }
3936}
3937
3938/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3939static void handle_rev64(DisasContext *s, unsigned int sf,
3940                         unsigned int rn, unsigned int rd)
3941{
3942    if (!sf) {
3943        unallocated_encoding(s);
3944        return;
3945    }
3946    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3947}
3948
3949/* C5.6.149 REV with sf==0, opcode==2
3950 * C5.6.151 REV32 (sf==1, opcode==2)
3951 */
3952static void handle_rev32(DisasContext *s, unsigned int sf,
3953                         unsigned int rn, unsigned int rd)
3954{
3955    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3956
3957    if (sf) {
3958        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3959        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3960
3961        /* bswap32_i64 requires zero high word */
3962        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3963        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3964        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3965        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3966        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3967
3968        tcg_temp_free_i64(tcg_tmp);
3969    } else {
3970        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3971        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3972    }
3973}
3974
3975/* C5.6.150 REV16 (opcode==1) */
3976static void handle_rev16(DisasContext *s, unsigned int sf,
3977                         unsigned int rn, unsigned int rd)
3978{
3979    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3980    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3981    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3982
3983    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3984    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3985
3986    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3987    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3988    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3989    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3990
3991    if (sf) {
3992        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3993        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3994        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3995        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3996
3997        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3998        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3999        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4000    }
4001
4002    tcg_temp_free_i64(tcg_tmp);
4003}
4004
4005/* C3.5.7 Data-processing (1 source)
4006 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4007 * +----+---+---+-----------------+---------+--------+------+------+
4008 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4009 * +----+---+---+-----------------+---------+--------+------+------+
4010 */
4011static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4012{
4013    unsigned int sf, opcode, rn, rd;
4014
4015    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4016        unallocated_encoding(s);
4017        return;
4018    }
4019
4020    sf = extract32(insn, 31, 1);
4021    opcode = extract32(insn, 10, 6);
4022    rn = extract32(insn, 5, 5);
4023    rd = extract32(insn, 0, 5);
4024
4025    switch (opcode) {
4026    case 0: /* RBIT */
4027        handle_rbit(s, sf, rn, rd);
4028        break;
4029    case 1: /* REV16 */
4030        handle_rev16(s, sf, rn, rd);
4031        break;
4032    case 2: /* REV32 */
4033        handle_rev32(s, sf, rn, rd);
4034        break;
4035    case 3: /* REV64 */
4036        handle_rev64(s, sf, rn, rd);
4037        break;
4038    case 4: /* CLZ */
4039        handle_clz(s, sf, rn, rd);
4040        break;
4041    case 5: /* CLS */
4042        handle_cls(s, sf, rn, rd);
4043        break;
4044    }
4045}
4046
4047static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4048                       unsigned int rm, unsigned int rn, unsigned int rd)
4049{
4050    TCGv_i64 tcg_n, tcg_m, tcg_rd;
4051    tcg_rd = cpu_reg(s, rd);
4052
4053    if (!sf && is_signed) {
4054        tcg_n = new_tmp_a64(s);
4055        tcg_m = new_tmp_a64(s);
4056        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4057        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4058    } else {
4059        tcg_n = read_cpu_reg(s, rn, sf);
4060        tcg_m = read_cpu_reg(s, rm, sf);
4061    }
4062
4063    if (is_signed) {
4064        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4065    } else {
4066        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4067    }
4068
4069    if (!sf) { /* zero extend final result */
4070        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4071    }
4072}
4073
4074/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4075static void handle_shift_reg(DisasContext *s,
4076                             enum a64_shift_type shift_type, unsigned int sf,
4077                             unsigned int rm, unsigned int rn, unsigned int rd)
4078{
4079    TCGv_i64 tcg_shift = tcg_temp_new_i64();
4080    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4081    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4082
4083    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4084    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4085    tcg_temp_free_i64(tcg_shift);
4086}
4087
4088/* CRC32[BHWX], CRC32C[BHWX] */
4089static void handle_crc32(DisasContext *s,
4090                         unsigned int sf, unsigned int sz, bool crc32c,
4091                         unsigned int rm, unsigned int rn, unsigned int rd)
4092{
4093    TCGv_i64 tcg_acc, tcg_val;
4094    TCGv_i32 tcg_bytes;
4095
4096    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4097        || (sf == 1 && sz != 3)
4098        || (sf == 0 && sz == 3)) {
4099        unallocated_encoding(s);
4100        return;
4101    }
4102
4103    if (sz == 3) {
4104        tcg_val = cpu_reg(s, rm);
4105    } else {
4106        uint64_t mask;
4107        switch (sz) {
4108        case 0:
4109            mask = 0xFF;
4110            break;
4111        case 1:
4112            mask = 0xFFFF;
4113            break;
4114        case 2:
4115            mask = 0xFFFFFFFF;
4116            break;
4117        default:
4118            g_assert_not_reached();
4119        }
4120        tcg_val = new_tmp_a64(s);
4121        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4122    }
4123
4124    tcg_acc = cpu_reg(s, rn);
4125    tcg_bytes = tcg_const_i32(1 << sz);
4126
4127    if (crc32c) {
4128        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4129    } else {
4130        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4131    }
4132
4133    tcg_temp_free_i32(tcg_bytes);
4134}
4135
4136/* C3.5.8 Data-processing (2 source)
4137 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4138 * +----+---+---+-----------------+------+--------+------+------+
4139 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4140 * +----+---+---+-----------------+------+--------+------+------+
4141 */
4142static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4143{
4144    unsigned int sf, rm, opcode, rn, rd;
4145    sf = extract32(insn, 31, 1);
4146    rm = extract32(insn, 16, 5);
4147    opcode = extract32(insn, 10, 6);
4148    rn = extract32(insn, 5, 5);
4149    rd = extract32(insn, 0, 5);
4150
4151    if (extract32(insn, 29, 1)) {
4152        unallocated_encoding(s);
4153        return;
4154    }
4155
4156    switch (opcode) {
4157    case 2: /* UDIV */
4158        handle_div(s, false, sf, rm, rn, rd);
4159        break;
4160    case 3: /* SDIV */
4161        handle_div(s, true, sf, rm, rn, rd);
4162        break;
4163    case 8: /* LSLV */
4164        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4165        break;
4166    case 9: /* LSRV */
4167        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4168        break;
4169    case 10: /* ASRV */
4170        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4171        break;
4172    case 11: /* RORV */
4173        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4174        break;
4175    case 16:
4176    case 17:
4177    case 18:
4178    case 19:
4179    case 20:
4180    case 21:
4181    case 22:
4182    case 23: /* CRC32 */
4183    {
4184        int sz = extract32(opcode, 0, 2);
4185        bool crc32c = extract32(opcode, 2, 1);
4186        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4187        break;
4188    }
4189    default:
4190        unallocated_encoding(s);
4191        break;
4192    }
4193}
4194
4195/* C3.5 Data processing - register */
4196static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4197{
4198    switch (extract32(insn, 24, 5)) {
4199    case 0x0a: /* Logical (shifted register) */
4200        disas_logic_reg(s, insn);
4201        break;
4202    case 0x0b: /* Add/subtract */
4203        if (insn & (1 << 21)) { /* (extended register) */
4204            disas_add_sub_ext_reg(s, insn);
4205        } else {
4206            disas_add_sub_reg(s, insn);
4207        }
4208        break;
4209    case 0x1b: /* Data-processing (3 source) */
4210        disas_data_proc_3src(s, insn);
4211        break;
4212    case 0x1a:
4213        switch (extract32(insn, 21, 3)) {
4214        case 0x0: /* Add/subtract (with carry) */
4215            disas_adc_sbc(s, insn);
4216            break;
4217        case 0x2: /* Conditional compare */
4218            disas_cc(s, insn); /* both imm and reg forms */
4219            break;
4220        case 0x4: /* Conditional select */
4221            disas_cond_select(s, insn);
4222            break;
4223        case 0x6: /* Data-processing */
4224            if (insn & (1 << 30)) { /* (1 source) */
4225                disas_data_proc_1src(s, insn);
4226            } else {            /* (2 source) */
4227                disas_data_proc_2src(s, insn);
4228            }
4229            break;
4230        default:
4231            unallocated_encoding(s);
4232            break;
4233        }
4234        break;
4235    default:
4236        unallocated_encoding(s);
4237        break;
4238    }
4239}
4240
4241static void handle_fp_compare(DisasContext *s, bool is_double,
4242                              unsigned int rn, unsigned int rm,
4243                              bool cmp_with_zero, bool signal_all_nans)
4244{
4245    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4246    TCGv_ptr fpst = get_fpstatus_ptr();
4247
4248    if (is_double) {
4249        TCGv_i64 tcg_vn, tcg_vm;
4250
4251        tcg_vn = read_fp_dreg(s, rn);
4252        if (cmp_with_zero) {
4253            tcg_vm = tcg_const_i64(0);
4254        } else {
4255            tcg_vm = read_fp_dreg(s, rm);
4256        }
4257        if (signal_all_nans) {
4258            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4259        } else {
4260            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4261        }
4262        tcg_temp_free_i64(tcg_vn);
4263        tcg_temp_free_i64(tcg_vm);
4264    } else {
4265        TCGv_i32 tcg_vn, tcg_vm;
4266
4267        tcg_vn = read_fp_sreg(s, rn);
4268        if (cmp_with_zero) {
4269            tcg_vm = tcg_const_i32(0);
4270        } else {
4271            tcg_vm = read_fp_sreg(s, rm);
4272        }
4273        if (signal_all_nans) {
4274            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4275        } else {
4276            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4277        }
4278        tcg_temp_free_i32(tcg_vn);
4279        tcg_temp_free_i32(tcg_vm);
4280    }
4281
4282    tcg_temp_free_ptr(fpst);
4283
4284    gen_set_nzcv(tcg_flags);
4285
4286    tcg_temp_free_i64(tcg_flags);
4287}
4288
4289/* C3.6.22 Floating point compare
4290 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4291 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4292 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4293 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4294 */
4295static void disas_fp_compare(DisasContext *s, uint32_t insn)
4296{
4297    unsigned int mos, type, rm, op, rn, opc, op2r;
4298
4299    mos = extract32(insn, 29, 3);
4300    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4301    rm = extract32(insn, 16, 5);
4302    op = extract32(insn, 14, 2);
4303    rn = extract32(insn, 5, 5);
4304    opc = extract32(insn, 3, 2);
4305    op2r = extract32(insn, 0, 3);
4306
4307    if (mos || op || op2r || type > 1) {
4308        unallocated_encoding(s);
4309        return;
4310    }
4311
4312    if (!fp_access_check(s)) {
4313        return;
4314    }
4315
4316    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4317}
4318
4319/* C3.6.23 Floating point conditional compare
4320 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4321 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4322 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4323 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4324 */
4325static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4326{
4327    unsigned int mos, type, rm, cond, rn, op, nzcv;
4328    TCGv_i64 tcg_flags;
4329    TCGLabel *label_continue = NULL;
4330
4331    mos = extract32(insn, 29, 3);
4332    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4333    rm = extract32(insn, 16, 5);
4334    cond = extract32(insn, 12, 4);
4335    rn = extract32(insn, 5, 5);
4336    op = extract32(insn, 4, 1);
4337    nzcv = extract32(insn, 0, 4);
4338
4339    if (mos || type > 1) {
4340        unallocated_encoding(s);
4341        return;
4342    }
4343
4344    if (!fp_access_check(s)) {
4345        return;
4346    }
4347
4348    if (cond < 0x0e) { /* not always */
4349        TCGLabel *label_match = gen_new_label();
4350        label_continue = gen_new_label();
4351        arm_gen_test_cc(cond, label_match);
4352        /* nomatch: */
4353        tcg_flags = tcg_const_i64(nzcv << 28);
4354        gen_set_nzcv(tcg_flags);
4355        tcg_temp_free_i64(tcg_flags);
4356        tcg_gen_br(label_continue);
4357        gen_set_label(label_match);
4358    }
4359
4360    handle_fp_compare(s, type, rn, rm, false, op);
4361
4362    if (cond < 0x0e) {
4363        gen_set_label(label_continue);
4364    }
4365}
4366
4367/* C3.6.24 Floating point conditional select
4368 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4369 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4370 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4371 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4372 */
4373static void disas_fp_csel(DisasContext *s, uint32_t insn)
4374{
4375    unsigned int mos, type, rm, cond, rn, rd;
4376    TCGv_i64 t_true, t_false, t_zero;
4377    DisasCompare64 c;
4378
4379    mos = extract32(insn, 29, 3);
4380    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4381    rm = extract32(insn, 16, 5);
4382    cond = extract32(insn, 12, 4);
4383    rn = extract32(insn, 5, 5);
4384    rd = extract32(insn, 0, 5);
4385
4386    if (mos || type > 1) {
4387        unallocated_encoding(s);
4388        return;
4389    }
4390
4391    if (!fp_access_check(s)) {
4392        return;
4393    }
4394
4395    /* Zero extend sreg inputs to 64 bits now.  */
4396    t_true = tcg_temp_new_i64();
4397    t_false = tcg_temp_new_i64();
4398    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4399    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4400
4401    a64_test_cc(&c, cond);
4402    t_zero = tcg_const_i64(0);
4403    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4404    tcg_temp_free_i64(t_zero);
4405    tcg_temp_free_i64(t_false);
4406    a64_free_cc(&c);
4407
4408    /* Note that sregs write back zeros to the high bits,
4409       and we've already done the zero-extension.  */
4410    write_fp_dreg(s, rd, t_true);
4411    tcg_temp_free_i64(t_true);
4412}
4413
4414/* C3.6.25 Floating-point data-processing (1 source) - single precision */
4415static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4416{
4417    TCGv_ptr fpst;
4418    TCGv_i32 tcg_op;
4419    TCGv_i32 tcg_res;
4420
4421    fpst = get_fpstatus_ptr();
4422    tcg_op = read_fp_sreg(s, rn);
4423    tcg_res = tcg_temp_new_i32();
4424
4425    switch (opcode) {
4426    case 0x0: /* FMOV */
4427        tcg_gen_mov_i32(tcg_res, tcg_op);
4428        break;
4429    case 0x1: /* FABS */
4430        gen_helper_vfp_abss(tcg_res, tcg_op);
4431        break;
4432    case 0x2: /* FNEG */
4433        gen_helper_vfp_negs(tcg_res, tcg_op);
4434        break;
4435    case 0x3: /* FSQRT */
4436        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4437        break;
4438    case 0x8: /* FRINTN */
4439    case 0x9: /* FRINTP */
4440    case 0xa: /* FRINTM */
4441    case 0xb: /* FRINTZ */
4442    case 0xc: /* FRINTA */
4443    {
4444        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4445
4446        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4447        gen_helper_rints(tcg_res, tcg_op, fpst);
4448
4449        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4450        tcg_temp_free_i32(tcg_rmode);
4451        break;
4452    }
4453    case 0xe: /* FRINTX */
4454        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4455        break;
4456    case 0xf: /* FRINTI */
4457        gen_helper_rints(tcg_res, tcg_op, fpst);
4458        break;
4459    default:
4460        abort();
4461    }
4462
4463    write_fp_sreg(s, rd, tcg_res);
4464
4465    tcg_temp_free_ptr(fpst);
4466    tcg_temp_free_i32(tcg_op);
4467    tcg_temp_free_i32(tcg_res);
4468}
4469
4470/* C3.6.25 Floating-point data-processing (1 source) - double precision */
4471static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4472{
4473    TCGv_ptr fpst;
4474    TCGv_i64 tcg_op;
4475    TCGv_i64 tcg_res;
4476
4477    fpst = get_fpstatus_ptr();
4478    tcg_op = read_fp_dreg(s, rn);
4479    tcg_res = tcg_temp_new_i64();
4480
4481    switch (opcode) {
4482    case 0x0: /* FMOV */
4483        tcg_gen_mov_i64(tcg_res, tcg_op);
4484        break;
4485    case 0x1: /* FABS */
4486        gen_helper_vfp_absd(tcg_res, tcg_op);
4487        break;
4488    case 0x2: /* FNEG */
4489        gen_helper_vfp_negd(tcg_res, tcg_op);
4490        break;
4491    case 0x3: /* FSQRT */
4492        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4493        break;
4494    case 0x8: /* FRINTN */
4495    case 0x9: /* FRINTP */
4496    case 0xa: /* FRINTM */
4497    case 0xb: /* FRINTZ */
4498    case 0xc: /* FRINTA */
4499    {
4500        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4501
4502        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4503        gen_helper_rintd(tcg_res, tcg_op, fpst);
4504
4505        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4506        tcg_temp_free_i32(tcg_rmode);
4507        break;
4508    }
4509    case 0xe: /* FRINTX */
4510        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4511        break;
4512    case 0xf: /* FRINTI */
4513        gen_helper_rintd(tcg_res, tcg_op, fpst);
4514        break;
4515    default:
4516        abort();
4517    }
4518
4519    write_fp_dreg(s, rd, tcg_res);
4520
4521    tcg_temp_free_ptr(fpst);
4522    tcg_temp_free_i64(tcg_op);
4523    tcg_temp_free_i64(tcg_res);
4524}
4525
4526static void handle_fp_fcvt(DisasContext *s, int opcode,
4527                           int rd, int rn, int dtype, int ntype)
4528{
4529    switch (ntype) {
4530    case 0x0:
4531    {
4532        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4533        if (dtype == 1) {
4534            /* Single to double */
4535            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4536            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4537            write_fp_dreg(s, rd, tcg_rd);
4538            tcg_temp_free_i64(tcg_rd);
4539        } else {
4540            /* Single to half */
4541            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4542            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4543            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4544            write_fp_sreg(s, rd, tcg_rd);
4545            tcg_temp_free_i32(tcg_rd);
4546        }
4547        tcg_temp_free_i32(tcg_rn);
4548        break;
4549    }
4550    case 0x1:
4551    {
4552        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4553        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4554        if (dtype == 0) {
4555            /* Double to single */
4556            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4557        } else {
4558            /* Double to half */
4559            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4560            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4561        }
4562        write_fp_sreg(s, rd, tcg_rd);
4563        tcg_temp_free_i32(tcg_rd);
4564        tcg_temp_free_i64(tcg_rn);
4565        break;
4566    }
4567    case 0x3:
4568    {
4569        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4570        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4571        if (dtype == 0) {
4572            /* Half to single */
4573            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4574            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4575            write_fp_sreg(s, rd, tcg_rd);
4576            tcg_temp_free_i32(tcg_rd);
4577        } else {
4578            /* Half to double */
4579            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4580            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4581            write_fp_dreg(s, rd, tcg_rd);
4582            tcg_temp_free_i64(tcg_rd);
4583        }
4584        tcg_temp_free_i32(tcg_rn);
4585        break;
4586    }
4587    default:
4588        abort();
4589    }
4590}
4591
4592/* C3.6.25 Floating point data-processing (1 source)
4593 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4594 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4595 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4596 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4597 */
4598static void disas_fp_1src(DisasContext *s, uint32_t insn)
4599{
4600    int type = extract32(insn, 22, 2);
4601    int opcode = extract32(insn, 15, 6);
4602    int rn = extract32(insn, 5, 5);
4603    int rd = extract32(insn, 0, 5);
4604
4605    switch (opcode) {
4606    case 0x4: case 0x5: case 0x7:
4607    {
4608        /* FCVT between half, single and double precision */
4609        int dtype = extract32(opcode, 0, 2);
4610        if (type == 2 || dtype == type) {
4611            unallocated_encoding(s);
4612            return;
4613        }
4614        if (!fp_access_check(s)) {
4615            return;
4616        }
4617
4618        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4619        break;
4620    }
4621    case 0x0 ... 0x3:
4622    case 0x8 ... 0xc:
4623    case 0xe ... 0xf:
4624        /* 32-to-32 and 64-to-64 ops */
4625        switch (type) {
4626        case 0:
4627            if (!fp_access_check(s)) {
4628                return;
4629            }
4630
4631            handle_fp_1src_single(s, opcode, rd, rn);
4632            break;
4633        case 1:
4634            if (!fp_access_check(s)) {
4635                return;
4636            }
4637
4638            handle_fp_1src_double(s, opcode, rd, rn);
4639            break;
4640        default:
4641            unallocated_encoding(s);
4642        }
4643        break;
4644    default:
4645        unallocated_encoding(s);
4646        break;
4647    }
4648}
4649
4650/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4651static void handle_fp_2src_single(DisasContext *s, int opcode,
4652                                  int rd, int rn, int rm)
4653{
4654    TCGv_i32 tcg_op1;
4655    TCGv_i32 tcg_op2;
4656    TCGv_i32 tcg_res;
4657    TCGv_ptr fpst;
4658
4659    tcg_res = tcg_temp_new_i32();
4660    fpst = get_fpstatus_ptr();
4661    tcg_op1 = read_fp_sreg(s, rn);
4662    tcg_op2 = read_fp_sreg(s, rm);
4663
4664    switch (opcode) {
4665    case 0x0: /* FMUL */
4666        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4667        break;
4668    case 0x1: /* FDIV */
4669        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4670        break;
4671    case 0x2: /* FADD */
4672        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4673        break;
4674    case 0x3: /* FSUB */
4675        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4676        break;
4677    case 0x4: /* FMAX */
4678        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4679        break;
4680    case 0x5: /* FMIN */
4681        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4682        break;
4683    case 0x6: /* FMAXNM */
4684        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4685        break;
4686    case 0x7: /* FMINNM */
4687        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4688        break;
4689    case 0x8: /* FNMUL */
4690        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4691        gen_helper_vfp_negs(tcg_res, tcg_res);
4692        break;
4693    }
4694
4695    write_fp_sreg(s, rd, tcg_res);
4696
4697    tcg_temp_free_ptr(fpst);
4698    tcg_temp_free_i32(tcg_op1);
4699    tcg_temp_free_i32(tcg_op2);
4700    tcg_temp_free_i32(tcg_res);
4701}
4702
4703/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4704static void handle_fp_2src_double(DisasContext *s, int opcode,
4705                                  int rd, int rn, int rm)
4706{
4707    TCGv_i64 tcg_op1;
4708    TCGv_i64 tcg_op2;
4709    TCGv_i64 tcg_res;
4710    TCGv_ptr fpst;
4711
4712    tcg_res = tcg_temp_new_i64();
4713    fpst = get_fpstatus_ptr();
4714    tcg_op1 = read_fp_dreg(s, rn);
4715    tcg_op2 = read_fp_dreg(s, rm);
4716
4717    switch (opcode) {
4718    case 0x0: /* FMUL */
4719        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4720        break;
4721    case 0x1: /* FDIV */
4722        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4723        break;
4724    case 0x2: /* FADD */
4725        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4726        break;
4727    case 0x3: /* FSUB */
4728        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4729        break;
4730    case 0x4: /* FMAX */
4731        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4732        break;
4733    case 0x5: /* FMIN */
4734        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4735        break;
4736    case 0x6: /* FMAXNM */
4737        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4738        break;
4739    case 0x7: /* FMINNM */
4740        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4741        break;
4742    case 0x8: /* FNMUL */
4743        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4744        gen_helper_vfp_negd(tcg_res, tcg_res);
4745        break;
4746    }
4747
4748    write_fp_dreg(s, rd, tcg_res);
4749
4750    tcg_temp_free_ptr(fpst);
4751    tcg_temp_free_i64(tcg_op1);
4752    tcg_temp_free_i64(tcg_op2);
4753    tcg_temp_free_i64(tcg_res);
4754}
4755
4756/* C3.6.26 Floating point data-processing (2 source)
4757 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4758 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4759 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4760 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4761 */
4762static void disas_fp_2src(DisasContext *s, uint32_t insn)
4763{
4764    int type = extract32(insn, 22, 2);
4765    int rd = extract32(insn, 0, 5);
4766    int rn = extract32(insn, 5, 5);
4767    int rm = extract32(insn, 16, 5);
4768    int opcode = extract32(insn, 12, 4);
4769
4770    if (opcode > 8) {
4771        unallocated_encoding(s);
4772        return;
4773    }
4774
4775    switch (type) {
4776    case 0:
4777        if (!fp_access_check(s)) {
4778            return;
4779        }
4780        handle_fp_2src_single(s, opcode, rd, rn, rm);
4781        break;
4782    case 1:
4783        if (!fp_access_check(s)) {
4784            return;
4785        }
4786        handle_fp_2src_double(s, opcode, rd, rn, rm);
4787        break;
4788    default:
4789        unallocated_encoding(s);
4790    }
4791}
4792
4793/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4794static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4795                                  int rd, int rn, int rm, int ra)
4796{
4797    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4798    TCGv_i32 tcg_res = tcg_temp_new_i32();
4799    TCGv_ptr fpst = get_fpstatus_ptr();
4800
4801    tcg_op1 = read_fp_sreg(s, rn);
4802    tcg_op2 = read_fp_sreg(s, rm);
4803    tcg_op3 = read_fp_sreg(s, ra);
4804
4805    /* These are fused multiply-add, and must be done as one
4806     * floating point operation with no rounding between the
4807     * multiplication and addition steps.
4808     * NB that doing the negations here as separate steps is
4809     * correct : an input NaN should come out with its sign bit
4810     * flipped if it is a negated-input.
4811     */
4812    if (o1 == true) {
4813        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4814    }
4815
4816    if (o0 != o1) {
4817        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4818    }
4819
4820    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4821
4822    write_fp_sreg(s, rd, tcg_res);
4823
4824    tcg_temp_free_ptr(fpst);
4825    tcg_temp_free_i32(tcg_op1);
4826    tcg_temp_free_i32(tcg_op2);
4827    tcg_temp_free_i32(tcg_op3);
4828    tcg_temp_free_i32(tcg_res);
4829}
4830
4831/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4832static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4833                                  int rd, int rn, int rm, int ra)
4834{
4835    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4836    TCGv_i64 tcg_res = tcg_temp_new_i64();
4837    TCGv_ptr fpst = get_fpstatus_ptr();
4838
4839    tcg_op1 = read_fp_dreg(s, rn);
4840    tcg_op2 = read_fp_dreg(s, rm);
4841    tcg_op3 = read_fp_dreg(s, ra);
4842
4843    /* These are fused multiply-add, and must be done as one
4844     * floating point operation with no rounding between the
4845     * multiplication and addition steps.
4846     * NB that doing the negations here as separate steps is
4847     * correct : an input NaN should come out with its sign bit
4848     * flipped if it is a negated-input.
4849     */
4850    if (o1 == true) {
4851        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4852    }
4853
4854    if (o0 != o1) {
4855        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4856    }
4857
4858    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4859
4860    write_fp_dreg(s, rd, tcg_res);
4861
4862    tcg_temp_free_ptr(fpst);
4863    tcg_temp_free_i64(tcg_op1);
4864    tcg_temp_free_i64(tcg_op2);
4865    tcg_temp_free_i64(tcg_op3);
4866    tcg_temp_free_i64(tcg_res);
4867}
4868
4869/* C3.6.27 Floating point data-processing (3 source)
4870 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4871 * +---+---+---+-----------+------+----+------+----+------+------+------+
4872 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4873 * +---+---+---+-----------+------+----+------+----+------+------+------+
4874 */
4875static void disas_fp_3src(DisasContext *s, uint32_t insn)
4876{
4877    int type = extract32(insn, 22, 2);
4878    int rd = extract32(insn, 0, 5);
4879    int rn = extract32(insn, 5, 5);
4880    int ra = extract32(insn, 10, 5);
4881    int rm = extract32(insn, 16, 5);
4882    bool o0 = extract32(insn, 15, 1);
4883    bool o1 = extract32(insn, 21, 1);
4884
4885    switch (type) {
4886    case 0:
4887        if (!fp_access_check(s)) {
4888            return;
4889        }
4890        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4891        break;
4892    case 1:
4893        if (!fp_access_check(s)) {
4894            return;
4895        }
4896        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4897        break;
4898    default:
4899        unallocated_encoding(s);
4900    }
4901}
4902
4903/* C3.6.28 Floating point immediate
4904 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4905 * +---+---+---+-----------+------+---+------------+-------+------+------+
4906 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4907 * +---+---+---+-----------+------+---+------------+-------+------+------+
4908 */
4909static void disas_fp_imm(DisasContext *s, uint32_t insn)
4910{
4911    int rd = extract32(insn, 0, 5);
4912    int imm8 = extract32(insn, 13, 8);
4913    int is_double = extract32(insn, 22, 2);
4914    uint64_t imm;
4915    TCGv_i64 tcg_res;
4916
4917    if (is_double > 1) {
4918        unallocated_encoding(s);
4919        return;
4920    }
4921
4922    if (!fp_access_check(s)) {
4923        return;
4924    }
4925
4926    /* The imm8 encodes the sign bit, enough bits to represent
4927     * an exponent in the range 01....1xx to 10....0xx,
4928     * and the most significant 4 bits of the mantissa; see
4929     * VFPExpandImm() in the v8 ARM ARM.
4930     */
4931    if (is_double) {
4932        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4933            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4934            extract32(imm8, 0, 6);
4935        imm <<= 48;
4936    } else {
4937        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4938            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4939            (extract32(imm8, 0, 6) << 3);
4940        imm <<= 16;
4941    }
4942
4943    tcg_res = tcg_const_i64(imm);
4944    write_fp_dreg(s, rd, tcg_res);
4945    tcg_temp_free_i64(tcg_res);
4946}
4947
4948/* Handle floating point <=> fixed point conversions. Note that we can
4949 * also deal with fp <=> integer conversions as a special case (scale == 64)
4950 * OPTME: consider handling that special case specially or at least skipping
4951 * the call to scalbn in the helpers for zero shifts.
4952 */
4953static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4954                           bool itof, int rmode, int scale, int sf, int type)
4955{
4956    bool is_signed = !(opcode & 1);
4957    bool is_double = type;
4958    TCGv_ptr tcg_fpstatus;
4959    TCGv_i32 tcg_shift;
4960
4961    tcg_fpstatus = get_fpstatus_ptr();
4962
4963    tcg_shift = tcg_const_i32(64 - scale);
4964
4965    if (itof) {
4966        TCGv_i64 tcg_int = cpu_reg(s, rn);
4967        if (!sf) {
4968            TCGv_i64 tcg_extend = new_tmp_a64(s);
4969
4970            if (is_signed) {
4971                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4972            } else {
4973                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4974            }
4975
4976            tcg_int = tcg_extend;
4977        }
4978
4979        if (is_double) {
4980            TCGv_i64 tcg_double = tcg_temp_new_i64();
4981            if (is_signed) {
4982                gen_helper_vfp_sqtod(tcg_double, tcg_int,
4983                                     tcg_shift, tcg_fpstatus);
4984            } else {
4985                gen_helper_vfp_uqtod(tcg_double, tcg_int,
4986                                     tcg_shift, tcg_fpstatus);
4987            }
4988            write_fp_dreg(s, rd, tcg_double);
4989            tcg_temp_free_i64(tcg_double);
4990        } else {
4991            TCGv_i32 tcg_single = tcg_temp_new_i32();
4992            if (is_signed) {
4993                gen_helper_vfp_sqtos(tcg_single, tcg_int,
4994                                     tcg_shift, tcg_fpstatus);
4995            } else {
4996                gen_helper_vfp_uqtos(tcg_single, tcg_int,
4997                                     tcg_shift, tcg_fpstatus);
4998            }
4999            write_fp_sreg(s, rd, tcg_single);
5000            tcg_temp_free_i32(tcg_single);
5001        }
5002    } else {
5003        TCGv_i64 tcg_int = cpu_reg(s, rd);
5004        TCGv_i32 tcg_rmode;
5005
5006        if (extract32(opcode, 2, 1)) {
5007            /* There are too many rounding modes to all fit into rmode,
5008             * so FCVTA[US] is a special case.
5009             */
5010            rmode = FPROUNDING_TIEAWAY;
5011        }
5012
5013        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5014
5015        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5016
5017        if (is_double) {
5018            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5019            if (is_signed) {
5020                if (!sf) {
5021                    gen_helper_vfp_tosld(tcg_int, tcg_double,
5022                                         tcg_shift, tcg_fpstatus);
5023                } else {
5024                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
5025                                         tcg_shift, tcg_fpstatus);
5026                }
5027            } else {
5028                if (!sf) {
5029                    gen_helper_vfp_tould(tcg_int, tcg_double,
5030                                         tcg_shift, tcg_fpstatus);
5031                } else {
5032                    gen_helper_vfp_touqd(tcg_int, tcg_double,
5033                                         tcg_shift, tcg_fpstatus);
5034                }
5035            }
5036            tcg_temp_free_i64(tcg_double);
5037        } else {
5038            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5039            if (sf) {
5040                if (is_signed) {
5041                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
5042                                         tcg_shift, tcg_fpstatus);
5043                } else {
5044                    gen_helper_vfp_touqs(tcg_int, tcg_single,
5045                                         tcg_shift, tcg_fpstatus);
5046                }
5047            } else {
5048                TCGv_i32 tcg_dest = tcg_temp_new_i32();
5049                if (is_signed) {
5050                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
5051                                         tcg_shift, tcg_fpstatus);
5052                } else {
5053                    gen_helper_vfp_touls(tcg_dest, tcg_single,
5054                                         tcg_shift, tcg_fpstatus);
5055                }
5056                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5057                tcg_temp_free_i32(tcg_dest);
5058            }
5059            tcg_temp_free_i32(tcg_single);
5060        }
5061
5062        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5063        tcg_temp_free_i32(tcg_rmode);
5064
5065        if (!sf) {
5066            tcg_gen_ext32u_i64(tcg_int, tcg_int);
5067        }
5068    }
5069
5070    tcg_temp_free_ptr(tcg_fpstatus);
5071    tcg_temp_free_i32(tcg_shift);
5072}
5073
5074/* C3.6.29 Floating point <-> fixed point conversions
5075 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5076 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5077 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5078 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5079 */
5080static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5081{
5082    int rd = extract32(insn, 0, 5);
5083    int rn = extract32(insn, 5, 5);
5084    int scale = extract32(insn, 10, 6);
5085    int opcode = extract32(insn, 16, 3);
5086    int rmode = extract32(insn, 19, 2);
5087    int type = extract32(insn, 22, 2);
5088    bool sbit = extract32(insn, 29, 1);
5089    bool sf = extract32(insn, 31, 1);
5090    bool itof;
5091
5092    if (sbit || (type > 1)
5093        || (!sf && scale < 32)) {
5094        unallocated_encoding(s);
5095        return;
5096    }
5097
5098    switch ((rmode << 3) | opcode) {
5099    case 0x2: /* SCVTF */
5100    case 0x3: /* UCVTF */
5101        itof = true;
5102        break;
5103    case 0x18: /* FCVTZS */
5104    case 0x19: /* FCVTZU */
5105        itof = false;
5106        break;
5107    default:
5108        unallocated_encoding(s);
5109        return;
5110    }
5111
5112    if (!fp_access_check(s)) {
5113        return;
5114    }
5115
5116    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5117}
5118
5119static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5120{
5121    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5122     * without conversion.
5123     */
5124
5125    if (itof) {
5126        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5127
5128        switch (type) {
5129        case 0:
5130        {
5131            /* 32 bit */
5132            TCGv_i64 tmp = tcg_temp_new_i64();
5133            tcg_gen_ext32u_i64(tmp, tcg_rn);
5134            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5135            tcg_gen_movi_i64(tmp, 0);
5136            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5137            tcg_temp_free_i64(tmp);
5138            break;
5139        }
5140        case 1:
5141        {
5142            /* 64 bit */
5143            TCGv_i64 tmp = tcg_const_i64(0);
5144            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5145            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5146            tcg_temp_free_i64(tmp);
5147            break;
5148        }
5149        case 2:
5150            /* 64 bit to top half. */
5151            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5152            break;
5153        }
5154    } else {
5155        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5156
5157        switch (type) {
5158        case 0:
5159            /* 32 bit */
5160            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5161            break;
5162        case 1:
5163            /* 64 bit */
5164            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5165            break;
5166        case 2:
5167            /* 64 bits from top half */
5168            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5169            break;
5170        }
5171    }
5172}
5173
5174/* C3.6.30 Floating point <-> integer conversions
5175 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5176 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5177 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5178 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5179 */
5180static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5181{
5182    int rd = extract32(insn, 0, 5);
5183    int rn = extract32(insn, 5, 5);
5184    int opcode = extract32(insn, 16, 3);
5185    int rmode = extract32(insn, 19, 2);
5186    int type = extract32(insn, 22, 2);
5187    bool sbit = extract32(insn, 29, 1);
5188    bool sf = extract32(insn, 31, 1);
5189
5190    if (sbit) {
5191        unallocated_encoding(s);
5192        return;
5193    }
5194
5195    if (opcode > 5) {
5196        /* FMOV */
5197        bool itof = opcode & 1;
5198
5199        if (rmode >= 2) {
5200            unallocated_encoding(s);
5201            return;
5202        }
5203
5204        switch (sf << 3 | type << 1 | rmode) {
5205        case 0x0: /* 32 bit */
5206        case 0xa: /* 64 bit */
5207        case 0xd: /* 64 bit to top half of quad */
5208            break;
5209        default:
5210            /* all other sf/type/rmode combinations are invalid */
5211            unallocated_encoding(s);
5212            break;
5213        }
5214
5215        if (!fp_access_check(s)) {
5216            return;
5217        }
5218        handle_fmov(s, rd, rn, type, itof);
5219    } else {
5220        /* actual FP conversions */
5221        bool itof = extract32(opcode, 1, 1);
5222
5223        if (type > 1 || (rmode != 0 && opcode > 1)) {
5224            unallocated_encoding(s);
5225            return;
5226        }
5227
5228        if (!fp_access_check(s)) {
5229            return;
5230        }
5231        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5232    }
5233}
5234
5235/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5236 *   31  30  29 28     25 24                          0
5237 * +---+---+---+---------+-----------------------------+
5238 * |   | 0 |   | 1 1 1 1 |                             |
5239 * +---+---+---+---------+-----------------------------+
5240 */
5241static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5242{
5243    if (extract32(insn, 24, 1)) {
5244        /* Floating point data-processing (3 source) */
5245        disas_fp_3src(s, insn);
5246    } else if (extract32(insn, 21, 1) == 0) {
5247        /* Floating point to fixed point conversions */
5248        disas_fp_fixed_conv(s, insn);
5249    } else {
5250        switch (extract32(insn, 10, 2)) {
5251        case 1:
5252            /* Floating point conditional compare */
5253            disas_fp_ccomp(s, insn);
5254            break;
5255        case 2:
5256            /* Floating point data-processing (2 source) */
5257            disas_fp_2src(s, insn);
5258            break;
5259        case 3:
5260            /* Floating point conditional select */
5261            disas_fp_csel(s, insn);
5262            break;
5263        case 0:
5264            switch (ctz32(extract32(insn, 12, 4))) {
5265            case 0: /* [15:12] == xxx1 */
5266                /* Floating point immediate */
5267                disas_fp_imm(s, insn);
5268                break;
5269            case 1: /* [15:12] == xx10 */
5270                /* Floating point compare */
5271                disas_fp_compare(s, insn);
5272                break;
5273            case 2: /* [15:12] == x100 */
5274                /* Floating point data-processing (1 source) */
5275                disas_fp_1src(s, insn);
5276                break;
5277            case 3: /* [15:12] == 1000 */
5278                unallocated_encoding(s);
5279                break;
5280            default: /* [15:12] == 0000 */
5281                /* Floating point <-> integer conversions */
5282                disas_fp_int_conv(s, insn);
5283                break;
5284            }
5285            break;
5286        }
5287    }
5288}
5289
5290static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5291                     int pos)
5292{
5293    /* Extract 64 bits from the middle of two concatenated 64 bit
5294     * vector register slices left:right. The extracted bits start
5295     * at 'pos' bits into the right (least significant) side.
5296     * We return the result in tcg_right, and guarantee not to
5297     * trash tcg_left.
5298     */
5299    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5300    assert(pos > 0 && pos < 64);
5301
5302    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5303    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5304    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5305
5306    tcg_temp_free_i64(tcg_tmp);
5307}
5308
5309/* C3.6.1 EXT
5310 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5311 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5312 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5313 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5314 */
5315static void disas_simd_ext(DisasContext *s, uint32_t insn)
5316{
5317    int is_q = extract32(insn, 30, 1);
5318    int op2 = extract32(insn, 22, 2);
5319    int imm4 = extract32(insn, 11, 4);
5320    int rm = extract32(insn, 16, 5);
5321    int rn = extract32(insn, 5, 5);
5322    int rd = extract32(insn, 0, 5);
5323    int pos = imm4 << 3;
5324    TCGv_i64 tcg_resl, tcg_resh;
5325
5326    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5327        unallocated_encoding(s);
5328        return;
5329    }
5330
5331    if (!fp_access_check(s)) {
5332        return;
5333    }
5334
5335    tcg_resh = tcg_temp_new_i64();
5336    tcg_resl = tcg_temp_new_i64();
5337
5338    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5339     * either extracting 128 bits from a 128:128 concatenation, or
5340     * extracting 64 bits from a 64:64 concatenation.
5341     */
5342    if (!is_q) {
5343        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5344        if (pos != 0) {
5345            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5346            do_ext64(s, tcg_resh, tcg_resl, pos);
5347        }
5348        tcg_gen_movi_i64(tcg_resh, 0);
5349    } else {
5350        TCGv_i64 tcg_hh;
5351        typedef struct {
5352            int reg;
5353            int elt;
5354        } EltPosns;
5355        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5356        EltPosns *elt = eltposns;
5357
5358        if (pos >= 64) {
5359            elt++;
5360            pos -= 64;
5361        }
5362
5363        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5364        elt++;
5365        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5366        elt++;
5367        if (pos != 0) {
5368            do_ext64(s, tcg_resh, tcg_resl, pos);
5369            tcg_hh = tcg_temp_new_i64();
5370            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5371            do_ext64(s, tcg_hh, tcg_resh, pos);
5372            tcg_temp_free_i64(tcg_hh);
5373        }
5374    }
5375
5376    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5377    tcg_temp_free_i64(tcg_resl);
5378    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5379    tcg_temp_free_i64(tcg_resh);
5380}
5381
5382/* C3.6.2 TBL/TBX
5383 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5384 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5385 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5386 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5387 */
5388static void disas_simd_tb(DisasContext *s, uint32_t insn)
5389{
5390    int op2 = extract32(insn, 22, 2);
5391    int is_q = extract32(insn, 30, 1);
5392    int rm = extract32(insn, 16, 5);
5393    int rn = extract32(insn, 5, 5);
5394    int rd = extract32(insn, 0, 5);
5395    int is_tblx = extract32(insn, 12, 1);
5396    int len = extract32(insn, 13, 2);
5397    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5398    TCGv_i32 tcg_regno, tcg_numregs;
5399
5400    if (op2 != 0) {
5401        unallocated_encoding(s);
5402        return;
5403    }
5404
5405    if (!fp_access_check(s)) {
5406        return;
5407    }
5408
5409    /* This does a table lookup: for every byte element in the input
5410     * we index into a table formed from up to four vector registers,
5411     * and then the output is the result of the lookups. Our helper
5412     * function does the lookup operation for a single 64 bit part of
5413     * the input.
5414     */
5415    tcg_resl = tcg_temp_new_i64();
5416    tcg_resh = tcg_temp_new_i64();
5417
5418    if (is_tblx) {
5419        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5420    } else {
5421        tcg_gen_movi_i64(tcg_resl, 0);
5422    }
5423    if (is_tblx && is_q) {
5424        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5425    } else {
5426        tcg_gen_movi_i64(tcg_resh, 0);
5427    }
5428
5429    tcg_idx = tcg_temp_new_i64();
5430    tcg_regno = tcg_const_i32(rn);
5431    tcg_numregs = tcg_const_i32(len + 1);
5432    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5433    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5434                        tcg_regno, tcg_numregs);
5435    if (is_q) {
5436        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5437        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5438                            tcg_regno, tcg_numregs);
5439    }
5440    tcg_temp_free_i64(tcg_idx);
5441    tcg_temp_free_i32(tcg_regno);
5442    tcg_temp_free_i32(tcg_numregs);
5443
5444    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5445    tcg_temp_free_i64(tcg_resl);
5446    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5447    tcg_temp_free_i64(tcg_resh);
5448}
5449
5450/* C3.6.3 ZIP/UZP/TRN
5451 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5452 * +---+---+-------------+------+---+------+---+------------------+------+
5453 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5454 * +---+---+-------------+------+---+------+---+------------------+------+
5455 */
5456static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5457{
5458    int rd = extract32(insn, 0, 5);
5459    int rn = extract32(insn, 5, 5);
5460    int rm = extract32(insn, 16, 5);
5461    int size = extract32(insn, 22, 2);
5462    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5463     * bit 2 indicates 1 vs 2 variant of the insn.
5464     */
5465    int opcode = extract32(insn, 12, 2);
5466    bool part = extract32(insn, 14, 1);
5467    bool is_q = extract32(insn, 30, 1);
5468    int esize = 8 << size;
5469    int i, ofs;
5470    int datasize = is_q ? 128 : 64;
5471    int elements = datasize / esize;
5472    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5473
5474    if (opcode == 0 || (size == 3 && !is_q)) {
5475        unallocated_encoding(s);
5476        return;
5477    }
5478
5479    if (!fp_access_check(s)) {
5480        return;
5481    }
5482
5483    tcg_resl = tcg_const_i64(0);
5484    tcg_resh = tcg_const_i64(0);
5485    tcg_res = tcg_temp_new_i64();
5486
5487    for (i = 0; i < elements; i++) {
5488        switch (opcode) {
5489        case 1: /* UZP1/2 */
5490        {
5491            int midpoint = elements / 2;
5492            if (i < midpoint) {
5493                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5494            } else {
5495                read_vec_element(s, tcg_res, rm,
5496                                 2 * (i - midpoint) + part, size);
5497            }
5498            break;
5499        }
5500        case 2: /* TRN1/2 */
5501            if (i & 1) {
5502                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5503            } else {
5504                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5505            }
5506            break;
5507        case 3: /* ZIP1/2 */
5508        {
5509            int base = part * elements / 2;
5510            if (i & 1) {
5511                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5512            } else {
5513                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5514            }
5515            break;
5516        }
5517        default:
5518            g_assert_not_reached();
5519        }
5520
5521        ofs = i * esize;
5522        if (ofs < 64) {
5523            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5524            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5525        } else {
5526            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5527            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5528        }
5529    }
5530
5531    tcg_temp_free_i64(tcg_res);
5532
5533    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5534    tcg_temp_free_i64(tcg_resl);
5535    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5536    tcg_temp_free_i64(tcg_resh);
5537}
5538
5539static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5540                        int opc, bool is_min, TCGv_ptr fpst)
5541{
5542    /* Helper function for disas_simd_across_lanes: do a single precision
5543     * min/max operation on the specified two inputs,
5544     * and return the result in tcg_elt1.
5545     */
5546    if (opc == 0xc) {
5547        if (is_min) {
5548            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5549        } else {
5550            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5551        }
5552    } else {
5553        assert(opc == 0xf);
5554        if (is_min) {
5555            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5556        } else {
5557            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5558        }
5559    }
5560}
5561
5562/* C3.6.4 AdvSIMD across lanes
5563 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5564 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5565 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5566 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5567 */
5568static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5569{
5570    int rd = extract32(insn, 0, 5);
5571    int rn = extract32(insn, 5, 5);
5572    int size = extract32(insn, 22, 2);
5573    int opcode = extract32(insn, 12, 5);
5574    bool is_q = extract32(insn, 30, 1);
5575    bool is_u = extract32(insn, 29, 1);
5576    bool is_fp = false;
5577    bool is_min = false;
5578    int esize;
5579    int elements;
5580    int i;
5581    TCGv_i64 tcg_res, tcg_elt;
5582
5583    switch (opcode) {
5584    case 0x1b: /* ADDV */
5585        if (is_u) {
5586            unallocated_encoding(s);
5587            return;
5588        }
5589        /* fall through */
5590    case 0x3: /* SADDLV, UADDLV */
5591    case 0xa: /* SMAXV, UMAXV */
5592    case 0x1a: /* SMINV, UMINV */
5593        if (size == 3 || (size == 2 && !is_q)) {
5594            unallocated_encoding(s);
5595            return;
5596        }
5597        break;
5598    case 0xc: /* FMAXNMV, FMINNMV */
5599    case 0xf: /* FMAXV, FMINV */
5600        if (!is_u || !is_q || extract32(size, 0, 1)) {
5601            unallocated_encoding(s);
5602            return;
5603        }
5604        /* Bit 1 of size field encodes min vs max, and actual size is always
5605         * 32 bits: adjust the size variable so following code can rely on it
5606         */
5607        is_min = extract32(size, 1, 1);
5608        is_fp = true;
5609        size = 2;
5610        break;
5611    default:
5612        unallocated_encoding(s);
5613        return;
5614    }
5615
5616    if (!fp_access_check(s)) {
5617        return;
5618    }
5619
5620    esize = 8 << size;
5621    elements = (is_q ? 128 : 64) / esize;
5622
5623    tcg_res = tcg_temp_new_i64();
5624    tcg_elt = tcg_temp_new_i64();
5625
5626    /* These instructions operate across all lanes of a vector
5627     * to produce a single result. We can guarantee that a 64
5628     * bit intermediate is sufficient:
5629     *  + for [US]ADDLV the maximum element size is 32 bits, and
5630     *    the result type is 64 bits
5631     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5632     *    same as the element size, which is 32 bits at most
5633     * For the integer operations we can choose to work at 64
5634     * or 32 bits and truncate at the end; for simplicity
5635     * we use 64 bits always. The floating point
5636     * ops do require 32 bit intermediates, though.
5637     */
5638    if (!is_fp) {
5639        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5640
5641        for (i = 1; i < elements; i++) {
5642            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5643
5644            switch (opcode) {
5645            case 0x03: /* SADDLV / UADDLV */
5646            case 0x1b: /* ADDV */
5647                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5648                break;
5649            case 0x0a: /* SMAXV / UMAXV */
5650                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5651                                    tcg_res,
5652                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5653                break;
5654            case 0x1a: /* SMINV / UMINV */
5655                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5656                                    tcg_res,
5657                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5658                break;
5659                break;
5660            default:
5661                g_assert_not_reached();
5662            }
5663
5664        }
5665    } else {
5666        /* Floating point ops which work on 32 bit (single) intermediates.
5667         * Note that correct NaN propagation requires that we do these
5668         * operations in exactly the order specified by the pseudocode.
5669         */
5670        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5671        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5672        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5673        TCGv_ptr fpst = get_fpstatus_ptr();
5674
5675        assert(esize == 32);
5676        assert(elements == 4);
5677
5678        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5679        tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5680        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5681        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5682
5683        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5684
5685        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5686        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5687        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5688        tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5689
5690        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5691
5692        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5693
5694        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5695        tcg_temp_free_i32(tcg_elt1);
5696        tcg_temp_free_i32(tcg_elt2);
5697        tcg_temp_free_i32(tcg_elt3);
5698        tcg_temp_free_ptr(fpst);
5699    }
5700
5701    tcg_temp_free_i64(tcg_elt);
5702
5703    /* Now truncate the result to the width required for the final output */
5704    if (opcode == 0x03) {
5705        /* SADDLV, UADDLV: result is 2*esize */
5706        size++;
5707    }
5708
5709    switch (size) {
5710    case 0:
5711        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5712        break;
5713    case 1:
5714        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5715        break;
5716    case 2:
5717        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5718        break;
5719    case 3:
5720        break;
5721    default:
5722        g_assert_not_reached();
5723    }
5724
5725    write_fp_dreg(s, rd, tcg_res);
5726    tcg_temp_free_i64(tcg_res);
5727}
5728
5729/* C6.3.31 DUP (Element, Vector)
5730 *
5731 *  31  30   29              21 20    16 15        10  9    5 4    0
5732 * +---+---+-------------------+--------+-------------+------+------+
5733 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5734 * +---+---+-------------------+--------+-------------+------+------+
5735 *
5736 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5737 */
5738static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5739                             int imm5)
5740{
5741    int size = ctz32(imm5);
5742    int esize = 8 << size;
5743    int elements = (is_q ? 128 : 64) / esize;
5744    int index, i;
5745    TCGv_i64 tmp;
5746
5747    if (size > 3 || (size == 3 && !is_q)) {
5748        unallocated_encoding(s);
5749        return;
5750    }
5751
5752    if (!fp_access_check(s)) {
5753        return;
5754    }
5755
5756    index = imm5 >> (size + 1);
5757
5758    tmp = tcg_temp_new_i64();
5759    read_vec_element(s, tmp, rn, index, size);
5760
5761    for (i = 0; i < elements; i++) {
5762        write_vec_element(s, tmp, rd, i, size);
5763    }
5764
5765    if (!is_q) {
5766        clear_vec_high(s, rd);
5767    }
5768
5769    tcg_temp_free_i64(tmp);
5770}
5771
5772/* C6.3.31 DUP (element, scalar)
5773 *  31                   21 20    16 15        10  9    5 4    0
5774 * +-----------------------+--------+-------------+------+------+
5775 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5776 * +-----------------------+--------+-------------+------+------+
5777 */
5778static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5779                              int imm5)
5780{
5781    int size = ctz32(imm5);
5782    int index;
5783    TCGv_i64 tmp;
5784
5785    if (size > 3) {
5786        unallocated_encoding(s);
5787        return;
5788    }
5789
5790    if (!fp_access_check(s)) {
5791        return;
5792    }
5793
5794    index = imm5 >> (size + 1);
5795
5796    /* This instruction just extracts the specified element and
5797     * zero-extends it into the bottom of the destination register.
5798     */
5799    tmp = tcg_temp_new_i64();
5800    read_vec_element(s, tmp, rn, index, size);
5801    write_fp_dreg(s, rd, tmp);
5802    tcg_temp_free_i64(tmp);
5803}
5804
5805/* C6.3.32 DUP (General)
5806 *
5807 *  31  30   29              21 20    16 15        10  9    5 4    0
5808 * +---+---+-------------------+--------+-------------+------+------+
5809 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5810 * +---+---+-------------------+--------+-------------+------+------+
5811 *
5812 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5813 */
5814static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5815                             int imm5)
5816{
5817    int size = ctz32(imm5);
5818    int esize = 8 << size;
5819    int elements = (is_q ? 128 : 64)/esize;
5820    int i = 0;
5821
5822    if (size > 3 || ((size == 3) && !is_q)) {
5823        unallocated_encoding(s);
5824        return;
5825    }
5826
5827    if (!fp_access_check(s)) {
5828        return;
5829    }
5830
5831    for (i = 0; i < elements; i++) {
5832        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5833    }
5834    if (!is_q) {
5835        clear_vec_high(s, rd);
5836    }
5837}
5838
5839/* C6.3.150 INS (Element)
5840 *
5841 *  31                   21 20    16 15  14    11  10 9    5 4    0
5842 * +-----------------------+--------+------------+---+------+------+
5843 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5844 * +-----------------------+--------+------------+---+------+------+
5845 *
5846 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5847 * index: encoded in imm5<4:size+1>
5848 */
5849static void handle_simd_inse(DisasContext *s, int rd, int rn,
5850                             int imm4, int imm5)
5851{
5852    int size = ctz32(imm5);
5853    int src_index, dst_index;
5854    TCGv_i64 tmp;
5855
5856    if (size > 3) {
5857        unallocated_encoding(s);
5858        return;
5859    }
5860
5861    if (!fp_access_check(s)) {
5862        return;
5863    }
5864
5865    dst_index = extract32(imm5, 1+size, 5);
5866    src_index = extract32(imm4, size, 4);
5867
5868    tmp = tcg_temp_new_i64();
5869
5870    read_vec_element(s, tmp, rn, src_index, size);
5871    write_vec_element(s, tmp, rd, dst_index, size);
5872
5873    tcg_temp_free_i64(tmp);
5874}
5875
5876
5877/* C6.3.151 INS (General)
5878 *
5879 *  31                   21 20    16 15        10  9    5 4    0
5880 * +-----------------------+--------+-------------+------+------+
5881 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5882 * +-----------------------+--------+-------------+------+------+
5883 *
5884 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5885 * index: encoded in imm5<4:size+1>
5886 */
5887static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5888{
5889    int size = ctz32(imm5);
5890    int idx;
5891
5892    if (size > 3) {
5893        unallocated_encoding(s);
5894        return;
5895    }
5896
5897    if (!fp_access_check(s)) {
5898        return;
5899    }
5900
5901    idx = extract32(imm5, 1 + size, 4 - size);
5902    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5903}
5904
5905/*
5906 * C6.3.321 UMOV (General)
5907 * C6.3.237 SMOV (General)
5908 *
5909 *  31  30   29              21 20    16 15    12   10 9    5 4    0
5910 * +---+---+-------------------+--------+-------------+------+------+
5911 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5912 * +---+---+-------------------+--------+-------------+------+------+
5913 *
5914 * U: unsigned when set
5915 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5916 */
5917static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5918                                  int rn, int rd, int imm5)
5919{
5920    int size = ctz32(imm5);
5921    int element;
5922    TCGv_i64 tcg_rd;
5923
5924    /* Check for UnallocatedEncodings */
5925    if (is_signed) {
5926        if (size > 2 || (size == 2 && !is_q)) {
5927            unallocated_encoding(s);
5928            return;
5929        }
5930    } else {
5931        if (size > 3
5932            || (size < 3 && is_q)
5933            || (size == 3 && !is_q)) {
5934            unallocated_encoding(s);
5935            return;
5936        }
5937    }
5938
5939    if (!fp_access_check(s)) {
5940        return;
5941    }
5942
5943    element = extract32(imm5, 1+size, 4);
5944
5945    tcg_rd = cpu_reg(s, rd);
5946    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5947    if (is_signed && !is_q) {
5948        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5949    }
5950}
5951
5952/* C3.6.5 AdvSIMD copy
5953 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5954 * +---+---+----+-----------------+------+---+------+---+------+------+
5955 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5956 * +---+---+----+-----------------+------+---+------+---+------+------+
5957 */
5958static void disas_simd_copy(DisasContext *s, uint32_t insn)
5959{
5960    int rd = extract32(insn, 0, 5);
5961    int rn = extract32(insn, 5, 5);
5962    int imm4 = extract32(insn, 11, 4);
5963    int op = extract32(insn, 29, 1);
5964    int is_q = extract32(insn, 30, 1);
5965    int imm5 = extract32(insn, 16, 5);
5966
5967    if (op) {
5968        if (is_q) {
5969            /* INS (element) */
5970            handle_simd_inse(s, rd, rn, imm4, imm5);
5971        } else {
5972            unallocated_encoding(s);
5973        }
5974    } else {
5975        switch (imm4) {
5976        case 0:
5977            /* DUP (element - vector) */
5978            handle_simd_dupe(s, is_q, rd, rn, imm5);
5979            break;
5980        case 1:
5981            /* DUP (general) */
5982            handle_simd_dupg(s, is_q, rd, rn, imm5);
5983            break;
5984        case 3:
5985            if (is_q) {
5986                /* INS (general) */
5987                handle_simd_insg(s, rd, rn, imm5);
5988            } else {
5989                unallocated_encoding(s);
5990            }
5991            break;
5992        case 5:
5993        case 7:
5994            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5995            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5996            break;
5997        default:
5998            unallocated_encoding(s);
5999            break;
6000        }
6001    }
6002}
6003
6004/* C3.6.6 AdvSIMD modified immediate
6005 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6006 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6007 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6008 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6009 *
6010 * There are a number of operations that can be carried out here:
6011 *   MOVI - move (shifted) imm into register
6012 *   MVNI - move inverted (shifted) imm into register
6013 *   ORR  - bitwise OR of (shifted) imm with register
6014 *   BIC  - bitwise clear of (shifted) imm with register
6015 */
6016static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6017{
6018    int rd = extract32(insn, 0, 5);
6019    int cmode = extract32(insn, 12, 4);
6020    int cmode_3_1 = extract32(cmode, 1, 3);
6021    int cmode_0 = extract32(cmode, 0, 1);
6022    int o2 = extract32(insn, 11, 1);
6023    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6024    bool is_neg = extract32(insn, 29, 1);
6025    bool is_q = extract32(insn, 30, 1);
6026    uint64_t imm = 0;
6027    TCGv_i64 tcg_rd, tcg_imm;
6028    int i;
6029
6030    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6031        unallocated_encoding(s);
6032        return;
6033    }
6034
6035    if (!fp_access_check(s)) {
6036        return;
6037    }
6038
6039    /* See AdvSIMDExpandImm() in ARM ARM */
6040    switch (cmode_3_1) {
6041    case 0: /* Replicate(Zeros(24):imm8, 2) */
6042    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6043    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6044    case 3: /* Replicate(imm8:Zeros(24), 2) */
6045    {
6046        int shift = cmode_3_1 * 8;
6047        imm = bitfield_replicate(abcdefgh << shift, 32);
6048        break;
6049    }
6050    case 4: /* Replicate(Zeros(8):imm8, 4) */
6051    case 5: /* Replicate(imm8:Zeros(8), 4) */
6052    {
6053        int shift = (cmode_3_1 & 0x1) * 8;
6054        imm = bitfield_replicate(abcdefgh << shift, 16);
6055        break;
6056    }
6057    case 6:
6058        if (cmode_0) {
6059            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6060            imm = (abcdefgh << 16) | 0xffff;
6061        } else {
6062            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6063            imm = (abcdefgh << 8) | 0xff;
6064        }
6065        imm = bitfield_replicate(imm, 32);
6066        break;
6067    case 7:
6068        if (!cmode_0 && !is_neg) {
6069            imm = bitfield_replicate(abcdefgh, 8);
6070        } else if (!cmode_0 && is_neg) {
6071            int i;
6072            imm = 0;
6073            for (i = 0; i < 8; i++) {
6074                if ((abcdefgh) & (1 << i)) {
6075                    imm |= 0xffULL << (i * 8);
6076                }
6077            }
6078        } else if (cmode_0) {
6079            if (is_neg) {
6080                imm = (abcdefgh & 0x3f) << 48;
6081                if (abcdefgh & 0x80) {
6082                    imm |= 0x8000000000000000ULL;
6083                }
6084                if (abcdefgh & 0x40) {
6085                    imm |= 0x3fc0000000000000ULL;
6086                } else {
6087                    imm |= 0x4000000000000000ULL;
6088                }
6089            } else {
6090                imm = (abcdefgh & 0x3f) << 19;
6091                if (abcdefgh & 0x80) {
6092                    imm |= 0x80000000;
6093                }
6094                if (abcdefgh & 0x40) {
6095                    imm |= 0x3e000000;
6096                } else {
6097                    imm |= 0x40000000;
6098                }
6099                imm |= (imm << 32);
6100            }
6101        }
6102        break;
6103    }
6104
6105    if (cmode_3_1 != 7 && is_neg) {
6106        imm = ~imm;
6107    }
6108
6109    tcg_imm = tcg_const_i64(imm);
6110    tcg_rd = new_tmp_a64(s);
6111
6112    for (i = 0; i < 2; i++) {
6113        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6114
6115        if (i == 1 && !is_q) {
6116            /* non-quad ops clear high half of vector */
6117            tcg_gen_movi_i64(tcg_rd, 0);
6118        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6119            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6120            if (is_neg) {
6121                /* AND (BIC) */
6122                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6123            } else {
6124                /* ORR */
6125                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6126            }
6127        } else {
6128            /* MOVI */
6129            tcg_gen_mov_i64(tcg_rd, tcg_imm);
6130        }
6131        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6132    }
6133
6134    tcg_temp_free_i64(tcg_imm);
6135}
6136
6137/* C3.6.7 AdvSIMD scalar copy
6138 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6139 * +-----+----+-----------------+------+---+------+---+------+------+
6140 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6141 * +-----+----+-----------------+------+---+------+---+------+------+
6142 */
6143static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6144{
6145    int rd = extract32(insn, 0, 5);
6146    int rn = extract32(insn, 5, 5);
6147    int imm4 = extract32(insn, 11, 4);
6148    int imm5 = extract32(insn, 16, 5);
6149    int op = extract32(insn, 29, 1);
6150
6151    if (op != 0 || imm4 != 0) {
6152        unallocated_encoding(s);
6153        return;
6154    }
6155
6156    /* DUP (element, scalar) */
6157    handle_simd_dupes(s, rd, rn, imm5);
6158}
6159
6160/* C3.6.8 AdvSIMD scalar pairwise
6161 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6162 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6163 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6164 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6165 */
6166static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6167{
6168    int u = extract32(insn, 29, 1);
6169    int size = extract32(insn, 22, 2);
6170    int opcode = extract32(insn, 12, 5);
6171    int rn = extract32(insn, 5, 5);
6172    int rd = extract32(insn, 0, 5);
6173    TCGv_ptr fpst;
6174
6175    /* For some ops (the FP ones), size[1] is part of the encoding.
6176     * For ADDP strictly it is not but size[1] is always 1 for valid
6177     * encodings.
6178     */
6179    opcode |= (extract32(size, 1, 1) << 5);
6180
6181    switch (opcode) {
6182    case 0x3b: /* ADDP */
6183        if (u || size != 3) {
6184            unallocated_encoding(s);
6185            return;
6186        }
6187        if (!fp_access_check(s)) {
6188            return;
6189        }
6190
6191        TCGV_UNUSED_PTR(fpst);
6192        break;
6193    case 0xc: /* FMAXNMP */
6194    case 0xd: /* FADDP */
6195    case 0xf: /* FMAXP */
6196    case 0x2c: /* FMINNMP */
6197    case 0x2f: /* FMINP */
6198        /* FP op, size[0] is 32 or 64 bit */
6199        if (!u) {
6200            unallocated_encoding(s);
6201            return;
6202        }
6203        if (!fp_access_check(s)) {
6204            return;
6205        }
6206
6207        size = extract32(size, 0, 1) ? 3 : 2;
6208        fpst = get_fpstatus_ptr();
6209        break;
6210    default:
6211        unallocated_encoding(s);
6212        return;
6213    }
6214
6215    if (size == 3) {
6216        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6217        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6218        TCGv_i64 tcg_res = tcg_temp_new_i64();
6219
6220        read_vec_element(s, tcg_op1, rn, 0, MO_64);
6221        read_vec_element(s, tcg_op2, rn, 1, MO_64);
6222
6223        switch (opcode) {
6224        case 0x3b: /* ADDP */
6225            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6226            break;
6227        case 0xc: /* FMAXNMP */
6228            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6229            break;
6230        case 0xd: /* FADDP */
6231            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6232            break;
6233        case 0xf: /* FMAXP */
6234            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6235            break;
6236        case 0x2c: /* FMINNMP */
6237            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6238            break;
6239        case 0x2f: /* FMINP */
6240            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6241            break;
6242        default:
6243            g_assert_not_reached();
6244        }
6245
6246        write_fp_dreg(s, rd, tcg_res);
6247
6248        tcg_temp_free_i64(tcg_op1);
6249        tcg_temp_free_i64(tcg_op2);
6250        tcg_temp_free_i64(tcg_res);
6251    } else {
6252        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6253        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6254        TCGv_i32 tcg_res = tcg_temp_new_i32();
6255
6256        read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6257        read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6258
6259        switch (opcode) {
6260        case 0xc: /* FMAXNMP */
6261            gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6262            break;
6263        case 0xd: /* FADDP */
6264            gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6265            break;
6266        case 0xf: /* FMAXP */
6267            gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6268            break;
6269        case 0x2c: /* FMINNMP */
6270            gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6271            break;
6272        case 0x2f: /* FMINP */
6273            gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6274            break;
6275        default:
6276            g_assert_not_reached();
6277        }
6278
6279        write_fp_sreg(s, rd, tcg_res);
6280
6281        tcg_temp_free_i32(tcg_op1);
6282        tcg_temp_free_i32(tcg_op2);
6283        tcg_temp_free_i32(tcg_res);
6284    }
6285
6286    if (!TCGV_IS_UNUSED_PTR(fpst)) {
6287        tcg_temp_free_ptr(fpst);
6288    }
6289}
6290
6291/*
6292 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6293 *
6294 * This code is handles the common shifting code and is used by both
6295 * the vector and scalar code.
6296 */
6297static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6298                                    TCGv_i64 tcg_rnd, bool accumulate,
6299                                    bool is_u, int size, int shift)
6300{
6301    bool extended_result = false;
6302    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6303    int ext_lshift = 0;
6304    TCGv_i64 tcg_src_hi;
6305
6306    if (round && size == 3) {
6307        extended_result = true;
6308        ext_lshift = 64 - shift;
6309        tcg_src_hi = tcg_temp_new_i64();
6310    } else if (shift == 64) {
6311        if (!accumulate && is_u) {
6312            /* result is zero */
6313            tcg_gen_movi_i64(tcg_res, 0);
6314            return;
6315        }
6316    }
6317
6318    /* Deal with the rounding step */
6319    if (round) {
6320        if (extended_result) {
6321            TCGv_i64 tcg_zero = tcg_const_i64(0);
6322            if (!is_u) {
6323                /* take care of sign extending tcg_res */
6324                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6325                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6326                                 tcg_src, tcg_src_hi,
6327                                 tcg_rnd, tcg_zero);
6328            } else {
6329                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6330                                 tcg_src, tcg_zero,
6331                                 tcg_rnd, tcg_zero);
6332            }
6333            tcg_temp_free_i64(tcg_zero);
6334        } else {
6335            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6336        }
6337    }
6338
6339    /* Now do the shift right */
6340    if (round && extended_result) {
6341        /* extended case, >64 bit precision required */
6342        if (ext_lshift == 0) {
6343            /* special case, only high bits matter */
6344            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6345        } else {
6346            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6347            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6348            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6349        }
6350    } else {
6351        if (is_u) {
6352            if (shift == 64) {
6353                /* essentially shifting in 64 zeros */
6354                tcg_gen_movi_i64(tcg_src, 0);
6355            } else {
6356                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6357            }
6358        } else {
6359            if (shift == 64) {
6360                /* effectively extending the sign-bit */
6361                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6362            } else {
6363                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6364            }
6365        }
6366    }
6367
6368    if (accumulate) {
6369        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6370    } else {
6371        tcg_gen_mov_i64(tcg_res, tcg_src);
6372    }
6373
6374    if (extended_result) {
6375        tcg_temp_free_i64(tcg_src_hi);
6376    }
6377}
6378
6379/* Common SHL/SLI - Shift left with an optional insert */
6380static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6381                                 bool insert, int shift)
6382{
6383    if (insert) { /* SLI */
6384        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6385    } else { /* SHL */
6386        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6387    }
6388}
6389
6390/* SRI: shift right with insert */
6391static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6392                                 int size, int shift)
6393{
6394    int esize = 8 << size;
6395
6396    /* shift count same as element size is valid but does nothing;
6397     * special case to avoid potential shift by 64.
6398     */
6399    if (shift != esize) {
6400        tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6401        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6402    }
6403}
6404
6405/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6406static void handle_scalar_simd_shri(DisasContext *s,
6407                                    bool is_u, int immh, int immb,
6408                                    int opcode, int rn, int rd)
6409{
6410    const int size = 3;
6411    int immhb = immh << 3 | immb;
6412    int shift = 2 * (8 << size) - immhb;
6413    bool accumulate = false;
6414    bool round = false;
6415    bool insert = false;
6416    TCGv_i64 tcg_rn;
6417    TCGv_i64 tcg_rd;
6418    TCGv_i64 tcg_round;
6419
6420    if (!extract32(immh, 3, 1)) {
6421        unallocated_encoding(s);
6422        return;
6423    }
6424
6425    if (!fp_access_check(s)) {
6426        return;
6427    }
6428
6429    switch (opcode) {
6430    case 0x02: /* SSRA / USRA (accumulate) */
6431        accumulate = true;
6432        break;
6433    case 0x04: /* SRSHR / URSHR (rounding) */
6434        round = true;
6435        break;
6436    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6437        accumulate = round = true;
6438        break;
6439    case 0x08: /* SRI */
6440        insert = true;
6441        break;
6442    }
6443
6444    if (round) {
6445        uint64_t round_const = 1ULL << (shift - 1);
6446        tcg_round = tcg_const_i64(round_const);
6447    } else {
6448        TCGV_UNUSED_I64(tcg_round);
6449    }
6450
6451    tcg_rn = read_fp_dreg(s, rn);
6452    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6453
6454    if (insert) {
6455        handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6456    } else {
6457        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6458                                accumulate, is_u, size, shift);
6459    }
6460
6461    write_fp_dreg(s, rd, tcg_rd);
6462
6463    tcg_temp_free_i64(tcg_rn);
6464    tcg_temp_free_i64(tcg_rd);
6465    if (round) {
6466        tcg_temp_free_i64(tcg_round);
6467    }
6468}
6469
6470/* SHL/SLI - Scalar shift left */
6471static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6472                                    int immh, int immb, int opcode,
6473                                    int rn, int rd)
6474{
6475    int size = 32 - clz32(immh) - 1;
6476    int immhb = immh << 3 | immb;
6477    int shift = immhb - (8 << size);
6478    TCGv_i64 tcg_rn = new_tmp_a64(s);
6479    TCGv_i64 tcg_rd = new_tmp_a64(s);
6480
6481    if (!extract32(immh, 3, 1)) {
6482        unallocated_encoding(s);
6483        return;
6484    }
6485
6486    if (!fp_access_check(s)) {
6487        return;
6488    }
6489
6490    tcg_rn = read_fp_dreg(s, rn);
6491    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6492
6493    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6494
6495    write_fp_dreg(s, rd, tcg_rd);
6496
6497    tcg_temp_free_i64(tcg_rn);
6498    tcg_temp_free_i64(tcg_rd);
6499}
6500
6501/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6502 * (signed/unsigned) narrowing */
6503static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6504                                   bool is_u_shift, bool is_u_narrow,
6505                                   int immh, int immb, int opcode,
6506                                   int rn, int rd)
6507{
6508    int immhb = immh << 3 | immb;
6509    int size = 32 - clz32(immh) - 1;
6510    int esize = 8 << size;
6511    int shift = (2 * esize) - immhb;
6512    int elements = is_scalar ? 1 : (64 / esize);
6513    bool round = extract32(opcode, 0, 1);
6514    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6515    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6516    TCGv_i32 tcg_rd_narrowed;
6517    TCGv_i64 tcg_final;
6518
6519    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6520        { gen_helper_neon_narrow_sat_s8,
6521          gen_helper_neon_unarrow_sat8 },
6522        { gen_helper_neon_narrow_sat_s16,
6523          gen_helper_neon_unarrow_sat16 },
6524        { gen_helper_neon_narrow_sat_s32,
6525          gen_helper_neon_unarrow_sat32 },
6526        { NULL, NULL },
6527    };
6528    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6529        gen_helper_neon_narrow_sat_u8,
6530        gen_helper_neon_narrow_sat_u16,
6531        gen_helper_neon_narrow_sat_u32,
6532        NULL
6533    };
6534    NeonGenNarrowEnvFn *narrowfn;
6535
6536    int i;
6537
6538    assert(size < 4);
6539
6540    if (extract32(immh, 3, 1)) {
6541        unallocated_encoding(s);
6542        return;
6543    }
6544
6545    if (!fp_access_check(s)) {
6546        return;
6547    }
6548
6549    if (is_u_shift) {
6550        narrowfn = unsigned_narrow_fns[size];
6551    } else {
6552        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6553    }
6554
6555    tcg_rn = tcg_temp_new_i64();
6556    tcg_rd = tcg_temp_new_i64();
6557    tcg_rd_narrowed = tcg_temp_new_i32();
6558    tcg_final = tcg_const_i64(0);
6559
6560    if (round) {
6561        uint64_t round_const = 1ULL << (shift - 1);
6562        tcg_round = tcg_const_i64(round_const);
6563    } else {
6564        TCGV_UNUSED_I64(tcg_round);
6565    }
6566
6567    for (i = 0; i < elements; i++) {
6568        read_vec_element(s, tcg_rn, rn, i, ldop);
6569        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6570                                false, is_u_shift, size+1, shift);
6571        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6572        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6573        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6574    }
6575
6576    if (!is_q) {
6577        clear_vec_high(s, rd);
6578        write_vec_element(s, tcg_final, rd, 0, MO_64);
6579    } else {
6580        write_vec_element(s, tcg_final, rd, 1, MO_64);
6581    }
6582
6583    if (round) {
6584        tcg_temp_free_i64(tcg_round);
6585    }
6586    tcg_temp_free_i64(tcg_rn);
6587    tcg_temp_free_i64(tcg_rd);
6588    tcg_temp_free_i32(tcg_rd_narrowed);
6589    tcg_temp_free_i64(tcg_final);
6590    return;
6591}
6592
6593/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6594static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6595                             bool src_unsigned, bool dst_unsigned,
6596                             int immh, int immb, int rn, int rd)
6597{
6598    int immhb = immh << 3 | immb;
6599    int size = 32 - clz32(immh) - 1;
6600    int shift = immhb - (8 << size);
6601    int pass;
6602
6603    assert(immh != 0);
6604    assert(!(scalar && is_q));
6605
6606    if (!scalar) {
6607        if (!is_q && extract32(immh, 3, 1)) {
6608            unallocated_encoding(s);
6609            return;
6610        }
6611
6612        /* Since we use the variable-shift helpers we must
6613         * replicate the shift count into each element of
6614         * the tcg_shift value.
6615         */
6616        switch (size) {
6617        case 0:
6618            shift |= shift << 8;
6619            /* fall through */
6620        case 1:
6621            shift |= shift << 16;
6622            break;
6623        case 2:
6624        case 3:
6625            break;
6626        default:
6627            g_assert_not_reached();
6628        }
6629    }
6630
6631    if (!fp_access_check(s)) {
6632        return;
6633    }
6634
6635    if (size == 3) {
6636        TCGv_i64 tcg_shift = tcg_const_i64(shift);
6637        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6638            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6639            { NULL, gen_helper_neon_qshl_u64 },
6640        };
6641        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6642        int maxpass = is_q ? 2 : 1;
6643
6644        for (pass = 0; pass < maxpass; pass++) {
6645            TCGv_i64 tcg_op = tcg_temp_new_i64();
6646
6647            read_vec_element(s, tcg_op, rn, pass, MO_64);
6648            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6649            write_vec_element(s, tcg_op, rd, pass, MO_64);
6650
6651            tcg_temp_free_i64(tcg_op);
6652        }
6653        tcg_temp_free_i64(tcg_shift);
6654
6655        if (!is_q) {
6656            clear_vec_high(s, rd);
6657        }
6658    } else {
6659        TCGv_i32 tcg_shift = tcg_const_i32(shift);
6660        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6661            {
6662                { gen_helper_neon_qshl_s8,
6663                  gen_helper_neon_qshl_s16,
6664                  gen_helper_neon_qshl_s32 },
6665                { gen_helper_neon_qshlu_s8,
6666                  gen_helper_neon_qshlu_s16,
6667                  gen_helper_neon_qshlu_s32 }
6668            }, {
6669                { NULL, NULL, NULL },
6670                { gen_helper_neon_qshl_u8,
6671                  gen_helper_neon_qshl_u16,
6672                  gen_helper_neon_qshl_u32 }
6673            }
6674        };
6675        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6676        TCGMemOp memop = scalar ? size : MO_32;
6677        int maxpass = scalar ? 1 : is_q ? 4 : 2;
6678
6679        for (pass = 0; pass < maxpass; pass++) {
6680            TCGv_i32 tcg_op = tcg_temp_new_i32();
6681
6682            read_vec_element_i32(s, tcg_op, rn, pass, memop);
6683            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6684            if (scalar) {
6685                switch (size) {
6686                case 0:
6687                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
6688                    break;
6689                case 1:
6690                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
6691                    break;
6692                case 2:
6693                    break;
6694                default:
6695                    g_assert_not_reached();
6696                }
6697                write_fp_sreg(s, rd, tcg_op);
6698            } else {
6699                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6700            }
6701
6702            tcg_temp_free_i32(tcg_op);
6703        }
6704        tcg_temp_free_i32(tcg_shift);
6705
6706        if (!is_q && !scalar) {
6707            clear_vec_high(s, rd);
6708        }
6709    }
6710}
6711
6712/* Common vector code for handling integer to FP conversion */
6713static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6714                                   int elements, int is_signed,
6715                                   int fracbits, int size)
6716{
6717    bool is_double = size == 3 ? true : false;
6718    TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6719    TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6720    TCGv_i64 tcg_int = tcg_temp_new_i64();
6721    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6722    int pass;
6723
6724    for (pass = 0; pass < elements; pass++) {
6725        read_vec_element(s, tcg_int, rn, pass, mop);
6726
6727        if (is_double) {
6728            TCGv_i64 tcg_double = tcg_temp_new_i64();
6729            if (is_signed) {
6730                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6731                                     tcg_shift, tcg_fpst);
6732            } else {
6733                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6734                                     tcg_shift, tcg_fpst);
6735            }
6736            if (elements == 1) {
6737                write_fp_dreg(s, rd, tcg_double);
6738            } else {
6739                write_vec_element(s, tcg_double, rd, pass, MO_64);
6740            }
6741            tcg_temp_free_i64(tcg_double);
6742        } else {
6743            TCGv_i32 tcg_single = tcg_temp_new_i32();
6744            if (is_signed) {
6745                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6746                                     tcg_shift, tcg_fpst);
6747            } else {
6748                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6749                                     tcg_shift, tcg_fpst);
6750            }
6751            if (elements == 1) {
6752                write_fp_sreg(s, rd, tcg_single);
6753            } else {
6754                write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6755            }
6756            tcg_temp_free_i32(tcg_single);
6757        }
6758    }
6759
6760    if (!is_double && elements == 2) {
6761        clear_vec_high(s, rd);
6762    }
6763
6764    tcg_temp_free_i64(tcg_int);
6765    tcg_temp_free_ptr(tcg_fpst);
6766    tcg_temp_free_i32(tcg_shift);
6767}
6768
6769/* UCVTF/SCVTF - Integer to FP conversion */
6770static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6771                                         bool is_q, bool is_u,
6772                                         int immh, int immb, int opcode,
6773                                         int rn, int rd)
6774{
6775    bool is_double = extract32(immh, 3, 1);
6776    int size = is_double ? MO_64 : MO_32;
6777    int elements;
6778    int immhb = immh << 3 | immb;
6779    int fracbits = (is_double ? 128 : 64) - immhb;
6780
6781    if (!extract32(immh, 2, 2)) {
6782        unallocated_encoding(s);
6783        return;
6784    }
6785
6786    if (is_scalar) {
6787        elements = 1;
6788    } else {
6789        elements = is_double ? 2 : is_q ? 4 : 2;
6790        if (is_double && !is_q) {
6791            unallocated_encoding(s);
6792            return;
6793        }
6794    }
6795
6796    if (!fp_access_check(s)) {
6797        return;
6798    }
6799
6800    /* immh == 0 would be a failure of the decode logic */
6801    g_assert(immh);
6802
6803    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6804}
6805
6806/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6807static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6808                                         bool is_q, bool is_u,
6809                                         int immh, int immb, int rn, int rd)
6810{
6811    bool is_double = extract32(immh, 3, 1);
6812    int immhb = immh << 3 | immb;
6813    int fracbits = (is_double ? 128 : 64) - immhb;
6814    int pass;
6815    TCGv_ptr tcg_fpstatus;
6816    TCGv_i32 tcg_rmode, tcg_shift;
6817
6818    if (!extract32(immh, 2, 2)) {
6819        unallocated_encoding(s);
6820        return;
6821    }
6822
6823    if (!is_scalar && !is_q && is_double) {
6824        unallocated_encoding(s);
6825        return;
6826    }
6827
6828    if (!fp_access_check(s)) {
6829        return;
6830    }
6831
6832    assert(!(is_scalar && is_q));
6833
6834    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6835    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6836    tcg_fpstatus = get_fpstatus_ptr();
6837    tcg_shift = tcg_const_i32(fracbits);
6838
6839    if (is_double) {
6840        int maxpass = is_scalar ? 1 : 2;
6841
6842        for (pass = 0; pass < maxpass; pass++) {
6843            TCGv_i64 tcg_op = tcg_temp_new_i64();
6844
6845            read_vec_element(s, tcg_op, rn, pass, MO_64);
6846            if (is_u) {
6847                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6848            } else {
6849                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6850            }
6851            write_vec_element(s, tcg_op, rd, pass, MO_64);
6852            tcg_temp_free_i64(tcg_op);
6853        }
6854        if (!is_q) {
6855            clear_vec_high(s, rd);
6856        }
6857    } else {
6858        int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6859        for (pass = 0; pass < maxpass; pass++) {
6860            TCGv_i32 tcg_op = tcg_temp_new_i32();
6861
6862            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6863            if (is_u) {
6864                gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6865            } else {
6866                gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6867            }
6868            if (is_scalar) {
6869                write_fp_sreg(s, rd, tcg_op);
6870            } else {
6871                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6872            }
6873            tcg_temp_free_i32(tcg_op);
6874        }
6875        if (!is_q && !is_scalar) {
6876            clear_vec_high(s, rd);
6877        }
6878    }
6879
6880    tcg_temp_free_ptr(tcg_fpstatus);
6881    tcg_temp_free_i32(tcg_shift);
6882    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6883    tcg_temp_free_i32(tcg_rmode);
6884}
6885
6886/* C3.6.9 AdvSIMD scalar shift by immediate
6887 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6888 * +-----+---+-------------+------+------+--------+---+------+------+
6889 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6890 * +-----+---+-------------+------+------+--------+---+------+------+
6891 *
6892 * This is the scalar version so it works on a fixed sized registers
6893 */
6894static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6895{
6896    int rd = extract32(insn, 0, 5);
6897    int rn = extract32(insn, 5, 5);
6898    int opcode = extract32(insn, 11, 5);
6899    int immb = extract32(insn, 16, 3);
6900    int immh = extract32(insn, 19, 4);
6901    bool is_u = extract32(insn, 29, 1);
6902
6903    if (immh == 0) {
6904        unallocated_encoding(s);
6905        return;
6906    }
6907
6908    switch (opcode) {
6909    case 0x08: /* SRI */
6910        if (!is_u) {
6911            unallocated_encoding(s);
6912            return;
6913        }
6914        /* fall through */
6915    case 0x00: /* SSHR / USHR */
6916    case 0x02: /* SSRA / USRA */
6917    case 0x04: /* SRSHR / URSHR */
6918    case 0x06: /* SRSRA / URSRA */
6919        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6920        break;
6921    case 0x0a: /* SHL / SLI */
6922        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6923        break;
6924    case 0x1c: /* SCVTF, UCVTF */
6925        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6926                                     opcode, rn, rd);
6927        break;
6928    case 0x10: /* SQSHRUN, SQSHRUN2 */
6929    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6930        if (!is_u) {
6931            unallocated_encoding(s);
6932            return;
6933        }
6934        handle_vec_simd_sqshrn(s, true, false, false, true,
6935                               immh, immb, opcode, rn, rd);
6936        break;
6937    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6938    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6939        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6940                               immh, immb, opcode, rn, rd);
6941        break;
6942    case 0xc: /* SQSHLU */
6943        if (!is_u) {
6944            unallocated_encoding(s);
6945            return;
6946        }
6947        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6948        break;
6949    case 0xe: /* SQSHL, UQSHL */
6950        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6951        break;
6952    case 0x1f: /* FCVTZS, FCVTZU */
6953        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6954        break;
6955    default:
6956        unallocated_encoding(s);
6957        break;
6958    }
6959}
6960
6961/* C3.6.10 AdvSIMD scalar three different
6962 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6963 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6964 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6965 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6966 */
6967static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6968{
6969    bool is_u = extract32(insn, 29, 1);
6970    int size = extract32(insn, 22, 2);
6971    int opcode = extract32(insn, 12, 4);
6972    int rm = extract32(insn, 16, 5);
6973    int rn = extract32(insn, 5, 5);
6974    int rd = extract32(insn, 0, 5);
6975
6976    if (is_u) {
6977        unallocated_encoding(s);
6978        return;
6979    }
6980
6981    switch (opcode) {
6982    case 0x9: /* SQDMLAL, SQDMLAL2 */
6983    case 0xb: /* SQDMLSL, SQDMLSL2 */
6984    case 0xd: /* SQDMULL, SQDMULL2 */
6985        if (size == 0 || size == 3) {
6986            unallocated_encoding(s);
6987            return;
6988        }
6989        break;
6990    default:
6991        unallocated_encoding(s);
6992        return;
6993    }
6994
6995    if (!fp_access_check(s)) {
6996        return;
6997    }
6998
6999    if (size == 2) {
7000        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7001        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7002        TCGv_i64 tcg_res = tcg_temp_new_i64();
7003
7004        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7005        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7006
7007        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7008        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7009
7010        switch (opcode) {
7011        case 0xd: /* SQDMULL, SQDMULL2 */
7012            break;
7013        case 0xb: /* SQDMLSL, SQDMLSL2 */
7014            tcg_gen_neg_i64(tcg_res, tcg_res);
7015            /* fall through */
7016        case 0x9: /* SQDMLAL, SQDMLAL2 */
7017            read_vec_element(s, tcg_op1, rd, 0, MO_64);
7018            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7019                                              tcg_res, tcg_op1);
7020            break;
7021        default:
7022            g_assert_not_reached();
7023        }
7024
7025        write_fp_dreg(s, rd, tcg_res);
7026
7027        tcg_temp_free_i64(tcg_op1);
7028        tcg_temp_free_i64(tcg_op2);
7029        tcg_temp_free_i64(tcg_res);
7030    } else {
7031        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7032        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7033        TCGv_i64 tcg_res = tcg_temp_new_i64();
7034
7035        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7036        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7037
7038        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7039        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7040
7041        switch (opcode) {
7042        case 0xd: /* SQDMULL, SQDMULL2 */
7043            break;
7044        case 0xb: /* SQDMLSL, SQDMLSL2 */
7045            gen_helper_neon_negl_u32(tcg_res, tcg_res);
7046            /* fall through */
7047        case 0x9: /* SQDMLAL, SQDMLAL2 */
7048        {
7049            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7050            read_vec_element(s, tcg_op3, rd, 0, MO_32);
7051            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7052                                              tcg_res, tcg_op3);
7053            tcg_temp_free_i64(tcg_op3);
7054            break;
7055        }
7056        default:
7057            g_assert_not_reached();
7058        }
7059
7060        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7061        write_fp_dreg(s, rd, tcg_res);
7062
7063        tcg_temp_free_i32(tcg_op1);
7064        tcg_temp_free_i32(tcg_op2);
7065        tcg_temp_free_i64(tcg_res);
7066    }
7067}
7068
7069static void handle_3same_64(DisasContext *s, int opcode, bool u,
7070                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7071{
7072    /* Handle 64x64->64 opcodes which are shared between the scalar
7073     * and vector 3-same groups. We cover every opcode where size == 3
7074     * is valid in either the three-reg-same (integer, not pairwise)
7075     * or scalar-three-reg-same groups. (Some opcodes are not yet
7076     * implemented.)
7077     */
7078    TCGCond cond;
7079
7080    switch (opcode) {
7081    case 0x1: /* SQADD */
7082        if (u) {
7083            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7084        } else {
7085            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7086        }
7087        break;
7088    case 0x5: /* SQSUB */
7089        if (u) {
7090            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7091        } else {
7092            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7093        }
7094        break;
7095    case 0x6: /* CMGT, CMHI */
7096        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7097         * We implement this using setcond (test) and then negating.
7098         */
7099        cond = u ? TCG_COND_GTU : TCG_COND_GT;
7100    do_cmop:
7101        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7102        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7103        break;
7104    case 0x7: /* CMGE, CMHS */
7105        cond = u ? TCG_COND_GEU : TCG_COND_GE;
7106        goto do_cmop;
7107    case 0x11: /* CMTST, CMEQ */
7108        if (u) {
7109            cond = TCG_COND_EQ;
7110            goto do_cmop;
7111        }
7112        /* CMTST : test is "if (X & Y != 0)". */
7113        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7114        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7115        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7116        break;
7117    case 0x8: /* SSHL, USHL */
7118        if (u) {
7119            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7120        } else {
7121            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7122        }
7123        break;
7124    case 0x9: /* SQSHL, UQSHL */
7125        if (u) {
7126            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7127        } else {
7128            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7129        }
7130        break;
7131    case 0xa: /* SRSHL, URSHL */
7132        if (u) {
7133            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7134        } else {
7135            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7136        }
7137        break;
7138    case 0xb: /* SQRSHL, UQRSHL */
7139        if (u) {
7140            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7141        } else {
7142            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7143        }
7144        break;
7145    case 0x10: /* ADD, SUB */
7146        if (u) {
7147            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7148        } else {
7149            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7150        }
7151        break;
7152    default:
7153        g_assert_not_reached();
7154    }
7155}
7156
7157/* Handle the 3-same-operands float operations; shared by the scalar
7158 * and vector encodings. The caller must filter out any encodings
7159 * not allocated for the encoding it is dealing with.
7160 */
7161static void handle_3same_float(DisasContext *s, int size, int elements,
7162                               int fpopcode, int rd, int rn, int rm)
7163{
7164    int pass;
7165    TCGv_ptr fpst = get_fpstatus_ptr();
7166
7167    for (pass = 0; pass < elements; pass++) {
7168        if (size) {
7169            /* Double */
7170            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7171            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7172            TCGv_i64 tcg_res = tcg_temp_new_i64();
7173
7174            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7175            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7176
7177            switch (fpopcode) {
7178            case 0x39: /* FMLS */
7179                /* As usual for ARM, separate negation for fused multiply-add */
7180                gen_helper_vfp_negd(tcg_op1, tcg_op1);
7181                /* fall through */
7182            case 0x19: /* FMLA */
7183                read_vec_element(s, tcg_res, rd, pass, MO_64);
7184                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7185                                       tcg_res, fpst);
7186                break;
7187            case 0x18: /* FMAXNM */
7188                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7189                break;
7190            case 0x1a: /* FADD */
7191                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7192                break;
7193            case 0x1b: /* FMULX */
7194                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7195                break;
7196            case 0x1c: /* FCMEQ */
7197                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7198                break;
7199            case 0x1e: /* FMAX */
7200                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7201                break;
7202            case 0x1f: /* FRECPS */
7203                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7204                break;
7205            case 0x38: /* FMINNM */
7206                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7207                break;
7208            case 0x3a: /* FSUB */
7209                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7210                break;
7211            case 0x3e: /* FMIN */
7212                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7213                break;
7214            case 0x3f: /* FRSQRTS */
7215                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7216                break;
7217            case 0x5b: /* FMUL */
7218                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7219                break;
7220            case 0x5c: /* FCMGE */
7221                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7222                break;
7223            case 0x5d: /* FACGE */
7224                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7225                break;
7226            case 0x5f: /* FDIV */
7227                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7228                break;
7229            case 0x7a: /* FABD */
7230                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7231                gen_helper_vfp_absd(tcg_res, tcg_res);
7232                break;
7233            case 0x7c: /* FCMGT */
7234                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7235                break;
7236            case 0x7d: /* FACGT */
7237                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7238                break;
7239            default:
7240                g_assert_not_reached();
7241            }
7242
7243            write_vec_element(s, tcg_res, rd, pass, MO_64);
7244
7245            tcg_temp_free_i64(tcg_res);
7246            tcg_temp_free_i64(tcg_op1);
7247            tcg_temp_free_i64(tcg_op2);
7248        } else {
7249            /* Single */
7250            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7251            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7252            TCGv_i32 tcg_res = tcg_temp_new_i32();
7253
7254            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7255            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7256
7257            switch (fpopcode) {
7258            case 0x39: /* FMLS */
7259                /* As usual for ARM, separate negation for fused multiply-add */
7260                gen_helper_vfp_negs(tcg_op1, tcg_op1);
7261                /* fall through */
7262            case 0x19: /* FMLA */
7263                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7264                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7265                                       tcg_res, fpst);
7266                break;
7267            case 0x1a: /* FADD */
7268                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7269                break;
7270            case 0x1b: /* FMULX */
7271                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7272                break;
7273            case 0x1c: /* FCMEQ */
7274                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7275                break;
7276            case 0x1e: /* FMAX */
7277                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7278                break;
7279            case 0x1f: /* FRECPS */
7280                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7281                break;
7282            case 0x18: /* FMAXNM */
7283                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7284                break;
7285            case 0x38: /* FMINNM */
7286                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7287                break;
7288            case 0x3a: /* FSUB */
7289                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7290                break;
7291            case 0x3e: /* FMIN */
7292                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7293                break;
7294            case 0x3f: /* FRSQRTS */
7295                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7296                break;
7297            case 0x5b: /* FMUL */
7298                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7299                break;
7300            case 0x5c: /* FCMGE */
7301                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7302                break;
7303            case 0x5d: /* FACGE */
7304                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7305                break;
7306            case 0x5f: /* FDIV */
7307                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7308                break;
7309            case 0x7a: /* FABD */
7310                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7311                gen_helper_vfp_abss(tcg_res, tcg_res);
7312                break;
7313            case 0x7c: /* FCMGT */
7314                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7315                break;
7316            case 0x7d: /* FACGT */
7317                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7318                break;
7319            default:
7320                g_assert_not_reached();
7321            }
7322
7323            if (elements == 1) {
7324                /* scalar single so clear high part */
7325                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7326
7327                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7328                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7329                tcg_temp_free_i64(tcg_tmp);
7330            } else {
7331                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7332            }
7333
7334            tcg_temp_free_i32(tcg_res);
7335            tcg_temp_free_i32(tcg_op1);
7336            tcg_temp_free_i32(tcg_op2);
7337        }
7338    }
7339
7340    tcg_temp_free_ptr(fpst);
7341
7342    if ((elements << size) < 4) {
7343        /* scalar, or non-quad vector op */
7344        clear_vec_high(s, rd);
7345    }
7346}
7347
7348/* C3.6.11 AdvSIMD scalar three same
7349 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7350 * +-----+---+-----------+------+---+------+--------+---+------+------+
7351 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7352 * +-----+---+-----------+------+---+------+--------+---+------+------+
7353 */
7354static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7355{
7356    int rd = extract32(insn, 0, 5);
7357    int rn = extract32(insn, 5, 5);
7358    int opcode = extract32(insn, 11, 5);
7359    int rm = extract32(insn, 16, 5);
7360    int size = extract32(insn, 22, 2);
7361    bool u = extract32(insn, 29, 1);
7362    TCGv_i64 tcg_rd;
7363
7364    if (opcode >= 0x18) {
7365        /* Floating point: U, size[1] and opcode indicate operation */
7366        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7367        switch (fpopcode) {
7368        case 0x1b: /* FMULX */
7369        case 0x1f: /* FRECPS */
7370        case 0x3f: /* FRSQRTS */
7371        case 0x5d: /* FACGE */
7372        case 0x7d: /* FACGT */
7373        case 0x1c: /* FCMEQ */
7374        case 0x5c: /* FCMGE */
7375        case 0x7c: /* FCMGT */
7376        case 0x7a: /* FABD */
7377            break;
7378        default:
7379            unallocated_encoding(s);
7380            return;
7381        }
7382
7383        if (!fp_access_check(s)) {
7384            return;
7385        }
7386
7387        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7388        return;
7389    }
7390
7391    switch (opcode) {
7392    case 0x1: /* SQADD, UQADD */
7393    case 0x5: /* SQSUB, UQSUB */
7394    case 0x9: /* SQSHL, UQSHL */
7395    case 0xb: /* SQRSHL, UQRSHL */
7396        break;
7397    case 0x8: /* SSHL, USHL */
7398    case 0xa: /* SRSHL, URSHL */
7399    case 0x6: /* CMGT, CMHI */
7400    case 0x7: /* CMGE, CMHS */
7401    case 0x11: /* CMTST, CMEQ */
7402    case 0x10: /* ADD, SUB (vector) */
7403        if (size != 3) {
7404            unallocated_encoding(s);
7405            return;
7406        }
7407        break;
7408    case 0x16: /* SQDMULH, SQRDMULH (vector) */
7409        if (size != 1 && size != 2) {
7410            unallocated_encoding(s);
7411            return;
7412        }
7413        break;
7414    default:
7415        unallocated_encoding(s);
7416        return;
7417    }
7418
7419    if (!fp_access_check(s)) {
7420        return;
7421    }
7422
7423    tcg_rd = tcg_temp_new_i64();
7424
7425    if (size == 3) {
7426        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7427        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7428
7429        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7430        tcg_temp_free_i64(tcg_rn);
7431        tcg_temp_free_i64(tcg_rm);
7432    } else {
7433        /* Do a single operation on the lowest element in the vector.
7434         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7435         * no side effects for all these operations.
7436         * OPTME: special-purpose helpers would avoid doing some
7437         * unnecessary work in the helper for the 8 and 16 bit cases.
7438         */
7439        NeonGenTwoOpEnvFn *genenvfn;
7440        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7441        TCGv_i32 tcg_rm = tcg_temp_new_i32();
7442        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7443
7444        read_vec_element_i32(s, tcg_rn, rn, 0, size);
7445        read_vec_element_i32(s, tcg_rm, rm, 0, size);
7446
7447        switch (opcode) {
7448        case 0x1: /* SQADD, UQADD */
7449        {
7450            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7451                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7452                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7453                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7454            };
7455            genenvfn = fns[size][u];
7456            break;
7457        }
7458        case 0x5: /* SQSUB, UQSUB */
7459        {
7460            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7461                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7462                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7463                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7464            };
7465            genenvfn = fns[size][u];
7466            break;
7467        }
7468        case 0x9: /* SQSHL, UQSHL */
7469        {
7470            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7471                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7472                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7473                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7474            };
7475            genenvfn = fns[size][u];
7476            break;
7477        }
7478        case 0xb: /* SQRSHL, UQRSHL */
7479        {
7480            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7481                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7482                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7483                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7484            };
7485            genenvfn = fns[size][u];
7486            break;
7487        }
7488        case 0x16: /* SQDMULH, SQRDMULH */
7489        {
7490            static NeonGenTwoOpEnvFn * const fns[2][2] = {
7491                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7492                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7493            };
7494            assert(size == 1 || size == 2);
7495            genenvfn = fns[size - 1][u];
7496            break;
7497        }
7498        default:
7499            g_assert_not_reached();
7500        }
7501
7502        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7503        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7504        tcg_temp_free_i32(tcg_rd32);
7505        tcg_temp_free_i32(tcg_rn);
7506        tcg_temp_free_i32(tcg_rm);
7507    }
7508
7509    write_fp_dreg(s, rd, tcg_rd);
7510
7511    tcg_temp_free_i64(tcg_rd);
7512}
7513
7514static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7515                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7516                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7517{
7518    /* Handle 64->64 opcodes which are shared between the scalar and
7519     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7520     * is valid in either group and also the double-precision fp ops.
7521     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7522     * requires them.
7523     */
7524    TCGCond cond;
7525
7526    switch (opcode) {
7527    case 0x4: /* CLS, CLZ */
7528        if (u) {
7529            gen_helper_clz64(tcg_rd, tcg_rn);
7530        } else {
7531            gen_helper_cls64(tcg_rd, tcg_rn);
7532        }
7533        break;
7534    case 0x5: /* NOT */
7535        /* This opcode is shared with CNT and RBIT but we have earlier
7536         * enforced that size == 3 if and only if this is the NOT insn.
7537         */
7538        tcg_gen_not_i64(tcg_rd, tcg_rn);
7539        break;
7540    case 0x7: /* SQABS, SQNEG */
7541        if (u) {
7542            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7543        } else {
7544            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7545        }
7546        break;
7547    case 0xa: /* CMLT */
7548        /* 64 bit integer comparison against zero, result is
7549         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7550         * subtracting 1.
7551         */
7552        cond = TCG_COND_LT;
7553    do_cmop:
7554        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7555        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7556        break;
7557    case 0x8: /* CMGT, CMGE */
7558        cond = u ? TCG_COND_GE : TCG_COND_GT;
7559        goto do_cmop;
7560    case 0x9: /* CMEQ, CMLE */
7561        cond = u ? TCG_COND_LE : TCG_COND_EQ;
7562        goto do_cmop;
7563    case 0xb: /* ABS, NEG */
7564        if (u) {
7565            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7566        } else {
7567            TCGv_i64 tcg_zero = tcg_const_i64(0);
7568            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7569            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7570                                tcg_rn, tcg_rd);
7571            tcg_temp_free_i64(tcg_zero);
7572        }
7573        break;
7574    case 0x2f: /* FABS */
7575        gen_helper_vfp_absd(tcg_rd, tcg_rn);
7576        break;
7577    case 0x6f: /* FNEG */
7578        gen_helper_vfp_negd(tcg_rd, tcg_rn);
7579        break;
7580    case 0x7f: /* FSQRT */
7581        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7582        break;
7583    case 0x1a: /* FCVTNS */
7584    case 0x1b: /* FCVTMS */
7585    case 0x1c: /* FCVTAS */
7586    case 0x3a: /* FCVTPS */
7587    case 0x3b: /* FCVTZS */
7588    {
7589        TCGv_i32 tcg_shift = tcg_const_i32(0);
7590        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7591        tcg_temp_free_i32(tcg_shift);
7592        break;
7593    }
7594    case 0x5a: /* FCVTNU */
7595    case 0x5b: /* FCVTMU */
7596    case 0x5c: /* FCVTAU */
7597    case 0x7a: /* FCVTPU */
7598    case 0x7b: /* FCVTZU */
7599    {
7600        TCGv_i32 tcg_shift = tcg_const_i32(0);
7601        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7602        tcg_temp_free_i32(tcg_shift);
7603        break;
7604    }
7605    case 0x18: /* FRINTN */
7606    case 0x19: /* FRINTM */
7607    case 0x38: /* FRINTP */
7608    case 0x39: /* FRINTZ */
7609    case 0x58: /* FRINTA */
7610    case 0x79: /* FRINTI */
7611        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7612        break;
7613    case 0x59: /* FRINTX */
7614        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7615        break;
7616    default:
7617        g_assert_not_reached();
7618    }
7619}
7620
7621static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7622                                   bool is_scalar, bool is_u, bool is_q,
7623                                   int size, int rn, int rd)
7624{
7625    bool is_double = (size == 3);
7626    TCGv_ptr fpst;
7627
7628    if (!fp_access_check(s)) {
7629        return;
7630    }
7631
7632    fpst = get_fpstatus_ptr();
7633
7634    if (is_double) {
7635        TCGv_i64 tcg_op = tcg_temp_new_i64();
7636        TCGv_i64 tcg_zero = tcg_const_i64(0);
7637        TCGv_i64 tcg_res = tcg_temp_new_i64();
7638        NeonGenTwoDoubleOPFn *genfn;
7639        bool swap = false;
7640        int pass;
7641
7642        switch (opcode) {
7643        case 0x2e: /* FCMLT (zero) */
7644            swap = true;
7645            /* fallthrough */
7646        case 0x2c: /* FCMGT (zero) */
7647            genfn = gen_helper_neon_cgt_f64;
7648            break;
7649        case 0x2d: /* FCMEQ (zero) */
7650            genfn = gen_helper_neon_ceq_f64;
7651            break;
7652        case 0x6d: /* FCMLE (zero) */
7653            swap = true;
7654            /* fall through */
7655        case 0x6c: /* FCMGE (zero) */
7656            genfn = gen_helper_neon_cge_f64;
7657            break;
7658        default:
7659            g_assert_not_reached();
7660        }
7661
7662        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7663            read_vec_element(s, tcg_op, rn, pass, MO_64);
7664            if (swap) {
7665                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7666            } else {
7667                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7668            }
7669            write_vec_element(s, tcg_res, rd, pass, MO_64);
7670        }
7671        if (is_scalar) {
7672            clear_vec_high(s, rd);
7673        }
7674
7675        tcg_temp_free_i64(tcg_res);
7676        tcg_temp_free_i64(tcg_zero);
7677        tcg_temp_free_i64(tcg_op);
7678    } else {
7679        TCGv_i32 tcg_op = tcg_temp_new_i32();
7680        TCGv_i32 tcg_zero = tcg_const_i32(0);
7681        TCGv_i32 tcg_res = tcg_temp_new_i32();
7682        NeonGenTwoSingleOPFn *genfn;
7683        bool swap = false;
7684        int pass, maxpasses;
7685
7686        switch (opcode) {
7687        case 0x2e: /* FCMLT (zero) */
7688            swap = true;
7689            /* fall through */
7690        case 0x2c: /* FCMGT (zero) */
7691            genfn = gen_helper_neon_cgt_f32;
7692            break;
7693        case 0x2d: /* FCMEQ (zero) */
7694            genfn = gen_helper_neon_ceq_f32;
7695            break;
7696        case 0x6d: /* FCMLE (zero) */
7697            swap = true;
7698            /* fall through */
7699        case 0x6c: /* FCMGE (zero) */
7700            genfn = gen_helper_neon_cge_f32;
7701            break;
7702        default:
7703            g_assert_not_reached();
7704        }
7705
7706        if (is_scalar) {
7707            maxpasses = 1;
7708        } else {
7709            maxpasses = is_q ? 4 : 2;
7710        }
7711
7712        for (pass = 0; pass < maxpasses; pass++) {
7713            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7714            if (swap) {
7715                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7716            } else {
7717                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7718            }
7719            if (is_scalar) {
7720                write_fp_sreg(s, rd, tcg_res);
7721            } else {
7722                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7723            }
7724        }
7725        tcg_temp_free_i32(tcg_res);
7726        tcg_temp_free_i32(tcg_zero);
7727        tcg_temp_free_i32(tcg_op);
7728        if (!is_q && !is_scalar) {
7729            clear_vec_high(s, rd);
7730        }
7731    }
7732
7733    tcg_temp_free_ptr(fpst);
7734}
7735
7736static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7737                                    bool is_scalar, bool is_u, bool is_q,
7738                                    int size, int rn, int rd)
7739{
7740    bool is_double = (size == 3);
7741    TCGv_ptr fpst = get_fpstatus_ptr();
7742
7743    if (is_double) {
7744        TCGv_i64 tcg_op = tcg_temp_new_i64();
7745        TCGv_i64 tcg_res = tcg_temp_new_i64();
7746        int pass;
7747
7748        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7749            read_vec_element(s, tcg_op, rn, pass, MO_64);
7750            switch (opcode) {
7751            case 0x3d: /* FRECPE */
7752                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7753                break;
7754            case 0x3f: /* FRECPX */
7755                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7756                break;
7757            case 0x7d: /* FRSQRTE */
7758                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7759                break;
7760            default:
7761                g_assert_not_reached();
7762            }
7763            write_vec_element(s, tcg_res, rd, pass, MO_64);
7764        }
7765        if (is_scalar) {
7766            clear_vec_high(s, rd);
7767        }
7768
7769        tcg_temp_free_i64(tcg_res);
7770        tcg_temp_free_i64(tcg_op);
7771    } else {
7772        TCGv_i32 tcg_op = tcg_temp_new_i32();
7773        TCGv_i32 tcg_res = tcg_temp_new_i32();
7774        int pass, maxpasses;
7775
7776        if (is_scalar) {
7777            maxpasses = 1;
7778        } else {
7779            maxpasses = is_q ? 4 : 2;
7780        }
7781
7782        for (pass = 0; pass < maxpasses; pass++) {
7783            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7784
7785            switch (opcode) {
7786            case 0x3c: /* URECPE */
7787                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7788                break;
7789            case 0x3d: /* FRECPE */
7790                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7791                break;
7792            case 0x3f: /* FRECPX */
7793                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7794                break;
7795            case 0x7d: /* FRSQRTE */
7796                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7797                break;
7798            default:
7799                g_assert_not_reached();
7800            }
7801
7802            if (is_scalar) {
7803                write_fp_sreg(s, rd, tcg_res);
7804            } else {
7805                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7806            }
7807        }
7808        tcg_temp_free_i32(tcg_res);
7809        tcg_temp_free_i32(tcg_op);
7810        if (!is_q && !is_scalar) {
7811            clear_vec_high(s, rd);
7812        }
7813    }
7814    tcg_temp_free_ptr(fpst);
7815}
7816
7817static void handle_2misc_narrow(DisasContext *s, bool scalar,
7818                                int opcode, bool u, bool is_q,
7819                                int size, int rn, int rd)
7820{
7821    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7822     * in the source becomes a size element in the destination).
7823     */
7824    int pass;
7825    TCGv_i32 tcg_res[2];
7826    int destelt = is_q ? 2 : 0;
7827    int passes = scalar ? 1 : 2;
7828
7829    if (scalar) {
7830        tcg_res[1] = tcg_const_i32(0);
7831    }
7832
7833    for (pass = 0; pass < passes; pass++) {
7834        TCGv_i64 tcg_op = tcg_temp_new_i64();
7835        NeonGenNarrowFn *genfn = NULL;
7836        NeonGenNarrowEnvFn *genenvfn = NULL;
7837
7838        if (scalar) {
7839            read_vec_element(s, tcg_op, rn, pass, size + 1);
7840        } else {
7841            read_vec_element(s, tcg_op, rn, pass, MO_64);
7842        }
7843        tcg_res[pass] = tcg_temp_new_i32();
7844
7845        switch (opcode) {
7846        case 0x12: /* XTN, SQXTUN */
7847        {
7848            static NeonGenNarrowFn * const xtnfns[3] = {
7849                gen_helper_neon_narrow_u8,
7850                gen_helper_neon_narrow_u16,
7851                tcg_gen_extrl_i64_i32,
7852            };
7853            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7854                gen_helper_neon_unarrow_sat8,
7855                gen_helper_neon_unarrow_sat16,
7856                gen_helper_neon_unarrow_sat32,
7857            };
7858            if (u) {
7859                genenvfn = sqxtunfns[size];
7860            } else {
7861                genfn = xtnfns[size];
7862            }
7863            break;
7864        }
7865        case 0x14: /* SQXTN, UQXTN */
7866        {
7867            static NeonGenNarrowEnvFn * const fns[3][2] = {
7868                { gen_helper_neon_narrow_sat_s8,
7869                  gen_helper_neon_narrow_sat_u8 },
7870                { gen_helper_neon_narrow_sat_s16,
7871                  gen_helper_neon_narrow_sat_u16 },
7872                { gen_helper_neon_narrow_sat_s32,
7873                  gen_helper_neon_narrow_sat_u32 },
7874            };
7875            genenvfn = fns[size][u];
7876            break;
7877        }
7878        case 0x16: /* FCVTN, FCVTN2 */
7879            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7880            if (size == 2) {
7881                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7882            } else {
7883                TCGv_i32 tcg_lo = tcg_temp_new_i32();
7884                TCGv_i32 tcg_hi = tcg_temp_new_i32();
7885                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7886                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7887                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7888                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7889                tcg_temp_free_i32(tcg_lo);
7890                tcg_temp_free_i32(tcg_hi);
7891            }
7892            break;
7893        case 0x56:  /* FCVTXN, FCVTXN2 */
7894            /* 64 bit to 32 bit float conversion
7895             * with von Neumann rounding (round to odd)
7896             */
7897            assert(size == 2);
7898            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7899            break;
7900        default:
7901            g_assert_not_reached();
7902        }
7903
7904        if (genfn) {
7905            genfn(tcg_res[pass], tcg_op);
7906        } else if (genenvfn) {
7907            genenvfn(tcg_res[pass], cpu_env, tcg_op);
7908        }
7909
7910        tcg_temp_free_i64(tcg_op);
7911    }
7912
7913    for (pass = 0; pass < 2; pass++) {
7914        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7915        tcg_temp_free_i32(tcg_res[pass]);
7916    }
7917    if (!is_q) {
7918        clear_vec_high(s, rd);
7919    }
7920}
7921
7922/* Remaining saturating accumulating ops */
7923static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7924                                bool is_q, int size, int rn, int rd)
7925{
7926    bool is_double = (size == 3);
7927
7928    if (is_double) {
7929        TCGv_i64 tcg_rn = tcg_temp_new_i64();
7930        TCGv_i64 tcg_rd = tcg_temp_new_i64();
7931        int pass;
7932
7933        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7934            read_vec_element(s, tcg_rn, rn, pass, MO_64);
7935            read_vec_element(s, tcg_rd, rd, pass, MO_64);
7936
7937            if (is_u) { /* USQADD */
7938                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7939            } else { /* SUQADD */
7940                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7941            }
7942            write_vec_element(s, tcg_rd, rd, pass, MO_64);
7943        }
7944        if (is_scalar) {
7945            clear_vec_high(s, rd);
7946        }
7947
7948        tcg_temp_free_i64(tcg_rd);
7949        tcg_temp_free_i64(tcg_rn);
7950    } else {
7951        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7952        TCGv_i32 tcg_rd = tcg_temp_new_i32();
7953        int pass, maxpasses;
7954
7955        if (is_scalar) {
7956            maxpasses = 1;
7957        } else {
7958            maxpasses = is_q ? 4 : 2;
7959        }
7960
7961        for (pass = 0; pass < maxpasses; pass++) {
7962            if (is_scalar) {
7963                read_vec_element_i32(s, tcg_rn, rn, pass, size);
7964                read_vec_element_i32(s, tcg_rd, rd, pass, size);
7965            } else {
7966                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7967                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7968            }
7969
7970            if (is_u) { /* USQADD */
7971                switch (size) {
7972                case 0:
7973                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7974                    break;
7975                case 1:
7976                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7977                    break;
7978                case 2:
7979                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7980                    break;
7981                default:
7982                    g_assert_not_reached();
7983                }
7984            } else { /* SUQADD */
7985                switch (size) {
7986                case 0:
7987                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7988                    break;
7989                case 1:
7990                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7991                    break;
7992                case 2:
7993                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7994                    break;
7995                default:
7996                    g_assert_not_reached();
7997                }
7998            }
7999
8000            if (is_scalar) {
8001                TCGv_i64 tcg_zero = tcg_const_i64(0);
8002                write_vec_element(s, tcg_zero, rd, 0, MO_64);
8003                tcg_temp_free_i64(tcg_zero);
8004            }
8005            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8006        }
8007
8008        if (!is_q) {
8009            clear_vec_high(s, rd);
8010        }
8011
8012        tcg_temp_free_i32(tcg_rd);
8013        tcg_temp_free_i32(tcg_rn);
8014    }
8015}
8016
8017/* C3.6.12 AdvSIMD scalar two reg misc
8018 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8019 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8020 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8021 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8022 */
8023static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8024{
8025    int rd = extract32(insn, 0, 5);
8026    int rn = extract32(insn, 5, 5);
8027    int opcode = extract32(insn, 12, 5);
8028    int size = extract32(insn, 22, 2);
8029    bool u = extract32(insn, 29, 1);
8030    bool is_fcvt = false;
8031    int rmode;
8032    TCGv_i32 tcg_rmode;
8033    TCGv_ptr tcg_fpstatus;
8034
8035    switch (opcode) {
8036    case 0x3: /* USQADD / SUQADD*/
8037        if (!fp_access_check(s)) {
8038            return;
8039        }
8040        handle_2misc_satacc(s, true, u, false, size, rn, rd);
8041        return;
8042    case 0x7: /* SQABS / SQNEG */
8043        break;
8044    case 0xa: /* CMLT */
8045        if (u) {
8046            unallocated_encoding(s);
8047            return;
8048        }
8049        /* fall through */
8050    case 0x8: /* CMGT, CMGE */
8051    case 0x9: /* CMEQ, CMLE */
8052    case 0xb: /* ABS, NEG */
8053        if (size != 3) {
8054            unallocated_encoding(s);
8055            return;
8056        }
8057        break;
8058    case 0x12: /* SQXTUN */
8059        if (!u) {
8060            unallocated_encoding(s);
8061            return;
8062        }
8063        /* fall through */
8064    case 0x14: /* SQXTN, UQXTN */
8065        if (size == 3) {
8066            unallocated_encoding(s);
8067            return;
8068        }
8069        if (!fp_access_check(s)) {
8070            return;
8071        }
8072        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8073        return;
8074    case 0xc ... 0xf:
8075    case 0x16 ... 0x1d:
8076    case 0x1f:
8077        /* Floating point: U, size[1] and opcode indicate operation;
8078         * size[0] indicates single or double precision.
8079         */
8080        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8081        size = extract32(size, 0, 1) ? 3 : 2;
8082        switch (opcode) {
8083        case 0x2c: /* FCMGT (zero) */
8084        case 0x2d: /* FCMEQ (zero) */
8085        case 0x2e: /* FCMLT (zero) */
8086        case 0x6c: /* FCMGE (zero) */
8087        case 0x6d: /* FCMLE (zero) */
8088            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8089            return;
8090        case 0x1d: /* SCVTF */
8091        case 0x5d: /* UCVTF */
8092        {
8093            bool is_signed = (opcode == 0x1d);
8094            if (!fp_access_check(s)) {
8095                return;
8096            }
8097            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8098            return;
8099        }
8100        case 0x3d: /* FRECPE */
8101        case 0x3f: /* FRECPX */
8102        case 0x7d: /* FRSQRTE */
8103            if (!fp_access_check(s)) {
8104                return;
8105            }
8106            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8107            return;
8108        case 0x1a: /* FCVTNS */
8109        case 0x1b: /* FCVTMS */
8110        case 0x3a: /* FCVTPS */
8111        case 0x3b: /* FCVTZS */
8112        case 0x5a: /* FCVTNU */
8113        case 0x5b: /* FCVTMU */
8114        case 0x7a: /* FCVTPU */
8115        case 0x7b: /* FCVTZU */
8116            is_fcvt = true;
8117            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8118            break;
8119        case 0x1c: /* FCVTAS */
8120        case 0x5c: /* FCVTAU */
8121            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8122            is_fcvt = true;
8123            rmode = FPROUNDING_TIEAWAY;
8124            break;
8125        case 0x56: /* FCVTXN, FCVTXN2 */
8126            if (size == 2) {
8127                unallocated_encoding(s);
8128                return;
8129            }
8130            if (!fp_access_check(s)) {
8131                return;
8132            }
8133            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8134            return;
8135        default:
8136            unallocated_encoding(s);
8137            return;
8138        }
8139        break;
8140    default:
8141        unallocated_encoding(s);
8142        return;
8143    }
8144
8145    if (!fp_access_check(s)) {
8146        return;
8147    }
8148
8149    if (is_fcvt) {
8150        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8151        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8152        tcg_fpstatus = get_fpstatus_ptr();
8153    } else {
8154        TCGV_UNUSED_I32(tcg_rmode);
8155        TCGV_UNUSED_PTR(tcg_fpstatus);
8156    }
8157
8158    if (size == 3) {
8159        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8160        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8161
8162        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8163        write_fp_dreg(s, rd, tcg_rd);
8164        tcg_temp_free_i64(tcg_rd);
8165        tcg_temp_free_i64(tcg_rn);
8166    } else {
8167        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8168        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8169
8170        read_vec_element_i32(s, tcg_rn, rn, 0, size);
8171
8172        switch (opcode) {
8173        case 0x7: /* SQABS, SQNEG */
8174        {
8175            NeonGenOneOpEnvFn *genfn;
8176            static NeonGenOneOpEnvFn * const fns[3][2] = {
8177                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8178                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8179                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8180            };
8181            genfn = fns[size][u];
8182            genfn(tcg_rd, cpu_env, tcg_rn);
8183            break;
8184        }
8185        case 0x1a: /* FCVTNS */
8186        case 0x1b: /* FCVTMS */
8187        case 0x1c: /* FCVTAS */
8188        case 0x3a: /* FCVTPS */
8189        case 0x3b: /* FCVTZS */
8190        {
8191            TCGv_i32 tcg_shift = tcg_const_i32(0);
8192            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8193            tcg_temp_free_i32(tcg_shift);
8194            break;
8195        }
8196        case 0x5a: /* FCVTNU */
8197        case 0x5b: /* FCVTMU */
8198        case 0x5c: /* FCVTAU */
8199        case 0x7a: /* FCVTPU */
8200        case 0x7b: /* FCVTZU */
8201        {
8202            TCGv_i32 tcg_shift = tcg_const_i32(0);
8203            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8204            tcg_temp_free_i32(tcg_shift);
8205            break;
8206        }
8207        default:
8208            g_assert_not_reached();
8209        }
8210
8211        write_fp_sreg(s, rd, tcg_rd);
8212        tcg_temp_free_i32(tcg_rd);
8213        tcg_temp_free_i32(tcg_rn);
8214    }
8215
8216    if (is_fcvt) {
8217        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8218        tcg_temp_free_i32(tcg_rmode);
8219        tcg_temp_free_ptr(tcg_fpstatus);
8220    }
8221}
8222
8223/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8224static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8225                                 int immh, int immb, int opcode, int rn, int rd)
8226{
8227    int size = 32 - clz32(immh) - 1;
8228    int immhb = immh << 3 | immb;
8229    int shift = 2 * (8 << size) - immhb;
8230    bool accumulate = false;
8231    bool round = false;
8232    bool insert = false;
8233    int dsize = is_q ? 128 : 64;
8234    int esize = 8 << size;
8235    int elements = dsize/esize;
8236    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8237    TCGv_i64 tcg_rn = new_tmp_a64(s);
8238    TCGv_i64 tcg_rd = new_tmp_a64(s);
8239    TCGv_i64 tcg_round;
8240    int i;
8241
8242    if (extract32(immh, 3, 1) && !is_q) {
8243        unallocated_encoding(s);
8244        return;
8245    }
8246
8247    if (size > 3 && !is_q) {
8248        unallocated_encoding(s);
8249        return;
8250    }
8251
8252    if (!fp_access_check(s)) {
8253        return;
8254    }
8255
8256    switch (opcode) {
8257    case 0x02: /* SSRA / USRA (accumulate) */
8258        accumulate = true;
8259        break;
8260    case 0x04: /* SRSHR / URSHR (rounding) */
8261        round = true;
8262        break;
8263    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8264        accumulate = round = true;
8265        break;
8266    case 0x08: /* SRI */
8267        insert = true;
8268        break;
8269    }
8270
8271    if (round) {
8272        uint64_t round_const = 1ULL << (shift - 1);
8273        tcg_round = tcg_const_i64(round_const);
8274    } else {
8275        TCGV_UNUSED_I64(tcg_round);
8276    }
8277
8278    for (i = 0; i < elements; i++) {
8279        read_vec_element(s, tcg_rn, rn, i, memop);
8280        if (accumulate || insert) {
8281            read_vec_element(s, tcg_rd, rd, i, memop);
8282        }
8283
8284        if (insert) {
8285            handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8286        } else {
8287            handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8288                                    accumulate, is_u, size, shift);
8289        }
8290
8291        write_vec_element(s, tcg_rd, rd, i, size);
8292    }
8293
8294    if (!is_q) {
8295        clear_vec_high(s, rd);
8296    }
8297
8298    if (round) {
8299        tcg_temp_free_i64(tcg_round);
8300    }
8301}
8302
8303/* SHL/SLI - Vector shift left */
8304static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8305                                int immh, int immb, int opcode, int rn, int rd)
8306{
8307    int size = 32 - clz32(immh) - 1;
8308    int immhb = immh << 3 | immb;
8309    int shift = immhb - (8 << size);
8310    int dsize = is_q ? 128 : 64;
8311    int esize = 8 << size;
8312    int elements = dsize/esize;
8313    TCGv_i64 tcg_rn = new_tmp_a64(s);
8314    TCGv_i64 tcg_rd = new_tmp_a64(s);
8315    int i;
8316
8317    if (extract32(immh, 3, 1) && !is_q) {
8318        unallocated_encoding(s);
8319        return;
8320    }
8321
8322    if (size > 3 && !is_q) {
8323        unallocated_encoding(s);
8324        return;
8325    }
8326
8327    if (!fp_access_check(s)) {
8328        return;
8329    }
8330
8331    for (i = 0; i < elements; i++) {
8332        read_vec_element(s, tcg_rn, rn, i, size);
8333        if (insert) {
8334            read_vec_element(s, tcg_rd, rd, i, size);
8335        }
8336
8337        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8338
8339        write_vec_element(s, tcg_rd, rd, i, size);
8340    }
8341
8342    if (!is_q) {
8343        clear_vec_high(s, rd);
8344    }
8345}
8346
8347/* USHLL/SHLL - Vector shift left with widening */
8348static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8349                                 int immh, int immb, int opcode, int rn, int rd)
8350{
8351    int size = 32 - clz32(immh) - 1;
8352    int immhb = immh << 3 | immb;
8353    int shift = immhb - (8 << size);
8354    int dsize = 64;
8355    int esize = 8 << size;
8356    int elements = dsize/esize;
8357    TCGv_i64 tcg_rn = new_tmp_a64(s);
8358    TCGv_i64 tcg_rd = new_tmp_a64(s);
8359    int i;
8360
8361    if (size >= 3) {
8362        unallocated_encoding(s);
8363        return;
8364    }
8365
8366    if (!fp_access_check(s)) {
8367        return;
8368    }
8369
8370    /* For the LL variants the store is larger than the load,
8371     * so if rd == rn we would overwrite parts of our input.
8372     * So load everything right now and use shifts in the main loop.
8373     */
8374    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8375
8376    for (i = 0; i < elements; i++) {
8377        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8378        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8379        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8380        write_vec_element(s, tcg_rd, rd, i, size + 1);
8381    }
8382}
8383
8384/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8385static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8386                                 int immh, int immb, int opcode, int rn, int rd)
8387{
8388    int immhb = immh << 3 | immb;
8389    int size = 32 - clz32(immh) - 1;
8390    int dsize = 64;
8391    int esize = 8 << size;
8392    int elements = dsize/esize;
8393    int shift = (2 * esize) - immhb;
8394    bool round = extract32(opcode, 0, 1);
8395    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8396    TCGv_i64 tcg_round;
8397    int i;
8398
8399    if (extract32(immh, 3, 1)) {
8400        unallocated_encoding(s);
8401        return;
8402    }
8403
8404    if (!fp_access_check(s)) {
8405        return;
8406    }
8407
8408    tcg_rn = tcg_temp_new_i64();
8409    tcg_rd = tcg_temp_new_i64();
8410    tcg_final = tcg_temp_new_i64();
8411    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8412
8413    if (round) {
8414        uint64_t round_const = 1ULL << (shift - 1);
8415        tcg_round = tcg_const_i64(round_const);
8416    } else {
8417        TCGV_UNUSED_I64(tcg_round);
8418    }
8419
8420    for (i = 0; i < elements; i++) {
8421        read_vec_element(s, tcg_rn, rn, i, size+1);
8422        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8423                                false, true, size+1, shift);
8424
8425        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8426    }
8427
8428    if (!is_q) {
8429        clear_vec_high(s, rd);
8430        write_vec_element(s, tcg_final, rd, 0, MO_64);
8431    } else {
8432        write_vec_element(s, tcg_final, rd, 1, MO_64);
8433    }
8434
8435    if (round) {
8436        tcg_temp_free_i64(tcg_round);
8437    }
8438    tcg_temp_free_i64(tcg_rn);
8439    tcg_temp_free_i64(tcg_rd);
8440    tcg_temp_free_i64(tcg_final);
8441    return;
8442}
8443
8444
8445/* C3.6.14 AdvSIMD shift by immediate
8446 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8447 * +---+---+---+-------------+------+------+--------+---+------+------+
8448 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8449 * +---+---+---+-------------+------+------+--------+---+------+------+
8450 */
8451static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8452{
8453    int rd = extract32(insn, 0, 5);
8454    int rn = extract32(insn, 5, 5);
8455    int opcode = extract32(insn, 11, 5);
8456    int immb = extract32(insn, 16, 3);
8457    int immh = extract32(insn, 19, 4);
8458    bool is_u = extract32(insn, 29, 1);
8459    bool is_q = extract32(insn, 30, 1);
8460
8461    switch (opcode) {
8462    case 0x08: /* SRI */
8463        if (!is_u) {
8464            unallocated_encoding(s);
8465            return;
8466        }
8467        /* fall through */
8468    case 0x00: /* SSHR / USHR */
8469    case 0x02: /* SSRA / USRA (accumulate) */
8470    case 0x04: /* SRSHR / URSHR (rounding) */
8471    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8472        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8473        break;
8474    case 0x0a: /* SHL / SLI */
8475        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8476        break;
8477    case 0x10: /* SHRN */
8478    case 0x11: /* RSHRN / SQRSHRUN */
8479        if (is_u) {
8480            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8481                                   opcode, rn, rd);
8482        } else {
8483            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8484        }
8485        break;
8486    case 0x12: /* SQSHRN / UQSHRN */
8487    case 0x13: /* SQRSHRN / UQRSHRN */
8488        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8489                               opcode, rn, rd);
8490        break;
8491    case 0x14: /* SSHLL / USHLL */
8492        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8493        break;
8494    case 0x1c: /* SCVTF / UCVTF */
8495        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8496                                     opcode, rn, rd);
8497        break;
8498    case 0xc: /* SQSHLU */
8499        if (!is_u) {
8500            unallocated_encoding(s);
8501            return;
8502        }
8503        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8504        break;
8505    case 0xe: /* SQSHL, UQSHL */
8506        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8507        break;
8508    case 0x1f: /* FCVTZS/ FCVTZU */
8509        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8510        return;
8511    default:
8512        unallocated_encoding(s);
8513        return;
8514    }
8515}
8516
8517/* Generate code to do a "long" addition or subtraction, ie one done in
8518 * TCGv_i64 on vector lanes twice the width specified by size.
8519 */
8520static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8521                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8522{
8523    static NeonGenTwo64OpFn * const fns[3][2] = {
8524        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8525        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8526        { tcg_gen_add_i64, tcg_gen_sub_i64 },
8527    };
8528    NeonGenTwo64OpFn *genfn;
8529    assert(size < 3);
8530
8531    genfn = fns[size][is_sub];
8532    genfn(tcg_res, tcg_op1, tcg_op2);
8533}
8534
8535static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8536                                int opcode, int rd, int rn, int rm)
8537{
8538    /* 3-reg-different widening insns: 64 x 64 -> 128 */
8539    TCGv_i64 tcg_res[2];
8540    int pass, accop;
8541
8542    tcg_res[0] = tcg_temp_new_i64();
8543    tcg_res[1] = tcg_temp_new_i64();
8544
8545    /* Does this op do an adding accumulate, a subtracting accumulate,
8546     * or no accumulate at all?
8547     */
8548    switch (opcode) {
8549    case 5:
8550    case 8:
8551    case 9:
8552        accop = 1;
8553        break;
8554    case 10:
8555    case 11:
8556        accop = -1;
8557        break;
8558    default:
8559        accop = 0;
8560        break;
8561    }
8562
8563    if (accop != 0) {
8564        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8565        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8566    }
8567
8568    /* size == 2 means two 32x32->64 operations; this is worth special
8569     * casing because we can generally handle it inline.
8570     */
8571    if (size == 2) {
8572        for (pass = 0; pass < 2; pass++) {
8573            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8574            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8575            TCGv_i64 tcg_passres;
8576            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8577
8578            int elt = pass + is_q * 2;
8579
8580            read_vec_element(s, tcg_op1, rn, elt, memop);
8581            read_vec_element(s, tcg_op2, rm, elt, memop);
8582
8583            if (accop == 0) {
8584                tcg_passres = tcg_res[pass];
8585            } else {
8586                tcg_passres = tcg_temp_new_i64();
8587            }
8588
8589            switch (opcode) {
8590            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8591                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8592                break;
8593            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8594                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8595                break;
8596            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8597            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8598            {
8599                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8600                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8601
8602                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8603                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8604                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8605                                    tcg_passres,
8606                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8607                tcg_temp_free_i64(tcg_tmp1);
8608                tcg_temp_free_i64(tcg_tmp2);
8609                break;
8610            }
8611            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8612            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8613            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8614                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8615                break;
8616            case 9: /* SQDMLAL, SQDMLAL2 */
8617            case 11: /* SQDMLSL, SQDMLSL2 */
8618            case 13: /* SQDMULL, SQDMULL2 */
8619                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8620                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8621                                                  tcg_passres, tcg_passres);
8622                break;
8623            default:
8624                g_assert_not_reached();
8625            }
8626
8627            if (opcode == 9 || opcode == 11) {
8628                /* saturating accumulate ops */
8629                if (accop < 0) {
8630                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
8631                }
8632                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8633                                                  tcg_res[pass], tcg_passres);
8634            } else if (accop > 0) {
8635                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8636            } else if (accop < 0) {
8637                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8638            }
8639
8640            if (accop != 0) {
8641                tcg_temp_free_i64(tcg_passres);
8642            }
8643
8644            tcg_temp_free_i64(tcg_op1);
8645            tcg_temp_free_i64(tcg_op2);
8646        }
8647    } else {
8648        /* size 0 or 1, generally helper functions */
8649        for (pass = 0; pass < 2; pass++) {
8650            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8651            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8652            TCGv_i64 tcg_passres;
8653            int elt = pass + is_q * 2;
8654
8655            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8656            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8657
8658            if (accop == 0) {
8659                tcg_passres = tcg_res[pass];
8660            } else {
8661                tcg_passres = tcg_temp_new_i64();
8662            }
8663
8664            switch (opcode) {
8665            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8666            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8667            {
8668                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8669                static NeonGenWidenFn * const widenfns[2][2] = {
8670                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8671                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8672                };
8673                NeonGenWidenFn *widenfn = widenfns[size][is_u];
8674
8675                widenfn(tcg_op2_64, tcg_op2);
8676                widenfn(tcg_passres, tcg_op1);
8677                gen_neon_addl(size, (opcode == 2), tcg_passres,
8678                              tcg_passres, tcg_op2_64);
8679                tcg_temp_free_i64(tcg_op2_64);
8680                break;
8681            }
8682            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8683            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8684                if (size == 0) {
8685                    if (is_u) {
8686                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8687                    } else {
8688                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8689                    }
8690                } else {
8691                    if (is_u) {
8692                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8693                    } else {
8694                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8695                    }
8696                }
8697                break;
8698            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8699            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8700            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8701                if (size == 0) {
8702                    if (is_u) {
8703                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8704                    } else {
8705                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8706                    }
8707                } else {
8708                    if (is_u) {
8709                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8710                    } else {
8711                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8712                    }
8713                }
8714                break;
8715            case 9: /* SQDMLAL, SQDMLAL2 */
8716            case 11: /* SQDMLSL, SQDMLSL2 */
8717            case 13: /* SQDMULL, SQDMULL2 */
8718                assert(size == 1);
8719                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8720                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8721                                                  tcg_passres, tcg_passres);
8722                break;
8723            case 14: /* PMULL */
8724                assert(size == 0);
8725                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8726                break;
8727            default:
8728                g_assert_not_reached();
8729            }
8730            tcg_temp_free_i32(tcg_op1);
8731            tcg_temp_free_i32(tcg_op2);
8732
8733            if (accop != 0) {
8734                if (opcode == 9 || opcode == 11) {
8735                    /* saturating accumulate ops */
8736                    if (accop < 0) {
8737                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8738                    }
8739                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8740                                                      tcg_res[pass],
8741                                                      tcg_passres);
8742                } else {
8743                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
8744                                  tcg_res[pass], tcg_passres);
8745                }
8746                tcg_temp_free_i64(tcg_passres);
8747            }
8748        }
8749    }
8750
8751    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8752    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8753    tcg_temp_free_i64(tcg_res[0]);
8754    tcg_temp_free_i64(tcg_res[1]);
8755}
8756
8757static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8758                            int opcode, int rd, int rn, int rm)
8759{
8760    TCGv_i64 tcg_res[2];
8761    int part = is_q ? 2 : 0;
8762    int pass;
8763
8764    for (pass = 0; pass < 2; pass++) {
8765        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8766        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8767        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8768        static NeonGenWidenFn * const widenfns[3][2] = {
8769            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8770            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8771            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8772        };
8773        NeonGenWidenFn *widenfn = widenfns[size][is_u];
8774
8775        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8776        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8777        widenfn(tcg_op2_wide, tcg_op2);
8778        tcg_temp_free_i32(tcg_op2);
8779        tcg_res[pass] = tcg_temp_new_i64();
8780        gen_neon_addl(size, (opcode == 3),
8781                      tcg_res[pass], tcg_op1, tcg_op2_wide);
8782        tcg_temp_free_i64(tcg_op1);
8783        tcg_temp_free_i64(tcg_op2_wide);
8784    }
8785
8786    for (pass = 0; pass < 2; pass++) {
8787        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8788        tcg_temp_free_i64(tcg_res[pass]);
8789    }
8790}
8791
8792static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8793{
8794    tcg_gen_addi_i64(in, in, 1U << 31);
8795    tcg_gen_extrh_i64_i32(res, in);
8796}
8797
8798static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8799                                 int opcode, int rd, int rn, int rm)
8800{
8801    TCGv_i32 tcg_res[2];
8802    int part = is_q ? 2 : 0;
8803    int pass;
8804
8805    for (pass = 0; pass < 2; pass++) {
8806        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8807        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8808        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8809        static NeonGenNarrowFn * const narrowfns[3][2] = {
8810            { gen_helper_neon_narrow_high_u8,
8811              gen_helper_neon_narrow_round_high_u8 },
8812            { gen_helper_neon_narrow_high_u16,
8813              gen_helper_neon_narrow_round_high_u16 },
8814            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8815        };
8816        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8817
8818        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8819        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8820
8821        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8822
8823        tcg_temp_free_i64(tcg_op1);
8824        tcg_temp_free_i64(tcg_op2);
8825
8826        tcg_res[pass] = tcg_temp_new_i32();
8827        gennarrow(tcg_res[pass], tcg_wideres);
8828        tcg_temp_free_i64(tcg_wideres);
8829    }
8830
8831    for (pass = 0; pass < 2; pass++) {
8832        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8833        tcg_temp_free_i32(tcg_res[pass]);
8834    }
8835    if (!is_q) {
8836        clear_vec_high(s, rd);
8837    }
8838}
8839
8840static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8841{
8842    /* PMULL of 64 x 64 -> 128 is an odd special case because it
8843     * is the only three-reg-diff instruction which produces a
8844     * 128-bit wide result from a single operation. However since
8845     * it's possible to calculate the two halves more or less
8846     * separately we just use two helper calls.
8847     */
8848    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8849    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8850    TCGv_i64 tcg_res = tcg_temp_new_i64();
8851
8852    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8853    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8854    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8855    write_vec_element(s, tcg_res, rd, 0, MO_64);
8856    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8857    write_vec_element(s, tcg_res, rd, 1, MO_64);
8858
8859    tcg_temp_free_i64(tcg_op1);
8860    tcg_temp_free_i64(tcg_op2);
8861    tcg_temp_free_i64(tcg_res);
8862}
8863
8864/* C3.6.15 AdvSIMD three different
8865 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8866 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8867 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8868 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8869 */
8870static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8871{
8872    /* Instructions in this group fall into three basic classes
8873     * (in each case with the operation working on each element in
8874     * the input vectors):
8875     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8876     *     128 bit input)
8877     * (2) wide 64 x 128 -> 128
8878     * (3) narrowing 128 x 128 -> 64
8879     * Here we do initial decode, catch unallocated cases and
8880     * dispatch to separate functions for each class.
8881     */
8882    int is_q = extract32(insn, 30, 1);
8883    int is_u = extract32(insn, 29, 1);
8884    int size = extract32(insn, 22, 2);
8885    int opcode = extract32(insn, 12, 4);
8886    int rm = extract32(insn, 16, 5);
8887    int rn = extract32(insn, 5, 5);
8888    int rd = extract32(insn, 0, 5);
8889
8890    switch (opcode) {
8891    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8892    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8893        /* 64 x 128 -> 128 */
8894        if (size == 3) {
8895            unallocated_encoding(s);
8896            return;
8897        }
8898        if (!fp_access_check(s)) {
8899            return;
8900        }
8901        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8902        break;
8903    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8904    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8905        /* 128 x 128 -> 64 */
8906        if (size == 3) {
8907            unallocated_encoding(s);
8908            return;
8909        }
8910        if (!fp_access_check(s)) {
8911            return;
8912        }
8913        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8914        break;
8915    case 14: /* PMULL, PMULL2 */
8916        if (is_u || size == 1 || size == 2) {
8917            unallocated_encoding(s);
8918            return;
8919        }
8920        if (size == 3) {
8921            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8922                unallocated_encoding(s);
8923                return;
8924            }
8925            if (!fp_access_check(s)) {
8926                return;
8927            }
8928            handle_pmull_64(s, is_q, rd, rn, rm);
8929            return;
8930        }
8931        goto is_widening;
8932    case 9: /* SQDMLAL, SQDMLAL2 */
8933    case 11: /* SQDMLSL, SQDMLSL2 */
8934    case 13: /* SQDMULL, SQDMULL2 */
8935        if (is_u || size == 0) {
8936            unallocated_encoding(s);
8937            return;
8938        }
8939        /* fall through */
8940    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8941    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8942    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8943    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8944    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8945    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8946    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8947        /* 64 x 64 -> 128 */
8948        if (size == 3) {
8949            unallocated_encoding(s);
8950            return;
8951        }
8952    is_widening:
8953        if (!fp_access_check(s)) {
8954            return;
8955        }
8956
8957        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8958        break;
8959    default:
8960        /* opcode 15 not allocated */
8961        unallocated_encoding(s);
8962        break;
8963    }
8964}
8965
8966/* Logic op (opcode == 3) subgroup of C3.6.16. */
8967static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8968{
8969    int rd = extract32(insn, 0, 5);
8970    int rn = extract32(insn, 5, 5);
8971    int rm = extract32(insn, 16, 5);
8972    int size = extract32(insn, 22, 2);
8973    bool is_u = extract32(insn, 29, 1);
8974    bool is_q = extract32(insn, 30, 1);
8975    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8976    int pass;
8977
8978    if (!fp_access_check(s)) {
8979        return;
8980    }
8981
8982    tcg_op1 = tcg_temp_new_i64();
8983    tcg_op2 = tcg_temp_new_i64();
8984    tcg_res[0] = tcg_temp_new_i64();
8985    tcg_res[1] = tcg_temp_new_i64();
8986
8987    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8988        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8989        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8990
8991        if (!is_u) {
8992            switch (size) {
8993            case 0: /* AND */
8994                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8995                break;
8996            case 1: /* BIC */
8997                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8998                break;
8999            case 2: /* ORR */
9000                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9001                break;
9002            case 3: /* ORN */
9003                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9004                break;
9005            }
9006        } else {
9007            if (size != 0) {
9008                /* B* ops need res loaded to operate on */
9009                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9010            }
9011
9012            switch (size) {
9013            case 0: /* EOR */
9014                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9015                break;
9016            case 1: /* BSL bitwise select */
9017                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9018                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9019                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9020                break;
9021            case 2: /* BIT, bitwise insert if true */
9022                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9023                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9024                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9025                break;
9026            case 3: /* BIF, bitwise insert if false */
9027                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9028                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9029                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9030                break;
9031            }
9032        }
9033    }
9034
9035    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9036    if (!is_q) {
9037        tcg_gen_movi_i64(tcg_res[1], 0);
9038    }
9039    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9040
9041    tcg_temp_free_i64(tcg_op1);
9042    tcg_temp_free_i64(tcg_op2);
9043    tcg_temp_free_i64(tcg_res[0]);
9044    tcg_temp_free_i64(tcg_res[1]);
9045}
9046
9047/* Helper functions for 32 bit comparisons */
9048static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9049{
9050    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9051}
9052
9053static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9054{
9055    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9056}
9057
9058static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9059{
9060    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9061}
9062
9063static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9064{
9065    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9066}
9067
9068/* Pairwise op subgroup of C3.6.16.
9069 *
9070 * This is called directly or via the handle_3same_float for float pairwise
9071 * operations where the opcode and size are calculated differently.
9072 */
9073static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9074                                   int size, int rn, int rm, int rd)
9075{
9076    TCGv_ptr fpst;
9077    int pass;
9078
9079    /* Floating point operations need fpst */
9080    if (opcode >= 0x58) {
9081        fpst = get_fpstatus_ptr();
9082    } else {
9083        TCGV_UNUSED_PTR(fpst);
9084    }
9085
9086    if (!fp_access_check(s)) {
9087        return;
9088    }
9089
9090    /* These operations work on the concatenated rm:rn, with each pair of
9091     * adjacent elements being operated on to produce an element in the result.
9092     */
9093    if (size == 3) {
9094        TCGv_i64 tcg_res[2];
9095
9096        for (pass = 0; pass < 2; pass++) {
9097            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9098            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9099            int passreg = (pass == 0) ? rn : rm;
9100
9101            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9102            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9103            tcg_res[pass] = tcg_temp_new_i64();
9104
9105            switch (opcode) {
9106            case 0x17: /* ADDP */
9107                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9108                break;
9109            case 0x58: /* FMAXNMP */
9110                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9111                break;
9112            case 0x5a: /* FADDP */
9113                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9114                break;
9115            case 0x5e: /* FMAXP */
9116                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9117                break;
9118            case 0x78: /* FMINNMP */
9119                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9120                break;
9121            case 0x7e: /* FMINP */
9122                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9123                break;
9124            default:
9125                g_assert_not_reached();
9126            }
9127
9128            tcg_temp_free_i64(tcg_op1);
9129            tcg_temp_free_i64(tcg_op2);
9130        }
9131
9132        for (pass = 0; pass < 2; pass++) {
9133            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9134            tcg_temp_free_i64(tcg_res[pass]);
9135        }
9136    } else {
9137        int maxpass = is_q ? 4 : 2;
9138        TCGv_i32 tcg_res[4];
9139
9140        for (pass = 0; pass < maxpass; pass++) {
9141            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9142            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9143            NeonGenTwoOpFn *genfn = NULL;
9144            int passreg = pass < (maxpass / 2) ? rn : rm;
9145            int passelt = (is_q && (pass & 1)) ? 2 : 0;
9146
9147            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9148            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9149            tcg_res[pass] = tcg_temp_new_i32();
9150
9151            switch (opcode) {
9152            case 0x17: /* ADDP */
9153            {
9154                static NeonGenTwoOpFn * const fns[3] = {
9155                    gen_helper_neon_padd_u8,
9156                    gen_helper_neon_padd_u16,
9157                    tcg_gen_add_i32,
9158                };
9159                genfn = fns[size];
9160                break;
9161            }
9162            case 0x14: /* SMAXP, UMAXP */
9163            {
9164                static NeonGenTwoOpFn * const fns[3][2] = {
9165                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9166                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9167                    { gen_max_s32, gen_max_u32 },
9168                };
9169                genfn = fns[size][u];
9170                break;
9171            }
9172            case 0x15: /* SMINP, UMINP */
9173            {
9174                static NeonGenTwoOpFn * const fns[3][2] = {
9175                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9176                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9177                    { gen_min_s32, gen_min_u32 },
9178                };
9179                genfn = fns[size][u];
9180                break;
9181            }
9182            /* The FP operations are all on single floats (32 bit) */
9183            case 0x58: /* FMAXNMP */
9184                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9185                break;
9186            case 0x5a: /* FADDP */
9187                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9188                break;
9189            case 0x5e: /* FMAXP */
9190                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9191                break;
9192            case 0x78: /* FMINNMP */
9193                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9194                break;
9195            case 0x7e: /* FMINP */
9196                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9197                break;
9198            default:
9199                g_assert_not_reached();
9200            }
9201
9202            /* FP ops called directly, otherwise call now */
9203            if (genfn) {
9204                genfn(tcg_res[pass], tcg_op1, tcg_op2);
9205            }
9206
9207            tcg_temp_free_i32(tcg_op1);
9208            tcg_temp_free_i32(tcg_op2);
9209        }
9210
9211        for (pass = 0; pass < maxpass; pass++) {
9212            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9213            tcg_temp_free_i32(tcg_res[pass]);
9214        }
9215        if (!is_q) {
9216            clear_vec_high(s, rd);
9217        }
9218    }
9219
9220    if (!TCGV_IS_UNUSED_PTR(fpst)) {
9221        tcg_temp_free_ptr(fpst);
9222    }
9223}
9224
9225/* Floating point op subgroup of C3.6.16. */
9226static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9227{
9228    /* For floating point ops, the U, size[1] and opcode bits
9229     * together indicate the operation. size[0] indicates single
9230     * or double.
9231     */
9232    int fpopcode = extract32(insn, 11, 5)
9233        | (extract32(insn, 23, 1) << 5)
9234        | (extract32(insn, 29, 1) << 6);
9235    int is_q = extract32(insn, 30, 1);
9236    int size = extract32(insn, 22, 1);
9237    int rm = extract32(insn, 16, 5);
9238    int rn = extract32(insn, 5, 5);
9239    int rd = extract32(insn, 0, 5);
9240
9241    int datasize = is_q ? 128 : 64;
9242    int esize = 32 << size;
9243    int elements = datasize / esize;
9244
9245    if (size == 1 && !is_q) {
9246        unallocated_encoding(s);
9247        return;
9248    }
9249
9250    switch (fpopcode) {
9251    case 0x58: /* FMAXNMP */
9252    case 0x5a: /* FADDP */
9253    case 0x5e: /* FMAXP */
9254    case 0x78: /* FMINNMP */
9255    case 0x7e: /* FMINP */
9256        if (size && !is_q) {
9257            unallocated_encoding(s);
9258            return;
9259        }
9260        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9261                               rn, rm, rd);
9262        return;
9263    case 0x1b: /* FMULX */
9264    case 0x1f: /* FRECPS */
9265    case 0x3f: /* FRSQRTS */
9266    case 0x5d: /* FACGE */
9267    case 0x7d: /* FACGT */
9268    case 0x19: /* FMLA */
9269    case 0x39: /* FMLS */
9270    case 0x18: /* FMAXNM */
9271    case 0x1a: /* FADD */
9272    case 0x1c: /* FCMEQ */
9273    case 0x1e: /* FMAX */
9274    case 0x38: /* FMINNM */
9275    case 0x3a: /* FSUB */
9276    case 0x3e: /* FMIN */
9277    case 0x5b: /* FMUL */
9278    case 0x5c: /* FCMGE */
9279    case 0x5f: /* FDIV */
9280    case 0x7a: /* FABD */
9281    case 0x7c: /* FCMGT */
9282        if (!fp_access_check(s)) {
9283            return;
9284        }
9285
9286        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9287        return;
9288    default:
9289        unallocated_encoding(s);
9290        return;
9291    }
9292}
9293
9294/* Integer op subgroup of C3.6.16. */
9295static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9296{
9297    int is_q = extract32(insn, 30, 1);
9298    int u = extract32(insn, 29, 1);
9299    int size = extract32(insn, 22, 2);
9300    int opcode = extract32(insn, 11, 5);
9301    int rm = extract32(insn, 16, 5);
9302    int rn = extract32(insn, 5, 5);
9303    int rd = extract32(insn, 0, 5);
9304    int pass;
9305
9306    switch (opcode) {
9307    case 0x13: /* MUL, PMUL */
9308        if (u && size != 0) {
9309            unallocated_encoding(s);
9310            return;
9311        }
9312        /* fall through */
9313    case 0x0: /* SHADD, UHADD */
9314    case 0x2: /* SRHADD, URHADD */
9315    case 0x4: /* SHSUB, UHSUB */
9316    case 0xc: /* SMAX, UMAX */
9317    case 0xd: /* SMIN, UMIN */
9318    case 0xe: /* SABD, UABD */
9319    case 0xf: /* SABA, UABA */
9320    case 0x12: /* MLA, MLS */
9321        if (size == 3) {
9322            unallocated_encoding(s);
9323            return;
9324        }
9325        break;
9326    case 0x16: /* SQDMULH, SQRDMULH */
9327        if (size == 0 || size == 3) {
9328            unallocated_encoding(s);
9329            return;
9330        }
9331        break;
9332    default:
9333        if (size == 3 && !is_q) {
9334            unallocated_encoding(s);
9335            return;
9336        }
9337        break;
9338    }
9339
9340    if (!fp_access_check(s)) {
9341        return;
9342    }
9343
9344    if (size == 3) {
9345        assert(is_q);
9346        for (pass = 0; pass < 2; pass++) {
9347            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9348            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9349            TCGv_i64 tcg_res = tcg_temp_new_i64();
9350
9351            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9352            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9353
9354            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9355
9356            write_vec_element(s, tcg_res, rd, pass, MO_64);
9357
9358            tcg_temp_free_i64(tcg_res);
9359            tcg_temp_free_i64(tcg_op1);
9360            tcg_temp_free_i64(tcg_op2);
9361        }
9362    } else {
9363        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9364            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9365            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9366            TCGv_i32 tcg_res = tcg_temp_new_i32();
9367            NeonGenTwoOpFn *genfn = NULL;
9368            NeonGenTwoOpEnvFn *genenvfn = NULL;
9369
9370            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9371            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9372
9373            switch (opcode) {
9374            case 0x0: /* SHADD, UHADD */
9375            {
9376                static NeonGenTwoOpFn * const fns[3][2] = {
9377                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9378                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9379                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9380                };
9381                genfn = fns[size][u];
9382                break;
9383            }
9384            case 0x1: /* SQADD, UQADD */
9385            {
9386                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9387                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9388                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9389                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9390                };
9391                genenvfn = fns[size][u];
9392                break;
9393            }
9394            case 0x2: /* SRHADD, URHADD */
9395            {
9396                static NeonGenTwoOpFn * const fns[3][2] = {
9397                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9398                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9399                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9400                };
9401                genfn = fns[size][u];
9402                break;
9403            }
9404            case 0x4: /* SHSUB, UHSUB */
9405            {
9406                static NeonGenTwoOpFn * const fns[3][2] = {
9407                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9408                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9409                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9410                };
9411                genfn = fns[size][u];
9412                break;
9413            }
9414            case 0x5: /* SQSUB, UQSUB */
9415            {
9416                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9417                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9418                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9419                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9420                };
9421                genenvfn = fns[size][u];
9422                break;
9423            }
9424            case 0x6: /* CMGT, CMHI */
9425            {
9426                static NeonGenTwoOpFn * const fns[3][2] = {
9427                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9428                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9429                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9430                };
9431                genfn = fns[size][u];
9432                break;
9433            }
9434            case 0x7: /* CMGE, CMHS */
9435            {
9436                static NeonGenTwoOpFn * const fns[3][2] = {
9437                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9438                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9439                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9440                };
9441                genfn = fns[size][u];
9442                break;
9443            }
9444            case 0x8: /* SSHL, USHL */
9445            {
9446                static NeonGenTwoOpFn * const fns[3][2] = {
9447                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9448                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9449                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9450                };
9451                genfn = fns[size][u];
9452                break;
9453            }
9454            case 0x9: /* SQSHL, UQSHL */
9455            {
9456                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9457                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9458                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9459                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9460                };
9461                genenvfn = fns[size][u];
9462                break;
9463            }
9464            case 0xa: /* SRSHL, URSHL */
9465            {
9466                static NeonGenTwoOpFn * const fns[3][2] = {
9467                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9468                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9469                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9470                };
9471                genfn = fns[size][u];
9472                break;
9473            }
9474            case 0xb: /* SQRSHL, UQRSHL */
9475            {
9476                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9477                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9478                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9479                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9480                };
9481                genenvfn = fns[size][u];
9482                break;
9483            }
9484            case 0xc: /* SMAX, UMAX */
9485            {
9486                static NeonGenTwoOpFn * const fns[3][2] = {
9487                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9488                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9489                    { gen_max_s32, gen_max_u32 },
9490                };
9491                genfn = fns[size][u];
9492                break;
9493            }
9494
9495            case 0xd: /* SMIN, UMIN */
9496            {
9497                static NeonGenTwoOpFn * const fns[3][2] = {
9498                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9499                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9500                    { gen_min_s32, gen_min_u32 },
9501                };
9502                genfn = fns[size][u];
9503                break;
9504            }
9505            case 0xe: /* SABD, UABD */
9506            case 0xf: /* SABA, UABA */
9507            {
9508                static NeonGenTwoOpFn * const fns[3][2] = {
9509                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9510                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9511                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9512                };
9513                genfn = fns[size][u];
9514                break;
9515            }
9516            case 0x10: /* ADD, SUB */
9517            {
9518                static NeonGenTwoOpFn * const fns[3][2] = {
9519                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9520                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9521                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9522                };
9523                genfn = fns[size][u];
9524                break;
9525            }
9526            case 0x11: /* CMTST, CMEQ */
9527            {
9528                static NeonGenTwoOpFn * const fns[3][2] = {
9529                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9530                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9531                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9532                };
9533                genfn = fns[size][u];
9534                break;
9535            }
9536            case 0x13: /* MUL, PMUL */
9537                if (u) {
9538                    /* PMUL */
9539                    assert(size == 0);
9540                    genfn = gen_helper_neon_mul_p8;
9541                    break;
9542                }
9543                /* fall through : MUL */
9544            case 0x12: /* MLA, MLS */
9545            {
9546                static NeonGenTwoOpFn * const fns[3] = {
9547                    gen_helper_neon_mul_u8,
9548                    gen_helper_neon_mul_u16,
9549                    tcg_gen_mul_i32,
9550                };
9551                genfn = fns[size];
9552                break;
9553            }
9554            case 0x16: /* SQDMULH, SQRDMULH */
9555            {
9556                static NeonGenTwoOpEnvFn * const fns[2][2] = {
9557                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9558                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9559                };
9560                assert(size == 1 || size == 2);
9561                genenvfn = fns[size - 1][u];
9562                break;
9563            }
9564            default:
9565                g_assert_not_reached();
9566            }
9567
9568            if (genenvfn) {
9569                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9570            } else {
9571                genfn(tcg_res, tcg_op1, tcg_op2);
9572            }
9573
9574            if (opcode == 0xf || opcode == 0x12) {
9575                /* SABA, UABA, MLA, MLS: accumulating ops */
9576                static NeonGenTwoOpFn * const fns[3][2] = {
9577                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9578                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9579                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9580                };
9581                bool is_sub = (opcode == 0x12 && u); /* MLS */
9582
9583                genfn = fns[size][is_sub];
9584                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9585                genfn(tcg_res, tcg_op1, tcg_res);
9586            }
9587
9588            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9589
9590            tcg_temp_free_i32(tcg_res);
9591            tcg_temp_free_i32(tcg_op1);
9592            tcg_temp_free_i32(tcg_op2);
9593        }
9594    }
9595
9596    if (!is_q) {
9597        clear_vec_high(s, rd);
9598    }
9599}
9600
9601/* C3.6.16 AdvSIMD three same
9602 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9603 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9604 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9605 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9606 */
9607static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9608{
9609    int opcode = extract32(insn, 11, 5);
9610
9611    switch (opcode) {
9612    case 0x3: /* logic ops */
9613        disas_simd_3same_logic(s, insn);
9614        break;
9615    case 0x17: /* ADDP */
9616    case 0x14: /* SMAXP, UMAXP */
9617    case 0x15: /* SMINP, UMINP */
9618    {
9619        /* Pairwise operations */
9620        int is_q = extract32(insn, 30, 1);
9621        int u = extract32(insn, 29, 1);
9622        int size = extract32(insn, 22, 2);
9623        int rm = extract32(insn, 16, 5);
9624        int rn = extract32(insn, 5, 5);
9625        int rd = extract32(insn, 0, 5);
9626        if (opcode == 0x17) {
9627            if (u || (size == 3 && !is_q)) {
9628                unallocated_encoding(s);
9629                return;
9630            }
9631        } else {
9632            if (size == 3) {
9633                unallocated_encoding(s);
9634                return;
9635            }
9636        }
9637        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9638        break;
9639    }
9640    case 0x18 ... 0x31:
9641        /* floating point ops, sz[1] and U are part of opcode */
9642        disas_simd_3same_float(s, insn);
9643        break;
9644    default:
9645        disas_simd_3same_int(s, insn);
9646        break;
9647    }
9648}
9649
9650static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9651                                  int size, int rn, int rd)
9652{
9653    /* Handle 2-reg-misc ops which are widening (so each size element
9654     * in the source becomes a 2*size element in the destination.
9655     * The only instruction like this is FCVTL.
9656     */
9657    int pass;
9658
9659    if (size == 3) {
9660        /* 32 -> 64 bit fp conversion */
9661        TCGv_i64 tcg_res[2];
9662        int srcelt = is_q ? 2 : 0;
9663
9664        for (pass = 0; pass < 2; pass++) {
9665            TCGv_i32 tcg_op = tcg_temp_new_i32();
9666            tcg_res[pass] = tcg_temp_new_i64();
9667
9668            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9669            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9670            tcg_temp_free_i32(tcg_op);
9671        }
9672        for (pass = 0; pass < 2; pass++) {
9673            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9674            tcg_temp_free_i64(tcg_res[pass]);
9675        }
9676    } else {
9677        /* 16 -> 32 bit fp conversion */
9678        int srcelt = is_q ? 4 : 0;
9679        TCGv_i32 tcg_res[4];
9680
9681        for (pass = 0; pass < 4; pass++) {
9682            tcg_res[pass] = tcg_temp_new_i32();
9683
9684            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9685            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9686                                           cpu_env);
9687        }
9688        for (pass = 0; pass < 4; pass++) {
9689            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9690            tcg_temp_free_i32(tcg_res[pass]);
9691        }
9692    }
9693}
9694
9695static void handle_rev(DisasContext *s, int opcode, bool u,
9696                       bool is_q, int size, int rn, int rd)
9697{
9698    int op = (opcode << 1) | u;
9699    int opsz = op + size;
9700    int grp_size = 3 - opsz;
9701    int dsize = is_q ? 128 : 64;
9702    int i;
9703
9704    if (opsz >= 3) {
9705        unallocated_encoding(s);
9706        return;
9707    }
9708
9709    if (!fp_access_check(s)) {
9710        return;
9711    }
9712
9713    if (size == 0) {
9714        /* Special case bytes, use bswap op on each group of elements */
9715        int groups = dsize / (8 << grp_size);
9716
9717        for (i = 0; i < groups; i++) {
9718            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9719
9720            read_vec_element(s, tcg_tmp, rn, i, grp_size);
9721            switch (grp_size) {
9722            case MO_16:
9723                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9724                break;
9725            case MO_32:
9726                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9727                break;
9728            case MO_64:
9729                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9730                break;
9731            default:
9732                g_assert_not_reached();
9733            }
9734            write_vec_element(s, tcg_tmp, rd, i, grp_size);
9735            tcg_temp_free_i64(tcg_tmp);
9736        }
9737        if (!is_q) {
9738            clear_vec_high(s, rd);
9739        }
9740    } else {
9741        int revmask = (1 << grp_size) - 1;
9742        int esize = 8 << size;
9743        int elements = dsize / esize;
9744        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9745        TCGv_i64 tcg_rd = tcg_const_i64(0);
9746        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9747
9748        for (i = 0; i < elements; i++) {
9749            int e_rev = (i & 0xf) ^ revmask;
9750            int off = e_rev * esize;
9751            read_vec_element(s, tcg_rn, rn, i, size);
9752            if (off >= 64) {
9753                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9754                                    tcg_rn, off - 64, esize);
9755            } else {
9756                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9757            }
9758        }
9759        write_vec_element(s, tcg_rd, rd, 0, MO_64);
9760        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9761
9762        tcg_temp_free_i64(tcg_rd_hi);
9763        tcg_temp_free_i64(tcg_rd);
9764        tcg_temp_free_i64(tcg_rn);
9765    }
9766}
9767
9768static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9769                                  bool is_q, int size, int rn, int rd)
9770{
9771    /* Implement the pairwise operations from 2-misc:
9772     * SADDLP, UADDLP, SADALP, UADALP.
9773     * These all add pairs of elements in the input to produce a
9774     * double-width result element in the output (possibly accumulating).
9775     */
9776    bool accum = (opcode == 0x6);
9777    int maxpass = is_q ? 2 : 1;
9778    int pass;
9779    TCGv_i64 tcg_res[2];
9780
9781    if (size == 2) {
9782        /* 32 + 32 -> 64 op */
9783        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9784
9785        for (pass = 0; pass < maxpass; pass++) {
9786            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9787            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9788
9789            tcg_res[pass] = tcg_temp_new_i64();
9790
9791            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9792            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9793            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9794            if (accum) {
9795                read_vec_element(s, tcg_op1, rd, pass, MO_64);
9796                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9797            }
9798
9799            tcg_temp_free_i64(tcg_op1);
9800            tcg_temp_free_i64(tcg_op2);
9801        }
9802    } else {
9803        for (pass = 0; pass < maxpass; pass++) {
9804            TCGv_i64 tcg_op = tcg_temp_new_i64();
9805            NeonGenOneOpFn *genfn;
9806            static NeonGenOneOpFn * const fns[2][2] = {
9807                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9808                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9809            };
9810
9811            genfn = fns[size][u];
9812
9813            tcg_res[pass] = tcg_temp_new_i64();
9814
9815            read_vec_element(s, tcg_op, rn, pass, MO_64);
9816            genfn(tcg_res[pass], tcg_op);
9817
9818            if (accum) {
9819                read_vec_element(s, tcg_op, rd, pass, MO_64);
9820                if (size == 0) {
9821                    gen_helper_neon_addl_u16(tcg_res[pass],
9822                                             tcg_res[pass], tcg_op);
9823                } else {
9824                    gen_helper_neon_addl_u32(tcg_res[pass],
9825                                             tcg_res[pass], tcg_op);
9826                }
9827            }
9828            tcg_temp_free_i64(tcg_op);
9829        }
9830    }
9831    if (!is_q) {
9832        tcg_res[1] = tcg_const_i64(0);
9833    }
9834    for (pass = 0; pass < 2; pass++) {
9835        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9836        tcg_temp_free_i64(tcg_res[pass]);
9837    }
9838}
9839
9840static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9841{
9842    /* Implement SHLL and SHLL2 */
9843    int pass;
9844    int part = is_q ? 2 : 0;
9845    TCGv_i64 tcg_res[2];
9846
9847    for (pass = 0; pass < 2; pass++) {
9848        static NeonGenWidenFn * const widenfns[3] = {
9849            gen_helper_neon_widen_u8,
9850            gen_helper_neon_widen_u16,
9851            tcg_gen_extu_i32_i64,
9852        };
9853        NeonGenWidenFn *widenfn = widenfns[size];
9854        TCGv_i32 tcg_op = tcg_temp_new_i32();
9855
9856        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9857        tcg_res[pass] = tcg_temp_new_i64();
9858        widenfn(tcg_res[pass], tcg_op);
9859        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9860
9861        tcg_temp_free_i32(tcg_op);
9862    }
9863
9864    for (pass = 0; pass < 2; pass++) {
9865        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9866        tcg_temp_free_i64(tcg_res[pass]);
9867    }
9868}
9869
9870/* C3.6.17 AdvSIMD two reg misc
9871 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9872 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9873 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9874 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9875 */
9876static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9877{
9878    int size = extract32(insn, 22, 2);
9879    int opcode = extract32(insn, 12, 5);
9880    bool u = extract32(insn, 29, 1);
9881    bool is_q = extract32(insn, 30, 1);
9882    int rn = extract32(insn, 5, 5);
9883    int rd = extract32(insn, 0, 5);
9884    bool need_fpstatus = false;
9885    bool need_rmode = false;
9886    int rmode = -1;
9887    TCGv_i32 tcg_rmode;
9888    TCGv_ptr tcg_fpstatus;
9889
9890    switch (opcode) {
9891    case 0x0: /* REV64, REV32 */
9892    case 0x1: /* REV16 */
9893        handle_rev(s, opcode, u, is_q, size, rn, rd);
9894        return;
9895    case 0x5: /* CNT, NOT, RBIT */
9896        if (u && size == 0) {
9897            /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9898            size = 3;
9899            break;
9900        } else if (u && size == 1) {
9901            /* RBIT */
9902            break;
9903        } else if (!u && size == 0) {
9904            /* CNT */
9905            break;
9906        }
9907        unallocated_encoding(s);
9908        return;
9909    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9910    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9911        if (size == 3) {
9912            unallocated_encoding(s);
9913            return;
9914        }
9915        if (!fp_access_check(s)) {
9916            return;
9917        }
9918
9919        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9920        return;
9921    case 0x4: /* CLS, CLZ */
9922        if (size == 3) {
9923            unallocated_encoding(s);
9924            return;
9925        }
9926        break;
9927    case 0x2: /* SADDLP, UADDLP */
9928    case 0x6: /* SADALP, UADALP */
9929        if (size == 3) {
9930            unallocated_encoding(s);
9931            return;
9932        }
9933        if (!fp_access_check(s)) {
9934            return;
9935        }
9936        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9937        return;
9938    case 0x13: /* SHLL, SHLL2 */
9939        if (u == 0 || size == 3) {
9940            unallocated_encoding(s);
9941            return;
9942        }
9943        if (!fp_access_check(s)) {
9944            return;
9945        }
9946        handle_shll(s, is_q, size, rn, rd);
9947        return;
9948    case 0xa: /* CMLT */
9949        if (u == 1) {
9950            unallocated_encoding(s);
9951            return;
9952        }
9953        /* fall through */
9954    case 0x8: /* CMGT, CMGE */
9955    case 0x9: /* CMEQ, CMLE */
9956    case 0xb: /* ABS, NEG */
9957        if (size == 3 && !is_q) {
9958            unallocated_encoding(s);
9959            return;
9960        }
9961        break;
9962    case 0x3: /* SUQADD, USQADD */
9963        if (size == 3 && !is_q) {
9964            unallocated_encoding(s);
9965            return;
9966        }
9967        if (!fp_access_check(s)) {
9968            return;
9969        }
9970        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9971        return;
9972    case 0x7: /* SQABS, SQNEG */
9973        if (size == 3 && !is_q) {
9974            unallocated_encoding(s);
9975            return;
9976        }
9977        break;
9978    case 0xc ... 0xf:
9979    case 0x16 ... 0x1d:
9980    case 0x1f:
9981    {
9982        /* Floating point: U, size[1] and opcode indicate operation;
9983         * size[0] indicates single or double precision.
9984         */
9985        int is_double = extract32(size, 0, 1);
9986        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9987        size = is_double ? 3 : 2;
9988        switch (opcode) {
9989        case 0x2f: /* FABS */
9990        case 0x6f: /* FNEG */
9991            if (size == 3 && !is_q) {
9992                unallocated_encoding(s);
9993                return;
9994            }
9995            break;
9996        case 0x1d: /* SCVTF */
9997        case 0x5d: /* UCVTF */
9998        {
9999            bool is_signed = (opcode == 0x1d) ? true : false;
10000            int elements = is_double ? 2 : is_q ? 4 : 2;
10001            if (is_double && !is_q) {
10002                unallocated_encoding(s);
10003                return;
10004            }
10005            if (!fp_access_check(s)) {
10006                return;
10007            }
10008            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10009            return;
10010        }
10011        case 0x2c: /* FCMGT (zero) */
10012        case 0x2d: /* FCMEQ (zero) */
10013        case 0x2e: /* FCMLT (zero) */
10014        case 0x6c: /* FCMGE (zero) */
10015        case 0x6d: /* FCMLE (zero) */
10016            if (size == 3 && !is_q) {
10017                unallocated_encoding(s);
10018                return;
10019            }
10020            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10021            return;
10022        case 0x7f: /* FSQRT */
10023            if (size == 3 && !is_q) {
10024                unallocated_encoding(s);
10025                return;
10026            }
10027            break;
10028        case 0x1a: /* FCVTNS */
10029        case 0x1b: /* FCVTMS */
10030        case 0x3a: /* FCVTPS */
10031        case 0x3b: /* FCVTZS */
10032        case 0x5a: /* FCVTNU */
10033        case 0x5b: /* FCVTMU */
10034        case 0x7a: /* FCVTPU */
10035        case 0x7b: /* FCVTZU */
10036            need_fpstatus = true;
10037            need_rmode = true;
10038            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10039            if (size == 3 && !is_q) {
10040                unallocated_encoding(s);
10041                return;
10042            }
10043            break;
10044        case 0x5c: /* FCVTAU */
10045        case 0x1c: /* FCVTAS */
10046            need_fpstatus = true;
10047            need_rmode = true;
10048            rmode = FPROUNDING_TIEAWAY;
10049            if (size == 3 && !is_q) {
10050                unallocated_encoding(s);
10051                return;
10052            }
10053            break;
10054        case 0x3c: /* URECPE */
10055            if (size == 3) {
10056                unallocated_encoding(s);
10057                return;
10058            }
10059            /* fall through */
10060        case 0x3d: /* FRECPE */
10061        case 0x7d: /* FRSQRTE */
10062            if (size == 3 && !is_q) {
10063                unallocated_encoding(s);
10064                return;
10065            }
10066            if (!fp_access_check(s)) {
10067                return;
10068            }
10069            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10070            return;
10071        case 0x56: /* FCVTXN, FCVTXN2 */
10072            if (size == 2) {
10073                unallocated_encoding(s);
10074                return;
10075            }
10076            /* fall through */
10077        case 0x16: /* FCVTN, FCVTN2 */
10078            /* handle_2misc_narrow does a 2*size -> size operation, but these
10079             * instructions encode the source size rather than dest size.
10080             */
10081            if (!fp_access_check(s)) {
10082                return;
10083            }
10084            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10085            return;
10086        case 0x17: /* FCVTL, FCVTL2 */
10087            if (!fp_access_check(s)) {
10088                return;
10089            }
10090            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10091            return;
10092        case 0x18: /* FRINTN */
10093        case 0x19: /* FRINTM */
10094        case 0x38: /* FRINTP */
10095        case 0x39: /* FRINTZ */
10096            need_rmode = true;
10097            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10098            /* fall through */
10099        case 0x59: /* FRINTX */
10100        case 0x79: /* FRINTI */
10101            need_fpstatus = true;
10102            if (size == 3 && !is_q) {
10103                unallocated_encoding(s);
10104                return;
10105            }
10106            break;
10107        case 0x58: /* FRINTA */
10108            need_rmode = true;
10109            rmode = FPROUNDING_TIEAWAY;
10110            need_fpstatus = true;
10111            if (size == 3 && !is_q) {
10112                unallocated_encoding(s);
10113                return;
10114            }
10115            break;
10116        case 0x7c: /* URSQRTE */
10117            if (size == 3) {
10118                unallocated_encoding(s);
10119                return;
10120            }
10121            need_fpstatus = true;
10122            break;
10123        default:
10124            unallocated_encoding(s);
10125            return;
10126        }
10127        break;
10128    }
10129    default:
10130        unallocated_encoding(s);
10131        return;
10132    }
10133
10134    if (!fp_access_check(s)) {
10135        return;
10136    }
10137
10138    if (need_fpstatus) {
10139        tcg_fpstatus = get_fpstatus_ptr();
10140    } else {
10141        TCGV_UNUSED_PTR(tcg_fpstatus);
10142    }
10143    if (need_rmode) {
10144        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10145        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10146    } else {
10147        TCGV_UNUSED_I32(tcg_rmode);
10148    }
10149
10150    if (size == 3) {
10151        /* All 64-bit element operations can be shared with scalar 2misc */
10152        int pass;
10153
10154        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10155            TCGv_i64 tcg_op = tcg_temp_new_i64();
10156            TCGv_i64 tcg_res = tcg_temp_new_i64();
10157
10158            read_vec_element(s, tcg_op, rn, pass, MO_64);
10159
10160            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10161                            tcg_rmode, tcg_fpstatus);
10162
10163            write_vec_element(s, tcg_res, rd, pass, MO_64);
10164
10165            tcg_temp_free_i64(tcg_res);
10166            tcg_temp_free_i64(tcg_op);
10167        }
10168    } else {
10169        int pass;
10170
10171        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10172            TCGv_i32 tcg_op = tcg_temp_new_i32();
10173            TCGv_i32 tcg_res = tcg_temp_new_i32();
10174            TCGCond cond;
10175
10176            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10177
10178            if (size == 2) {
10179                /* Special cases for 32 bit elements */
10180                switch (opcode) {
10181                case 0xa: /* CMLT */
10182                    /* 32 bit integer comparison against zero, result is
10183                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
10184                     * and inverting.
10185                     */
10186                    cond = TCG_COND_LT;
10187                do_cmop:
10188                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10189                    tcg_gen_neg_i32(tcg_res, tcg_res);
10190                    break;
10191                case 0x8: /* CMGT, CMGE */
10192                    cond = u ? TCG_COND_GE : TCG_COND_GT;
10193                    goto do_cmop;
10194                case 0x9: /* CMEQ, CMLE */
10195                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
10196                    goto do_cmop;
10197                case 0x4: /* CLS */
10198                    if (u) {
10199                        gen_helper_clz32(tcg_res, tcg_op);
10200                    } else {
10201                        gen_helper_cls32(tcg_res, tcg_op);
10202                    }
10203                    break;
10204                case 0x7: /* SQABS, SQNEG */
10205                    if (u) {
10206                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10207                    } else {
10208                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10209                    }
10210                    break;
10211                case 0xb: /* ABS, NEG */
10212                    if (u) {
10213                        tcg_gen_neg_i32(tcg_res, tcg_op);
10214                    } else {
10215                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10216                        tcg_gen_neg_i32(tcg_res, tcg_op);
10217                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10218                                            tcg_zero, tcg_op, tcg_res);
10219                        tcg_temp_free_i32(tcg_zero);
10220                    }
10221                    break;
10222                case 0x2f: /* FABS */
10223                    gen_helper_vfp_abss(tcg_res, tcg_op);
10224                    break;
10225                case 0x6f: /* FNEG */
10226                    gen_helper_vfp_negs(tcg_res, tcg_op);
10227                    break;
10228                case 0x7f: /* FSQRT */
10229                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10230                    break;
10231                case 0x1a: /* FCVTNS */
10232                case 0x1b: /* FCVTMS */
10233                case 0x1c: /* FCVTAS */
10234                case 0x3a: /* FCVTPS */
10235                case 0x3b: /* FCVTZS */
10236                {
10237                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10238                    gen_helper_vfp_tosls(tcg_res, tcg_op,
10239                                         tcg_shift, tcg_fpstatus);
10240                    tcg_temp_free_i32(tcg_shift);
10241                    break;
10242                }
10243                case 0x5a: /* FCVTNU */
10244                case 0x5b: /* FCVTMU */
10245                case 0x5c: /* FCVTAU */
10246                case 0x7a: /* FCVTPU */
10247                case 0x7b: /* FCVTZU */
10248                {
10249                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10250                    gen_helper_vfp_touls(tcg_res, tcg_op,
10251                                         tcg_shift, tcg_fpstatus);
10252                    tcg_temp_free_i32(tcg_shift);
10253                    break;
10254                }
10255                case 0x18: /* FRINTN */
10256                case 0x19: /* FRINTM */
10257                case 0x38: /* FRINTP */
10258                case 0x39: /* FRINTZ */
10259                case 0x58: /* FRINTA */
10260                case 0x79: /* FRINTI */
10261                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10262                    break;
10263                case 0x59: /* FRINTX */
10264                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10265                    break;
10266                case 0x7c: /* URSQRTE */
10267                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10268                    break;
10269                default:
10270                    g_assert_not_reached();
10271                }
10272            } else {
10273                /* Use helpers for 8 and 16 bit elements */
10274                switch (opcode) {
10275                case 0x5: /* CNT, RBIT */
10276                    /* For these two insns size is part of the opcode specifier
10277                     * (handled earlier); they always operate on byte elements.
10278                     */
10279                    if (u) {
10280                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10281                    } else {
10282                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10283                    }
10284                    break;
10285                case 0x7: /* SQABS, SQNEG */
10286                {
10287                    NeonGenOneOpEnvFn *genfn;
10288                    static NeonGenOneOpEnvFn * const fns[2][2] = {
10289                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10290                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10291                    };
10292                    genfn = fns[size][u];
10293                    genfn(tcg_res, cpu_env, tcg_op);
10294                    break;
10295                }
10296                case 0x8: /* CMGT, CMGE */
10297                case 0x9: /* CMEQ, CMLE */
10298                case 0xa: /* CMLT */
10299                {
10300                    static NeonGenTwoOpFn * const fns[3][2] = {
10301                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10302                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10303                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10304                    };
10305                    NeonGenTwoOpFn *genfn;
10306                    int comp;
10307                    bool reverse;
10308                    TCGv_i32 tcg_zero = tcg_const_i32(0);
10309
10310                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10311                    comp = (opcode - 0x8) * 2 + u;
10312                    /* ...but LE, LT are implemented as reverse GE, GT */
10313                    reverse = (comp > 2);
10314                    if (reverse) {
10315                        comp = 4 - comp;
10316                    }
10317                    genfn = fns[comp][size];
10318                    if (reverse) {
10319                        genfn(tcg_res, tcg_zero, tcg_op);
10320                    } else {
10321                        genfn(tcg_res, tcg_op, tcg_zero);
10322                    }
10323                    tcg_temp_free_i32(tcg_zero);
10324                    break;
10325                }
10326                case 0xb: /* ABS, NEG */
10327                    if (u) {
10328                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10329                        if (size) {
10330                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10331                        } else {
10332                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10333                        }
10334                        tcg_temp_free_i32(tcg_zero);
10335                    } else {
10336                        if (size) {
10337                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
10338                        } else {
10339                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
10340                        }
10341                    }
10342                    break;
10343                case 0x4: /* CLS, CLZ */
10344                    if (u) {
10345                        if (size == 0) {
10346                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
10347                        } else {
10348                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
10349                        }
10350                    } else {
10351                        if (size == 0) {
10352                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
10353                        } else {
10354                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
10355                        }
10356                    }
10357                    break;
10358                default:
10359                    g_assert_not_reached();
10360                }
10361            }
10362
10363            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10364
10365            tcg_temp_free_i32(tcg_res);
10366            tcg_temp_free_i32(tcg_op);
10367        }
10368    }
10369    if (!is_q) {
10370        clear_vec_high(s, rd);
10371    }
10372
10373    if (need_rmode) {
10374        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10375        tcg_temp_free_i32(tcg_rmode);
10376    }
10377    if (need_fpstatus) {
10378        tcg_temp_free_ptr(tcg_fpstatus);
10379    }
10380}
10381
10382/* C3.6.13 AdvSIMD scalar x indexed element
10383 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10384 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10385 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10386 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10387 * C3.6.18 AdvSIMD vector x indexed element
10388 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10389 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10390 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10391 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10392 */
10393static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10394{
10395    /* This encoding has two kinds of instruction:
10396     *  normal, where we perform elt x idxelt => elt for each
10397     *     element in the vector
10398     *  long, where we perform elt x idxelt and generate a result of
10399     *     double the width of the input element
10400     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10401     */
10402    bool is_scalar = extract32(insn, 28, 1);
10403    bool is_q = extract32(insn, 30, 1);
10404    bool u = extract32(insn, 29, 1);
10405    int size = extract32(insn, 22, 2);
10406    int l = extract32(insn, 21, 1);
10407    int m = extract32(insn, 20, 1);
10408    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10409    int rm = extract32(insn, 16, 4);
10410    int opcode = extract32(insn, 12, 4);
10411    int h = extract32(insn, 11, 1);
10412    int rn = extract32(insn, 5, 5);
10413    int rd = extract32(insn, 0, 5);
10414    bool is_long = false;
10415    bool is_fp = false;
10416    int index;
10417    TCGv_ptr fpst;
10418
10419    switch (opcode) {
10420    case 0x0: /* MLA */
10421    case 0x4: /* MLS */
10422        if (!u || is_scalar) {
10423            unallocated_encoding(s);
10424            return;
10425        }
10426        break;
10427    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10428    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10429    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10430        if (is_scalar) {
10431            unallocated_encoding(s);
10432            return;
10433        }
10434        is_long = true;
10435        break;
10436    case 0x3: /* SQDMLAL, SQDMLAL2 */
10437    case 0x7: /* SQDMLSL, SQDMLSL2 */
10438    case 0xb: /* SQDMULL, SQDMULL2 */
10439        is_long = true;
10440        /* fall through */
10441    case 0xc: /* SQDMULH */
10442    case 0xd: /* SQRDMULH */
10443        if (u) {
10444            unallocated_encoding(s);
10445            return;
10446        }
10447        break;
10448    case 0x8: /* MUL */
10449        if (u || is_scalar) {
10450            unallocated_encoding(s);
10451            return;
10452        }
10453        break;
10454    case 0x1: /* FMLA */
10455    case 0x5: /* FMLS */
10456        if (u) {
10457            unallocated_encoding(s);
10458            return;
10459        }
10460        /* fall through */
10461    case 0x9: /* FMUL, FMULX */
10462        if (!extract32(size, 1, 1)) {
10463            unallocated_encoding(s);
10464            return;
10465        }
10466        is_fp = true;
10467        break;
10468    default:
10469        unallocated_encoding(s);
10470        return;
10471    }
10472
10473    if (is_fp) {
10474        /* low bit of size indicates single/double */
10475        size = extract32(size, 0, 1) ? 3 : 2;
10476        if (size == 2) {
10477            index = h << 1 | l;
10478        } else {
10479            if (l || !is_q) {
10480                unallocated_encoding(s);
10481                return;
10482            }
10483            index = h;
10484        }
10485        rm |= (m << 4);
10486    } else {
10487        switch (size) {
10488        case 1:
10489            index = h << 2 | l << 1 | m;
10490            break;
10491        case 2:
10492            index = h << 1 | l;
10493            rm |= (m << 4);
10494            break;
10495        default:
10496            unallocated_encoding(s);
10497            return;
10498        }
10499    }
10500
10501    if (!fp_access_check(s)) {
10502        return;
10503    }
10504
10505    if (is_fp) {
10506        fpst = get_fpstatus_ptr();
10507    } else {
10508        TCGV_UNUSED_PTR(fpst);
10509    }
10510
10511    if (size == 3) {
10512        TCGv_i64 tcg_idx = tcg_temp_new_i64();
10513        int pass;
10514
10515        assert(is_fp && is_q && !is_long);
10516
10517        read_vec_element(s, tcg_idx, rm, index, MO_64);
10518
10519        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10520            TCGv_i64 tcg_op = tcg_temp_new_i64();
10521            TCGv_i64 tcg_res = tcg_temp_new_i64();
10522
10523            read_vec_element(s, tcg_op, rn, pass, MO_64);
10524
10525            switch (opcode) {
10526            case 0x5: /* FMLS */
10527                /* As usual for ARM, separate negation for fused multiply-add */
10528                gen_helper_vfp_negd(tcg_op, tcg_op);
10529                /* fall through */
10530            case 0x1: /* FMLA */
10531                read_vec_element(s, tcg_res, rd, pass, MO_64);
10532                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10533                break;
10534            case 0x9: /* FMUL, FMULX */
10535                if (u) {
10536                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10537                } else {
10538                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10539                }
10540                break;
10541            default:
10542                g_assert_not_reached();
10543            }
10544
10545            write_vec_element(s, tcg_res, rd, pass, MO_64);
10546            tcg_temp_free_i64(tcg_op);
10547            tcg_temp_free_i64(tcg_res);
10548        }
10549
10550        if (is_scalar) {
10551            clear_vec_high(s, rd);
10552        }
10553
10554        tcg_temp_free_i64(tcg_idx);
10555    } else if (!is_long) {
10556        /* 32 bit floating point, or 16 or 32 bit integer.
10557         * For the 16 bit scalar case we use the usual Neon helpers and
10558         * rely on the fact that 0 op 0 == 0 with no side effects.
10559         */
10560        TCGv_i32 tcg_idx = tcg_temp_new_i32();
10561        int pass, maxpasses;
10562
10563        if (is_scalar) {
10564            maxpasses = 1;
10565        } else {
10566            maxpasses = is_q ? 4 : 2;
10567        }
10568
10569        read_vec_element_i32(s, tcg_idx, rm, index, size);
10570
10571        if (size == 1 && !is_scalar) {
10572            /* The simplest way to handle the 16x16 indexed ops is to duplicate
10573             * the index into both halves of the 32 bit tcg_idx and then use
10574             * the usual Neon helpers.
10575             */
10576            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10577        }
10578
10579        for (pass = 0; pass < maxpasses; pass++) {
10580            TCGv_i32 tcg_op = tcg_temp_new_i32();
10581            TCGv_i32 tcg_res = tcg_temp_new_i32();
10582
10583            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10584
10585            switch (opcode) {
10586            case 0x0: /* MLA */
10587            case 0x4: /* MLS */
10588            case 0x8: /* MUL */
10589            {
10590                static NeonGenTwoOpFn * const fns[2][2] = {
10591                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10592                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
10593                };
10594                NeonGenTwoOpFn *genfn;
10595                bool is_sub = opcode == 0x4;
10596
10597                if (size == 1) {
10598                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10599                } else {
10600                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10601                }
10602                if (opcode == 0x8) {
10603                    break;
10604                }
10605                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10606                genfn = fns[size - 1][is_sub];
10607                genfn(tcg_res, tcg_op, tcg_res);
10608                break;
10609            }
10610            case 0x5: /* FMLS */
10611                /* As usual for ARM, separate negation for fused multiply-add */
10612                gen_helper_vfp_negs(tcg_op, tcg_op);
10613                /* fall through */
10614            case 0x1: /* FMLA */
10615                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10616                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10617                break;
10618            case 0x9: /* FMUL, FMULX */
10619                if (u) {
10620                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10621                } else {
10622                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10623                }
10624                break;
10625            case 0xc: /* SQDMULH */
10626                if (size == 1) {
10627                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10628                                               tcg_op, tcg_idx);
10629                } else {
10630                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10631                                               tcg_op, tcg_idx);
10632                }
10633                break;
10634            case 0xd: /* SQRDMULH */
10635                if (size == 1) {
10636                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10637                                                tcg_op, tcg_idx);
10638                } else {
10639                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10640                                                tcg_op, tcg_idx);
10641                }
10642                break;
10643            default:
10644                g_assert_not_reached();
10645            }
10646
10647            if (is_scalar) {
10648                write_fp_sreg(s, rd, tcg_res);
10649            } else {
10650                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10651            }
10652
10653            tcg_temp_free_i32(tcg_op);
10654            tcg_temp_free_i32(tcg_res);
10655        }
10656
10657        tcg_temp_free_i32(tcg_idx);
10658
10659        if (!is_q) {
10660            clear_vec_high(s, rd);
10661        }
10662    } else {
10663        /* long ops: 16x16->32 or 32x32->64 */
10664        TCGv_i64 tcg_res[2];
10665        int pass;
10666        bool satop = extract32(opcode, 0, 1);
10667        TCGMemOp memop = MO_32;
10668
10669        if (satop || !u) {
10670            memop |= MO_SIGN;
10671        }
10672
10673        if (size == 2) {
10674            TCGv_i64 tcg_idx = tcg_temp_new_i64();
10675
10676            read_vec_element(s, tcg_idx, rm, index, memop);
10677
10678            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10679                TCGv_i64 tcg_op = tcg_temp_new_i64();
10680                TCGv_i64 tcg_passres;
10681                int passelt;
10682
10683                if (is_scalar) {
10684                    passelt = 0;
10685                } else {
10686                    passelt = pass + (is_q * 2);
10687                }
10688
10689                read_vec_element(s, tcg_op, rn, passelt, memop);
10690
10691                tcg_res[pass] = tcg_temp_new_i64();
10692
10693                if (opcode == 0xa || opcode == 0xb) {
10694                    /* Non-accumulating ops */
10695                    tcg_passres = tcg_res[pass];
10696                } else {
10697                    tcg_passres = tcg_temp_new_i64();
10698                }
10699
10700                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10701                tcg_temp_free_i64(tcg_op);
10702
10703                if (satop) {
10704                    /* saturating, doubling */
10705                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10706                                                      tcg_passres, tcg_passres);
10707                }
10708
10709                if (opcode == 0xa || opcode == 0xb) {
10710                    continue;
10711                }
10712
10713                /* Accumulating op: handle accumulate step */
10714                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10715
10716                switch (opcode) {
10717                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10718                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10719                    break;
10720                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10721                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10722                    break;
10723                case 0x7: /* SQDMLSL, SQDMLSL2 */
10724                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10725                    /* fall through */
10726                case 0x3: /* SQDMLAL, SQDMLAL2 */
10727                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10728                                                      tcg_res[pass],
10729                                                      tcg_passres);
10730                    break;
10731                default:
10732                    g_assert_not_reached();
10733                }
10734                tcg_temp_free_i64(tcg_passres);
10735            }
10736            tcg_temp_free_i64(tcg_idx);
10737
10738            if (is_scalar) {
10739                clear_vec_high(s, rd);
10740            }
10741        } else {
10742            TCGv_i32 tcg_idx = tcg_temp_new_i32();
10743
10744            assert(size == 1);
10745            read_vec_element_i32(s, tcg_idx, rm, index, size);
10746
10747            if (!is_scalar) {
10748                /* The simplest way to handle the 16x16 indexed ops is to
10749                 * duplicate the index into both halves of the 32 bit tcg_idx
10750                 * and then use the usual Neon helpers.
10751                 */
10752                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10753            }
10754
10755            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10756                TCGv_i32 tcg_op = tcg_temp_new_i32();
10757                TCGv_i64 tcg_passres;
10758
10759                if (is_scalar) {
10760                    read_vec_element_i32(s, tcg_op, rn, pass, size);
10761                } else {
10762                    read_vec_element_i32(s, tcg_op, rn,
10763                                         pass + (is_q * 2), MO_32);
10764                }
10765
10766                tcg_res[pass] = tcg_temp_new_i64();
10767
10768                if (opcode == 0xa || opcode == 0xb) {
10769                    /* Non-accumulating ops */
10770                    tcg_passres = tcg_res[pass];
10771                } else {
10772                    tcg_passres = tcg_temp_new_i64();
10773                }
10774
10775                if (memop & MO_SIGN) {
10776                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10777                } else {
10778                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10779                }
10780                if (satop) {
10781                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10782                                                      tcg_passres, tcg_passres);
10783                }
10784                tcg_temp_free_i32(tcg_op);
10785
10786                if (opcode == 0xa || opcode == 0xb) {
10787                    continue;
10788                }
10789
10790                /* Accumulating op: handle accumulate step */
10791                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10792
10793                switch (opcode) {
10794                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10795                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10796                                             tcg_passres);
10797                    break;
10798                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10799                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10800                                             tcg_passres);
10801                    break;
10802                case 0x7: /* SQDMLSL, SQDMLSL2 */
10803                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10804                    /* fall through */
10805                case 0x3: /* SQDMLAL, SQDMLAL2 */
10806                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10807                                                      tcg_res[pass],
10808                                                      tcg_passres);
10809                    break;
10810                default:
10811                    g_assert_not_reached();
10812                }
10813                tcg_temp_free_i64(tcg_passres);
10814            }
10815            tcg_temp_free_i32(tcg_idx);
10816
10817            if (is_scalar) {
10818                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10819            }
10820        }
10821
10822        if (is_scalar) {
10823            tcg_res[1] = tcg_const_i64(0);
10824        }
10825
10826        for (pass = 0; pass < 2; pass++) {
10827            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10828            tcg_temp_free_i64(tcg_res[pass]);
10829        }
10830    }
10831
10832    if (!TCGV_IS_UNUSED_PTR(fpst)) {
10833        tcg_temp_free_ptr(fpst);
10834    }
10835}
10836
10837/* C3.6.19 Crypto AES
10838 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10839 * +-----------------+------+-----------+--------+-----+------+------+
10840 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10841 * +-----------------+------+-----------+--------+-----+------+------+
10842 */
10843static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10844{
10845    int size = extract32(insn, 22, 2);
10846    int opcode = extract32(insn, 12, 5);
10847    int rn = extract32(insn, 5, 5);
10848    int rd = extract32(insn, 0, 5);
10849    int decrypt;
10850    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10851    CryptoThreeOpEnvFn *genfn;
10852
10853    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10854        || size != 0) {
10855        unallocated_encoding(s);
10856        return;
10857    }
10858
10859    switch (opcode) {
10860    case 0x4: /* AESE */
10861        decrypt = 0;
10862        genfn = gen_helper_crypto_aese;
10863        break;
10864    case 0x6: /* AESMC */
10865        decrypt = 0;
10866        genfn = gen_helper_crypto_aesmc;
10867        break;
10868    case 0x5: /* AESD */
10869        decrypt = 1;
10870        genfn = gen_helper_crypto_aese;
10871        break;
10872    case 0x7: /* AESIMC */
10873        decrypt = 1;
10874        genfn = gen_helper_crypto_aesmc;
10875        break;
10876    default:
10877        unallocated_encoding(s);
10878        return;
10879    }
10880
10881    /* Note that we convert the Vx register indexes into the
10882     * index within the vfp.regs[] array, so we can share the
10883     * helper with the AArch32 instructions.
10884     */
10885    tcg_rd_regno = tcg_const_i32(rd << 1);
10886    tcg_rn_regno = tcg_const_i32(rn << 1);
10887    tcg_decrypt = tcg_const_i32(decrypt);
10888
10889    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10890
10891    tcg_temp_free_i32(tcg_rd_regno);
10892    tcg_temp_free_i32(tcg_rn_regno);
10893    tcg_temp_free_i32(tcg_decrypt);
10894}
10895
10896/* C3.6.20 Crypto three-reg SHA
10897 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10898 * +-----------------+------+---+------+---+--------+-----+------+------+
10899 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10900 * +-----------------+------+---+------+---+--------+-----+------+------+
10901 */
10902static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10903{
10904    int size = extract32(insn, 22, 2);
10905    int opcode = extract32(insn, 12, 3);
10906    int rm = extract32(insn, 16, 5);
10907    int rn = extract32(insn, 5, 5);
10908    int rd = extract32(insn, 0, 5);
10909    CryptoThreeOpEnvFn *genfn;
10910    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10911    int feature = ARM_FEATURE_V8_SHA256;
10912
10913    if (size != 0) {
10914        unallocated_encoding(s);
10915        return;
10916    }
10917
10918    switch (opcode) {
10919    case 0: /* SHA1C */
10920    case 1: /* SHA1P */
10921    case 2: /* SHA1M */
10922    case 3: /* SHA1SU0 */
10923        genfn = NULL;
10924        feature = ARM_FEATURE_V8_SHA1;
10925        break;
10926    case 4: /* SHA256H */
10927        genfn = gen_helper_crypto_sha256h;
10928        break;
10929    case 5: /* SHA256H2 */
10930        genfn = gen_helper_crypto_sha256h2;
10931        break;
10932    case 6: /* SHA256SU1 */
10933        genfn = gen_helper_crypto_sha256su1;
10934        break;
10935    default:
10936        unallocated_encoding(s);
10937        return;
10938    }
10939
10940    if (!arm_dc_feature(s, feature)) {
10941        unallocated_encoding(s);
10942        return;
10943    }
10944
10945    tcg_rd_regno = tcg_const_i32(rd << 1);
10946    tcg_rn_regno = tcg_const_i32(rn << 1);
10947    tcg_rm_regno = tcg_const_i32(rm << 1);
10948
10949    if (genfn) {
10950        genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10951    } else {
10952        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10953
10954        gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10955                                    tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10956        tcg_temp_free_i32(tcg_opcode);
10957    }
10958
10959    tcg_temp_free_i32(tcg_rd_regno);
10960    tcg_temp_free_i32(tcg_rn_regno);
10961    tcg_temp_free_i32(tcg_rm_regno);
10962}
10963
10964/* C3.6.21 Crypto two-reg SHA
10965 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10966 * +-----------------+------+-----------+--------+-----+------+------+
10967 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10968 * +-----------------+------+-----------+--------+-----+------+------+
10969 */
10970static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10971{
10972    int size = extract32(insn, 22, 2);
10973    int opcode = extract32(insn, 12, 5);
10974    int rn = extract32(insn, 5, 5);
10975    int rd = extract32(insn, 0, 5);
10976    CryptoTwoOpEnvFn *genfn;
10977    int feature;
10978    TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10979
10980    if (size != 0) {
10981        unallocated_encoding(s);
10982        return;
10983    }
10984
10985    switch (opcode) {
10986    case 0: /* SHA1H */
10987        feature = ARM_FEATURE_V8_SHA1;
10988        genfn = gen_helper_crypto_sha1h;
10989        break;
10990    case 1: /* SHA1SU1 */
10991        feature = ARM_FEATURE_V8_SHA1;
10992        genfn = gen_helper_crypto_sha1su1;
10993        break;
10994    case 2: /* SHA256SU0 */
10995        feature = ARM_FEATURE_V8_SHA256;
10996        genfn = gen_helper_crypto_sha256su0;
10997        break;
10998    default:
10999        unallocated_encoding(s);
11000        return;
11001    }
11002
11003    if (!arm_dc_feature(s, feature)) {
11004        unallocated_encoding(s);
11005        return;
11006    }
11007
11008    tcg_rd_regno = tcg_const_i32(rd << 1);
11009    tcg_rn_regno = tcg_const_i32(rn << 1);
11010
11011    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11012
11013    tcg_temp_free_i32(tcg_rd_regno);
11014    tcg_temp_free_i32(tcg_rn_regno);
11015}
11016
11017/* C3.6 Data processing - SIMD, inc Crypto
11018 *
11019 * As the decode gets a little complex we are using a table based
11020 * approach for this part of the decode.
11021 */
11022static const AArch64DecodeTable data_proc_simd[] = {
11023    /* pattern  ,  mask     ,  fn                        */
11024    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11025    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11026    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11027    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11028    { 0x0e000400, 0x9fe08400, disas_simd_copy },
11029    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11030    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11031    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11032    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11033    { 0x0e000000, 0xbf208c00, disas_simd_tb },
11034    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11035    { 0x2e000000, 0xbf208400, disas_simd_ext },
11036    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11037    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11038    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11039    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11040    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11041    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11042    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11043    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11044    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11045    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11046    { 0x00000000, 0x00000000, NULL }
11047};
11048
11049static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11050{
11051    /* Note that this is called with all non-FP cases from
11052     * table C3-6 so it must UNDEF for entries not specifically
11053     * allocated to instructions in that table.
11054     */
11055    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11056    if (fn) {
11057        fn(s, insn);
11058    } else {
11059        unallocated_encoding(s);
11060    }
11061}
11062
11063/* C3.6 Data processing - SIMD and floating point */
11064static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11065{
11066    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11067        disas_data_proc_fp(s, insn);
11068    } else {
11069        /* SIMD, including crypto */
11070        disas_data_proc_simd(s, insn);
11071    }
11072}
11073
11074/* C3.1 A64 instruction index by encoding */
11075static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11076{
11077    uint32_t insn;
11078
11079    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11080    s->insn = insn;
11081    s->pc += 4;
11082
11083    s->fp_access_checked = false;
11084
11085    switch (extract32(insn, 25, 4)) {
11086    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11087        unallocated_encoding(s);
11088        break;
11089    case 0x8: case 0x9: /* Data processing - immediate */
11090        disas_data_proc_imm(s, insn);
11091        break;
11092    case 0xa: case 0xb: /* Branch, exception generation and system insns */
11093        disas_b_exc_sys(s, insn);
11094        break;
11095    case 0x4:
11096    case 0x6:
11097    case 0xc:
11098    case 0xe:      /* Loads and stores */
11099        disas_ldst(s, insn);
11100        break;
11101    case 0x5:
11102    case 0xd:      /* Data processing - register */
11103        disas_data_proc_reg(s, insn);
11104        break;
11105    case 0x7:
11106    case 0xf:      /* Data processing - SIMD and floating point */
11107        disas_data_proc_simd_fp(s, insn);
11108        break;
11109    default:
11110        assert(FALSE); /* all 15 cases should be handled above */
11111        break;
11112    }
11113
11114    /* if we allocated any temporaries, free them here */
11115    free_tmp_a64(s);
11116}
11117
11118void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11119{
11120    CPUState *cs = CPU(cpu);
11121    CPUARMState *env = &cpu->env;
11122    DisasContext dc1, *dc = &dc1;
11123    target_ulong pc_start;
11124    target_ulong next_page_start;
11125    int num_insns;
11126    int max_insns;
11127
11128    pc_start = tb->pc;
11129
11130    dc->tb = tb;
11131
11132    dc->is_jmp = DISAS_NEXT;
11133    dc->pc = pc_start;
11134    dc->singlestep_enabled = cs->singlestep_enabled;
11135    dc->condjmp = 0;
11136
11137    dc->aarch64 = 1;
11138    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11139     * there is no secure EL1, so we route exceptions to EL3.
11140     */
11141    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11142                               !arm_el_is_aa64(env, 3);
11143    dc->thumb = 0;
11144    dc->sctlr_b = 0;
11145    dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11146    dc->condexec_mask = 0;
11147    dc->condexec_cond = 0;
11148    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11149    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11150#if !defined(CONFIG_USER_ONLY)
11151    dc->user = (dc->current_el == 0);
11152    dc->ns = ARM_TBFLAG_NS(tb->flags);
11153#endif
11154    dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11155    dc->vec_len = 0;
11156    dc->vec_stride = 0;
11157    dc->cp_regs = cpu->cp_regs;
11158    dc->features = env->features;
11159
11160    /* Single step state. The code-generation logic here is:
11161     *  SS_ACTIVE == 0:
11162     *   generate code with no special handling for single-stepping (except
11163     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11164     *   this happens anyway because those changes are all system register or
11165     *   PSTATE writes).
11166     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11167     *   emit code for one insn
11168     *   emit code to clear PSTATE.SS
11169     *   emit code to generate software step exception for completed step
11170     *   end TB (as usual for having generated an exception)
11171     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11172     *   emit code to generate a software step exception
11173     *   end the TB
11174     */
11175    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11176    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11177    dc->is_ldex = false;
11178    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11179
11180    init_tmp_a64_array(dc);
11181
11182    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11183    num_insns = 0;
11184    max_insns = tb->cflags & CF_COUNT_MASK;
11185    if (max_insns == 0) {
11186        max_insns = CF_COUNT_MASK;
11187    }
11188    if (max_insns > TCG_MAX_INSNS) {
11189        max_insns = TCG_MAX_INSNS;
11190    }
11191
11192    gen_tb_start(tb);
11193
11194    tcg_clear_temp_count();
11195
11196    do {
11197        dc->insn_start_idx = tcg_op_buf_count();
11198        tcg_gen_insn_start(dc->pc, 0, 0);
11199        num_insns++;
11200
11201        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11202            CPUBreakpoint *bp;
11203            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11204                if (bp->pc == dc->pc) {
11205                    if (bp->flags & BP_CPU) {
11206                        gen_a64_set_pc_im(dc->pc);
11207                        gen_helper_check_breakpoints(cpu_env);
11208                        /* End the TB early; it likely won't be executed */
11209                        dc->is_jmp = DISAS_UPDATE;
11210                    } else {
11211                        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11212                        /* The address covered by the breakpoint must be
11213                           included in [tb->pc, tb->pc + tb->size) in order
11214                           to for it to be properly cleared -- thus we
11215                           increment the PC here so that the logic setting
11216                           tb->size below does the right thing.  */
11217                        dc->pc += 4;
11218                        goto done_generating;
11219                    }
11220                    break;
11221                }
11222            }
11223        }
11224
11225        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11226            gen_io_start();
11227        }
11228
11229        if (dc->ss_active && !dc->pstate_ss) {
11230            /* Singlestep state is Active-pending.
11231             * If we're in this state at the start of a TB then either
11232             *  a) we just took an exception to an EL which is being debugged
11233             *     and this is the first insn in the exception handler
11234             *  b) debug exceptions were masked and we just unmasked them
11235             *     without changing EL (eg by clearing PSTATE.D)
11236             * In either case we're going to take a swstep exception in the
11237             * "did not step an insn" case, and so the syndrome ISV and EX
11238             * bits should be zero.
11239             */
11240            assert(num_insns == 1);
11241            gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11242                          default_exception_el(dc));
11243            dc->is_jmp = DISAS_EXC;
11244            break;
11245        }
11246
11247        disas_a64_insn(env, dc);
11248
11249        if (tcg_check_temp_count()) {
11250            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11251                    dc->pc);
11252        }
11253
11254        /* Translation stops when a conditional branch is encountered.
11255         * Otherwise the subsequent code could get translated several times.
11256         * Also stop translation when a page boundary is reached.  This
11257         * ensures prefetch aborts occur at the right place.
11258         */
11259    } while (!dc->is_jmp && !tcg_op_buf_full() &&
11260             !cs->singlestep_enabled &&
11261             !singlestep &&
11262             !dc->ss_active &&
11263             dc->pc < next_page_start &&
11264             num_insns < max_insns);
11265
11266    if (tb->cflags & CF_LAST_IO) {
11267        gen_io_end();
11268    }
11269
11270    if (unlikely(cs->singlestep_enabled || dc->ss_active)
11271        && dc->is_jmp != DISAS_EXC) {
11272        /* Note that this means single stepping WFI doesn't halt the CPU.
11273         * For conditional branch insns this is harmless unreachable code as
11274         * gen_goto_tb() has already handled emitting the debug exception
11275         * (and thus a tb-jump is not possible when singlestepping).
11276         */
11277        assert(dc->is_jmp != DISAS_TB_JUMP);
11278        if (dc->is_jmp != DISAS_JUMP) {
11279            gen_a64_set_pc_im(dc->pc);
11280        }
11281        if (cs->singlestep_enabled) {
11282            gen_exception_internal(EXCP_DEBUG);
11283        } else {
11284            gen_step_complete_exception(dc);
11285        }
11286    } else {
11287        switch (dc->is_jmp) {
11288        case DISAS_NEXT:
11289            gen_goto_tb(dc, 1, dc->pc);
11290            break;
11291        default:
11292        case DISAS_UPDATE:
11293            gen_a64_set_pc_im(dc->pc);
11294            /* fall through */
11295        case DISAS_JUMP:
11296            /* indicate that the hash table must be used to find the next TB */
11297            tcg_gen_exit_tb(0);
11298            break;
11299        case DISAS_TB_JUMP:
11300        case DISAS_EXC:
11301        case DISAS_SWI:
11302            break;
11303        case DISAS_WFE:
11304            gen_a64_set_pc_im(dc->pc);
11305            gen_helper_wfe(cpu_env);
11306            tcg_gen_exit_tb(0);
11307            break;
11308        case DISAS_YIELD:
11309            gen_a64_set_pc_im(dc->pc);
11310            gen_helper_yield(cpu_env);
11311            break;
11312        case DISAS_WFI:
11313            /* This is a special case because we don't want to just halt the CPU
11314             * if trying to debug across a WFI.
11315             */
11316            gen_a64_set_pc_im(dc->pc);
11317            gen_helper_wfi(cpu_env);
11318            /* The helper doesn't necessarily throw an exception, but we
11319             * must go back to the main loop to check for interrupts anyway.
11320             */
11321            tcg_gen_exit_tb(0);
11322            break;
11323        }
11324    }
11325
11326done_generating:
11327    gen_tb_end(tb, num_insns);
11328
11329#ifdef DEBUG_DISAS
11330    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11331        qemu_log_in_addr_range(pc_start)) {
11332        qemu_log("----------------\n");
11333        qemu_log("IN: %s\n", lookup_symbol(pc_start));
11334        log_target_disas(cs, pc_start, dc->pc - pc_start,
11335                         4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11336        qemu_log("\n");
11337    }
11338#endif
11339    tb->size = dc->pc - pc_start;
11340    tb->icount = num_insns;
11341}
11342