LXR qemu/target-arm/translate-a64.c

   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "qemu/log.h"
  25#include "arm_ldst.h"
  26#include "translate.h"
  27#include "internals.h"
  28#include "qemu/host-utils.h"
  29
  30#include "exec/semihost.h"
  31#include "exec/gen-icount.h"
  32
  33#include "exec/helper-proto.h"
  34#include "exec/helper-gen.h"
  35#include "exec/log.h"
  36
  37#include "trace-tcg.h"
  38
  39static TCGv_i64 cpu_X[32];
  40static TCGv_i64 cpu_pc;
  41
  42/* Load/store exclusive handling */
  43static TCGv_i64 cpu_exclusive_high;
  44static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  45
  46static const char *regnames[] = {
  47    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  48    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  49    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  50    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  51};
  52
  53enum a64_shift_type {
  54    A64_SHIFT_TYPE_LSL = 0,
  55    A64_SHIFT_TYPE_LSR = 1,
  56    A64_SHIFT_TYPE_ASR = 2,
  57    A64_SHIFT_TYPE_ROR = 3
  58};
  59
  60/* Table based decoder typedefs - used when the relevant bits for decode
  61 * are too awkwardly scattered across the instruction (eg SIMD).
  62 */
  63typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  64
  65typedef struct AArch64DecodeTable {
  66    uint32_t pattern;
  67    uint32_t mask;
  68    AArch64DecodeFn *disas_fn;
  69} AArch64DecodeTable;
  70
  71/* Function prototype for gen_ functions for calling Neon helpers */
  72typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  73typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  74typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  75typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  76typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  77typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  78typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  79typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  80typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  81typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  82typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  83typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  84typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  85
  86/* initialize TCG globals.  */
  87void a64_translate_init(void)
  88{
  89    int i;
  90
  91    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  92                                    offsetof(CPUARMState, pc),
  93                                    "pc");
  94    for (i = 0; i < 32; i++) {
  95        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  96                                          offsetof(CPUARMState, xregs[i]),
  97                                          regnames[i]);
  98    }
  99
 100    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 101        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 102}
 103
 104static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 105{
 106    /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 107     *  if EL1, access as if EL0; otherwise access at current EL
 108     */
 109    switch (s->mmu_idx) {
 110    case ARMMMUIdx_S12NSE1:
 111        return ARMMMUIdx_S12NSE0;
 112    case ARMMMUIdx_S1SE1:
 113        return ARMMMUIdx_S1SE0;
 114    case ARMMMUIdx_S2NS:
 115        g_assert_not_reached();
 116    default:
 117        return s->mmu_idx;
 118    }
 119}
 120
 121void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 122                            fprintf_function cpu_fprintf, int flags)
 123{
 124    ARMCPU *cpu = ARM_CPU(cs);
 125    CPUARMState *env = &cpu->env;
 126    uint32_t psr = pstate_read(env);
 127    int i;
 128    int el = arm_current_el(env);
 129    const char *ns_status;
 130
 131    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 132            env->pc, env->xregs[31]);
 133    for (i = 0; i < 31; i++) {
 134        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 135        if ((i % 4) == 3) {
 136            cpu_fprintf(f, "\n");
 137        } else {
 138            cpu_fprintf(f, " ");
 139        }
 140    }
 141
 142    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 143        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 144    } else {
 145        ns_status = "";
 146    }
 147
 148    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 149                psr,
 150                psr & PSTATE_N ? 'N' : '-',
 151                psr & PSTATE_Z ? 'Z' : '-',
 152                psr & PSTATE_C ? 'C' : '-',
 153                psr & PSTATE_V ? 'V' : '-',
 154                ns_status,
 155                el,
 156                psr & PSTATE_SP ? 'h' : 't');
 157
 158    if (flags & CPU_DUMP_FPU) {
 159        int numvfpregs = 32;
 160        for (i = 0; i < numvfpregs; i += 2) {
 161            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 162            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 163            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 164                        i, vhi, vlo);
 165            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 166            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 167            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 168                        i + 1, vhi, vlo);
 169        }
 170        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 171                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 172    }
 173}
 174
 175void gen_a64_set_pc_im(uint64_t val)
 176{
 177    tcg_gen_movi_i64(cpu_pc, val);
 178}
 179
 180/* Load the PC from a generic TCG variable.
 181 *
 182 * If address tagging is enabled via the TCR TBI bits, then loading
 183 * an address into the PC will clear out any tag in the it:
 184 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 185 *    then the address is zero-extended, clearing bits [63:56]
 186 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 187 *    and TBI1 controls addressses with bit 55 == 1.
 188 *    If the appropriate TBI bit is set for the address then
 189 *    the address is sign-extended from bit 55 into bits [63:56]
 190 *
 191 * We can avoid doing this for relative-branches, because the
 192 * PC + offset can never overflow into the tag bits (assuming
 193 * that virtual addresses are less than 56 bits wide, as they
 194 * are currently), but we must handle it for branch-to-register.
 195 */
 196static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 197{
 198
 199    if (s->current_el <= 1) {
 200        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 201         * examine bit 55 of address, can just generate code.
 202         * If mixed, then test via generated code
 203         */
 204        if (s->tbi0 && s->tbi1) {
 205            TCGv_i64 tmp_reg = tcg_temp_new_i64();
 206            /* Both bits set, sign extension from bit 55 into [63:56] will
 207             * cover both cases
 208             */
 209            tcg_gen_shli_i64(tmp_reg, src, 8);
 210            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 211            tcg_temp_free_i64(tmp_reg);
 212        } else if (!s->tbi0 && !s->tbi1) {
 213            /* Neither bit set, just load it as-is */
 214            tcg_gen_mov_i64(cpu_pc, src);
 215        } else {
 216            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 217            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 218            TCGv_i64 tcg_zero   = tcg_const_i64(0);
 219
 220            tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 221
 222            if (s->tbi0) {
 223                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 224                tcg_gen_andi_i64(tcg_tmpval, src,
 225                                 0x00FFFFFFFFFFFFFFull);
 226                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 227                                    tcg_tmpval, src);
 228            } else {
 229                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 230                tcg_gen_ori_i64(tcg_tmpval, src,
 231                                0xFF00000000000000ull);
 232                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 233                                    tcg_tmpval, src);
 234            }
 235            tcg_temp_free_i64(tcg_zero);
 236            tcg_temp_free_i64(tcg_bit55);
 237            tcg_temp_free_i64(tcg_tmpval);
 238        }
 239    } else {  /* EL > 1 */
 240        if (s->tbi0) {
 241            /* Force tag byte to all zero */
 242            tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 243        } else {
 244            /* Load unmodified address */
 245            tcg_gen_mov_i64(cpu_pc, src);
 246        }
 247    }
 248}
 249
 250typedef struct DisasCompare64 {
 251    TCGCond cond;
 252    TCGv_i64 value;
 253} DisasCompare64;
 254
 255static void a64_test_cc(DisasCompare64 *c64, int cc)
 256{
 257    DisasCompare c32;
 258
 259    arm_test_cc(&c32, cc);
 260
 261    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 262       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 263    c64->cond = c32.cond;
 264    c64->value = tcg_temp_new_i64();
 265    tcg_gen_ext_i32_i64(c64->value, c32.value);
 266
 267    arm_free_cc(&c32);
 268}
 269
 270static void a64_free_cc(DisasCompare64 *c64)
 271{
 272    tcg_temp_free_i64(c64->value);
 273}
 274
 275static void gen_exception_internal(int excp)
 276{
 277    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 278
 279    assert(excp_is_internal(excp));
 280    gen_helper_exception_internal(cpu_env, tcg_excp);
 281    tcg_temp_free_i32(tcg_excp);
 282}
 283
 284static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 285{
 286    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 287    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 288    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 289
 290    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 291                                       tcg_syn, tcg_el);
 292    tcg_temp_free_i32(tcg_el);
 293    tcg_temp_free_i32(tcg_syn);
 294    tcg_temp_free_i32(tcg_excp);
 295}
 296
 297static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 298{
 299    gen_a64_set_pc_im(s->pc - offset);
 300    gen_exception_internal(excp);
 301    s->is_jmp = DISAS_EXC;
 302}
 303
 304static void gen_exception_insn(DisasContext *s, int offset, int excp,
 305                               uint32_t syndrome, uint32_t target_el)
 306{
 307    gen_a64_set_pc_im(s->pc - offset);
 308    gen_exception(excp, syndrome, target_el);
 309    s->is_jmp = DISAS_EXC;
 310}
 311
 312static void gen_ss_advance(DisasContext *s)
 313{
 314    /* If the singlestep state is Active-not-pending, advance to
 315     * Active-pending.
 316     */
 317    if (s->ss_active) {
 318        s->pstate_ss = 0;
 319        gen_helper_clear_pstate_ss(cpu_env);
 320    }
 321}
 322
 323static void gen_step_complete_exception(DisasContext *s)
 324{
 325    /* We just completed step of an insn. Move from Active-not-pending
 326     * to Active-pending, and then also take the swstep exception.
 327     * This corresponds to making the (IMPDEF) choice to prioritize
 328     * swstep exceptions over asynchronous exceptions taken to an exception
 329     * level where debug is disabled. This choice has the advantage that
 330     * we do not need to maintain internal state corresponding to the
 331     * ISV/EX syndrome bits between completion of the step and generation
 332     * of the exception, and our syndrome information is always correct.
 333     */
 334    gen_ss_advance(s);
 335    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 336                  default_exception_el(s));
 337    s->is_jmp = DISAS_EXC;
 338}
 339
 340static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 341{
 342    /* No direct tb linking with singlestep (either QEMU's or the ARM
 343     * debug architecture kind) or deterministic io
 344     */
 345    if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 346        return false;
 347    }
 348
 349#ifndef CONFIG_USER_ONLY
 350    /* Only link tbs from inside the same guest page */
 351    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 352        return false;
 353    }
 354#endif
 355
 356    return true;
 357}
 358
 359static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 360{
 361    TranslationBlock *tb;
 362
 363    tb = s->tb;
 364    if (use_goto_tb(s, n, dest)) {
 365        tcg_gen_goto_tb(n);
 366        gen_a64_set_pc_im(dest);
 367        tcg_gen_exit_tb((intptr_t)tb + n);
 368        s->is_jmp = DISAS_TB_JUMP;
 369    } else {
 370        gen_a64_set_pc_im(dest);
 371        if (s->ss_active) {
 372            gen_step_complete_exception(s);
 373        } else if (s->singlestep_enabled) {
 374            gen_exception_internal(EXCP_DEBUG);
 375        } else {
 376            tcg_gen_exit_tb(0);
 377            s->is_jmp = DISAS_TB_JUMP;
 378        }
 379    }
 380}
 381
 382static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 383{
 384    /* We don't need to save all of the syndrome so we mask and shift
 385     * out uneeded bits to help the sleb128 encoder do a better job.
 386     */
 387    syn &= ARM_INSN_START_WORD2_MASK;
 388    syn >>= ARM_INSN_START_WORD2_SHIFT;
 389
 390    /* We check and clear insn_start_idx to catch multiple updates.  */
 391    assert(s->insn_start_idx != 0);
 392    tcg_set_insn_param(s->insn_start_idx, 2, syn);
 393    s->insn_start_idx = 0;
 394}
 395
 396static void unallocated_encoding(DisasContext *s)
 397{
 398    /* Unallocated and reserved encodings are uncategorized */
 399    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 400                       default_exception_el(s));
 401}
 402
 403#define unsupported_encoding(s, insn)                                    \
 404    do {                                                                 \
 405        qemu_log_mask(LOG_UNIMP,                                         \
 406                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 407                      "at pc=%016" PRIx64 "\n",                          \
 408                      __FILE__, __LINE__, insn, s->pc - 4);              \
 409        unallocated_encoding(s);                                         \
 410    } while (0);
 411
 412static void init_tmp_a64_array(DisasContext *s)
 413{
 414#ifdef CONFIG_DEBUG_TCG
 415    int i;
 416    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 417        TCGV_UNUSED_I64(s->tmp_a64[i]);
 418    }
 419#endif
 420    s->tmp_a64_count = 0;
 421}
 422
 423static void free_tmp_a64(DisasContext *s)
 424{
 425    int i;
 426    for (i = 0; i < s->tmp_a64_count; i++) {
 427        tcg_temp_free_i64(s->tmp_a64[i]);
 428    }
 429    init_tmp_a64_array(s);
 430}
 431
 432static TCGv_i64 new_tmp_a64(DisasContext *s)
 433{
 434    assert(s->tmp_a64_count < TMP_A64_MAX);
 435    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 436}
 437
 438static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 439{
 440    TCGv_i64 t = new_tmp_a64(s);
 441    tcg_gen_movi_i64(t, 0);
 442    return t;
 443}
 444
 445/*
 446 * Register access functions
 447 *
 448 * These functions are used for directly accessing a register in where
 449 * changes to the final register value are likely to be made. If you
 450 * need to use a register for temporary calculation (e.g. index type
 451 * operations) use the read_* form.
 452 *
 453 * B1.2.1 Register mappings
 454 *
 455 * In instruction register encoding 31 can refer to ZR (zero register) or
 456 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 457 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 458 * This is the point of the _sp forms.
 459 */
 460static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 461{
 462    if (reg == 31) {
 463        return new_tmp_a64_zero(s);
 464    } else {
 465        return cpu_X[reg];
 466    }
 467}
 468
 469/* register access for when 31 == SP */
 470static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 471{
 472    return cpu_X[reg];
 473}
 474
 475/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 476 * representing the register contents. This TCGv is an auto-freed
 477 * temporary so it need not be explicitly freed, and may be modified.
 478 */
 479static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 480{
 481    TCGv_i64 v = new_tmp_a64(s);
 482    if (reg != 31) {
 483        if (sf) {
 484            tcg_gen_mov_i64(v, cpu_X[reg]);
 485        } else {
 486            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 487        }
 488    } else {
 489        tcg_gen_movi_i64(v, 0);
 490    }
 491    return v;
 492}
 493
 494static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 495{
 496    TCGv_i64 v = new_tmp_a64(s);
 497    if (sf) {
 498        tcg_gen_mov_i64(v, cpu_X[reg]);
 499    } else {
 500        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 501    }
 502    return v;
 503}
 504
 505/* We should have at some point before trying to access an FP register
 506 * done the necessary access check, so assert that
 507 * (a) we did the check and
 508 * (b) we didn't then just plough ahead anyway if it failed.
 509 * Print the instruction pattern in the abort message so we can figure
 510 * out what we need to fix if a user encounters this problem in the wild.
 511 */
 512static inline void assert_fp_access_checked(DisasContext *s)
 513{
 514#ifdef CONFIG_DEBUG_TCG
 515    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 516        fprintf(stderr, "target-arm: FP access check missing for "
 517                "instruction 0x%08x\n", s->insn);
 518        abort();
 519    }
 520#endif
 521}
 522
 523/* Return the offset into CPUARMState of an element of specified
 524 * size, 'element' places in from the least significant end of
 525 * the FP/vector register Qn.
 526 */
 527static inline int vec_reg_offset(DisasContext *s, int regno,
 528                                 int element, TCGMemOp size)
 529{
 530    int offs = 0;
 531#ifdef HOST_WORDS_BIGENDIAN
 532    /* This is complicated slightly because vfp.regs[2n] is
 533     * still the low half and  vfp.regs[2n+1] the high half
 534     * of the 128 bit vector, even on big endian systems.
 535     * Calculate the offset assuming a fully bigendian 128 bits,
 536     * then XOR to account for the order of the two 64 bit halves.
 537     */
 538    offs += (16 - ((element + 1) * (1 << size)));
 539    offs ^= 8;
 540#else
 541    offs += element * (1 << size);
 542#endif
 543    offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
 544    assert_fp_access_checked(s);
 545    return offs;
 546}
 547
 548/* Return the offset into CPUARMState of a slice (from
 549 * the least significant end) of FP register Qn (ie
 550 * Dn, Sn, Hn or Bn).
 551 * (Note that this is not the same mapping as for A32; see cpu.h)
 552 */
 553static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 554{
 555    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 556#ifdef HOST_WORDS_BIGENDIAN
 557    offs += (8 - (1 << size));
 558#endif
 559    assert_fp_access_checked(s);
 560    return offs;
 561}
 562
 563/* Offset of the high half of the 128 bit vector Qn */
 564static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 565{
 566    assert_fp_access_checked(s);
 567    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 568}
 569
 570/* Convenience accessors for reading and writing single and double
 571 * FP registers. Writing clears the upper parts of the associated
 572 * 128 bit vector register, as required by the architecture.
 573 * Note that unlike the GP register accessors, the values returned
 574 * by the read functions must be manually freed.
 575 */
 576static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 577{
 578    TCGv_i64 v = tcg_temp_new_i64();
 579
 580    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 581    return v;
 582}
 583
 584static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 585{
 586    TCGv_i32 v = tcg_temp_new_i32();
 587
 588    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 589    return v;
 590}
 591
 592static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 593{
 594    TCGv_i64 tcg_zero = tcg_const_i64(0);
 595
 596    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 597    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 598    tcg_temp_free_i64(tcg_zero);
 599}
 600
 601static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 602{
 603    TCGv_i64 tmp = tcg_temp_new_i64();
 604
 605    tcg_gen_extu_i32_i64(tmp, v);
 606    write_fp_dreg(s, reg, tmp);
 607    tcg_temp_free_i64(tmp);
 608}
 609
 610static TCGv_ptr get_fpstatus_ptr(void)
 611{
 612    TCGv_ptr statusptr = tcg_temp_new_ptr();
 613    int offset;
 614
 615    /* In A64 all instructions (both FP and Neon) use the FPCR;
 616     * there is no equivalent of the A32 Neon "standard FPSCR value"
 617     * and all operations use vfp.fp_status.
 618     */
 619    offset = offsetof(CPUARMState, vfp.fp_status);
 620    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 621    return statusptr;
 622}
 623
 624/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 625 * than the 32 bit equivalent.
 626 */
 627static inline void gen_set_NZ64(TCGv_i64 result)
 628{
 629    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 630    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 631}
 632
 633/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 634static inline void gen_logic_CC(int sf, TCGv_i64 result)
 635{
 636    if (sf) {
 637        gen_set_NZ64(result);
 638    } else {
 639        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 640        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 641    }
 642    tcg_gen_movi_i32(cpu_CF, 0);
 643    tcg_gen_movi_i32(cpu_VF, 0);
 644}
 645
 646/* dest = T0 + T1; compute C, N, V and Z flags */
 647static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 648{
 649    if (sf) {
 650        TCGv_i64 result, flag, tmp;
 651        result = tcg_temp_new_i64();
 652        flag = tcg_temp_new_i64();
 653        tmp = tcg_temp_new_i64();
 654
 655        tcg_gen_movi_i64(tmp, 0);
 656        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 657
 658        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 659
 660        gen_set_NZ64(result);
 661
 662        tcg_gen_xor_i64(flag, result, t0);
 663        tcg_gen_xor_i64(tmp, t0, t1);
 664        tcg_gen_andc_i64(flag, flag, tmp);
 665        tcg_temp_free_i64(tmp);
 666        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 667
 668        tcg_gen_mov_i64(dest, result);
 669        tcg_temp_free_i64(result);
 670        tcg_temp_free_i64(flag);
 671    } else {
 672        /* 32 bit arithmetic */
 673        TCGv_i32 t0_32 = tcg_temp_new_i32();
 674        TCGv_i32 t1_32 = tcg_temp_new_i32();
 675        TCGv_i32 tmp = tcg_temp_new_i32();
 676
 677        tcg_gen_movi_i32(tmp, 0);
 678        tcg_gen_extrl_i64_i32(t0_32, t0);
 679        tcg_gen_extrl_i64_i32(t1_32, t1);
 680        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 681        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 682        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 683        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 684        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 685        tcg_gen_extu_i32_i64(dest, cpu_NF);
 686
 687        tcg_temp_free_i32(tmp);
 688        tcg_temp_free_i32(t0_32);
 689        tcg_temp_free_i32(t1_32);
 690    }
 691}
 692
 693/* dest = T0 - T1; compute C, N, V and Z flags */
 694static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 695{
 696    if (sf) {
 697        /* 64 bit arithmetic */
 698        TCGv_i64 result, flag, tmp;
 699
 700        result = tcg_temp_new_i64();
 701        flag = tcg_temp_new_i64();
 702        tcg_gen_sub_i64(result, t0, t1);
 703
 704        gen_set_NZ64(result);
 705
 706        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 707        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 708
 709        tcg_gen_xor_i64(flag, result, t0);
 710        tmp = tcg_temp_new_i64();
 711        tcg_gen_xor_i64(tmp, t0, t1);
 712        tcg_gen_and_i64(flag, flag, tmp);
 713        tcg_temp_free_i64(tmp);
 714        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 715        tcg_gen_mov_i64(dest, result);
 716        tcg_temp_free_i64(flag);
 717        tcg_temp_free_i64(result);
 718    } else {
 719        /* 32 bit arithmetic */
 720        TCGv_i32 t0_32 = tcg_temp_new_i32();
 721        TCGv_i32 t1_32 = tcg_temp_new_i32();
 722        TCGv_i32 tmp;
 723
 724        tcg_gen_extrl_i64_i32(t0_32, t0);
 725        tcg_gen_extrl_i64_i32(t1_32, t1);
 726        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 727        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 728        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 729        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 730        tmp = tcg_temp_new_i32();
 731        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 732        tcg_temp_free_i32(t0_32);
 733        tcg_temp_free_i32(t1_32);
 734        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 735        tcg_temp_free_i32(tmp);
 736        tcg_gen_extu_i32_i64(dest, cpu_NF);
 737    }
 738}
 739
 740/* dest = T0 + T1 + CF; do not compute flags. */
 741static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 742{
 743    TCGv_i64 flag = tcg_temp_new_i64();
 744    tcg_gen_extu_i32_i64(flag, cpu_CF);
 745    tcg_gen_add_i64(dest, t0, t1);
 746    tcg_gen_add_i64(dest, dest, flag);
 747    tcg_temp_free_i64(flag);
 748
 749    if (!sf) {
 750        tcg_gen_ext32u_i64(dest, dest);
 751    }
 752}
 753
 754/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 755static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 756{
 757    if (sf) {
 758        TCGv_i64 result, cf_64, vf_64, tmp;
 759        result = tcg_temp_new_i64();
 760        cf_64 = tcg_temp_new_i64();
 761        vf_64 = tcg_temp_new_i64();
 762        tmp = tcg_const_i64(0);
 763
 764        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 765        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 766        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 767        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 768        gen_set_NZ64(result);
 769
 770        tcg_gen_xor_i64(vf_64, result, t0);
 771        tcg_gen_xor_i64(tmp, t0, t1);
 772        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 773        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 774
 775        tcg_gen_mov_i64(dest, result);
 776
 777        tcg_temp_free_i64(tmp);
 778        tcg_temp_free_i64(vf_64);
 779        tcg_temp_free_i64(cf_64);
 780        tcg_temp_free_i64(result);
 781    } else {
 782        TCGv_i32 t0_32, t1_32, tmp;
 783        t0_32 = tcg_temp_new_i32();
 784        t1_32 = tcg_temp_new_i32();
 785        tmp = tcg_const_i32(0);
 786
 787        tcg_gen_extrl_i64_i32(t0_32, t0);
 788        tcg_gen_extrl_i64_i32(t1_32, t1);
 789        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 790        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 791
 792        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 793        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 794        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 795        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 796        tcg_gen_extu_i32_i64(dest, cpu_NF);
 797
 798        tcg_temp_free_i32(tmp);
 799        tcg_temp_free_i32(t1_32);
 800        tcg_temp_free_i32(t0_32);
 801    }
 802}
 803
 804/*
 805 * Load/Store generators
 806 */
 807
 808/*
 809 * Store from GPR register to memory.
 810 */
 811static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 812                             TCGv_i64 tcg_addr, int size, int memidx,
 813                             bool iss_valid,
 814                             unsigned int iss_srt,
 815                             bool iss_sf, bool iss_ar)
 816{
 817    g_assert(size <= 3);
 818    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 819
 820    if (iss_valid) {
 821        uint32_t syn;
 822
 823        syn = syn_data_abort_with_iss(0,
 824                                      size,
 825                                      false,
 826                                      iss_srt,
 827                                      iss_sf,
 828                                      iss_ar,
 829                                      0, 0, 0, 0, 0, false);
 830        disas_set_insn_syndrome(s, syn);
 831    }
 832}
 833
 834static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 835                      TCGv_i64 tcg_addr, int size,
 836                      bool iss_valid,
 837                      unsigned int iss_srt,
 838                      bool iss_sf, bool iss_ar)
 839{
 840    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 841                     iss_valid, iss_srt, iss_sf, iss_ar);
 842}
 843
 844/*
 845 * Load from memory to GPR register
 846 */
 847static void do_gpr_ld_memidx(DisasContext *s,
 848                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 849                             int size, bool is_signed,
 850                             bool extend, int memidx,
 851                             bool iss_valid, unsigned int iss_srt,
 852                             bool iss_sf, bool iss_ar)
 853{
 854    TCGMemOp memop = s->be_data + size;
 855
 856    g_assert(size <= 3);
 857
 858    if (is_signed) {
 859        memop += MO_SIGN;
 860    }
 861
 862    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 863
 864    if (extend && is_signed) {
 865        g_assert(size < 3);
 866        tcg_gen_ext32u_i64(dest, dest);
 867    }
 868
 869    if (iss_valid) {
 870        uint32_t syn;
 871
 872        syn = syn_data_abort_with_iss(0,
 873                                      size,
 874                                      is_signed,
 875                                      iss_srt,
 876                                      iss_sf,
 877                                      iss_ar,
 878                                      0, 0, 0, 0, 0, false);
 879        disas_set_insn_syndrome(s, syn);
 880    }
 881}
 882
 883static void do_gpr_ld(DisasContext *s,
 884                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 885                      int size, bool is_signed, bool extend,
 886                      bool iss_valid, unsigned int iss_srt,
 887                      bool iss_sf, bool iss_ar)
 888{
 889    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 890                     get_mem_index(s),
 891                     iss_valid, iss_srt, iss_sf, iss_ar);
 892}
 893
 894/*
 895 * Store from FP register to memory
 896 */
 897static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 898{
 899    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 900    TCGv_i64 tmp = tcg_temp_new_i64();
 901    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 902    if (size < 4) {
 903        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 904                            s->be_data + size);
 905    } else {
 906        bool be = s->be_data == MO_BE;
 907        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 908
 909        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 910        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 911                            s->be_data | MO_Q);
 912        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 913        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 914                            s->be_data | MO_Q);
 915        tcg_temp_free_i64(tcg_hiaddr);
 916    }
 917
 918    tcg_temp_free_i64(tmp);
 919}
 920
 921/*
 922 * Load from memory to FP register
 923 */
 924static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 925{
 926    /* This always zero-extends and writes to a full 128 bit wide vector */
 927    TCGv_i64 tmplo = tcg_temp_new_i64();
 928    TCGv_i64 tmphi;
 929
 930    if (size < 4) {
 931        TCGMemOp memop = s->be_data + size;
 932        tmphi = tcg_const_i64(0);
 933        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 934    } else {
 935        bool be = s->be_data == MO_BE;
 936        TCGv_i64 tcg_hiaddr;
 937
 938        tmphi = tcg_temp_new_i64();
 939        tcg_hiaddr = tcg_temp_new_i64();
 940
 941        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 942        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 943                            s->be_data | MO_Q);
 944        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 945                            s->be_data | MO_Q);
 946        tcg_temp_free_i64(tcg_hiaddr);
 947    }
 948
 949    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 950    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 951
 952    tcg_temp_free_i64(tmplo);
 953    tcg_temp_free_i64(tmphi);
 954}
 955
 956/*
 957 * Vector load/store helpers.
 958 *
 959 * The principal difference between this and a FP load is that we don't
 960 * zero extend as we are filling a partial chunk of the vector register.
 961 * These functions don't support 128 bit loads/stores, which would be
 962 * normal load/store operations.
 963 *
 964 * The _i32 versions are useful when operating on 32 bit quantities
 965 * (eg for floating point single or using Neon helper functions).
 966 */
 967
 968/* Get value of an element within a vector register */
 969static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 970                             int element, TCGMemOp memop)
 971{
 972    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 973    switch (memop) {
 974    case MO_8:
 975        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 976        break;
 977    case MO_16:
 978        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 979        break;
 980    case MO_32:
 981        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 982        break;
 983    case MO_8|MO_SIGN:
 984        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 985        break;
 986    case MO_16|MO_SIGN:
 987        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 988        break;
 989    case MO_32|MO_SIGN:
 990        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 991        break;
 992    case MO_64:
 993    case MO_64|MO_SIGN:
 994        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 995        break;
 996    default:
 997        g_assert_not_reached();
 998    }
 999}
1000

1001static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1002                                 int element, TCGMemOp memop)
1003{
1004    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1005    switch (memop) {
1006    case MO_8:
1007        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1008        break;
1009    case MO_16:
1010        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1011        break;
1012    case MO_8|MO_SIGN:
1013        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1014        break;
1015    case MO_16|MO_SIGN:
1016        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1017        break;
1018    case MO_32:
1019    case MO_32|MO_SIGN:
1020        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1021        break;
1022    default:
1023        g_assert_not_reached();
1024    }
1025}
1026
1027/* Set value of an element within a vector register */
1028static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1029                              int element, TCGMemOp memop)
1030{
1031    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1032    switch (memop) {
1033    case MO_8:
1034        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1035        break;
1036    case MO_16:
1037        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1038        break;
1039    case MO_32:
1040        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1041        break;
1042    case MO_64:
1043        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1044        break;
1045    default:
1046        g_assert_not_reached();
1047    }
1048}
1049
1050static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1051                                  int destidx, int element, TCGMemOp memop)
1052{
1053    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1054    switch (memop) {
1055    case MO_8:
1056        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1057        break;
1058    case MO_16:
1059        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1060        break;
1061    case MO_32:
1062        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1063        break;
1064    default:
1065        g_assert_not_reached();
1066    }
1067}
1068
1069/* Clear the high 64 bits of a 128 bit vector (in general non-quad
1070 * vector ops all need to do this).
1071 */
1072static void clear_vec_high(DisasContext *s, int rd)
1073{
1074    TCGv_i64 tcg_zero = tcg_const_i64(0);
1075
1076    write_vec_element(s, tcg_zero, rd, 1, MO_64);
1077    tcg_temp_free_i64(tcg_zero);
1078}
1079
1080/* Store from vector register to memory */
1081static void do_vec_st(DisasContext *s, int srcidx, int element,
1082                      TCGv_i64 tcg_addr, int size)
1083{
1084    TCGMemOp memop = s->be_data + size;
1085    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1086
1087    read_vec_element(s, tcg_tmp, srcidx, element, size);
1088    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1089
1090    tcg_temp_free_i64(tcg_tmp);
1091}
1092
1093/* Load from memory to vector register */
1094static void do_vec_ld(DisasContext *s, int destidx, int element,
1095                      TCGv_i64 tcg_addr, int size)
1096{
1097    TCGMemOp memop = s->be_data + size;
1098    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1099
1100    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1101    write_vec_element(s, tcg_tmp, destidx, element, size);
1102
1103    tcg_temp_free_i64(tcg_tmp);
1104}
1105
1106/* Check that FP/Neon access is enabled. If it is, return
1107 * true. If not, emit code to generate an appropriate exception,
1108 * and return false; the caller should not emit any code for
1109 * the instruction. Note that this check must happen after all
1110 * unallocated-encoding checks (otherwise the syndrome information
1111 * for the resulting exception will be incorrect).
1112 */
1113static inline bool fp_access_check(DisasContext *s)
1114{
1115    assert(!s->fp_access_checked);
1116    s->fp_access_checked = true;
1117
1118    if (!s->fp_excp_el) {
1119        return true;
1120    }
1121
1122    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1123                       s->fp_excp_el);
1124    return false;
1125}
1126
1127/*
1128 * This utility function is for doing register extension with an
1129 * optional shift. You will likely want to pass a temporary for the
1130 * destination register. See DecodeRegExtend() in the ARM ARM.
1131 */
1132static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1133                              int option, unsigned int shift)
1134{
1135    int extsize = extract32(option, 0, 2);
1136    bool is_signed = extract32(option, 2, 1);
1137
1138    if (is_signed) {
1139        switch (extsize) {
1140        case 0:
1141            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1142            break;
1143        case 1:
1144            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1145            break;
1146        case 2:
1147            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1148            break;
1149        case 3:
1150            tcg_gen_mov_i64(tcg_out, tcg_in);
1151            break;
1152        }
1153    } else {
1154        switch (extsize) {
1155        case 0:
1156            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1157            break;
1158        case 1:
1159            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1160            break;
1161        case 2:
1162            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1163            break;
1164        case 3:
1165            tcg_gen_mov_i64(tcg_out, tcg_in);
1166            break;
1167        }
1168    }
1169
1170    if (shift) {
1171        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1172    }
1173}
1174
1175static inline void gen_check_sp_alignment(DisasContext *s)
1176{
1177    /* The AArch64 architecture mandates that (if enabled via PSTATE
1178     * or SCTLR bits) there is a check that SP is 16-aligned on every
1179     * SP-relative load or store (with an exception generated if it is not).
1180     * In line with general QEMU practice regarding misaligned accesses,
1181     * we omit these checks for the sake of guest program performance.
1182     * This function is provided as a hook so we can more easily add these
1183     * checks in future (possibly as a "favour catching guest program bugs
1184     * over speed" user selectable option).
1185     */
1186}
1187
1188/*
1189 * This provides a simple table based table lookup decoder. It is
1190 * intended to be used when the relevant bits for decode are too
1191 * awkwardly placed and switch/if based logic would be confusing and
1192 * deeply nested. Since it's a linear search through the table, tables
1193 * should be kept small.
1194 *
1195 * It returns the first handler where insn & mask == pattern, or
1196 * NULL if there is no match.
1197 * The table is terminated by an empty mask (i.e. 0)
1198 */
1199static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1200                                               uint32_t insn)
1201{
1202    const AArch64DecodeTable *tptr = table;
1203
1204    while (tptr->mask) {
1205        if ((insn & tptr->mask) == tptr->pattern) {
1206            return tptr->disas_fn;
1207        }
1208        tptr++;
1209    }
1210    return NULL;
1211}
1212
1213/*
1214 * the instruction disassembly implemented here matches
1215 * the instruction encoding classifications in chapter 3 (C3)
1216 * of the ARM Architecture Reference Manual (DDI0487A_a)
1217 */
1218
1219/* C3.2.7 Unconditional branch (immediate)
1220 *   31  30       26 25                                  0
1221 * +----+-----------+-------------------------------------+
1222 * | op | 0 0 1 0 1 |                 imm26               |
1223 * +----+-----------+-------------------------------------+
1224 */
1225static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1226{
1227    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1228
1229    if (insn & (1U << 31)) {
1230        /* C5.6.26 BL Branch with link */
1231        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1232    }
1233
1234    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1235    gen_goto_tb(s, 0, addr);
1236}
1237
1238/* C3.2.1 Compare & branch (immediate)
1239 *   31  30         25  24  23                  5 4      0
1240 * +----+-------------+----+---------------------+--------+
1241 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1242 * +----+-------------+----+---------------------+--------+
1243 */
1244static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1245{
1246    unsigned int sf, op, rt;
1247    uint64_t addr;
1248    TCGLabel *label_match;
1249    TCGv_i64 tcg_cmp;
1250
1251    sf = extract32(insn, 31, 1);
1252    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1253    rt = extract32(insn, 0, 5);
1254    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1255
1256    tcg_cmp = read_cpu_reg(s, rt, sf);
1257    label_match = gen_new_label();
1258
1259    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1260                        tcg_cmp, 0, label_match);
1261
1262    gen_goto_tb(s, 0, s->pc);
1263    gen_set_label(label_match);
1264    gen_goto_tb(s, 1, addr);
1265}
1266
1267/* C3.2.5 Test & branch (immediate)
1268 *   31  30         25  24  23   19 18          5 4    0
1269 * +----+-------------+----+-------+-------------+------+
1270 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1271 * +----+-------------+----+-------+-------------+------+
1272 */
1273static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1274{
1275    unsigned int bit_pos, op, rt;
1276    uint64_t addr;
1277    TCGLabel *label_match;
1278    TCGv_i64 tcg_cmp;
1279
1280    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1281    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1282    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1283    rt = extract32(insn, 0, 5);
1284
1285    tcg_cmp = tcg_temp_new_i64();
1286    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1287    label_match = gen_new_label();
1288    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1289                        tcg_cmp, 0, label_match);
1290    tcg_temp_free_i64(tcg_cmp);
1291    gen_goto_tb(s, 0, s->pc);
1292    gen_set_label(label_match);
1293    gen_goto_tb(s, 1, addr);
1294}
1295
1296/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1297 *  31           25  24  23                  5   4  3    0
1298 * +---------------+----+---------------------+----+------+
1299 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1300 * +---------------+----+---------------------+----+------+
1301 */
1302static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1303{
1304    unsigned int cond;
1305    uint64_t addr;
1306
1307    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1308        unallocated_encoding(s);
1309        return;
1310    }
1311    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1312    cond = extract32(insn, 0, 4);
1313
1314    if (cond < 0x0e) {
1315        /* genuinely conditional branches */
1316        TCGLabel *label_match = gen_new_label();
1317        arm_gen_test_cc(cond, label_match);
1318        gen_goto_tb(s, 0, s->pc);
1319        gen_set_label(label_match);
1320        gen_goto_tb(s, 1, addr);
1321    } else {
1322        /* 0xe and 0xf are both "always" conditions */
1323        gen_goto_tb(s, 0, addr);
1324    }
1325}
1326
1327/* C5.6.68 HINT */
1328static void handle_hint(DisasContext *s, uint32_t insn,
1329                        unsigned int op1, unsigned int op2, unsigned int crm)
1330{
1331    unsigned int selector = crm << 3 | op2;
1332
1333    if (op1 != 3) {
1334        unallocated_encoding(s);
1335        return;
1336    }
1337
1338    switch (selector) {
1339    case 0: /* NOP */
1340        return;
1341    case 3: /* WFI */
1342        s->is_jmp = DISAS_WFI;
1343        return;
1344    case 1: /* YIELD */
1345        s->is_jmp = DISAS_YIELD;
1346        return;
1347    case 2: /* WFE */
1348        s->is_jmp = DISAS_WFE;
1349        return;
1350    case 4: /* SEV */
1351    case 5: /* SEVL */
1352        /* we treat all as NOP at least for now */
1353        return;
1354    default:
1355        /* default specified as NOP equivalent */
1356        return;
1357    }
1358}
1359
1360static void gen_clrex(DisasContext *s, uint32_t insn)
1361{
1362    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1363}
1364
1365/* CLREX, DSB, DMB, ISB */
1366static void handle_sync(DisasContext *s, uint32_t insn,
1367                        unsigned int op1, unsigned int op2, unsigned int crm)
1368{
1369    TCGBar bar;
1370
1371    if (op1 != 3) {
1372        unallocated_encoding(s);
1373        return;
1374    }
1375
1376    switch (op2) {
1377    case 2: /* CLREX */
1378        gen_clrex(s, insn);
1379        return;
1380    case 4: /* DSB */
1381    case 5: /* DMB */
1382        switch (crm & 3) {
1383        case 1: /* MBReqTypes_Reads */
1384            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1385            break;
1386        case 2: /* MBReqTypes_Writes */
1387            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1388            break;
1389        default: /* MBReqTypes_All */
1390            bar = TCG_BAR_SC | TCG_MO_ALL;
1391            break;
1392        }
1393        tcg_gen_mb(bar);
1394        return;
1395    case 6: /* ISB */
1396        /* We need to break the TB after this insn to execute
1397         * a self-modified code correctly and also to take
1398         * any pending interrupts immediately.
1399         */
1400        s->is_jmp = DISAS_UPDATE;
1401        return;
1402    default:
1403        unallocated_encoding(s);
1404        return;
1405    }
1406}
1407
1408/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1409static void handle_msr_i(DisasContext *s, uint32_t insn,
1410                         unsigned int op1, unsigned int op2, unsigned int crm)
1411{
1412    int op = op1 << 3 | op2;
1413    switch (op) {
1414    case 0x05: /* SPSel */
1415        if (s->current_el == 0) {
1416            unallocated_encoding(s);
1417            return;
1418        }
1419        /* fall through */
1420    case 0x1e: /* DAIFSet */
1421    case 0x1f: /* DAIFClear */
1422    {
1423        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1424        TCGv_i32 tcg_op = tcg_const_i32(op);
1425        gen_a64_set_pc_im(s->pc - 4);
1426        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1427        tcg_temp_free_i32(tcg_imm);
1428        tcg_temp_free_i32(tcg_op);
1429        s->is_jmp = DISAS_UPDATE;
1430        break;
1431    }
1432    default:
1433        unallocated_encoding(s);
1434        return;
1435    }
1436}
1437
1438static void gen_get_nzcv(TCGv_i64 tcg_rt)
1439{
1440    TCGv_i32 tmp = tcg_temp_new_i32();
1441    TCGv_i32 nzcv = tcg_temp_new_i32();
1442
1443    /* build bit 31, N */
1444    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1445    /* build bit 30, Z */
1446    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1447    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1448    /* build bit 29, C */
1449    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1450    /* build bit 28, V */
1451    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1452    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1453    /* generate result */
1454    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1455
1456    tcg_temp_free_i32(nzcv);
1457    tcg_temp_free_i32(tmp);
1458}
1459
1460static void gen_set_nzcv(TCGv_i64 tcg_rt)
1461
1462{
1463    TCGv_i32 nzcv = tcg_temp_new_i32();
1464
1465    /* take NZCV from R[t] */
1466    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1467
1468    /* bit 31, N */
1469    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1470    /* bit 30, Z */
1471    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1472    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1473    /* bit 29, C */
1474    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1475    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1476    /* bit 28, V */
1477    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1478    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1479    tcg_temp_free_i32(nzcv);
1480}
1481
1482/* C5.6.129 MRS - move from system register
1483 * C5.6.131 MSR (register) - move to system register
1484 * C5.6.204 SYS
1485 * C5.6.205 SYSL
1486 * These are all essentially the same insn in 'read' and 'write'
1487 * versions, with varying op0 fields.
1488 */
1489static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1490                       unsigned int op0, unsigned int op1, unsigned int op2,
1491                       unsigned int crn, unsigned int crm, unsigned int rt)
1492{
1493    const ARMCPRegInfo *ri;
1494    TCGv_i64 tcg_rt;
1495
1496    ri = get_arm_cp_reginfo(s->cp_regs,
1497                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1498                                               crn, crm, op0, op1, op2));
1499
1500    if (!ri) {
1501        /* Unknown register; this might be a guest error or a QEMU
1502         * unimplemented feature.
1503         */
1504        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1505                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1506                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1507        unallocated_encoding(s);
1508        return;
1509    }
1510
1511    /* Check access permissions */
1512    if (!cp_access_ok(s->current_el, ri, isread)) {
1513        unallocated_encoding(s);
1514        return;
1515    }
1516
1517    if (ri->accessfn) {
1518        /* Emit code to perform further access permissions checks at
1519         * runtime; this may result in an exception.
1520         */
1521        TCGv_ptr tmpptr;
1522        TCGv_i32 tcg_syn, tcg_isread;
1523        uint32_t syndrome;
1524
1525        gen_a64_set_pc_im(s->pc - 4);
1526        tmpptr = tcg_const_ptr(ri);
1527        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1528        tcg_syn = tcg_const_i32(syndrome);
1529        tcg_isread = tcg_const_i32(isread);
1530        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1531        tcg_temp_free_ptr(tmpptr);
1532        tcg_temp_free_i32(tcg_syn);
1533        tcg_temp_free_i32(tcg_isread);
1534    }
1535
1536    /* Handle special cases first */
1537    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1538    case ARM_CP_NOP:
1539        return;
1540    case ARM_CP_NZCV:
1541        tcg_rt = cpu_reg(s, rt);
1542        if (isread) {
1543            gen_get_nzcv(tcg_rt);
1544        } else {
1545            gen_set_nzcv(tcg_rt);
1546        }
1547        return;
1548    case ARM_CP_CURRENTEL:
1549        /* Reads as current EL value from pstate, which is
1550         * guaranteed to be constant by the tb flags.
1551         */
1552        tcg_rt = cpu_reg(s, rt);
1553        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1554        return;
1555    case ARM_CP_DC_ZVA:
1556        /* Writes clear the aligned block of memory which rt points into. */
1557        tcg_rt = cpu_reg(s, rt);
1558        gen_helper_dc_zva(cpu_env, tcg_rt);
1559        return;
1560    default:
1561        break;
1562    }
1563
1564    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1565        gen_io_start();
1566    }
1567
1568    tcg_rt = cpu_reg(s, rt);
1569
1570    if (isread) {
1571        if (ri->type & ARM_CP_CONST) {
1572            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1573        } else if (ri->readfn) {
1574            TCGv_ptr tmpptr;
1575            tmpptr = tcg_const_ptr(ri);
1576            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1577            tcg_temp_free_ptr(tmpptr);
1578        } else {
1579            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1580        }
1581    } else {
1582        if (ri->type & ARM_CP_CONST) {
1583            /* If not forbidden by access permissions, treat as WI */
1584            return;
1585        } else if (ri->writefn) {
1586            TCGv_ptr tmpptr;
1587            tmpptr = tcg_const_ptr(ri);
1588            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1589            tcg_temp_free_ptr(tmpptr);
1590        } else {
1591            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1592        }
1593    }
1594
1595    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1596        /* I/O operations must end the TB here (whether read or write) */
1597        gen_io_end();
1598        s->is_jmp = DISAS_UPDATE;
1599    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1600        /* We default to ending the TB on a coprocessor register write,
1601         * but allow this to be suppressed by the register definition
1602         * (usually only necessary to work around guest bugs).
1603         */
1604        s->is_jmp = DISAS_UPDATE;
1605    }
1606}
1607
1608/* C3.2.4 System
1609 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1610 * +---------------------+---+-----+-----+-------+-------+-----+------+
1611 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1612 * +---------------------+---+-----+-----+-------+-------+-----+------+
1613 */
1614static void disas_system(DisasContext *s, uint32_t insn)
1615{
1616    unsigned int l, op0, op1, crn, crm, op2, rt;
1617    l = extract32(insn, 21, 1);
1618    op0 = extract32(insn, 19, 2);
1619    op1 = extract32(insn, 16, 3);
1620    crn = extract32(insn, 12, 4);
1621    crm = extract32(insn, 8, 4);
1622    op2 = extract32(insn, 5, 3);
1623    rt = extract32(insn, 0, 5);
1624
1625    if (op0 == 0) {
1626        if (l || rt != 31) {
1627            unallocated_encoding(s);
1628            return;
1629        }
1630        switch (crn) {
1631        case 2: /* C5.6.68 HINT */
1632            handle_hint(s, insn, op1, op2, crm);
1633            break;
1634        case 3: /* CLREX, DSB, DMB, ISB */
1635            handle_sync(s, insn, op1, op2, crm);
1636            break;
1637        case 4: /* C5.6.130 MSR (immediate) */
1638            handle_msr_i(s, insn, op1, op2, crm);
1639            break;
1640        default:
1641            unallocated_encoding(s);
1642            break;
1643        }
1644        return;
1645    }
1646    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1647}
1648
1649/* C3.2.3 Exception generation
1650 *
1651 *  31             24 23 21 20                     5 4   2 1  0
1652 * +-----------------+-----+------------------------+-----+----+
1653 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1654 * +-----------------------+------------------------+----------+
1655 */
1656static void disas_exc(DisasContext *s, uint32_t insn)
1657{
1658    int opc = extract32(insn, 21, 3);
1659    int op2_ll = extract32(insn, 0, 5);
1660    int imm16 = extract32(insn, 5, 16);
1661    TCGv_i32 tmp;
1662
1663    switch (opc) {
1664    case 0:
1665        /* For SVC, HVC and SMC we advance the single-step state
1666         * machine before taking the exception. This is architecturally
1667         * mandated, to ensure that single-stepping a system call
1668         * instruction works properly.
1669         */
1670        switch (op2_ll) {
1671        case 1:                                                     /* SVC */
1672            gen_ss_advance(s);
1673            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1674                               default_exception_el(s));
1675            break;
1676        case 2:                                                     /* HVC */
1677            if (s->current_el == 0) {
1678                unallocated_encoding(s);
1679                break;
1680            }
1681            /* The pre HVC helper handles cases when HVC gets trapped
1682             * as an undefined insn by runtime configuration.
1683             */
1684            gen_a64_set_pc_im(s->pc - 4);
1685            gen_helper_pre_hvc(cpu_env);
1686            gen_ss_advance(s);
1687            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1688            break;
1689        case 3:                                                     /* SMC */
1690            if (s->current_el == 0) {
1691                unallocated_encoding(s);
1692                break;
1693            }
1694            gen_a64_set_pc_im(s->pc - 4);
1695            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1696            gen_helper_pre_smc(cpu_env, tmp);
1697            tcg_temp_free_i32(tmp);
1698            gen_ss_advance(s);
1699            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1700            break;
1701        default:
1702            unallocated_encoding(s);
1703            break;
1704        }
1705        break;
1706    case 1:
1707        if (op2_ll != 0) {
1708            unallocated_encoding(s);
1709            break;
1710        }
1711        /* BRK */
1712        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1713                           default_exception_el(s));
1714        break;
1715    case 2:
1716        if (op2_ll != 0) {
1717            unallocated_encoding(s);
1718            break;
1719        }
1720        /* HLT. This has two purposes.
1721         * Architecturally, it is an external halting debug instruction.
1722         * Since QEMU doesn't implement external debug, we treat this as
1723         * it is required for halting debug disabled: it will UNDEF.
1724         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1725         */
1726        if (semihosting_enabled() && imm16 == 0xf000) {
1727#ifndef CONFIG_USER_ONLY
1728            /* In system mode, don't allow userspace access to semihosting,
1729             * to provide some semblance of security (and for consistency
1730             * with our 32-bit semihosting).
1731             */
1732            if (s->current_el == 0) {
1733                unsupported_encoding(s, insn);
1734                break;
1735            }
1736#endif
1737            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1738        } else {
1739            unsupported_encoding(s, insn);
1740        }
1741        break;
1742    case 5:
1743        if (op2_ll < 1 || op2_ll > 3) {
1744            unallocated_encoding(s);
1745            break;
1746        }
1747        /* DCPS1, DCPS2, DCPS3 */
1748        unsupported_encoding(s, insn);
1749        break;
1750    default:
1751        unallocated_encoding(s);
1752        break;
1753    }
1754}
1755
1756/* C3.2.7 Unconditional branch (register)
1757 *  31           25 24   21 20   16 15   10 9    5 4     0
1758 * +---------------+-------+-------+-------+------+-------+
1759 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1760 * +---------------+-------+-------+-------+------+-------+
1761 */
1762static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1763{
1764    unsigned int opc, op2, op3, rn, op4;
1765
1766    opc = extract32(insn, 21, 4);
1767    op2 = extract32(insn, 16, 5);
1768    op3 = extract32(insn, 10, 6);
1769    rn = extract32(insn, 5, 5);
1770    op4 = extract32(insn, 0, 5);
1771
1772    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1773        unallocated_encoding(s);
1774        return;
1775    }
1776
1777    switch (opc) {
1778    case 0: /* BR */
1779    case 1: /* BLR */
1780    case 2: /* RET */
1781        gen_a64_set_pc(s, cpu_reg(s, rn));
1782        /* BLR also needs to load return address */
1783        if (opc == 1) {
1784            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1785        }
1786        break;
1787    case 4: /* ERET */
1788        if (s->current_el == 0) {
1789            unallocated_encoding(s);
1790            return;
1791        }
1792        gen_helper_exception_return(cpu_env);
1793        s->is_jmp = DISAS_JUMP;
1794        return;
1795    case 5: /* DRPS */
1796        if (rn != 0x1f) {
1797            unallocated_encoding(s);
1798        } else {
1799            unsupported_encoding(s, insn);
1800        }
1801        return;
1802    default:
1803        unallocated_encoding(s);
1804        return;
1805    }
1806
1807    s->is_jmp = DISAS_JUMP;
1808}
1809
1810/* C3.2 Branches, exception generating and system instructions */
1811static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1812{
1813    switch (extract32(insn, 25, 7)) {
1814    case 0x0a: case 0x0b:
1815    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1816        disas_uncond_b_imm(s, insn);
1817        break;
1818    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1819        disas_comp_b_imm(s, insn);
1820        break;
1821    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1822        disas_test_b_imm(s, insn);
1823        break;
1824    case 0x2a: /* Conditional branch (immediate) */
1825        disas_cond_b_imm(s, insn);
1826        break;
1827    case 0x6a: /* Exception generation / System */
1828        if (insn & (1 << 24)) {
1829            disas_system(s, insn);
1830        } else {
1831            disas_exc(s, insn);
1832        }
1833        break;
1834    case 0x6b: /* Unconditional branch (register) */
1835        disas_uncond_b_reg(s, insn);
1836        break;
1837    default:
1838        unallocated_encoding(s);
1839        break;
1840    }
1841}
1842
1843/*
1844 * Load/Store exclusive instructions are implemented by remembering
1845 * the value/address loaded, and seeing if these are the same
1846 * when the store is performed. This is not actually the architecturally
1847 * mandated semantics, but it works for typical guest code sequences
1848 * and avoids having to monitor regular stores.
1849 *
1850 * The store exclusive uses the atomic cmpxchg primitives to avoid
1851 * races in multi-threaded linux-user and when MTTCG softmmu is
1852 * enabled.
1853 */
1854static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1855                               TCGv_i64 addr, int size, bool is_pair)
1856{
1857    TCGv_i64 tmp = tcg_temp_new_i64();
1858    TCGMemOp memop = s->be_data + size;
1859
1860    g_assert(size <= 3);
1861    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1862
1863    if (is_pair) {
1864        TCGv_i64 addr2 = tcg_temp_new_i64();
1865        TCGv_i64 hitmp = tcg_temp_new_i64();
1866
1867        g_assert(size >= 2);
1868        tcg_gen_addi_i64(addr2, addr, 1 << size);
1869        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1870        tcg_temp_free_i64(addr2);
1871        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1872        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1873        tcg_temp_free_i64(hitmp);
1874    }
1875
1876    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1877    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1878
1879    tcg_temp_free_i64(tmp);
1880    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1881}
1882
1883static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1884                                TCGv_i64 inaddr, int size, int is_pair)
1885{
1886    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1887     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1888     *     [addr] = {Rt};
1889     *     if (is_pair) {
1890     *         [addr + datasize] = {Rt2};
1891     *     }
1892     *     {Rd} = 0;
1893     * } else {
1894     *     {Rd} = 1;
1895     * }
1896     * env->exclusive_addr = -1;
1897     */
1898    TCGLabel *fail_label = gen_new_label();
1899    TCGLabel *done_label = gen_new_label();
1900    TCGv_i64 addr = tcg_temp_local_new_i64();
1901    TCGv_i64 tmp;
1902
1903    /* Copy input into a local temp so it is not trashed when the
1904     * basic block ends at the branch insn.
1905     */
1906    tcg_gen_mov_i64(addr, inaddr);
1907    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1908
1909    tmp = tcg_temp_new_i64();
1910    if (is_pair) {
1911        if (size == 2) {
1912            TCGv_i64 val = tcg_temp_new_i64();
1913            tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1914            tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
1915            tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
1916                                       get_mem_index(s),
1917                                       size | MO_ALIGN | s->be_data);
1918            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
1919            tcg_temp_free_i64(val);
1920        } else if (s->be_data == MO_LE) {
1921            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
1922                                           cpu_reg(s, rt2));
1923        } else {
1924            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
1925                                           cpu_reg(s, rt2));
1926        }
1927    } else {
1928        TCGv_i64 val = cpu_reg(s, rt);
1929        tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val,
1930                                   get_mem_index(s),
1931                                   size | MO_ALIGN | s->be_data);
1932        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1933    }
1934
1935    tcg_temp_free_i64(addr);
1936
1937    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
1938    tcg_temp_free_i64(tmp);
1939    tcg_gen_br(done_label);
1940
1941    gen_set_label(fail_label);
1942    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1943    gen_set_label(done_label);
1944    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1945}
1946
1947/* Update the Sixty-Four bit (SF) registersize. This logic is derived
1948 * from the ARMv8 specs for LDR (Shared decode for all encodings).
1949 */
1950static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1951{
1952    int opc0 = extract32(opc, 0, 1);
1953    int regsize;
1954
1955    if (is_signed) {
1956        regsize = opc0 ? 32 : 64;
1957    } else {
1958        regsize = size == 3 ? 64 : 32;
1959    }
1960    return regsize == 64;
1961}
1962
1963/* C3.3.6 Load/store exclusive
1964 *
1965 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1966 * +-----+-------------+----+---+----+------+----+-------+------+------+
1967 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1968 * +-----+-------------+----+---+----+------+----+-------+------+------+
1969 *
1970 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1971 *   L: 0 -> store, 1 -> load
1972 *  o2: 0 -> exclusive, 1 -> not
1973 *  o1: 0 -> single register, 1 -> register pair
1974 *  o0: 1 -> load-acquire/store-release, 0 -> not
1975 */
1976static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1977{
1978    int rt = extract32(insn, 0, 5);
1979    int rn = extract32(insn, 5, 5);
1980    int rt2 = extract32(insn, 10, 5);
1981    int is_lasr = extract32(insn, 15, 1);
1982    int rs = extract32(insn, 16, 5);
1983    int is_pair = extract32(insn, 21, 1);
1984    int is_store = !extract32(insn, 22, 1);
1985    int is_excl = !extract32(insn, 23, 1);
1986    int size = extract32(insn, 30, 2);
1987    TCGv_i64 tcg_addr;
1988
1989    if ((!is_excl && !is_pair && !is_lasr) ||
1990        (!is_excl && is_pair) ||
1991        (is_pair && size < 2)) {
1992        unallocated_encoding(s);
1993        return;
1994    }
1995
1996    if (rn == 31) {
1997        gen_check_sp_alignment(s);
1998    }
1999    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2000

2001    /* Note that since TCG is single threaded load-acquire/store-release
2002     * semantics require no extra if (is_lasr) { ... } handling.
2003     */
2004
2005    if (is_excl) {
2006        if (!is_store) {
2007            s->is_ldex = true;
2008            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2009            if (is_lasr) {
2010                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2011            }
2012        } else {
2013            if (is_lasr) {
2014                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2015            }
2016            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2017        }
2018    } else {
2019        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2020        bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2021
2022        /* Generate ISS for non-exclusive accesses including LASR.  */
2023        if (is_store) {
2024            if (is_lasr) {
2025                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2026            }
2027            do_gpr_st(s, tcg_rt, tcg_addr, size,
2028                      true, rt, iss_sf, is_lasr);
2029        } else {
2030            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2031                      true, rt, iss_sf, is_lasr);
2032            if (is_lasr) {
2033                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2034            }
2035        }
2036    }
2037}
2038
2039/*
2040 * C3.3.5 Load register (literal)
2041 *
2042 *  31 30 29   27  26 25 24 23                5 4     0
2043 * +-----+-------+---+-----+-------------------+-------+
2044 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2045 * +-----+-------+---+-----+-------------------+-------+
2046 *
2047 * V: 1 -> vector (simd/fp)
2048 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2049 *                   10-> 32 bit signed, 11 -> prefetch
2050 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2051 */
2052static void disas_ld_lit(DisasContext *s, uint32_t insn)
2053{
2054    int rt = extract32(insn, 0, 5);
2055    int64_t imm = sextract32(insn, 5, 19) << 2;
2056    bool is_vector = extract32(insn, 26, 1);
2057    int opc = extract32(insn, 30, 2);
2058    bool is_signed = false;
2059    int size = 2;
2060    TCGv_i64 tcg_rt, tcg_addr;
2061
2062    if (is_vector) {
2063        if (opc == 3) {
2064            unallocated_encoding(s);
2065            return;
2066        }
2067        size = 2 + opc;
2068        if (!fp_access_check(s)) {
2069            return;
2070        }
2071    } else {
2072        if (opc == 3) {
2073            /* PRFM (literal) : prefetch */
2074            return;
2075        }
2076        size = 2 + extract32(opc, 0, 1);
2077        is_signed = extract32(opc, 1, 1);
2078    }
2079
2080    tcg_rt = cpu_reg(s, rt);
2081
2082    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2083    if (is_vector) {
2084        do_fp_ld(s, rt, tcg_addr, size);
2085    } else {
2086        /* Only unsigned 32bit loads target 32bit registers.  */
2087        bool iss_sf = opc != 0;
2088
2089        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2090                  true, rt, iss_sf, false);
2091    }
2092    tcg_temp_free_i64(tcg_addr);
2093}
2094
2095/*
2096 * C5.6.80 LDNP (Load Pair - non-temporal hint)
2097 * C5.6.81 LDP (Load Pair - non vector)
2098 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2099 * C5.6.176 STNP (Store Pair - non-temporal hint)
2100 * C5.6.177 STP (Store Pair - non vector)
2101 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2102 * C6.3.165 LDP (Load Pair of SIMD&FP)
2103 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2104 * C6.3.284 STP (Store Pair of SIMD&FP)
2105 *
2106 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2107 * +-----+-------+---+---+-------+---+-----------------------------+
2108 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2109 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2110 *
2111 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2112 *      LDPSW                    01
2113 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2114 *   V: 0 -> GPR, 1 -> Vector
2115 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2116 *      10 -> signed offset, 11 -> pre-index
2117 *   L: 0 -> Store 1 -> Load
2118 *
2119 * Rt, Rt2 = GPR or SIMD registers to be stored
2120 * Rn = general purpose register containing address
2121 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2122 */
2123static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2124{
2125    int rt = extract32(insn, 0, 5);
2126    int rn = extract32(insn, 5, 5);
2127    int rt2 = extract32(insn, 10, 5);
2128    uint64_t offset = sextract64(insn, 15, 7);
2129    int index = extract32(insn, 23, 2);
2130    bool is_vector = extract32(insn, 26, 1);
2131    bool is_load = extract32(insn, 22, 1);
2132    int opc = extract32(insn, 30, 2);
2133
2134    bool is_signed = false;
2135    bool postindex = false;
2136    bool wback = false;
2137
2138    TCGv_i64 tcg_addr; /* calculated address */
2139    int size;
2140
2141    if (opc == 3) {
2142        unallocated_encoding(s);
2143        return;
2144    }
2145
2146    if (is_vector) {
2147        size = 2 + opc;
2148    } else {
2149        size = 2 + extract32(opc, 1, 1);
2150        is_signed = extract32(opc, 0, 1);
2151        if (!is_load && is_signed) {
2152            unallocated_encoding(s);
2153            return;
2154        }
2155    }
2156
2157    switch (index) {
2158    case 1: /* post-index */
2159        postindex = true;
2160        wback = true;
2161        break;
2162    case 0:
2163        /* signed offset with "non-temporal" hint. Since we don't emulate
2164         * caches we don't care about hints to the cache system about
2165         * data access patterns, and handle this identically to plain
2166         * signed offset.
2167         */
2168        if (is_signed) {
2169            /* There is no non-temporal-hint version of LDPSW */
2170            unallocated_encoding(s);
2171            return;
2172        }
2173        postindex = false;
2174        break;
2175    case 2: /* signed offset, rn not updated */
2176        postindex = false;
2177        break;
2178    case 3: /* pre-index */
2179        postindex = false;
2180        wback = true;
2181        break;
2182    }
2183
2184    if (is_vector && !fp_access_check(s)) {
2185        return;
2186    }
2187
2188    offset <<= size;
2189
2190    if (rn == 31) {
2191        gen_check_sp_alignment(s);
2192    }
2193
2194    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2195
2196    if (!postindex) {
2197        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2198    }
2199
2200    if (is_vector) {
2201        if (is_load) {
2202            do_fp_ld(s, rt, tcg_addr, size);
2203        } else {
2204            do_fp_st(s, rt, tcg_addr, size);
2205        }
2206    } else {
2207        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2208        if (is_load) {
2209            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2210                      false, 0, false, false);
2211        } else {
2212            do_gpr_st(s, tcg_rt, tcg_addr, size,
2213                      false, 0, false, false);
2214        }
2215    }
2216    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2217    if (is_vector) {
2218        if (is_load) {
2219            do_fp_ld(s, rt2, tcg_addr, size);
2220        } else {
2221            do_fp_st(s, rt2, tcg_addr, size);
2222        }
2223    } else {
2224        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2225        if (is_load) {
2226            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2227                      false, 0, false, false);
2228        } else {
2229            do_gpr_st(s, tcg_rt2, tcg_addr, size,
2230                      false, 0, false, false);
2231        }
2232    }
2233
2234    if (wback) {
2235        if (postindex) {
2236            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2237        } else {
2238            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2239        }
2240        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2241    }
2242}
2243
2244/*
2245 * C3.3.8 Load/store (immediate post-indexed)
2246 * C3.3.9 Load/store (immediate pre-indexed)
2247 * C3.3.12 Load/store (unscaled immediate)
2248 *
2249 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2250 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2251 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2252 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2253 *
2254 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2255         10 -> unprivileged
2256 * V = 0 -> non-vector
2257 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2258 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2259 */
2260static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2261                                int opc,
2262                                int size,
2263                                int rt,
2264                                bool is_vector)
2265{
2266    int rn = extract32(insn, 5, 5);
2267    int imm9 = sextract32(insn, 12, 9);
2268    int idx = extract32(insn, 10, 2);
2269    bool is_signed = false;
2270    bool is_store = false;
2271    bool is_extended = false;
2272    bool is_unpriv = (idx == 2);
2273    bool iss_valid = !is_vector;
2274    bool post_index;
2275    bool writeback;
2276
2277    TCGv_i64 tcg_addr;
2278
2279    if (is_vector) {
2280        size |= (opc & 2) << 1;
2281        if (size > 4 || is_unpriv) {
2282            unallocated_encoding(s);
2283            return;
2284        }
2285        is_store = ((opc & 1) == 0);
2286        if (!fp_access_check(s)) {
2287            return;
2288        }
2289    } else {
2290        if (size == 3 && opc == 2) {
2291            /* PRFM - prefetch */
2292            if (is_unpriv) {
2293                unallocated_encoding(s);
2294                return;
2295            }
2296            return;
2297        }
2298        if (opc == 3 && size > 1) {
2299            unallocated_encoding(s);
2300            return;
2301        }
2302        is_store = (opc == 0);
2303        is_signed = extract32(opc, 1, 1);
2304        is_extended = (size < 3) && extract32(opc, 0, 1);
2305    }
2306
2307    switch (idx) {
2308    case 0:
2309    case 2:
2310        post_index = false;
2311        writeback = false;
2312        break;
2313    case 1:
2314        post_index = true;
2315        writeback = true;
2316        break;
2317    case 3:
2318        post_index = false;
2319        writeback = true;
2320        break;
2321    }
2322
2323    if (rn == 31) {
2324        gen_check_sp_alignment(s);
2325    }
2326    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2327
2328    if (!post_index) {
2329        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2330    }
2331
2332    if (is_vector) {
2333        if (is_store) {
2334            do_fp_st(s, rt, tcg_addr, size);
2335        } else {
2336            do_fp_ld(s, rt, tcg_addr, size);
2337        }
2338    } else {
2339        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2340        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2341        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2342
2343        if (is_store) {
2344            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2345                             iss_valid, rt, iss_sf, false);
2346        } else {
2347            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2348                             is_signed, is_extended, memidx,
2349                             iss_valid, rt, iss_sf, false);
2350        }
2351    }
2352
2353    if (writeback) {
2354        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2355        if (post_index) {
2356            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2357        }
2358        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2359    }
2360}
2361
2362/*
2363 * C3.3.10 Load/store (register offset)
2364 *
2365 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2366 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2367 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2368 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2369 *
2370 * For non-vector:
2371 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2372 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2373 * For vector:
2374 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2375 *   opc<0>: 0 -> store, 1 -> load
2376 * V: 1 -> vector/simd
2377 * opt: extend encoding (see DecodeRegExtend)
2378 * S: if S=1 then scale (essentially index by sizeof(size))
2379 * Rt: register to transfer into/out of
2380 * Rn: address register or SP for base
2381 * Rm: offset register or ZR for offset
2382 */
2383static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2384                                   int opc,
2385                                   int size,
2386                                   int rt,
2387                                   bool is_vector)
2388{
2389    int rn = extract32(insn, 5, 5);
2390    int shift = extract32(insn, 12, 1);
2391    int rm = extract32(insn, 16, 5);
2392    int opt = extract32(insn, 13, 3);
2393    bool is_signed = false;
2394    bool is_store = false;
2395    bool is_extended = false;
2396
2397    TCGv_i64 tcg_rm;
2398    TCGv_i64 tcg_addr;
2399
2400    if (extract32(opt, 1, 1) == 0) {
2401        unallocated_encoding(s);
2402        return;
2403    }
2404
2405    if (is_vector) {
2406        size |= (opc & 2) << 1;
2407        if (size > 4) {
2408            unallocated_encoding(s);
2409            return;
2410        }
2411        is_store = !extract32(opc, 0, 1);
2412        if (!fp_access_check(s)) {
2413            return;
2414        }
2415    } else {
2416        if (size == 3 && opc == 2) {
2417            /* PRFM - prefetch */
2418            return;
2419        }
2420        if (opc == 3 && size > 1) {
2421            unallocated_encoding(s);
2422            return;
2423        }
2424        is_store = (opc == 0);
2425        is_signed = extract32(opc, 1, 1);
2426        is_extended = (size < 3) && extract32(opc, 0, 1);
2427    }
2428
2429    if (rn == 31) {
2430        gen_check_sp_alignment(s);
2431    }
2432    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2433
2434    tcg_rm = read_cpu_reg(s, rm, 1);
2435    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2436
2437    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2438
2439    if (is_vector) {
2440        if (is_store) {
2441            do_fp_st(s, rt, tcg_addr, size);
2442        } else {
2443            do_fp_ld(s, rt, tcg_addr, size);
2444        }
2445    } else {
2446        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2447        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2448        if (is_store) {
2449            do_gpr_st(s, tcg_rt, tcg_addr, size,
2450                      true, rt, iss_sf, false);
2451        } else {
2452            do_gpr_ld(s, tcg_rt, tcg_addr, size,
2453                      is_signed, is_extended,
2454                      true, rt, iss_sf, false);
2455        }
2456    }
2457}
2458
2459/*
2460 * C3.3.13 Load/store (unsigned immediate)
2461 *
2462 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2463 * +----+-------+---+-----+-----+------------+-------+------+
2464 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2465 * +----+-------+---+-----+-----+------------+-------+------+
2466 *
2467 * For non-vector:
2468 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2469 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2470 * For vector:
2471 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2472 *   opc<0>: 0 -> store, 1 -> load
2473 * Rn: base address register (inc SP)
2474 * Rt: target register
2475 */
2476static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2477                                        int opc,
2478                                        int size,
2479                                        int rt,
2480                                        bool is_vector)
2481{
2482    int rn = extract32(insn, 5, 5);
2483    unsigned int imm12 = extract32(insn, 10, 12);
2484    unsigned int offset;
2485
2486    TCGv_i64 tcg_addr;
2487
2488    bool is_store;
2489    bool is_signed = false;
2490    bool is_extended = false;
2491
2492    if (is_vector) {
2493        size |= (opc & 2) << 1;
2494        if (size > 4) {
2495            unallocated_encoding(s);
2496            return;
2497        }
2498        is_store = !extract32(opc, 0, 1);
2499        if (!fp_access_check(s)) {
2500            return;
2501        }
2502    } else {
2503        if (size == 3 && opc == 2) {
2504            /* PRFM - prefetch */
2505            return;
2506        }
2507        if (opc == 3 && size > 1) {
2508            unallocated_encoding(s);
2509            return;
2510        }
2511        is_store = (opc == 0);
2512        is_signed = extract32(opc, 1, 1);
2513        is_extended = (size < 3) && extract32(opc, 0, 1);
2514    }
2515
2516    if (rn == 31) {
2517        gen_check_sp_alignment(s);
2518    }
2519    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2520    offset = imm12 << size;
2521    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2522
2523    if (is_vector) {
2524        if (is_store) {
2525            do_fp_st(s, rt, tcg_addr, size);
2526        } else {
2527            do_fp_ld(s, rt, tcg_addr, size);
2528        }
2529    } else {
2530        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2531        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2532        if (is_store) {
2533            do_gpr_st(s, tcg_rt, tcg_addr, size,
2534                      true, rt, iss_sf, false);
2535        } else {
2536            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2537                      true, rt, iss_sf, false);
2538        }
2539    }
2540}
2541
2542/* Load/store register (all forms) */
2543static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2544{
2545    int rt = extract32(insn, 0, 5);
2546    int opc = extract32(insn, 22, 2);
2547    bool is_vector = extract32(insn, 26, 1);
2548    int size = extract32(insn, 30, 2);
2549
2550    switch (extract32(insn, 24, 2)) {
2551    case 0:
2552        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2553            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2554        } else {
2555            /* Load/store register (unscaled immediate)
2556             * Load/store immediate pre/post-indexed
2557             * Load/store register unprivileged
2558             */
2559            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2560        }
2561        break;
2562    case 1:
2563        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2564        break;
2565    default:
2566        unallocated_encoding(s);
2567        break;
2568    }
2569}
2570
2571/* C3.3.1 AdvSIMD load/store multiple structures
2572 *
2573 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2574 * +---+---+---------------+---+-------------+--------+------+------+------+
2575 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2576 * +---+---+---------------+---+-------------+--------+------+------+------+
2577 *
2578 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2579 *
2580 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2581 * +---+---+---------------+---+---+---------+--------+------+------+------+
2582 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2583 * +---+---+---------------+---+---+---------+--------+------+------+------+
2584 *
2585 * Rt: first (or only) SIMD&FP register to be transferred
2586 * Rn: base address or SP
2587 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2588 */
2589static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2590{
2591    int rt = extract32(insn, 0, 5);
2592    int rn = extract32(insn, 5, 5);
2593    int size = extract32(insn, 10, 2);
2594    int opcode = extract32(insn, 12, 4);
2595    bool is_store = !extract32(insn, 22, 1);
2596    bool is_postidx = extract32(insn, 23, 1);
2597    bool is_q = extract32(insn, 30, 1);
2598    TCGv_i64 tcg_addr, tcg_rn;
2599
2600    int ebytes = 1 << size;
2601    int elements = (is_q ? 128 : 64) / (8 << size);
2602    int rpt;    /* num iterations */
2603    int selem;  /* structure elements */
2604    int r;
2605
2606    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2607        unallocated_encoding(s);
2608        return;
2609    }
2610
2611    /* From the shared decode logic */
2612    switch (opcode) {
2613    case 0x0:
2614        rpt = 1;
2615        selem = 4;
2616        break;
2617    case 0x2:
2618        rpt = 4;
2619        selem = 1;
2620        break;
2621    case 0x4:
2622        rpt = 1;
2623        selem = 3;
2624        break;
2625    case 0x6:
2626        rpt = 3;
2627        selem = 1;
2628        break;
2629    case 0x7:
2630        rpt = 1;
2631        selem = 1;
2632        break;
2633    case 0x8:
2634        rpt = 1;
2635        selem = 2;
2636        break;
2637    case 0xa:
2638        rpt = 2;
2639        selem = 1;
2640        break;
2641    default:
2642        unallocated_encoding(s);
2643        return;
2644    }
2645
2646    if (size == 3 && !is_q && selem != 1) {
2647        /* reserved */
2648        unallocated_encoding(s);
2649        return;
2650    }
2651
2652    if (!fp_access_check(s)) {
2653        return;
2654    }
2655
2656    if (rn == 31) {
2657        gen_check_sp_alignment(s);
2658    }
2659
2660    tcg_rn = cpu_reg_sp(s, rn);
2661    tcg_addr = tcg_temp_new_i64();
2662    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2663
2664    for (r = 0; r < rpt; r++) {
2665        int e;
2666        for (e = 0; e < elements; e++) {
2667            int tt = (rt + r) % 32;
2668            int xs;
2669            for (xs = 0; xs < selem; xs++) {
2670                if (is_store) {
2671                    do_vec_st(s, tt, e, tcg_addr, size);
2672                } else {
2673                    do_vec_ld(s, tt, e, tcg_addr, size);
2674
2675                    /* For non-quad operations, setting a slice of the low
2676                     * 64 bits of the register clears the high 64 bits (in
2677                     * the ARM ARM pseudocode this is implicit in the fact
2678                     * that 'rval' is a 64 bit wide variable). We optimize
2679                     * by noticing that we only need to do this the first
2680                     * time we touch a register.
2681                     */
2682                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2683                        clear_vec_high(s, tt);
2684                    }
2685                }
2686                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2687                tt = (tt + 1) % 32;
2688            }
2689        }
2690    }
2691
2692    if (is_postidx) {
2693        int rm = extract32(insn, 16, 5);
2694        if (rm == 31) {
2695            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2696        } else {
2697            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2698        }
2699    }
2700    tcg_temp_free_i64(tcg_addr);
2701}
2702
2703/* C3.3.3 AdvSIMD load/store single structure
2704 *
2705 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2706 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2707 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2708 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2709 *
2710 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2711 *
2712 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2713 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2714 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2715 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2716 *
2717 * Rt: first (or only) SIMD&FP register to be transferred
2718 * Rn: base address or SP
2719 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2720 * index = encoded in Q:S:size dependent on size
2721 *
2722 * lane_size = encoded in R, opc
2723 * transfer width = encoded in opc, S, size
2724 */
2725static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2726{
2727    int rt = extract32(insn, 0, 5);
2728    int rn = extract32(insn, 5, 5);
2729    int size = extract32(insn, 10, 2);
2730    int S = extract32(insn, 12, 1);
2731    int opc = extract32(insn, 13, 3);
2732    int R = extract32(insn, 21, 1);
2733    int is_load = extract32(insn, 22, 1);
2734    int is_postidx = extract32(insn, 23, 1);
2735    int is_q = extract32(insn, 30, 1);
2736
2737    int scale = extract32(opc, 1, 2);
2738    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2739    bool replicate = false;
2740    int index = is_q << 3 | S << 2 | size;
2741    int ebytes, xs;
2742    TCGv_i64 tcg_addr, tcg_rn;
2743
2744    switch (scale) {
2745    case 3:
2746        if (!is_load || S) {
2747            unallocated_encoding(s);
2748            return;
2749        }
2750        scale = size;
2751        replicate = true;
2752        break;
2753    case 0:
2754        break;
2755    case 1:
2756        if (extract32(size, 0, 1)) {
2757            unallocated_encoding(s);
2758            return;
2759        }
2760        index >>= 1;
2761        break;
2762    case 2:
2763        if (extract32(size, 1, 1)) {
2764            unallocated_encoding(s);
2765            return;
2766        }
2767        if (!extract32(size, 0, 1)) {
2768            index >>= 2;
2769        } else {
2770            if (S) {
2771                unallocated_encoding(s);
2772                return;
2773            }
2774            index >>= 3;
2775            scale = 3;
2776        }
2777        break;
2778    default:
2779        g_assert_not_reached();
2780    }
2781
2782    if (!fp_access_check(s)) {
2783        return;
2784    }
2785
2786    ebytes = 1 << scale;
2787
2788    if (rn == 31) {
2789        gen_check_sp_alignment(s);
2790    }
2791
2792    tcg_rn = cpu_reg_sp(s, rn);
2793    tcg_addr = tcg_temp_new_i64();
2794    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2795
2796    for (xs = 0; xs < selem; xs++) {
2797        if (replicate) {
2798            /* Load and replicate to all elements */
2799            uint64_t mulconst;
2800            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2801
2802            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2803                                get_mem_index(s), s->be_data + scale);
2804            switch (scale) {
2805            case 0:
2806                mulconst = 0x0101010101010101ULL;
2807                break;
2808            case 1:
2809                mulconst = 0x0001000100010001ULL;
2810                break;
2811            case 2:
2812                mulconst = 0x0000000100000001ULL;
2813                break;
2814            case 3:
2815                mulconst = 0;
2816                break;
2817            default:
2818                g_assert_not_reached();
2819            }
2820            if (mulconst) {
2821                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2822            }
2823            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2824            if (is_q) {
2825                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2826            } else {
2827                clear_vec_high(s, rt);
2828            }
2829            tcg_temp_free_i64(tcg_tmp);
2830        } else {
2831            /* Load/store one element per register */
2832            if (is_load) {
2833                do_vec_ld(s, rt, index, tcg_addr, scale);
2834            } else {
2835                do_vec_st(s, rt, index, tcg_addr, scale);
2836            }
2837        }
2838        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2839        rt = (rt + 1) % 32;
2840    }
2841
2842    if (is_postidx) {
2843        int rm = extract32(insn, 16, 5);
2844        if (rm == 31) {
2845            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2846        } else {
2847            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2848        }
2849    }
2850    tcg_temp_free_i64(tcg_addr);
2851}
2852
2853/* C3.3 Loads and stores */
2854static void disas_ldst(DisasContext *s, uint32_t insn)
2855{
2856    switch (extract32(insn, 24, 6)) {
2857    case 0x08: /* Load/store exclusive */
2858        disas_ldst_excl(s, insn);
2859        break;
2860    case 0x18: case 0x1c: /* Load register (literal) */
2861        disas_ld_lit(s, insn);
2862        break;
2863    case 0x28: case 0x29:
2864    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2865        disas_ldst_pair(s, insn);
2866        break;
2867    case 0x38: case 0x39:
2868    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2869        disas_ldst_reg(s, insn);
2870        break;
2871    case 0x0c: /* AdvSIMD load/store multiple structures */
2872        disas_ldst_multiple_struct(s, insn);
2873        break;
2874    case 0x0d: /* AdvSIMD load/store single structure */
2875        disas_ldst_single_struct(s, insn);
2876        break;
2877    default:
2878        unallocated_encoding(s);
2879        break;
2880    }
2881}
2882
2883/* C3.4.6 PC-rel. addressing
2884 *   31  30   29 28       24 23                5 4    0
2885 * +----+-------+-----------+-------------------+------+
2886 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2887 * +----+-------+-----------+-------------------+------+
2888 */
2889static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2890{
2891    unsigned int page, rd;
2892    uint64_t base;
2893    uint64_t offset;
2894
2895    page = extract32(insn, 31, 1);
2896    /* SignExtend(immhi:immlo) -> offset */
2897    offset = sextract64(insn, 5, 19);
2898    offset = offset << 2 | extract32(insn, 29, 2);
2899    rd = extract32(insn, 0, 5);
2900    base = s->pc - 4;
2901
2902    if (page) {
2903        /* ADRP (page based) */
2904        base &= ~0xfff;
2905        offset <<= 12;
2906    }
2907
2908    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2909}
2910
2911/*
2912 * C3.4.1 Add/subtract (immediate)
2913 *
2914 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2915 * +--+--+--+-----------+-----+-------------+-----+-----+
2916 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2917 * +--+--+--+-----------+-----+-------------+-----+-----+
2918 *
2919 *    sf: 0 -> 32bit, 1 -> 64bit
2920 *    op: 0 -> add  , 1 -> sub
2921 *     S: 1 -> set flags
2922 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2923 */
2924static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2925{
2926    int rd = extract32(insn, 0, 5);
2927    int rn = extract32(insn, 5, 5);
2928    uint64_t imm = extract32(insn, 10, 12);
2929    int shift = extract32(insn, 22, 2);
2930    bool setflags = extract32(insn, 29, 1);
2931    bool sub_op = extract32(insn, 30, 1);
2932    bool is_64bit = extract32(insn, 31, 1);
2933
2934    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2935    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2936    TCGv_i64 tcg_result;
2937
2938    switch (shift) {
2939    case 0x0:
2940        break;
2941    case 0x1:
2942        imm <<= 12;
2943        break;
2944    default:
2945        unallocated_encoding(s);
2946        return;
2947    }
2948
2949    tcg_result = tcg_temp_new_i64();
2950    if (!setflags) {
2951        if (sub_op) {
2952            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2953        } else {
2954            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2955        }
2956    } else {
2957        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2958        if (sub_op) {
2959            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2960        } else {
2961            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2962        }
2963        tcg_temp_free_i64(tcg_imm);
2964    }
2965
2966    if (is_64bit) {
2967        tcg_gen_mov_i64(tcg_rd, tcg_result);
2968    } else {
2969        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2970    }
2971
2972    tcg_temp_free_i64(tcg_result);
2973}
2974
2975/* The input should be a value in the bottom e bits (with higher
2976 * bits zero); returns that value replicated into every element
2977 * of size e in a 64 bit integer.
2978 */
2979static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2980{
2981    assert(e != 0);
2982    while (e < 64) {
2983        mask |= mask << e;
2984        e *= 2;
2985    }
2986    return mask;
2987}
2988
2989/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2990static inline uint64_t bitmask64(unsigned int length)
2991{
2992    assert(length > 0 && length <= 64);
2993    return ~0ULL >> (64 - length);
2994}
2995
2996/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2997 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2998 * value (ie should cause a guest UNDEF exception), and true if they are
2999 * valid, in which case the decoded bit pattern is written to result.
3000 */

3001static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3002                                   unsigned int imms, unsigned int immr)
3003{
3004    uint64_t mask;
3005    unsigned e, levels, s, r;
3006    int len;
3007
3008    assert(immn < 2 && imms < 64 && immr < 64);
3009
3010    /* The bit patterns we create here are 64 bit patterns which
3011     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3012     * 64 bits each. Each element contains the same value: a run
3013     * of between 1 and e-1 non-zero bits, rotated within the
3014     * element by between 0 and e-1 bits.
3015     *
3016     * The element size and run length are encoded into immn (1 bit)
3017     * and imms (6 bits) as follows:
3018     * 64 bit elements: immn = 1, imms = <length of run - 1>
3019     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3020     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3021     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3022     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3023     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3024     * Notice that immn = 0, imms = 11111x is the only combination
3025     * not covered by one of the above options; this is reserved.
3026     * Further, <length of run - 1> all-ones is a reserved pattern.
3027     *
3028     * In all cases the rotation is by immr % e (and immr is 6 bits).
3029     */
3030
3031    /* First determine the element size */
3032    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3033    if (len < 1) {
3034        /* This is the immn == 0, imms == 0x11111x case */
3035        return false;
3036    }
3037    e = 1 << len;
3038
3039    levels = e - 1;
3040    s = imms & levels;
3041    r = immr & levels;
3042
3043    if (s == levels) {
3044        /* <length of run - 1> mustn't be all-ones. */
3045        return false;
3046    }
3047
3048    /* Create the value of one element: s+1 set bits rotated
3049     * by r within the element (which is e bits wide)...
3050     */
3051    mask = bitmask64(s + 1);
3052    if (r) {
3053        mask = (mask >> r) | (mask << (e - r));
3054        mask &= bitmask64(e);
3055    }
3056    /* ...then replicate the element over the whole 64 bit value */
3057    mask = bitfield_replicate(mask, e);
3058    *result = mask;
3059    return true;
3060}
3061
3062/* C3.4.4 Logical (immediate)
3063 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3064 * +----+-----+-------------+---+------+------+------+------+
3065 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3066 * +----+-----+-------------+---+------+------+------+------+
3067 */
3068static void disas_logic_imm(DisasContext *s, uint32_t insn)
3069{
3070    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3071    TCGv_i64 tcg_rd, tcg_rn;
3072    uint64_t wmask;
3073    bool is_and = false;
3074
3075    sf = extract32(insn, 31, 1);
3076    opc = extract32(insn, 29, 2);
3077    is_n = extract32(insn, 22, 1);
3078    immr = extract32(insn, 16, 6);
3079    imms = extract32(insn, 10, 6);
3080    rn = extract32(insn, 5, 5);
3081    rd = extract32(insn, 0, 5);
3082
3083    if (!sf && is_n) {
3084        unallocated_encoding(s);
3085        return;
3086    }
3087
3088    if (opc == 0x3) { /* ANDS */
3089        tcg_rd = cpu_reg(s, rd);
3090    } else {
3091        tcg_rd = cpu_reg_sp(s, rd);
3092    }
3093    tcg_rn = cpu_reg(s, rn);
3094
3095    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3096        /* some immediate field values are reserved */
3097        unallocated_encoding(s);
3098        return;
3099    }
3100
3101    if (!sf) {
3102        wmask &= 0xffffffff;
3103    }
3104
3105    switch (opc) {
3106    case 0x3: /* ANDS */
3107    case 0x0: /* AND */
3108        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3109        is_and = true;
3110        break;
3111    case 0x1: /* ORR */
3112        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3113        break;
3114    case 0x2: /* EOR */
3115        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3116        break;
3117    default:
3118        assert(FALSE); /* must handle all above */
3119        break;
3120    }
3121
3122    if (!sf && !is_and) {
3123        /* zero extend final result; we know we can skip this for AND
3124         * since the immediate had the high 32 bits clear.
3125         */
3126        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3127    }
3128
3129    if (opc == 3) { /* ANDS */
3130        gen_logic_CC(sf, tcg_rd);
3131    }
3132}
3133
3134/*
3135 * C3.4.5 Move wide (immediate)
3136 *
3137 *  31 30 29 28         23 22 21 20             5 4    0
3138 * +--+-----+-------------+-----+----------------+------+
3139 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3140 * +--+-----+-------------+-----+----------------+------+
3141 *
3142 * sf: 0 -> 32 bit, 1 -> 64 bit
3143 * opc: 00 -> N, 10 -> Z, 11 -> K
3144 * hw: shift/16 (0,16, and sf only 32, 48)
3145 */
3146static void disas_movw_imm(DisasContext *s, uint32_t insn)
3147{
3148    int rd = extract32(insn, 0, 5);
3149    uint64_t imm = extract32(insn, 5, 16);
3150    int sf = extract32(insn, 31, 1);
3151    int opc = extract32(insn, 29, 2);
3152    int pos = extract32(insn, 21, 2) << 4;
3153    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3154    TCGv_i64 tcg_imm;
3155
3156    if (!sf && (pos >= 32)) {
3157        unallocated_encoding(s);
3158        return;
3159    }
3160
3161    switch (opc) {
3162    case 0: /* MOVN */
3163    case 2: /* MOVZ */
3164        imm <<= pos;
3165        if (opc == 0) {
3166            imm = ~imm;
3167        }
3168        if (!sf) {
3169            imm &= 0xffffffffu;
3170        }
3171        tcg_gen_movi_i64(tcg_rd, imm);
3172        break;
3173    case 3: /* MOVK */
3174        tcg_imm = tcg_const_i64(imm);
3175        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3176        tcg_temp_free_i64(tcg_imm);
3177        if (!sf) {
3178            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3179        }
3180        break;
3181    default:
3182        unallocated_encoding(s);
3183        break;
3184    }
3185}
3186
3187/* C3.4.2 Bitfield
3188 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3189 * +----+-----+-------------+---+------+------+------+------+
3190 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3191 * +----+-----+-------------+---+------+------+------+------+
3192 */
3193static void disas_bitfield(DisasContext *s, uint32_t insn)
3194{
3195    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3196    TCGv_i64 tcg_rd, tcg_tmp;
3197
3198    sf = extract32(insn, 31, 1);
3199    opc = extract32(insn, 29, 2);
3200    n = extract32(insn, 22, 1);
3201    ri = extract32(insn, 16, 6);
3202    si = extract32(insn, 10, 6);
3203    rn = extract32(insn, 5, 5);
3204    rd = extract32(insn, 0, 5);
3205    bitsize = sf ? 64 : 32;
3206
3207    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3208        unallocated_encoding(s);
3209        return;
3210    }
3211
3212    tcg_rd = cpu_reg(s, rd);
3213
3214    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3215       to be smaller than bitsize, we'll never reference data outside the
3216       low 32-bits anyway.  */
3217    tcg_tmp = read_cpu_reg(s, rn, 1);
3218
3219    /* Recognize the common aliases.  */
3220    if (opc == 0) { /* SBFM */
3221        if (ri == 0) {
3222            if (si == 7) { /* SXTB */
3223                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3224                goto done;
3225            } else if (si == 15) { /* SXTH */
3226                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3227                goto done;
3228            } else if (si == 31) { /* SXTW */
3229                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3230                goto done;
3231            }
3232        }
3233        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3234            if (si == 31) {
3235                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3236            }
3237            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3238            goto done;
3239        }
3240    } else if (opc == 2) { /* UBFM */
3241        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3242            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3243            return;
3244        }
3245        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3246            if (si == 31) {
3247                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3248            }
3249            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3250            return;
3251        }
3252        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3253            int shift = bitsize - 1 - si;
3254            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3255            goto done;
3256        }
3257    }
3258
3259    if (opc != 1) { /* SBFM or UBFM */
3260        tcg_gen_movi_i64(tcg_rd, 0);
3261    }
3262
3263    /* do the bit move operation */
3264    if (si >= ri) {
3265        /* Wd<s-r:0> = Wn<s:r> */
3266        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3267        pos = 0;
3268        len = (si - ri) + 1;
3269    } else {
3270        /* Wd<32+s-r,32-r> = Wn<s:0> */
3271        pos = bitsize - ri;
3272        len = si + 1;
3273    }
3274
3275    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3276
3277    if (opc == 0) { /* SBFM - sign extend the destination field */
3278        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3279        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3280    }
3281
3282 done:
3283    if (!sf) { /* zero extend final result */
3284        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3285    }
3286}
3287
3288/* C3.4.3 Extract
3289 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3290 * +----+------+-------------+---+----+------+--------+------+------+
3291 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3292 * +----+------+-------------+---+----+------+--------+------+------+
3293 */
3294static void disas_extract(DisasContext *s, uint32_t insn)
3295{
3296    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3297
3298    sf = extract32(insn, 31, 1);
3299    n = extract32(insn, 22, 1);
3300    rm = extract32(insn, 16, 5);
3301    imm = extract32(insn, 10, 6);
3302    rn = extract32(insn, 5, 5);
3303    rd = extract32(insn, 0, 5);
3304    op21 = extract32(insn, 29, 2);
3305    op0 = extract32(insn, 21, 1);
3306    bitsize = sf ? 64 : 32;
3307
3308    if (sf != n || op21 || op0 || imm >= bitsize) {
3309        unallocated_encoding(s);
3310    } else {
3311        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3312
3313        tcg_rd = cpu_reg(s, rd);
3314
3315        if (unlikely(imm == 0)) {
3316            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3317             * so an extract from bit 0 is a special case.
3318             */
3319            if (sf) {
3320                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3321            } else {
3322                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3323            }
3324        } else if (rm == rn) { /* ROR */
3325            tcg_rm = cpu_reg(s, rm);
3326            if (sf) {
3327                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3328            } else {
3329                TCGv_i32 tmp = tcg_temp_new_i32();
3330                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3331                tcg_gen_rotri_i32(tmp, tmp, imm);
3332                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3333                tcg_temp_free_i32(tmp);
3334            }
3335        } else {
3336            tcg_rm = read_cpu_reg(s, rm, sf);
3337            tcg_rn = read_cpu_reg(s, rn, sf);
3338            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3339            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3340            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3341            if (!sf) {
3342                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3343            }
3344        }
3345    }
3346}
3347
3348/* C3.4 Data processing - immediate */
3349static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3350{
3351    switch (extract32(insn, 23, 6)) {
3352    case 0x20: case 0x21: /* PC-rel. addressing */
3353        disas_pc_rel_adr(s, insn);
3354        break;
3355    case 0x22: case 0x23: /* Add/subtract (immediate) */
3356        disas_add_sub_imm(s, insn);
3357        break;
3358    case 0x24: /* Logical (immediate) */
3359        disas_logic_imm(s, insn);
3360        break;
3361    case 0x25: /* Move wide (immediate) */
3362        disas_movw_imm(s, insn);
3363        break;
3364    case 0x26: /* Bitfield */
3365        disas_bitfield(s, insn);
3366        break;
3367    case 0x27: /* Extract */
3368        disas_extract(s, insn);
3369        break;
3370    default:
3371        unallocated_encoding(s);
3372        break;
3373    }
3374}
3375
3376/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3377 * Note that it is the caller's responsibility to ensure that the
3378 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3379 * mandated semantics for out of range shifts.
3380 */
3381static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3382                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3383{
3384    switch (shift_type) {
3385    case A64_SHIFT_TYPE_LSL:
3386        tcg_gen_shl_i64(dst, src, shift_amount);
3387        break;
3388    case A64_SHIFT_TYPE_LSR:
3389        tcg_gen_shr_i64(dst, src, shift_amount);
3390        break;
3391    case A64_SHIFT_TYPE_ASR:
3392        if (!sf) {
3393            tcg_gen_ext32s_i64(dst, src);
3394        }
3395        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3396        break;
3397    case A64_SHIFT_TYPE_ROR:
3398        if (sf) {
3399            tcg_gen_rotr_i64(dst, src, shift_amount);
3400        } else {
3401            TCGv_i32 t0, t1;
3402            t0 = tcg_temp_new_i32();
3403            t1 = tcg_temp_new_i32();
3404            tcg_gen_extrl_i64_i32(t0, src);
3405            tcg_gen_extrl_i64_i32(t1, shift_amount);
3406            tcg_gen_rotr_i32(t0, t0, t1);
3407            tcg_gen_extu_i32_i64(dst, t0);
3408            tcg_temp_free_i32(t0);
3409            tcg_temp_free_i32(t1);
3410        }
3411        break;
3412    default:
3413        assert(FALSE); /* all shift types should be handled */
3414        break;
3415    }
3416
3417    if (!sf) { /* zero extend final result */
3418        tcg_gen_ext32u_i64(dst, dst);
3419    }
3420}
3421
3422/* Shift a TCGv src by immediate, put result in dst.
3423 * The shift amount must be in range (this should always be true as the
3424 * relevant instructions will UNDEF on bad shift immediates).
3425 */
3426static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3427                          enum a64_shift_type shift_type, unsigned int shift_i)
3428{
3429    assert(shift_i < (sf ? 64 : 32));
3430
3431    if (shift_i == 0) {
3432        tcg_gen_mov_i64(dst, src);
3433    } else {
3434        TCGv_i64 shift_const;
3435
3436        shift_const = tcg_const_i64(shift_i);
3437        shift_reg(dst, src, sf, shift_type, shift_const);
3438        tcg_temp_free_i64(shift_const);
3439    }
3440}
3441
3442/* C3.5.10 Logical (shifted register)
3443 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3444 * +----+-----+-----------+-------+---+------+--------+------+------+
3445 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3446 * +----+-----+-----------+-------+---+------+--------+------+------+
3447 */
3448static void disas_logic_reg(DisasContext *s, uint32_t insn)
3449{
3450    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3451    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3452
3453    sf = extract32(insn, 31, 1);
3454    opc = extract32(insn, 29, 2);
3455    shift_type = extract32(insn, 22, 2);
3456    invert = extract32(insn, 21, 1);
3457    rm = extract32(insn, 16, 5);
3458    shift_amount = extract32(insn, 10, 6);
3459    rn = extract32(insn, 5, 5);
3460    rd = extract32(insn, 0, 5);
3461
3462    if (!sf && (shift_amount & (1 << 5))) {
3463        unallocated_encoding(s);
3464        return;
3465    }
3466
3467    tcg_rd = cpu_reg(s, rd);
3468
3469    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3470        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3471         * register-register MOV and MVN, so it is worth special casing.
3472         */
3473        tcg_rm = cpu_reg(s, rm);
3474        if (invert) {
3475            tcg_gen_not_i64(tcg_rd, tcg_rm);
3476            if (!sf) {
3477                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3478            }
3479        } else {
3480            if (sf) {
3481                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3482            } else {
3483                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3484            }
3485        }
3486        return;
3487    }
3488
3489    tcg_rm = read_cpu_reg(s, rm, sf);
3490
3491    if (shift_amount) {
3492        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3493    }
3494
3495    tcg_rn = cpu_reg(s, rn);
3496
3497    switch (opc | (invert << 2)) {
3498    case 0: /* AND */
3499    case 3: /* ANDS */
3500        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3501        break;
3502    case 1: /* ORR */
3503        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3504        break;
3505    case 2: /* EOR */
3506        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3507        break;
3508    case 4: /* BIC */
3509    case 7: /* BICS */
3510        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3511        break;
3512    case 5: /* ORN */
3513        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3514        break;
3515    case 6: /* EON */
3516        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3517        break;
3518    default:
3519        assert(FALSE);
3520        break;
3521    }
3522
3523    if (!sf) {
3524        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3525    }
3526
3527    if (opc == 3) {
3528        gen_logic_CC(sf, tcg_rd);
3529    }
3530}
3531
3532/*
3533 * C3.5.1 Add/subtract (extended register)
3534 *
3535 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3536 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3537 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3538 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3539 *
3540 *  sf: 0 -> 32bit, 1 -> 64bit
3541 *  op: 0 -> add  , 1 -> sub
3542 *   S: 1 -> set flags
3543 * opt: 00
3544 * option: extension type (see DecodeRegExtend)
3545 * imm3: optional shift to Rm
3546 *
3547 * Rd = Rn + LSL(extend(Rm), amount)
3548 */
3549static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3550{
3551    int rd = extract32(insn, 0, 5);
3552    int rn = extract32(insn, 5, 5);
3553    int imm3 = extract32(insn, 10, 3);
3554    int option = extract32(insn, 13, 3);
3555    int rm = extract32(insn, 16, 5);
3556    bool setflags = extract32(insn, 29, 1);
3557    bool sub_op = extract32(insn, 30, 1);
3558    bool sf = extract32(insn, 31, 1);
3559
3560    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3561    TCGv_i64 tcg_rd;
3562    TCGv_i64 tcg_result;
3563
3564    if (imm3 > 4) {
3565        unallocated_encoding(s);
3566        return;
3567    }
3568
3569    /* non-flag setting ops may use SP */
3570    if (!setflags) {
3571        tcg_rd = cpu_reg_sp(s, rd);
3572    } else {
3573        tcg_rd = cpu_reg(s, rd);
3574    }
3575    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3576
3577    tcg_rm = read_cpu_reg(s, rm, sf);
3578    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3579
3580    tcg_result = tcg_temp_new_i64();
3581
3582    if (!setflags) {
3583        if (sub_op) {
3584            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3585        } else {
3586            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3587        }
3588    } else {
3589        if (sub_op) {
3590            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3591        } else {
3592            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3593        }
3594    }
3595
3596    if (sf) {
3597        tcg_gen_mov_i64(tcg_rd, tcg_result);
3598    } else {
3599        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3600    }
3601
3602    tcg_temp_free_i64(tcg_result);
3603}
3604
3605/*
3606 * C3.5.2 Add/subtract (shifted register)
3607 *
3608 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3609 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3610 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3611 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3612 *
3613 *    sf: 0 -> 32bit, 1 -> 64bit
3614 *    op: 0 -> add  , 1 -> sub
3615 *     S: 1 -> set flags
3616 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3617 *  imm6: Shift amount to apply to Rm before the add/sub
3618 */
3619static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3620{
3621    int rd = extract32(insn, 0, 5);
3622    int rn = extract32(insn, 5, 5);
3623    int imm6 = extract32(insn, 10, 6);
3624    int rm = extract32(insn, 16, 5);
3625    int shift_type = extract32(insn, 22, 2);
3626    bool setflags = extract32(insn, 29, 1);
3627    bool sub_op = extract32(insn, 30, 1);
3628    bool sf = extract32(insn, 31, 1);
3629
3630    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3631    TCGv_i64 tcg_rn, tcg_rm;
3632    TCGv_i64 tcg_result;
3633
3634    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3635        unallocated_encoding(s);
3636        return;
3637    }
3638
3639    tcg_rn = read_cpu_reg(s, rn, sf);
3640    tcg_rm = read_cpu_reg(s, rm, sf);
3641
3642    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3643
3644    tcg_result = tcg_temp_new_i64();
3645
3646    if (!setflags) {
3647        if (sub_op) {
3648            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3649        } else {
3650            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3651        }
3652    } else {
3653        if (sub_op) {
3654            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3655        } else {
3656            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3657        }
3658    }
3659
3660    if (sf) {
3661        tcg_gen_mov_i64(tcg_rd, tcg_result);
3662    } else {
3663        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3664    }
3665
3666    tcg_temp_free_i64(tcg_result);
3667}
3668
3669/* C3.5.9 Data-processing (3 source)
3670
3671   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3672  +--+------+-----------+------+------+----+------+------+------+
3673  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3674  +--+------+-----------+------+------+----+------+------+------+
3675
3676 */
3677static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3678{
3679    int rd = extract32(insn, 0, 5);
3680    int rn = extract32(insn, 5, 5);
3681    int ra = extract32(insn, 10, 5);
3682    int rm = extract32(insn, 16, 5);
3683    int op_id = (extract32(insn, 29, 3) << 4) |
3684        (extract32(insn, 21, 3) << 1) |
3685        extract32(insn, 15, 1);
3686    bool sf = extract32(insn, 31, 1);
3687    bool is_sub = extract32(op_id, 0, 1);
3688    bool is_high = extract32(op_id, 2, 1);
3689    bool is_signed = false;
3690    TCGv_i64 tcg_op1;
3691    TCGv_i64 tcg_op2;
3692    TCGv_i64 tcg_tmp;
3693
3694    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3695    switch (op_id) {
3696    case 0x42: /* SMADDL */
3697    case 0x43: /* SMSUBL */
3698    case 0x44: /* SMULH */
3699        is_signed = true;
3700        break;
3701    case 0x0: /* MADD (32bit) */
3702    case 0x1: /* MSUB (32bit) */
3703    case 0x40: /* MADD (64bit) */
3704    case 0x41: /* MSUB (64bit) */
3705    case 0x4a: /* UMADDL */
3706    case 0x4b: /* UMSUBL */
3707    case 0x4c: /* UMULH */
3708        break;
3709    default:
3710        unallocated_encoding(s);
3711        return;
3712    }
3713
3714    if (is_high) {
3715        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3716        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3717        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3718        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3719
3720        if (is_signed) {
3721            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3722        } else {
3723            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3724        }
3725
3726        tcg_temp_free_i64(low_bits);
3727        return;
3728    }
3729
3730    tcg_op1 = tcg_temp_new_i64();
3731    tcg_op2 = tcg_temp_new_i64();
3732    tcg_tmp = tcg_temp_new_i64();
3733
3734    if (op_id < 0x42) {
3735        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3736        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3737    } else {
3738        if (is_signed) {
3739            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3740            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3741        } else {
3742            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3743            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3744        }
3745    }
3746
3747    if (ra == 31 && !is_sub) {
3748        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3749        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3750    } else {
3751        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3752        if (is_sub) {
3753            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3754        } else {
3755            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3756        }
3757    }
3758
3759    if (!sf) {
3760        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3761    }
3762
3763    tcg_temp_free_i64(tcg_op1);
3764    tcg_temp_free_i64(tcg_op2);
3765    tcg_temp_free_i64(tcg_tmp);
3766}
3767
3768/* C3.5.3 - Add/subtract (with carry)
3769 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3770 * +--+--+--+------------------------+------+---------+------+-----+
3771 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3772 * +--+--+--+------------------------+------+---------+------+-----+
3773 *                                            [000000]
3774 */
3775
3776static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3777{
3778    unsigned int sf, op, setflags, rm, rn, rd;
3779    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3780
3781    if (extract32(insn, 10, 6) != 0) {
3782        unallocated_encoding(s);
3783        return;
3784    }
3785
3786    sf = extract32(insn, 31, 1);
3787    op = extract32(insn, 30, 1);
3788    setflags = extract32(insn, 29, 1);
3789    rm = extract32(insn, 16, 5);
3790    rn = extract32(insn, 5, 5);
3791    rd = extract32(insn, 0, 5);
3792
3793    tcg_rd = cpu_reg(s, rd);
3794    tcg_rn = cpu_reg(s, rn);
3795
3796    if (op) {
3797        tcg_y = new_tmp_a64(s);
3798        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3799    } else {
3800        tcg_y = cpu_reg(s, rm);
3801    }
3802
3803    if (setflags) {
3804        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3805    } else {
3806        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3807    }
3808}
3809
3810/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3811 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3812 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3813 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3814 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3815 *        [1]                             y                [0]       [0]
3816 */
3817static void disas_cc(DisasContext *s, uint32_t insn)
3818{
3819    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3820    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3821    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3822    DisasCompare c;
3823
3824    if (!extract32(insn, 29, 1)) {
3825        unallocated_encoding(s);
3826        return;
3827    }
3828    if (insn & (1 << 10 | 1 << 4)) {
3829        unallocated_encoding(s);
3830        return;
3831    }
3832    sf = extract32(insn, 31, 1);
3833    op = extract32(insn, 30, 1);
3834    is_imm = extract32(insn, 11, 1);
3835    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3836    cond = extract32(insn, 12, 4);
3837    rn = extract32(insn, 5, 5);
3838    nzcv = extract32(insn, 0, 4);
3839
3840    /* Set T0 = !COND.  */
3841    tcg_t0 = tcg_temp_new_i32();
3842    arm_test_cc(&c, cond);
3843    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3844    arm_free_cc(&c);
3845
3846    /* Load the arguments for the new comparison.  */
3847    if (is_imm) {
3848        tcg_y = new_tmp_a64(s);
3849        tcg_gen_movi_i64(tcg_y, y);
3850    } else {
3851        tcg_y = cpu_reg(s, y);
3852    }
3853    tcg_rn = cpu_reg(s, rn);
3854
3855    /* Set the flags for the new comparison.  */
3856    tcg_tmp = tcg_temp_new_i64();
3857    if (op) {
3858        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3859    } else {
3860        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3861    }
3862    tcg_temp_free_i64(tcg_tmp);
3863
3864    /* If COND was false, force the flags to #nzcv.  Compute two masks
3865     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3866     * For tcg hosts that support ANDC, we can make do with just T1.
3867     * In either case, allow the tcg optimizer to delete any unused mask.
3868     */
3869    tcg_t1 = tcg_temp_new_i32();
3870    tcg_t2 = tcg_temp_new_i32();
3871    tcg_gen_neg_i32(tcg_t1, tcg_t0);
3872    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3873
3874    if (nzcv & 8) { /* N */
3875        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3876    } else {
3877        if (TCG_TARGET_HAS_andc_i32) {
3878            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3879        } else {
3880            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3881        }
3882    }
3883    if (nzcv & 4) { /* Z */
3884        if (TCG_TARGET_HAS_andc_i32) {
3885            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3886        } else {
3887            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3888        }
3889    } else {
3890        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3891    }
3892    if (nzcv & 2) { /* C */
3893        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3894    } else {
3895        if (TCG_TARGET_HAS_andc_i32) {
3896            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3897        } else {
3898            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3899        }
3900    }
3901    if (nzcv & 1) { /* V */
3902        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3903    } else {
3904        if (TCG_TARGET_HAS_andc_i32) {
3905            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3906        } else {
3907            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3908        }
3909    }
3910    tcg_temp_free_i32(tcg_t0);
3911    tcg_temp_free_i32(tcg_t1);
3912    tcg_temp_free_i32(tcg_t2);
3913}
3914
3915/* C3.5.6 Conditional select
3916 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3917 * +----+----+---+-----------------+------+------+-----+------+------+
3918 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3919 * +----+----+---+-----------------+------+------+-----+------+------+
3920 */
3921static void disas_cond_select(DisasContext *s, uint32_t insn)
3922{
3923    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3924    TCGv_i64 tcg_rd, zero;
3925    DisasCompare64 c;
3926
3927    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3928        /* S == 1 or op2<1> == 1 */
3929        unallocated_encoding(s);
3930        return;
3931    }
3932    sf = extract32(insn, 31, 1);
3933    else_inv = extract32(insn, 30, 1);
3934    rm = extract32(insn, 16, 5);
3935    cond = extract32(insn, 12, 4);
3936    else_inc = extract32(insn, 10, 1);
3937    rn = extract32(insn, 5, 5);
3938    rd = extract32(insn, 0, 5);
3939
3940    tcg_rd = cpu_reg(s, rd);
3941
3942    a64_test_cc(&c, cond);
3943    zero = tcg_const_i64(0);
3944
3945    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3946        /* CSET & CSETM.  */
3947        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3948        if (else_inv) {
3949            tcg_gen_neg_i64(tcg_rd, tcg_rd);
3950        }
3951    } else {
3952        TCGv_i64 t_true = cpu_reg(s, rn);
3953        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3954        if (else_inv && else_inc) {
3955            tcg_gen_neg_i64(t_false, t_false);
3956        } else if (else_inv) {
3957            tcg_gen_not_i64(t_false, t_false);
3958        } else if (else_inc) {
3959            tcg_gen_addi_i64(t_false, t_false, 1);
3960        }
3961        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3962    }
3963
3964    tcg_temp_free_i64(zero);
3965    a64_free_cc(&c);
3966
3967    if (!sf) {
3968        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3969    }
3970}
3971
3972static void handle_clz(DisasContext *s, unsigned int sf,
3973                       unsigned int rn, unsigned int rd)
3974{
3975    TCGv_i64 tcg_rd, tcg_rn;
3976    tcg_rd = cpu_reg(s, rd);
3977    tcg_rn = cpu_reg(s, rn);
3978
3979    if (sf) {
3980        gen_helper_clz64(tcg_rd, tcg_rn);
3981    } else {
3982        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3983        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3984        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3985        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3986        tcg_temp_free_i32(tcg_tmp32);
3987    }
3988}
3989
3990static void handle_cls(DisasContext *s, unsigned int sf,
3991                       unsigned int rn, unsigned int rd)
3992{
3993    TCGv_i64 tcg_rd, tcg_rn;
3994    tcg_rd = cpu_reg(s, rd);
3995    tcg_rn = cpu_reg(s, rn);
3996
3997    if (sf) {
3998        gen_helper_cls64(tcg_rd, tcg_rn);
3999    } else {
4000        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();

4001        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4002        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
4003        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4004        tcg_temp_free_i32(tcg_tmp32);
4005    }
4006}
4007
4008static void handle_rbit(DisasContext *s, unsigned int sf,
4009                        unsigned int rn, unsigned int rd)
4010{
4011    TCGv_i64 tcg_rd, tcg_rn;
4012    tcg_rd = cpu_reg(s, rd);
4013    tcg_rn = cpu_reg(s, rn);
4014
4015    if (sf) {
4016        gen_helper_rbit64(tcg_rd, tcg_rn);
4017    } else {
4018        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4019        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4020        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4021        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4022        tcg_temp_free_i32(tcg_tmp32);
4023    }
4024}
4025
4026/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
4027static void handle_rev64(DisasContext *s, unsigned int sf,
4028                         unsigned int rn, unsigned int rd)
4029{
4030    if (!sf) {
4031        unallocated_encoding(s);
4032        return;
4033    }
4034    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4035}
4036
4037/* C5.6.149 REV with sf==0, opcode==2
4038 * C5.6.151 REV32 (sf==1, opcode==2)
4039 */
4040static void handle_rev32(DisasContext *s, unsigned int sf,
4041                         unsigned int rn, unsigned int rd)
4042{
4043    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4044
4045    if (sf) {
4046        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4047        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4048
4049        /* bswap32_i64 requires zero high word */
4050        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4051        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4052        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4053        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4054        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4055
4056        tcg_temp_free_i64(tcg_tmp);
4057    } else {
4058        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4059        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4060    }
4061}
4062
4063/* C5.6.150 REV16 (opcode==1) */
4064static void handle_rev16(DisasContext *s, unsigned int sf,
4065                         unsigned int rn, unsigned int rd)
4066{
4067    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4068    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4069    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4070
4071    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
4072    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
4073
4074    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
4075    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4076    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4077    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
4078
4079    if (sf) {
4080        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4081        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4082        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4083        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
4084
4085        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
4086        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4087        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4088    }
4089
4090    tcg_temp_free_i64(tcg_tmp);
4091}
4092
4093/* C3.5.7 Data-processing (1 source)
4094 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4095 * +----+---+---+-----------------+---------+--------+------+------+
4096 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4097 * +----+---+---+-----------------+---------+--------+------+------+
4098 */
4099static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4100{
4101    unsigned int sf, opcode, rn, rd;
4102
4103    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4104        unallocated_encoding(s);
4105        return;
4106    }
4107
4108    sf = extract32(insn, 31, 1);
4109    opcode = extract32(insn, 10, 6);
4110    rn = extract32(insn, 5, 5);
4111    rd = extract32(insn, 0, 5);
4112
4113    switch (opcode) {
4114    case 0: /* RBIT */
4115        handle_rbit(s, sf, rn, rd);
4116        break;
4117    case 1: /* REV16 */
4118        handle_rev16(s, sf, rn, rd);
4119        break;
4120    case 2: /* REV32 */
4121        handle_rev32(s, sf, rn, rd);
4122        break;
4123    case 3: /* REV64 */
4124        handle_rev64(s, sf, rn, rd);
4125        break;
4126    case 4: /* CLZ */
4127        handle_clz(s, sf, rn, rd);
4128        break;
4129    case 5: /* CLS */
4130        handle_cls(s, sf, rn, rd);
4131        break;
4132    }
4133}
4134
4135static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4136                       unsigned int rm, unsigned int rn, unsigned int rd)
4137{
4138    TCGv_i64 tcg_n, tcg_m, tcg_rd;
4139    tcg_rd = cpu_reg(s, rd);
4140
4141    if (!sf && is_signed) {
4142        tcg_n = new_tmp_a64(s);
4143        tcg_m = new_tmp_a64(s);
4144        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4145        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4146    } else {
4147        tcg_n = read_cpu_reg(s, rn, sf);
4148        tcg_m = read_cpu_reg(s, rm, sf);
4149    }
4150
4151    if (is_signed) {
4152        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4153    } else {
4154        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4155    }
4156
4157    if (!sf) { /* zero extend final result */
4158        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4159    }
4160}
4161
4162/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4163static void handle_shift_reg(DisasContext *s,
4164                             enum a64_shift_type shift_type, unsigned int sf,
4165                             unsigned int rm, unsigned int rn, unsigned int rd)
4166{
4167    TCGv_i64 tcg_shift = tcg_temp_new_i64();
4168    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4169    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4170
4171    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4172    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4173    tcg_temp_free_i64(tcg_shift);
4174}
4175
4176/* CRC32[BHWX], CRC32C[BHWX] */
4177static void handle_crc32(DisasContext *s,
4178                         unsigned int sf, unsigned int sz, bool crc32c,
4179                         unsigned int rm, unsigned int rn, unsigned int rd)
4180{
4181    TCGv_i64 tcg_acc, tcg_val;
4182    TCGv_i32 tcg_bytes;
4183
4184    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4185        || (sf == 1 && sz != 3)
4186        || (sf == 0 && sz == 3)) {
4187        unallocated_encoding(s);
4188        return;
4189    }
4190
4191    if (sz == 3) {
4192        tcg_val = cpu_reg(s, rm);
4193    } else {
4194        uint64_t mask;
4195        switch (sz) {
4196        case 0:
4197            mask = 0xFF;
4198            break;
4199        case 1:
4200            mask = 0xFFFF;
4201            break;
4202        case 2:
4203            mask = 0xFFFFFFFF;
4204            break;
4205        default:
4206            g_assert_not_reached();
4207        }
4208        tcg_val = new_tmp_a64(s);
4209        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4210    }
4211
4212    tcg_acc = cpu_reg(s, rn);
4213    tcg_bytes = tcg_const_i32(1 << sz);
4214
4215    if (crc32c) {
4216        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4217    } else {
4218        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4219    }
4220
4221    tcg_temp_free_i32(tcg_bytes);
4222}
4223
4224/* C3.5.8 Data-processing (2 source)
4225 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4226 * +----+---+---+-----------------+------+--------+------+------+
4227 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4228 * +----+---+---+-----------------+------+--------+------+------+
4229 */
4230static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4231{
4232    unsigned int sf, rm, opcode, rn, rd;
4233    sf = extract32(insn, 31, 1);
4234    rm = extract32(insn, 16, 5);
4235    opcode = extract32(insn, 10, 6);
4236    rn = extract32(insn, 5, 5);
4237    rd = extract32(insn, 0, 5);
4238
4239    if (extract32(insn, 29, 1)) {
4240        unallocated_encoding(s);
4241        return;
4242    }
4243
4244    switch (opcode) {
4245    case 2: /* UDIV */
4246        handle_div(s, false, sf, rm, rn, rd);
4247        break;
4248    case 3: /* SDIV */
4249        handle_div(s, true, sf, rm, rn, rd);
4250        break;
4251    case 8: /* LSLV */
4252        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4253        break;
4254    case 9: /* LSRV */
4255        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4256        break;
4257    case 10: /* ASRV */
4258        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4259        break;
4260    case 11: /* RORV */
4261        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4262        break;
4263    case 16:
4264    case 17:
4265    case 18:
4266    case 19:
4267    case 20:
4268    case 21:
4269    case 22:
4270    case 23: /* CRC32 */
4271    {
4272        int sz = extract32(opcode, 0, 2);
4273        bool crc32c = extract32(opcode, 2, 1);
4274        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4275        break;
4276    }
4277    default:
4278        unallocated_encoding(s);
4279        break;
4280    }
4281}
4282
4283/* C3.5 Data processing - register */
4284static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4285{
4286    switch (extract32(insn, 24, 5)) {
4287    case 0x0a: /* Logical (shifted register) */
4288        disas_logic_reg(s, insn);
4289        break;
4290    case 0x0b: /* Add/subtract */
4291        if (insn & (1 << 21)) { /* (extended register) */
4292            disas_add_sub_ext_reg(s, insn);
4293        } else {
4294            disas_add_sub_reg(s, insn);
4295        }
4296        break;
4297    case 0x1b: /* Data-processing (3 source) */
4298        disas_data_proc_3src(s, insn);
4299        break;
4300    case 0x1a:
4301        switch (extract32(insn, 21, 3)) {
4302        case 0x0: /* Add/subtract (with carry) */
4303            disas_adc_sbc(s, insn);
4304            break;
4305        case 0x2: /* Conditional compare */
4306            disas_cc(s, insn); /* both imm and reg forms */
4307            break;
4308        case 0x4: /* Conditional select */
4309            disas_cond_select(s, insn);
4310            break;
4311        case 0x6: /* Data-processing */
4312            if (insn & (1 << 30)) { /* (1 source) */
4313                disas_data_proc_1src(s, insn);
4314            } else {            /* (2 source) */
4315                disas_data_proc_2src(s, insn);
4316            }
4317            break;
4318        default:
4319            unallocated_encoding(s);
4320            break;
4321        }
4322        break;
4323    default:
4324        unallocated_encoding(s);
4325        break;
4326    }
4327}
4328
4329static void handle_fp_compare(DisasContext *s, bool is_double,
4330                              unsigned int rn, unsigned int rm,
4331                              bool cmp_with_zero, bool signal_all_nans)
4332{
4333    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4334    TCGv_ptr fpst = get_fpstatus_ptr();
4335
4336    if (is_double) {
4337        TCGv_i64 tcg_vn, tcg_vm;
4338
4339        tcg_vn = read_fp_dreg(s, rn);
4340        if (cmp_with_zero) {
4341            tcg_vm = tcg_const_i64(0);
4342        } else {
4343            tcg_vm = read_fp_dreg(s, rm);
4344        }
4345        if (signal_all_nans) {
4346            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4347        } else {
4348            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4349        }
4350        tcg_temp_free_i64(tcg_vn);
4351        tcg_temp_free_i64(tcg_vm);
4352    } else {
4353        TCGv_i32 tcg_vn, tcg_vm;
4354
4355        tcg_vn = read_fp_sreg(s, rn);
4356        if (cmp_with_zero) {
4357            tcg_vm = tcg_const_i32(0);
4358        } else {
4359            tcg_vm = read_fp_sreg(s, rm);
4360        }
4361        if (signal_all_nans) {
4362            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4363        } else {
4364            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4365        }
4366        tcg_temp_free_i32(tcg_vn);
4367        tcg_temp_free_i32(tcg_vm);
4368    }
4369
4370    tcg_temp_free_ptr(fpst);
4371
4372    gen_set_nzcv(tcg_flags);
4373
4374    tcg_temp_free_i64(tcg_flags);
4375}
4376
4377/* C3.6.22 Floating point compare
4378 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4379 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4380 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4381 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4382 */
4383static void disas_fp_compare(DisasContext *s, uint32_t insn)
4384{
4385    unsigned int mos, type, rm, op, rn, opc, op2r;
4386
4387    mos = extract32(insn, 29, 3);
4388    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4389    rm = extract32(insn, 16, 5);
4390    op = extract32(insn, 14, 2);
4391    rn = extract32(insn, 5, 5);
4392    opc = extract32(insn, 3, 2);
4393    op2r = extract32(insn, 0, 3);
4394
4395    if (mos || op || op2r || type > 1) {
4396        unallocated_encoding(s);
4397        return;
4398    }
4399
4400    if (!fp_access_check(s)) {
4401        return;
4402    }
4403
4404    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4405}
4406
4407/* C3.6.23 Floating point conditional compare
4408 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4409 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4410 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4411 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4412 */
4413static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4414{
4415    unsigned int mos, type, rm, cond, rn, op, nzcv;
4416    TCGv_i64 tcg_flags;
4417    TCGLabel *label_continue = NULL;
4418
4419    mos = extract32(insn, 29, 3);
4420    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4421    rm = extract32(insn, 16, 5);
4422    cond = extract32(insn, 12, 4);
4423    rn = extract32(insn, 5, 5);
4424    op = extract32(insn, 4, 1);
4425    nzcv = extract32(insn, 0, 4);
4426
4427    if (mos || type > 1) {
4428        unallocated_encoding(s);
4429        return;
4430    }
4431
4432    if (!fp_access_check(s)) {
4433        return;
4434    }
4435
4436    if (cond < 0x0e) { /* not always */
4437        TCGLabel *label_match = gen_new_label();
4438        label_continue = gen_new_label();
4439        arm_gen_test_cc(cond, label_match);
4440        /* nomatch: */
4441        tcg_flags = tcg_const_i64(nzcv << 28);
4442        gen_set_nzcv(tcg_flags);
4443        tcg_temp_free_i64(tcg_flags);
4444        tcg_gen_br(label_continue);
4445        gen_set_label(label_match);
4446    }
4447
4448    handle_fp_compare(s, type, rn, rm, false, op);
4449
4450    if (cond < 0x0e) {
4451        gen_set_label(label_continue);
4452    }
4453}
4454
4455/* C3.6.24 Floating point conditional select
4456 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4457 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4458 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4459 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4460 */
4461static void disas_fp_csel(DisasContext *s, uint32_t insn)
4462{
4463    unsigned int mos, type, rm, cond, rn, rd;
4464    TCGv_i64 t_true, t_false, t_zero;
4465    DisasCompare64 c;
4466
4467    mos = extract32(insn, 29, 3);
4468    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4469    rm = extract32(insn, 16, 5);
4470    cond = extract32(insn, 12, 4);
4471    rn = extract32(insn, 5, 5);
4472    rd = extract32(insn, 0, 5);
4473
4474    if (mos || type > 1) {
4475        unallocated_encoding(s);
4476        return;
4477    }
4478
4479    if (!fp_access_check(s)) {
4480        return;
4481    }
4482
4483    /* Zero extend sreg inputs to 64 bits now.  */
4484    t_true = tcg_temp_new_i64();
4485    t_false = tcg_temp_new_i64();
4486    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4487    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4488
4489    a64_test_cc(&c, cond);
4490    t_zero = tcg_const_i64(0);
4491    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4492    tcg_temp_free_i64(t_zero);
4493    tcg_temp_free_i64(t_false);
4494    a64_free_cc(&c);
4495
4496    /* Note that sregs write back zeros to the high bits,
4497       and we've already done the zero-extension.  */
4498    write_fp_dreg(s, rd, t_true);
4499    tcg_temp_free_i64(t_true);
4500}
4501
4502/* C3.6.25 Floating-point data-processing (1 source) - single precision */
4503static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4504{
4505    TCGv_ptr fpst;
4506    TCGv_i32 tcg_op;
4507    TCGv_i32 tcg_res;
4508
4509    fpst = get_fpstatus_ptr();
4510    tcg_op = read_fp_sreg(s, rn);
4511    tcg_res = tcg_temp_new_i32();
4512
4513    switch (opcode) {
4514    case 0x0: /* FMOV */
4515        tcg_gen_mov_i32(tcg_res, tcg_op);
4516        break;
4517    case 0x1: /* FABS */
4518        gen_helper_vfp_abss(tcg_res, tcg_op);
4519        break;
4520    case 0x2: /* FNEG */
4521        gen_helper_vfp_negs(tcg_res, tcg_op);
4522        break;
4523    case 0x3: /* FSQRT */
4524        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4525        break;
4526    case 0x8: /* FRINTN */
4527    case 0x9: /* FRINTP */
4528    case 0xa: /* FRINTM */
4529    case 0xb: /* FRINTZ */
4530    case 0xc: /* FRINTA */
4531    {
4532        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4533
4534        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4535        gen_helper_rints(tcg_res, tcg_op, fpst);
4536
4537        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4538        tcg_temp_free_i32(tcg_rmode);
4539        break;
4540    }
4541    case 0xe: /* FRINTX */
4542        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4543        break;
4544    case 0xf: /* FRINTI */
4545        gen_helper_rints(tcg_res, tcg_op, fpst);
4546        break;
4547    default:
4548        abort();
4549    }
4550
4551    write_fp_sreg(s, rd, tcg_res);
4552
4553    tcg_temp_free_ptr(fpst);
4554    tcg_temp_free_i32(tcg_op);
4555    tcg_temp_free_i32(tcg_res);
4556}
4557
4558/* C3.6.25 Floating-point data-processing (1 source) - double precision */
4559static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4560{
4561    TCGv_ptr fpst;
4562    TCGv_i64 tcg_op;
4563    TCGv_i64 tcg_res;
4564
4565    fpst = get_fpstatus_ptr();
4566    tcg_op = read_fp_dreg(s, rn);
4567    tcg_res = tcg_temp_new_i64();
4568
4569    switch (opcode) {
4570    case 0x0: /* FMOV */
4571        tcg_gen_mov_i64(tcg_res, tcg_op);
4572        break;
4573    case 0x1: /* FABS */
4574        gen_helper_vfp_absd(tcg_res, tcg_op);
4575        break;
4576    case 0x2: /* FNEG */
4577        gen_helper_vfp_negd(tcg_res, tcg_op);
4578        break;
4579    case 0x3: /* FSQRT */
4580        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4581        break;
4582    case 0x8: /* FRINTN */
4583    case 0x9: /* FRINTP */
4584    case 0xa: /* FRINTM */
4585    case 0xb: /* FRINTZ */
4586    case 0xc: /* FRINTA */
4587    {
4588        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4589
4590        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4591        gen_helper_rintd(tcg_res, tcg_op, fpst);
4592
4593        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4594        tcg_temp_free_i32(tcg_rmode);
4595        break;
4596    }
4597    case 0xe: /* FRINTX */
4598        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4599        break;
4600    case 0xf: /* FRINTI */
4601        gen_helper_rintd(tcg_res, tcg_op, fpst);
4602        break;
4603    default:
4604        abort();
4605    }
4606
4607    write_fp_dreg(s, rd, tcg_res);
4608
4609    tcg_temp_free_ptr(fpst);
4610    tcg_temp_free_i64(tcg_op);
4611    tcg_temp_free_i64(tcg_res);
4612}
4613
4614static void handle_fp_fcvt(DisasContext *s, int opcode,
4615                           int rd, int rn, int dtype, int ntype)
4616{
4617    switch (ntype) {
4618    case 0x0:
4619    {
4620        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4621        if (dtype == 1) {
4622            /* Single to double */
4623            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4624            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4625            write_fp_dreg(s, rd, tcg_rd);
4626            tcg_temp_free_i64(tcg_rd);
4627        } else {
4628            /* Single to half */
4629            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4630            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4631            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4632            write_fp_sreg(s, rd, tcg_rd);
4633            tcg_temp_free_i32(tcg_rd);
4634        }
4635        tcg_temp_free_i32(tcg_rn);
4636        break;
4637    }
4638    case 0x1:
4639    {
4640        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4641        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4642        if (dtype == 0) {
4643            /* Double to single */
4644            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4645        } else {
4646            /* Double to half */
4647            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4648            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4649        }
4650        write_fp_sreg(s, rd, tcg_rd);
4651        tcg_temp_free_i32(tcg_rd);
4652        tcg_temp_free_i64(tcg_rn);
4653        break;
4654    }
4655    case 0x3:
4656    {
4657        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4658        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4659        if (dtype == 0) {
4660            /* Half to single */
4661            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4662            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4663            write_fp_sreg(s, rd, tcg_rd);
4664            tcg_temp_free_i32(tcg_rd);
4665        } else {
4666            /* Half to double */
4667            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4668            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4669            write_fp_dreg(s, rd, tcg_rd);
4670            tcg_temp_free_i64(tcg_rd);
4671        }
4672        tcg_temp_free_i32(tcg_rn);
4673        break;
4674    }
4675    default:
4676        abort();
4677    }
4678}
4679
4680/* C3.6.25 Floating point data-processing (1 source)
4681 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4682 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4683 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4684 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4685 */
4686static void disas_fp_1src(DisasContext *s, uint32_t insn)
4687{
4688    int type = extract32(insn, 22, 2);
4689    int opcode = extract32(insn, 15, 6);
4690    int rn = extract32(insn, 5, 5);
4691    int rd = extract32(insn, 0, 5);
4692
4693    switch (opcode) {
4694    case 0x4: case 0x5: case 0x7:
4695    {
4696        /* FCVT between half, single and double precision */
4697        int dtype = extract32(opcode, 0, 2);
4698        if (type == 2 || dtype == type) {
4699            unallocated_encoding(s);
4700            return;
4701        }
4702        if (!fp_access_check(s)) {
4703            return;
4704        }
4705
4706        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4707        break;
4708    }
4709    case 0x0 ... 0x3:
4710    case 0x8 ... 0xc:
4711    case 0xe ... 0xf:
4712        /* 32-to-32 and 64-to-64 ops */
4713        switch (type) {
4714        case 0:
4715            if (!fp_access_check(s)) {
4716                return;
4717            }
4718
4719            handle_fp_1src_single(s, opcode, rd, rn);
4720            break;
4721        case 1:
4722            if (!fp_access_check(s)) {
4723                return;
4724            }
4725
4726            handle_fp_1src_double(s, opcode, rd, rn);
4727            break;
4728        default:
4729            unallocated_encoding(s);
4730        }
4731        break;
4732    default:
4733        unallocated_encoding(s);
4734        break;
4735    }
4736}
4737
4738/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4739static void handle_fp_2src_single(DisasContext *s, int opcode,
4740                                  int rd, int rn, int rm)
4741{
4742    TCGv_i32 tcg_op1;
4743    TCGv_i32 tcg_op2;
4744    TCGv_i32 tcg_res;
4745    TCGv_ptr fpst;
4746
4747    tcg_res = tcg_temp_new_i32();
4748    fpst = get_fpstatus_ptr();
4749    tcg_op1 = read_fp_sreg(s, rn);
4750    tcg_op2 = read_fp_sreg(s, rm);
4751
4752    switch (opcode) {
4753    case 0x0: /* FMUL */
4754        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4755        break;
4756    case 0x1: /* FDIV */
4757        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4758        break;
4759    case 0x2: /* FADD */
4760        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4761        break;
4762    case 0x3: /* FSUB */
4763        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4764        break;
4765    case 0x4: /* FMAX */
4766        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4767        break;
4768    case 0x5: /* FMIN */
4769        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4770        break;
4771    case 0x6: /* FMAXNM */
4772        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4773        break;
4774    case 0x7: /* FMINNM */
4775        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4776        break;
4777    case 0x8: /* FNMUL */
4778        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4779        gen_helper_vfp_negs(tcg_res, tcg_res);
4780        break;
4781    }
4782
4783    write_fp_sreg(s, rd, tcg_res);
4784
4785    tcg_temp_free_ptr(fpst);
4786    tcg_temp_free_i32(tcg_op1);
4787    tcg_temp_free_i32(tcg_op2);
4788    tcg_temp_free_i32(tcg_res);
4789}
4790
4791/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4792static void handle_fp_2src_double(DisasContext *s, int opcode,
4793                                  int rd, int rn, int rm)
4794{
4795    TCGv_i64 tcg_op1;
4796    TCGv_i64 tcg_op2;
4797    TCGv_i64 tcg_res;
4798    TCGv_ptr fpst;
4799
4800    tcg_res = tcg_temp_new_i64();
4801    fpst = get_fpstatus_ptr();
4802    tcg_op1 = read_fp_dreg(s, rn);
4803    tcg_op2 = read_fp_dreg(s, rm);
4804
4805    switch (opcode) {
4806    case 0x0: /* FMUL */
4807        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4808        break;
4809    case 0x1: /* FDIV */
4810        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4811        break;
4812    case 0x2: /* FADD */
4813        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4814        break;
4815    case 0x3: /* FSUB */
4816        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4817        break;
4818    case 0x4: /* FMAX */
4819        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4820        break;
4821    case 0x5: /* FMIN */
4822        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4823        break;
4824    case 0x6: /* FMAXNM */
4825        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4826        break;
4827    case 0x7: /* FMINNM */
4828        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4829        break;
4830    case 0x8: /* FNMUL */
4831        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4832        gen_helper_vfp_negd(tcg_res, tcg_res);
4833        break;
4834    }
4835
4836    write_fp_dreg(s, rd, tcg_res);
4837
4838    tcg_temp_free_ptr(fpst);
4839    tcg_temp_free_i64(tcg_op1);
4840    tcg_temp_free_i64(tcg_op2);
4841    tcg_temp_free_i64(tcg_res);
4842}
4843
4844/* C3.6.26 Floating point data-processing (2 source)
4845 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4846 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4847 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4848 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4849 */
4850static void disas_fp_2src(DisasContext *s, uint32_t insn)
4851{
4852    int type = extract32(insn, 22, 2);
4853    int rd = extract32(insn, 0, 5);
4854    int rn = extract32(insn, 5, 5);
4855    int rm = extract32(insn, 16, 5);
4856    int opcode = extract32(insn, 12, 4);
4857
4858    if (opcode > 8) {
4859        unallocated_encoding(s);
4860        return;
4861    }
4862
4863    switch (type) {
4864    case 0:
4865        if (!fp_access_check(s)) {
4866            return;
4867        }
4868        handle_fp_2src_single(s, opcode, rd, rn, rm);
4869        break;
4870    case 1:
4871        if (!fp_access_check(s)) {
4872            return;
4873        }
4874        handle_fp_2src_double(s, opcode, rd, rn, rm);
4875        break;
4876    default:
4877        unallocated_encoding(s);
4878    }
4879}
4880
4881/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4882static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4883                                  int rd, int rn, int rm, int ra)
4884{
4885    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4886    TCGv_i32 tcg_res = tcg_temp_new_i32();
4887    TCGv_ptr fpst = get_fpstatus_ptr();
4888
4889    tcg_op1 = read_fp_sreg(s, rn);
4890    tcg_op2 = read_fp_sreg(s, rm);
4891    tcg_op3 = read_fp_sreg(s, ra);
4892
4893    /* These are fused multiply-add, and must be done as one
4894     * floating point operation with no rounding between the
4895     * multiplication and addition steps.
4896     * NB that doing the negations here as separate steps is
4897     * correct : an input NaN should come out with its sign bit
4898     * flipped if it is a negated-input.
4899     */
4900    if (o1 == true) {
4901        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4902    }
4903
4904    if (o0 != o1) {
4905        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4906    }
4907
4908    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4909
4910    write_fp_sreg(s, rd, tcg_res);
4911
4912    tcg_temp_free_ptr(fpst);
4913    tcg_temp_free_i32(tcg_op1);
4914    tcg_temp_free_i32(tcg_op2);
4915    tcg_temp_free_i32(tcg_op3);
4916    tcg_temp_free_i32(tcg_res);
4917}
4918
4919/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4920static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4921                                  int rd, int rn, int rm, int ra)
4922{
4923    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4924    TCGv_i64 tcg_res = tcg_temp_new_i64();
4925    TCGv_ptr fpst = get_fpstatus_ptr();
4926
4927    tcg_op1 = read_fp_dreg(s, rn);
4928    tcg_op2 = read_fp_dreg(s, rm);
4929    tcg_op3 = read_fp_dreg(s, ra);
4930
4931    /* These are fused multiply-add, and must be done as one
4932     * floating point operation with no rounding between the
4933     * multiplication and addition steps.
4934     * NB that doing the negations here as separate steps is
4935     * correct : an input NaN should come out with its sign bit
4936     * flipped if it is a negated-input.
4937     */
4938    if (o1 == true) {
4939        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4940    }
4941
4942    if (o0 != o1) {
4943        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4944    }
4945
4946    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4947
4948    write_fp_dreg(s, rd, tcg_res);
4949
4950    tcg_temp_free_ptr(fpst);
4951    tcg_temp_free_i64(tcg_op1);
4952    tcg_temp_free_i64(tcg_op2);
4953    tcg_temp_free_i64(tcg_op3);
4954    tcg_temp_free_i64(tcg_res);
4955}
4956
4957/* C3.6.27 Floating point data-processing (3 source)
4958 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4959 * +---+---+---+-----------+------+----+------+----+------+------+------+
4960 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4961 * +---+---+---+-----------+------+----+------+----+------+------+------+
4962 */
4963static void disas_fp_3src(DisasContext *s, uint32_t insn)
4964{
4965    int type = extract32(insn, 22, 2);
4966    int rd = extract32(insn, 0, 5);
4967    int rn = extract32(insn, 5, 5);
4968    int ra = extract32(insn, 10, 5);
4969    int rm = extract32(insn, 16, 5);
4970    bool o0 = extract32(insn, 15, 1);
4971    bool o1 = extract32(insn, 21, 1);
4972
4973    switch (type) {
4974    case 0:
4975        if (!fp_access_check(s)) {
4976            return;
4977        }
4978        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4979        break;
4980    case 1:
4981        if (!fp_access_check(s)) {
4982            return;
4983        }
4984        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4985        break;
4986    default:
4987        unallocated_encoding(s);
4988    }
4989}
4990
4991/* C3.6.28 Floating point immediate
4992 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4993 * +---+---+---+-----------+------+---+------------+-------+------+------+
4994 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4995 * +---+---+---+-----------+------+---+------------+-------+------+------+
4996 */
4997static void disas_fp_imm(DisasContext *s, uint32_t insn)
4998{
4999    int rd = extract32(insn, 0, 5);
5000    int imm8 = extract32(insn, 13, 8);

5001    int is_double = extract32(insn, 22, 2);
5002    uint64_t imm;
5003    TCGv_i64 tcg_res;
5004
5005    if (is_double > 1) {
5006        unallocated_encoding(s);
5007        return;
5008    }
5009
5010    if (!fp_access_check(s)) {
5011        return;
5012    }
5013
5014    /* The imm8 encodes the sign bit, enough bits to represent
5015     * an exponent in the range 01....1xx to 10....0xx,
5016     * and the most significant 4 bits of the mantissa; see
5017     * VFPExpandImm() in the v8 ARM ARM.
5018     */
5019    if (is_double) {
5020        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5021            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5022            extract32(imm8, 0, 6);
5023        imm <<= 48;
5024    } else {
5025        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5026            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5027            (extract32(imm8, 0, 6) << 3);
5028        imm <<= 16;
5029    }
5030
5031    tcg_res = tcg_const_i64(imm);
5032    write_fp_dreg(s, rd, tcg_res);
5033    tcg_temp_free_i64(tcg_res);
5034}
5035
5036/* Handle floating point <=> fixed point conversions. Note that we can
5037 * also deal with fp <=> integer conversions as a special case (scale == 64)
5038 * OPTME: consider handling that special case specially or at least skipping
5039 * the call to scalbn in the helpers for zero shifts.
5040 */
5041static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5042                           bool itof, int rmode, int scale, int sf, int type)
5043{
5044    bool is_signed = !(opcode & 1);
5045    bool is_double = type;
5046    TCGv_ptr tcg_fpstatus;
5047    TCGv_i32 tcg_shift;
5048
5049    tcg_fpstatus = get_fpstatus_ptr();
5050
5051    tcg_shift = tcg_const_i32(64 - scale);
5052
5053    if (itof) {
5054        TCGv_i64 tcg_int = cpu_reg(s, rn);
5055        if (!sf) {
5056            TCGv_i64 tcg_extend = new_tmp_a64(s);
5057
5058            if (is_signed) {
5059                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5060            } else {
5061                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5062            }
5063
5064            tcg_int = tcg_extend;
5065        }
5066
5067        if (is_double) {
5068            TCGv_i64 tcg_double = tcg_temp_new_i64();
5069            if (is_signed) {
5070                gen_helper_vfp_sqtod(tcg_double, tcg_int,
5071                                     tcg_shift, tcg_fpstatus);
5072            } else {
5073                gen_helper_vfp_uqtod(tcg_double, tcg_int,
5074                                     tcg_shift, tcg_fpstatus);
5075            }
5076            write_fp_dreg(s, rd, tcg_double);
5077            tcg_temp_free_i64(tcg_double);
5078        } else {
5079            TCGv_i32 tcg_single = tcg_temp_new_i32();
5080            if (is_signed) {
5081                gen_helper_vfp_sqtos(tcg_single, tcg_int,
5082                                     tcg_shift, tcg_fpstatus);
5083            } else {
5084                gen_helper_vfp_uqtos(tcg_single, tcg_int,
5085                                     tcg_shift, tcg_fpstatus);
5086            }
5087            write_fp_sreg(s, rd, tcg_single);
5088            tcg_temp_free_i32(tcg_single);
5089        }
5090    } else {
5091        TCGv_i64 tcg_int = cpu_reg(s, rd);
5092        TCGv_i32 tcg_rmode;
5093
5094        if (extract32(opcode, 2, 1)) {
5095            /* There are too many rounding modes to all fit into rmode,
5096             * so FCVTA[US] is a special case.
5097             */
5098            rmode = FPROUNDING_TIEAWAY;
5099        }
5100
5101        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5102
5103        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5104
5105        if (is_double) {
5106            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5107            if (is_signed) {
5108                if (!sf) {
5109                    gen_helper_vfp_tosld(tcg_int, tcg_double,
5110                                         tcg_shift, tcg_fpstatus);
5111                } else {
5112                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
5113                                         tcg_shift, tcg_fpstatus);
5114                }
5115            } else {
5116                if (!sf) {
5117                    gen_helper_vfp_tould(tcg_int, tcg_double,
5118                                         tcg_shift, tcg_fpstatus);
5119                } else {
5120                    gen_helper_vfp_touqd(tcg_int, tcg_double,
5121                                         tcg_shift, tcg_fpstatus);
5122                }
5123            }
5124            tcg_temp_free_i64(tcg_double);
5125        } else {
5126            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5127            if (sf) {
5128                if (is_signed) {
5129                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
5130                                         tcg_shift, tcg_fpstatus);
5131                } else {
5132                    gen_helper_vfp_touqs(tcg_int, tcg_single,
5133                                         tcg_shift, tcg_fpstatus);
5134                }
5135            } else {
5136                TCGv_i32 tcg_dest = tcg_temp_new_i32();
5137                if (is_signed) {
5138                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
5139                                         tcg_shift, tcg_fpstatus);
5140                } else {
5141                    gen_helper_vfp_touls(tcg_dest, tcg_single,
5142                                         tcg_shift, tcg_fpstatus);
5143                }
5144                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5145                tcg_temp_free_i32(tcg_dest);
5146            }
5147            tcg_temp_free_i32(tcg_single);
5148        }
5149
5150        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5151        tcg_temp_free_i32(tcg_rmode);
5152
5153        if (!sf) {
5154            tcg_gen_ext32u_i64(tcg_int, tcg_int);
5155        }
5156    }
5157
5158    tcg_temp_free_ptr(tcg_fpstatus);
5159    tcg_temp_free_i32(tcg_shift);
5160}
5161
5162/* C3.6.29 Floating point <-> fixed point conversions
5163 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5164 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5165 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5166 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5167 */
5168static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5169{
5170    int rd = extract32(insn, 0, 5);
5171    int rn = extract32(insn, 5, 5);
5172    int scale = extract32(insn, 10, 6);
5173    int opcode = extract32(insn, 16, 3);
5174    int rmode = extract32(insn, 19, 2);
5175    int type = extract32(insn, 22, 2);
5176    bool sbit = extract32(insn, 29, 1);
5177    bool sf = extract32(insn, 31, 1);
5178    bool itof;
5179
5180    if (sbit || (type > 1)
5181        || (!sf && scale < 32)) {
5182        unallocated_encoding(s);
5183        return;
5184    }
5185
5186    switch ((rmode << 3) | opcode) {
5187    case 0x2: /* SCVTF */
5188    case 0x3: /* UCVTF */
5189        itof = true;
5190        break;
5191    case 0x18: /* FCVTZS */
5192    case 0x19: /* FCVTZU */
5193        itof = false;
5194        break;
5195    default:
5196        unallocated_encoding(s);
5197        return;
5198    }
5199
5200    if (!fp_access_check(s)) {
5201        return;
5202    }
5203
5204    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5205}
5206
5207static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5208{
5209    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5210     * without conversion.
5211     */
5212
5213    if (itof) {
5214        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5215
5216        switch (type) {
5217        case 0:
5218        {
5219            /* 32 bit */
5220            TCGv_i64 tmp = tcg_temp_new_i64();
5221            tcg_gen_ext32u_i64(tmp, tcg_rn);
5222            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5223            tcg_gen_movi_i64(tmp, 0);
5224            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5225            tcg_temp_free_i64(tmp);
5226            break;
5227        }
5228        case 1:
5229        {
5230            /* 64 bit */
5231            TCGv_i64 tmp = tcg_const_i64(0);
5232            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5233            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5234            tcg_temp_free_i64(tmp);
5235            break;
5236        }
5237        case 2:
5238            /* 64 bit to top half. */
5239            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5240            break;
5241        }
5242    } else {
5243        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5244
5245        switch (type) {
5246        case 0:
5247            /* 32 bit */
5248            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5249            break;
5250        case 1:
5251            /* 64 bit */
5252            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5253            break;
5254        case 2:
5255            /* 64 bits from top half */
5256            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5257            break;
5258        }
5259    }
5260}
5261
5262/* C3.6.30 Floating point <-> integer conversions
5263 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5264 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5265 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5266 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5267 */
5268static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5269{
5270    int rd = extract32(insn, 0, 5);
5271    int rn = extract32(insn, 5, 5);
5272    int opcode = extract32(insn, 16, 3);
5273    int rmode = extract32(insn, 19, 2);
5274    int type = extract32(insn, 22, 2);
5275    bool sbit = extract32(insn, 29, 1);
5276    bool sf = extract32(insn, 31, 1);
5277
5278    if (sbit) {
5279        unallocated_encoding(s);
5280        return;
5281    }
5282
5283    if (opcode > 5) {
5284        /* FMOV */
5285        bool itof = opcode & 1;
5286
5287        if (rmode >= 2) {
5288            unallocated_encoding(s);
5289            return;
5290        }
5291
5292        switch (sf << 3 | type << 1 | rmode) {
5293        case 0x0: /* 32 bit */
5294        case 0xa: /* 64 bit */
5295        case 0xd: /* 64 bit to top half of quad */
5296            break;
5297        default:
5298            /* all other sf/type/rmode combinations are invalid */
5299            unallocated_encoding(s);
5300            break;
5301        }
5302
5303        if (!fp_access_check(s)) {
5304            return;
5305        }
5306        handle_fmov(s, rd, rn, type, itof);
5307    } else {
5308        /* actual FP conversions */
5309        bool itof = extract32(opcode, 1, 1);
5310
5311        if (type > 1 || (rmode != 0 && opcode > 1)) {
5312            unallocated_encoding(s);
5313            return;
5314        }
5315
5316        if (!fp_access_check(s)) {
5317            return;
5318        }
5319        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5320    }
5321}
5322
5323/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5324 *   31  30  29 28     25 24                          0
5325 * +---+---+---+---------+-----------------------------+
5326 * |   | 0 |   | 1 1 1 1 |                             |
5327 * +---+---+---+---------+-----------------------------+
5328 */
5329static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5330{
5331    if (extract32(insn, 24, 1)) {
5332        /* Floating point data-processing (3 source) */
5333        disas_fp_3src(s, insn);
5334    } else if (extract32(insn, 21, 1) == 0) {
5335        /* Floating point to fixed point conversions */
5336        disas_fp_fixed_conv(s, insn);
5337    } else {
5338        switch (extract32(insn, 10, 2)) {
5339        case 1:
5340            /* Floating point conditional compare */
5341            disas_fp_ccomp(s, insn);
5342            break;
5343        case 2:
5344            /* Floating point data-processing (2 source) */
5345            disas_fp_2src(s, insn);
5346            break;
5347        case 3:
5348            /* Floating point conditional select */
5349            disas_fp_csel(s, insn);
5350            break;
5351        case 0:
5352            switch (ctz32(extract32(insn, 12, 4))) {
5353            case 0: /* [15:12] == xxx1 */
5354                /* Floating point immediate */
5355                disas_fp_imm(s, insn);
5356                break;
5357            case 1: /* [15:12] == xx10 */
5358                /* Floating point compare */
5359                disas_fp_compare(s, insn);
5360                break;
5361            case 2: /* [15:12] == x100 */
5362                /* Floating point data-processing (1 source) */
5363                disas_fp_1src(s, insn);
5364                break;
5365            case 3: /* [15:12] == 1000 */
5366                unallocated_encoding(s);
5367                break;
5368            default: /* [15:12] == 0000 */
5369                /* Floating point <-> integer conversions */
5370                disas_fp_int_conv(s, insn);
5371                break;
5372            }
5373            break;
5374        }
5375    }
5376}
5377
5378static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5379                     int pos)
5380{
5381    /* Extract 64 bits from the middle of two concatenated 64 bit
5382     * vector register slices left:right. The extracted bits start
5383     * at 'pos' bits into the right (least significant) side.
5384     * We return the result in tcg_right, and guarantee not to
5385     * trash tcg_left.
5386     */
5387    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5388    assert(pos > 0 && pos < 64);
5389
5390    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5391    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5392    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5393
5394    tcg_temp_free_i64(tcg_tmp);
5395}
5396
5397/* C3.6.1 EXT
5398 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5399 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5400 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5401 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5402 */
5403static void disas_simd_ext(DisasContext *s, uint32_t insn)
5404{
5405    int is_q = extract32(insn, 30, 1);
5406    int op2 = extract32(insn, 22, 2);
5407    int imm4 = extract32(insn, 11, 4);
5408    int rm = extract32(insn, 16, 5);
5409    int rn = extract32(insn, 5, 5);
5410    int rd = extract32(insn, 0, 5);
5411    int pos = imm4 << 3;
5412    TCGv_i64 tcg_resl, tcg_resh;
5413
5414    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5415        unallocated_encoding(s);
5416        return;
5417    }
5418
5419    if (!fp_access_check(s)) {
5420        return;
5421    }
5422
5423    tcg_resh = tcg_temp_new_i64();
5424    tcg_resl = tcg_temp_new_i64();
5425
5426    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5427     * either extracting 128 bits from a 128:128 concatenation, or
5428     * extracting 64 bits from a 64:64 concatenation.
5429     */
5430    if (!is_q) {
5431        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5432        if (pos != 0) {
5433            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5434            do_ext64(s, tcg_resh, tcg_resl, pos);
5435        }
5436        tcg_gen_movi_i64(tcg_resh, 0);
5437    } else {
5438        TCGv_i64 tcg_hh;
5439        typedef struct {
5440            int reg;
5441            int elt;
5442        } EltPosns;
5443        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5444        EltPosns *elt = eltposns;
5445
5446        if (pos >= 64) {
5447            elt++;
5448            pos -= 64;
5449        }
5450
5451        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5452        elt++;
5453        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5454        elt++;
5455        if (pos != 0) {
5456            do_ext64(s, tcg_resh, tcg_resl, pos);
5457            tcg_hh = tcg_temp_new_i64();
5458            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5459            do_ext64(s, tcg_hh, tcg_resh, pos);
5460            tcg_temp_free_i64(tcg_hh);
5461        }
5462    }
5463
5464    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5465    tcg_temp_free_i64(tcg_resl);
5466    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5467    tcg_temp_free_i64(tcg_resh);
5468}
5469
5470/* C3.6.2 TBL/TBX
5471 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5472 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5473 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5474 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5475 */
5476static void disas_simd_tb(DisasContext *s, uint32_t insn)
5477{
5478    int op2 = extract32(insn, 22, 2);
5479    int is_q = extract32(insn, 30, 1);
5480    int rm = extract32(insn, 16, 5);
5481    int rn = extract32(insn, 5, 5);
5482    int rd = extract32(insn, 0, 5);
5483    int is_tblx = extract32(insn, 12, 1);
5484    int len = extract32(insn, 13, 2);
5485    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5486    TCGv_i32 tcg_regno, tcg_numregs;
5487
5488    if (op2 != 0) {
5489        unallocated_encoding(s);
5490        return;
5491    }
5492
5493    if (!fp_access_check(s)) {
5494        return;
5495    }
5496
5497    /* This does a table lookup: for every byte element in the input
5498     * we index into a table formed from up to four vector registers,
5499     * and then the output is the result of the lookups. Our helper
5500     * function does the lookup operation for a single 64 bit part of
5501     * the input.
5502     */
5503    tcg_resl = tcg_temp_new_i64();
5504    tcg_resh = tcg_temp_new_i64();
5505
5506    if (is_tblx) {
5507        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5508    } else {
5509        tcg_gen_movi_i64(tcg_resl, 0);
5510    }
5511    if (is_tblx && is_q) {
5512        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5513    } else {
5514        tcg_gen_movi_i64(tcg_resh, 0);
5515    }
5516
5517    tcg_idx = tcg_temp_new_i64();
5518    tcg_regno = tcg_const_i32(rn);
5519    tcg_numregs = tcg_const_i32(len + 1);
5520    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5521    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5522                        tcg_regno, tcg_numregs);
5523    if (is_q) {
5524        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5525        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5526                            tcg_regno, tcg_numregs);
5527    }
5528    tcg_temp_free_i64(tcg_idx);
5529    tcg_temp_free_i32(tcg_regno);
5530    tcg_temp_free_i32(tcg_numregs);
5531
5532    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5533    tcg_temp_free_i64(tcg_resl);
5534    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5535    tcg_temp_free_i64(tcg_resh);
5536}
5537
5538/* C3.6.3 ZIP/UZP/TRN
5539 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5540 * +---+---+-------------+------+---+------+---+------------------+------+
5541 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5542 * +---+---+-------------+------+---+------+---+------------------+------+
5543 */
5544static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5545{
5546    int rd = extract32(insn, 0, 5);
5547    int rn = extract32(insn, 5, 5);
5548    int rm = extract32(insn, 16, 5);
5549    int size = extract32(insn, 22, 2);
5550    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5551     * bit 2 indicates 1 vs 2 variant of the insn.
5552     */
5553    int opcode = extract32(insn, 12, 2);
5554    bool part = extract32(insn, 14, 1);
5555    bool is_q = extract32(insn, 30, 1);
5556    int esize = 8 << size;
5557    int i, ofs;
5558    int datasize = is_q ? 128 : 64;
5559    int elements = datasize / esize;
5560    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5561
5562    if (opcode == 0 || (size == 3 && !is_q)) {
5563        unallocated_encoding(s);
5564        return;
5565    }
5566
5567    if (!fp_access_check(s)) {
5568        return;
5569    }
5570
5571    tcg_resl = tcg_const_i64(0);
5572    tcg_resh = tcg_const_i64(0);
5573    tcg_res = tcg_temp_new_i64();
5574
5575    for (i = 0; i < elements; i++) {
5576        switch (opcode) {
5577        case 1: /* UZP1/2 */
5578        {
5579            int midpoint = elements / 2;
5580            if (i < midpoint) {
5581                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5582            } else {
5583                read_vec_element(s, tcg_res, rm,
5584                                 2 * (i - midpoint) + part, size);
5585            }
5586            break;
5587        }
5588        case 2: /* TRN1/2 */
5589            if (i & 1) {
5590                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5591            } else {
5592                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5593            }
5594            break;
5595        case 3: /* ZIP1/2 */
5596        {
5597            int base = part * elements / 2;
5598            if (i & 1) {
5599                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5600            } else {
5601                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5602            }
5603            break;
5604        }
5605        default:
5606            g_assert_not_reached();
5607        }
5608
5609        ofs = i * esize;
5610        if (ofs < 64) {
5611            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5612            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5613        } else {
5614            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5615            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5616        }
5617    }
5618
5619    tcg_temp_free_i64(tcg_res);
5620
5621    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5622    tcg_temp_free_i64(tcg_resl);
5623    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5624    tcg_temp_free_i64(tcg_resh);
5625}
5626
5627static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5628                        int opc, bool is_min, TCGv_ptr fpst)
5629{
5630    /* Helper function for disas_simd_across_lanes: do a single precision
5631     * min/max operation on the specified two inputs,
5632     * and return the result in tcg_elt1.
5633     */
5634    if (opc == 0xc) {
5635        if (is_min) {
5636            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5637        } else {
5638            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5639        }
5640    } else {
5641        assert(opc == 0xf);
5642        if (is_min) {
5643            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5644        } else {
5645            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5646        }
5647    }
5648}
5649
5650/* C3.6.4 AdvSIMD across lanes
5651 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5652 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5653 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5654 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5655 */
5656static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5657{
5658    int rd = extract32(insn, 0, 5);
5659    int rn = extract32(insn, 5, 5);
5660    int size = extract32(insn, 22, 2);
5661    int opcode = extract32(insn, 12, 5);
5662    bool is_q = extract32(insn, 30, 1);
5663    bool is_u = extract32(insn, 29, 1);
5664    bool is_fp = false;
5665    bool is_min = false;
5666    int esize;
5667    int elements;
5668    int i;
5669    TCGv_i64 tcg_res, tcg_elt;
5670
5671    switch (opcode) {
5672    case 0x1b: /* ADDV */
5673        if (is_u) {
5674            unallocated_encoding(s);
5675            return;
5676        }
5677        /* fall through */
5678    case 0x3: /* SADDLV, UADDLV */
5679    case 0xa: /* SMAXV, UMAXV */
5680    case 0x1a: /* SMINV, UMINV */
5681        if (size == 3 || (size == 2 && !is_q)) {
5682            unallocated_encoding(s);
5683            return;
5684        }
5685        break;
5686    case 0xc: /* FMAXNMV, FMINNMV */
5687    case 0xf: /* FMAXV, FMINV */
5688        if (!is_u || !is_q || extract32(size, 0, 1)) {
5689            unallocated_encoding(s);
5690            return;
5691        }
5692        /* Bit 1 of size field encodes min vs max, and actual size is always
5693         * 32 bits: adjust the size variable so following code can rely on it
5694         */
5695        is_min = extract32(size, 1, 1);
5696        is_fp = true;
5697        size = 2;
5698        break;
5699    default:
5700        unallocated_encoding(s);
5701        return;
5702    }
5703
5704    if (!fp_access_check(s)) {
5705        return;
5706    }
5707
5708    esize = 8 << size;
5709    elements = (is_q ? 128 : 64) / esize;
5710
5711    tcg_res = tcg_temp_new_i64();
5712    tcg_elt = tcg_temp_new_i64();
5713
5714    /* These instructions operate across all lanes of a vector
5715     * to produce a single result. We can guarantee that a 64
5716     * bit intermediate is sufficient:
5717     *  + for [US]ADDLV the maximum element size is 32 bits, and
5718     *    the result type is 64 bits
5719     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5720     *    same as the element size, which is 32 bits at most
5721     * For the integer operations we can choose to work at 64
5722     * or 32 bits and truncate at the end; for simplicity
5723     * we use 64 bits always. The floating point
5724     * ops do require 32 bit intermediates, though.
5725     */
5726    if (!is_fp) {
5727        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5728
5729        for (i = 1; i < elements; i++) {
5730            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5731
5732            switch (opcode) {
5733            case 0x03: /* SADDLV / UADDLV */
5734            case 0x1b: /* ADDV */
5735                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5736                break;
5737            case 0x0a: /* SMAXV / UMAXV */
5738                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5739                                    tcg_res,
5740                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5741                break;
5742            case 0x1a: /* SMINV / UMINV */
5743                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5744                                    tcg_res,
5745                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5746                break;
5747                break;
5748            default:
5749                g_assert_not_reached();
5750            }
5751
5752        }
5753    } else {
5754        /* Floating point ops which work on 32 bit (single) intermediates.
5755         * Note that correct NaN propagation requires that we do these
5756         * operations in exactly the order specified by the pseudocode.
5757         */
5758        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5759        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5760        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5761        TCGv_ptr fpst = get_fpstatus_ptr();
5762
5763        assert(esize == 32);
5764        assert(elements == 4);
5765
5766        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5767        tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5768        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5769        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5770
5771        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5772
5773        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5774        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5775        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5776        tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5777
5778        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5779
5780        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5781
5782        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5783        tcg_temp_free_i32(tcg_elt1);
5784        tcg_temp_free_i32(tcg_elt2);
5785        tcg_temp_free_i32(tcg_elt3);
5786        tcg_temp_free_ptr(fpst);
5787    }
5788
5789    tcg_temp_free_i64(tcg_elt);
5790
5791    /* Now truncate the result to the width required for the final output */
5792    if (opcode == 0x03) {
5793        /* SADDLV, UADDLV: result is 2*esize */
5794        size++;
5795    }
5796
5797    switch (size) {
5798    case 0:
5799        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5800        break;
5801    case 1:
5802        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5803        break;
5804    case 2:
5805        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5806        break;
5807    case 3:
5808        break;
5809    default:
5810        g_assert_not_reached();
5811    }
5812
5813    write_fp_dreg(s, rd, tcg_res);
5814    tcg_temp_free_i64(tcg_res);
5815}
5816
5817/* C6.3.31 DUP (Element, Vector)
5818 *
5819 *  31  30   29              21 20    16 15        10  9    5 4    0
5820 * +---+---+-------------------+--------+-------------+------+------+
5821 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5822 * +---+---+-------------------+--------+-------------+------+------+
5823 *
5824 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5825 */
5826static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5827                             int imm5)
5828{
5829    int size = ctz32(imm5);
5830    int esize = 8 << size;
5831    int elements = (is_q ? 128 : 64) / esize;
5832    int index, i;
5833    TCGv_i64 tmp;
5834
5835    if (size > 3 || (size == 3 && !is_q)) {
5836        unallocated_encoding(s);
5837        return;
5838    }
5839
5840    if (!fp_access_check(s)) {
5841        return;
5842    }
5843
5844    index = imm5 >> (size + 1);
5845
5846    tmp = tcg_temp_new_i64();
5847    read_vec_element(s, tmp, rn, index, size);
5848
5849    for (i = 0; i < elements; i++) {
5850        write_vec_element(s, tmp, rd, i, size);
5851    }
5852
5853    if (!is_q) {
5854        clear_vec_high(s, rd);
5855    }
5856
5857    tcg_temp_free_i64(tmp);
5858}
5859
5860/* C6.3.31 DUP (element, scalar)
5861 *  31                   21 20    16 15        10  9    5 4    0
5862 * +-----------------------+--------+-------------+------+------+
5863 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5864 * +-----------------------+--------+-------------+------+------+
5865 */
5866static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5867                              int imm5)
5868{
5869    int size = ctz32(imm5);
5870    int index;
5871    TCGv_i64 tmp;
5872
5873    if (size > 3) {
5874        unallocated_encoding(s);
5875        return;
5876    }
5877
5878    if (!fp_access_check(s)) {
5879        return;
5880    }
5881
5882    index = imm5 >> (size + 1);
5883
5884    /* This instruction just extracts the specified element and
5885     * zero-extends it into the bottom of the destination register.
5886     */
5887    tmp = tcg_temp_new_i64();
5888    read_vec_element(s, tmp, rn, index, size);
5889    write_fp_dreg(s, rd, tmp);
5890    tcg_temp_free_i64(tmp);
5891}
5892
5893/* C6.3.32 DUP (General)
5894 *
5895 *  31  30   29              21 20    16 15        10  9    5 4    0
5896 * +---+---+-------------------+--------+-------------+------+------+
5897 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5898 * +---+---+-------------------+--------+-------------+------+------+
5899 *
5900 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5901 */
5902static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5903                             int imm5)
5904{
5905    int size = ctz32(imm5);
5906    int esize = 8 << size;
5907    int elements = (is_q ? 128 : 64)/esize;
5908    int i = 0;
5909
5910    if (size > 3 || ((size == 3) && !is_q)) {
5911        unallocated_encoding(s);
5912        return;
5913    }
5914
5915    if (!fp_access_check(s)) {
5916        return;
5917    }
5918
5919    for (i = 0; i < elements; i++) {
5920        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5921    }
5922    if (!is_q) {
5923        clear_vec_high(s, rd);
5924    }
5925}
5926
5927/* C6.3.150 INS (Element)
5928 *
5929 *  31                   21 20    16 15  14    11  10 9    5 4    0
5930 * +-----------------------+--------+------------+---+------+------+
5931 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5932 * +-----------------------+--------+------------+---+------+------+
5933 *
5934 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5935 * index: encoded in imm5<4:size+1>
5936 */
5937static void handle_simd_inse(DisasContext *s, int rd, int rn,
5938                             int imm4, int imm5)
5939{
5940    int size = ctz32(imm5);
5941    int src_index, dst_index;
5942    TCGv_i64 tmp;
5943
5944    if (size > 3) {
5945        unallocated_encoding(s);
5946        return;
5947    }
5948
5949    if (!fp_access_check(s)) {
5950        return;
5951    }
5952
5953    dst_index = extract32(imm5, 1+size, 5);
5954    src_index = extract32(imm4, size, 4);
5955
5956    tmp = tcg_temp_new_i64();
5957
5958    read_vec_element(s, tmp, rn, src_index, size);
5959    write_vec_element(s, tmp, rd, dst_index, size);
5960
5961    tcg_temp_free_i64(tmp);
5962}
5963
5964
5965/* C6.3.151 INS (General)
5966 *
5967 *  31                   21 20    16 15        10  9    5 4    0
5968 * +-----------------------+--------+-------------+------+------+
5969 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5970 * +-----------------------+--------+-------------+------+------+
5971 *
5972 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5973 * index: encoded in imm5<4:size+1>
5974 */
5975static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5976{
5977    int size = ctz32(imm5);
5978    int idx;
5979
5980    if (size > 3) {
5981        unallocated_encoding(s);
5982        return;
5983    }
5984
5985    if (!fp_access_check(s)) {
5986        return;
5987    }
5988
5989    idx = extract32(imm5, 1 + size, 4 - size);
5990    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5991}
5992
5993/*
5994 * C6.3.321 UMOV (General)
5995 * C6.3.237 SMOV (General)
5996 *
5997 *  31  30   29              21 20    16 15    12   10 9    5 4    0
5998 * +---+---+-------------------+--------+-------------+------+------+
5999 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6000 * +---+---+-------------------+--------+-------------+------+------+

6001 *
6002 * U: unsigned when set
6003 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6004 */
6005static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6006                                  int rn, int rd, int imm5)
6007{
6008    int size = ctz32(imm5);
6009    int element;
6010    TCGv_i64 tcg_rd;
6011
6012    /* Check for UnallocatedEncodings */
6013    if (is_signed) {
6014        if (size > 2 || (size == 2 && !is_q)) {
6015            unallocated_encoding(s);
6016            return;
6017        }
6018    } else {
6019        if (size > 3
6020            || (size < 3 && is_q)
6021            || (size == 3 && !is_q)) {
6022            unallocated_encoding(s);
6023            return;
6024        }
6025    }
6026
6027    if (!fp_access_check(s)) {
6028        return;
6029    }
6030
6031    element = extract32(imm5, 1+size, 4);
6032
6033    tcg_rd = cpu_reg(s, rd);
6034    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6035    if (is_signed && !is_q) {
6036        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6037    }
6038}
6039
6040/* C3.6.5 AdvSIMD copy
6041 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6042 * +---+---+----+-----------------+------+---+------+---+------+------+
6043 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6044 * +---+---+----+-----------------+------+---+------+---+------+------+
6045 */
6046static void disas_simd_copy(DisasContext *s, uint32_t insn)
6047{
6048    int rd = extract32(insn, 0, 5);
6049    int rn = extract32(insn, 5, 5);
6050    int imm4 = extract32(insn, 11, 4);
6051    int op = extract32(insn, 29, 1);
6052    int is_q = extract32(insn, 30, 1);
6053    int imm5 = extract32(insn, 16, 5);
6054
6055    if (op) {
6056        if (is_q) {
6057            /* INS (element) */
6058            handle_simd_inse(s, rd, rn, imm4, imm5);
6059        } else {
6060            unallocated_encoding(s);
6061        }
6062    } else {
6063        switch (imm4) {
6064        case 0:
6065            /* DUP (element - vector) */
6066            handle_simd_dupe(s, is_q, rd, rn, imm5);
6067            break;
6068        case 1:
6069            /* DUP (general) */
6070            handle_simd_dupg(s, is_q, rd, rn, imm5);
6071            break;
6072        case 3:
6073            if (is_q) {
6074                /* INS (general) */
6075                handle_simd_insg(s, rd, rn, imm5);
6076            } else {
6077                unallocated_encoding(s);
6078            }
6079            break;
6080        case 5:
6081        case 7:
6082            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6083            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6084            break;
6085        default:
6086            unallocated_encoding(s);
6087            break;
6088        }
6089    }
6090}
6091
6092/* C3.6.6 AdvSIMD modified immediate
6093 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6094 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6095 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6096 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6097 *
6098 * There are a number of operations that can be carried out here:
6099 *   MOVI - move (shifted) imm into register
6100 *   MVNI - move inverted (shifted) imm into register
6101 *   ORR  - bitwise OR of (shifted) imm with register
6102 *   BIC  - bitwise clear of (shifted) imm with register
6103 */
6104static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6105{
6106    int rd = extract32(insn, 0, 5);
6107    int cmode = extract32(insn, 12, 4);
6108    int cmode_3_1 = extract32(cmode, 1, 3);
6109    int cmode_0 = extract32(cmode, 0, 1);
6110    int o2 = extract32(insn, 11, 1);
6111    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6112    bool is_neg = extract32(insn, 29, 1);
6113    bool is_q = extract32(insn, 30, 1);
6114    uint64_t imm = 0;
6115    TCGv_i64 tcg_rd, tcg_imm;
6116    int i;
6117
6118    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6119        unallocated_encoding(s);
6120        return;
6121    }
6122
6123    if (!fp_access_check(s)) {
6124        return;
6125    }
6126
6127    /* See AdvSIMDExpandImm() in ARM ARM */
6128    switch (cmode_3_1) {
6129    case 0: /* Replicate(Zeros(24):imm8, 2) */
6130    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6131    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6132    case 3: /* Replicate(imm8:Zeros(24), 2) */
6133    {
6134        int shift = cmode_3_1 * 8;
6135        imm = bitfield_replicate(abcdefgh << shift, 32);
6136        break;
6137    }
6138    case 4: /* Replicate(Zeros(8):imm8, 4) */
6139    case 5: /* Replicate(imm8:Zeros(8), 4) */
6140    {
6141        int shift = (cmode_3_1 & 0x1) * 8;
6142        imm = bitfield_replicate(abcdefgh << shift, 16);
6143        break;
6144    }
6145    case 6:
6146        if (cmode_0) {
6147            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6148            imm = (abcdefgh << 16) | 0xffff;
6149        } else {
6150            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6151            imm = (abcdefgh << 8) | 0xff;
6152        }
6153        imm = bitfield_replicate(imm, 32);
6154        break;
6155    case 7:
6156        if (!cmode_0 && !is_neg) {
6157            imm = bitfield_replicate(abcdefgh, 8);
6158        } else if (!cmode_0 && is_neg) {
6159            int i;
6160            imm = 0;
6161            for (i = 0; i < 8; i++) {
6162                if ((abcdefgh) & (1 << i)) {
6163                    imm |= 0xffULL << (i * 8);
6164                }
6165            }
6166        } else if (cmode_0) {
6167            if (is_neg) {
6168                imm = (abcdefgh & 0x3f) << 48;
6169                if (abcdefgh & 0x80) {
6170                    imm |= 0x8000000000000000ULL;
6171                }
6172                if (abcdefgh & 0x40) {
6173                    imm |= 0x3fc0000000000000ULL;
6174                } else {
6175                    imm |= 0x4000000000000000ULL;
6176                }
6177            } else {
6178                imm = (abcdefgh & 0x3f) << 19;
6179                if (abcdefgh & 0x80) {
6180                    imm |= 0x80000000;
6181                }
6182                if (abcdefgh & 0x40) {
6183                    imm |= 0x3e000000;
6184                } else {
6185                    imm |= 0x40000000;
6186                }
6187                imm |= (imm << 32);
6188            }
6189        }
6190        break;
6191    }
6192
6193    if (cmode_3_1 != 7 && is_neg) {
6194        imm = ~imm;
6195    }
6196
6197    tcg_imm = tcg_const_i64(imm);
6198    tcg_rd = new_tmp_a64(s);
6199
6200    for (i = 0; i < 2; i++) {
6201        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6202
6203        if (i == 1 && !is_q) {
6204            /* non-quad ops clear high half of vector */
6205            tcg_gen_movi_i64(tcg_rd, 0);
6206        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6207            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6208            if (is_neg) {
6209                /* AND (BIC) */
6210                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6211            } else {
6212                /* ORR */
6213                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6214            }
6215        } else {
6216            /* MOVI */
6217            tcg_gen_mov_i64(tcg_rd, tcg_imm);
6218        }
6219        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6220    }
6221
6222    tcg_temp_free_i64(tcg_imm);
6223}
6224
6225/* C3.6.7 AdvSIMD scalar copy
6226 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6227 * +-----+----+-----------------+------+---+------+---+------+------+
6228 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6229 * +-----+----+-----------------+------+---+------+---+------+------+
6230 */
6231static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6232{
6233    int rd = extract32(insn, 0, 5);
6234    int rn = extract32(insn, 5, 5);
6235    int imm4 = extract32(insn, 11, 4);
6236    int imm5 = extract32(insn, 16, 5);
6237    int op = extract32(insn, 29, 1);
6238
6239    if (op != 0 || imm4 != 0) {
6240        unallocated_encoding(s);
6241        return;
6242    }
6243
6244    /* DUP (element, scalar) */
6245    handle_simd_dupes(s, rd, rn, imm5);
6246}
6247
6248/* C3.6.8 AdvSIMD scalar pairwise
6249 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6250 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6251 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6252 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6253 */
6254static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6255{
6256    int u = extract32(insn, 29, 1);
6257    int size = extract32(insn, 22, 2);
6258    int opcode = extract32(insn, 12, 5);
6259    int rn = extract32(insn, 5, 5);
6260    int rd = extract32(insn, 0, 5);
6261    TCGv_ptr fpst;
6262
6263    /* For some ops (the FP ones), size[1] is part of the encoding.
6264     * For ADDP strictly it is not but size[1] is always 1 for valid
6265     * encodings.
6266     */
6267    opcode |= (extract32(size, 1, 1) << 5);
6268
6269    switch (opcode) {
6270    case 0x3b: /* ADDP */
6271        if (u || size != 3) {
6272            unallocated_encoding(s);
6273            return;
6274        }
6275        if (!fp_access_check(s)) {
6276            return;
6277        }
6278
6279        TCGV_UNUSED_PTR(fpst);
6280        break;
6281    case 0xc: /* FMAXNMP */
6282    case 0xd: /* FADDP */
6283    case 0xf: /* FMAXP */
6284    case 0x2c: /* FMINNMP */
6285    case 0x2f: /* FMINP */
6286        /* FP op, size[0] is 32 or 64 bit */
6287        if (!u) {
6288            unallocated_encoding(s);
6289            return;
6290        }
6291        if (!fp_access_check(s)) {
6292            return;
6293        }
6294
6295        size = extract32(size, 0, 1) ? 3 : 2;
6296        fpst = get_fpstatus_ptr();
6297        break;
6298    default:
6299        unallocated_encoding(s);
6300        return;
6301    }
6302
6303    if (size == 3) {
6304        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6305        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6306        TCGv_i64 tcg_res = tcg_temp_new_i64();
6307
6308        read_vec_element(s, tcg_op1, rn, 0, MO_64);
6309        read_vec_element(s, tcg_op2, rn, 1, MO_64);
6310
6311        switch (opcode) {
6312        case 0x3b: /* ADDP */
6313            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6314            break;
6315        case 0xc: /* FMAXNMP */
6316            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6317            break;
6318        case 0xd: /* FADDP */
6319            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6320            break;
6321        case 0xf: /* FMAXP */
6322            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6323            break;
6324        case 0x2c: /* FMINNMP */
6325            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6326            break;
6327        case 0x2f: /* FMINP */
6328            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6329            break;
6330        default:
6331            g_assert_not_reached();
6332        }
6333
6334        write_fp_dreg(s, rd, tcg_res);
6335
6336        tcg_temp_free_i64(tcg_op1);
6337        tcg_temp_free_i64(tcg_op2);
6338        tcg_temp_free_i64(tcg_res);
6339    } else {
6340        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6341        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6342        TCGv_i32 tcg_res = tcg_temp_new_i32();
6343
6344        read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6345        read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6346
6347        switch (opcode) {
6348        case 0xc: /* FMAXNMP */
6349            gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6350            break;
6351        case 0xd: /* FADDP */
6352            gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6353            break;
6354        case 0xf: /* FMAXP */
6355            gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6356            break;
6357        case 0x2c: /* FMINNMP */
6358            gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6359            break;
6360        case 0x2f: /* FMINP */
6361            gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6362            break;
6363        default:
6364            g_assert_not_reached();
6365        }
6366
6367        write_fp_sreg(s, rd, tcg_res);
6368
6369        tcg_temp_free_i32(tcg_op1);
6370        tcg_temp_free_i32(tcg_op2);
6371        tcg_temp_free_i32(tcg_res);
6372    }
6373
6374    if (!TCGV_IS_UNUSED_PTR(fpst)) {
6375        tcg_temp_free_ptr(fpst);
6376    }
6377}
6378
6379/*
6380 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6381 *
6382 * This code is handles the common shifting code and is used by both
6383 * the vector and scalar code.
6384 */
6385static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6386                                    TCGv_i64 tcg_rnd, bool accumulate,
6387                                    bool is_u, int size, int shift)
6388{
6389    bool extended_result = false;
6390    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6391    int ext_lshift = 0;
6392    TCGv_i64 tcg_src_hi;
6393
6394    if (round && size == 3) {
6395        extended_result = true;
6396        ext_lshift = 64 - shift;
6397        tcg_src_hi = tcg_temp_new_i64();
6398    } else if (shift == 64) {
6399        if (!accumulate && is_u) {
6400            /* result is zero */
6401            tcg_gen_movi_i64(tcg_res, 0);
6402            return;
6403        }
6404    }
6405
6406    /* Deal with the rounding step */
6407    if (round) {
6408        if (extended_result) {
6409            TCGv_i64 tcg_zero = tcg_const_i64(0);
6410            if (!is_u) {
6411                /* take care of sign extending tcg_res */
6412                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6413                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6414                                 tcg_src, tcg_src_hi,
6415                                 tcg_rnd, tcg_zero);
6416            } else {
6417                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6418                                 tcg_src, tcg_zero,
6419                                 tcg_rnd, tcg_zero);
6420            }
6421            tcg_temp_free_i64(tcg_zero);
6422        } else {
6423            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6424        }
6425    }
6426
6427    /* Now do the shift right */
6428    if (round && extended_result) {
6429        /* extended case, >64 bit precision required */
6430        if (ext_lshift == 0) {
6431            /* special case, only high bits matter */
6432            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6433        } else {
6434            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6435            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6436            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6437        }
6438    } else {
6439        if (is_u) {
6440            if (shift == 64) {
6441                /* essentially shifting in 64 zeros */
6442                tcg_gen_movi_i64(tcg_src, 0);
6443            } else {
6444                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6445            }
6446        } else {
6447            if (shift == 64) {
6448                /* effectively extending the sign-bit */
6449                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6450            } else {
6451                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6452            }
6453        }
6454    }
6455
6456    if (accumulate) {
6457        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6458    } else {
6459        tcg_gen_mov_i64(tcg_res, tcg_src);
6460    }
6461
6462    if (extended_result) {
6463        tcg_temp_free_i64(tcg_src_hi);
6464    }
6465}
6466
6467/* Common SHL/SLI - Shift left with an optional insert */
6468static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6469                                 bool insert, int shift)
6470{
6471    if (insert) { /* SLI */
6472        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6473    } else { /* SHL */
6474        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6475    }
6476}
6477
6478/* SRI: shift right with insert */
6479static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6480                                 int size, int shift)
6481{
6482    int esize = 8 << size;
6483
6484    /* shift count same as element size is valid but does nothing;
6485     * special case to avoid potential shift by 64.
6486     */
6487    if (shift != esize) {
6488        tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6489        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6490    }
6491}
6492
6493/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6494static void handle_scalar_simd_shri(DisasContext *s,
6495                                    bool is_u, int immh, int immb,
6496                                    int opcode, int rn, int rd)
6497{
6498    const int size = 3;
6499    int immhb = immh << 3 | immb;
6500    int shift = 2 * (8 << size) - immhb;
6501    bool accumulate = false;
6502    bool round = false;
6503    bool insert = false;
6504    TCGv_i64 tcg_rn;
6505    TCGv_i64 tcg_rd;
6506    TCGv_i64 tcg_round;
6507
6508    if (!extract32(immh, 3, 1)) {
6509        unallocated_encoding(s);
6510        return;
6511    }
6512
6513    if (!fp_access_check(s)) {
6514        return;
6515    }
6516
6517    switch (opcode) {
6518    case 0x02: /* SSRA / USRA (accumulate) */
6519        accumulate = true;
6520        break;
6521    case 0x04: /* SRSHR / URSHR (rounding) */
6522        round = true;
6523        break;
6524    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6525        accumulate = round = true;
6526        break;
6527    case 0x08: /* SRI */
6528        insert = true;
6529        break;
6530    }
6531
6532    if (round) {
6533        uint64_t round_const = 1ULL << (shift - 1);
6534        tcg_round = tcg_const_i64(round_const);
6535    } else {
6536        TCGV_UNUSED_I64(tcg_round);
6537    }
6538
6539    tcg_rn = read_fp_dreg(s, rn);
6540    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6541
6542    if (insert) {
6543        handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6544    } else {
6545        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6546                                accumulate, is_u, size, shift);
6547    }
6548
6549    write_fp_dreg(s, rd, tcg_rd);
6550
6551    tcg_temp_free_i64(tcg_rn);
6552    tcg_temp_free_i64(tcg_rd);
6553    if (round) {
6554        tcg_temp_free_i64(tcg_round);
6555    }
6556}
6557
6558/* SHL/SLI - Scalar shift left */
6559static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6560                                    int immh, int immb, int opcode,
6561                                    int rn, int rd)
6562{
6563    int size = 32 - clz32(immh) - 1;
6564    int immhb = immh << 3 | immb;
6565    int shift = immhb - (8 << size);
6566    TCGv_i64 tcg_rn = new_tmp_a64(s);
6567    TCGv_i64 tcg_rd = new_tmp_a64(s);
6568
6569    if (!extract32(immh, 3, 1)) {
6570        unallocated_encoding(s);
6571        return;
6572    }
6573
6574    if (!fp_access_check(s)) {
6575        return;
6576    }
6577
6578    tcg_rn = read_fp_dreg(s, rn);
6579    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6580
6581    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6582
6583    write_fp_dreg(s, rd, tcg_rd);
6584
6585    tcg_temp_free_i64(tcg_rn);
6586    tcg_temp_free_i64(tcg_rd);
6587}
6588
6589/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6590 * (signed/unsigned) narrowing */
6591static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6592                                   bool is_u_shift, bool is_u_narrow,
6593                                   int immh, int immb, int opcode,
6594                                   int rn, int rd)
6595{
6596    int immhb = immh << 3 | immb;
6597    int size = 32 - clz32(immh) - 1;
6598    int esize = 8 << size;
6599    int shift = (2 * esize) - immhb;
6600    int elements = is_scalar ? 1 : (64 / esize);
6601    bool round = extract32(opcode, 0, 1);
6602    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6603    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6604    TCGv_i32 tcg_rd_narrowed;
6605    TCGv_i64 tcg_final;
6606
6607    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6608        { gen_helper_neon_narrow_sat_s8,
6609          gen_helper_neon_unarrow_sat8 },
6610        { gen_helper_neon_narrow_sat_s16,
6611          gen_helper_neon_unarrow_sat16 },
6612        { gen_helper_neon_narrow_sat_s32,
6613          gen_helper_neon_unarrow_sat32 },
6614        { NULL, NULL },
6615    };
6616    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6617        gen_helper_neon_narrow_sat_u8,
6618        gen_helper_neon_narrow_sat_u16,
6619        gen_helper_neon_narrow_sat_u32,
6620        NULL
6621    };
6622    NeonGenNarrowEnvFn *narrowfn;
6623
6624    int i;
6625
6626    assert(size < 4);
6627
6628    if (extract32(immh, 3, 1)) {
6629        unallocated_encoding(s);
6630        return;
6631    }
6632
6633    if (!fp_access_check(s)) {
6634        return;
6635    }
6636
6637    if (is_u_shift) {
6638        narrowfn = unsigned_narrow_fns[size];
6639    } else {
6640        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6641    }
6642
6643    tcg_rn = tcg_temp_new_i64();
6644    tcg_rd = tcg_temp_new_i64();
6645    tcg_rd_narrowed = tcg_temp_new_i32();
6646    tcg_final = tcg_const_i64(0);
6647
6648    if (round) {
6649        uint64_t round_const = 1ULL << (shift - 1);
6650        tcg_round = tcg_const_i64(round_const);
6651    } else {
6652        TCGV_UNUSED_I64(tcg_round);
6653    }
6654
6655    for (i = 0; i < elements; i++) {
6656        read_vec_element(s, tcg_rn, rn, i, ldop);
6657        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6658                                false, is_u_shift, size+1, shift);
6659        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6660        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6661        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6662    }
6663
6664    if (!is_q) {
6665        clear_vec_high(s, rd);
6666        write_vec_element(s, tcg_final, rd, 0, MO_64);
6667    } else {
6668        write_vec_element(s, tcg_final, rd, 1, MO_64);
6669    }
6670
6671    if (round) {
6672        tcg_temp_free_i64(tcg_round);
6673    }
6674    tcg_temp_free_i64(tcg_rn);
6675    tcg_temp_free_i64(tcg_rd);
6676    tcg_temp_free_i32(tcg_rd_narrowed);
6677    tcg_temp_free_i64(tcg_final);
6678    return;
6679}
6680
6681/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6682static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6683                             bool src_unsigned, bool dst_unsigned,
6684                             int immh, int immb, int rn, int rd)
6685{
6686    int immhb = immh << 3 | immb;
6687    int size = 32 - clz32(immh) - 1;
6688    int shift = immhb - (8 << size);
6689    int pass;
6690
6691    assert(immh != 0);
6692    assert(!(scalar && is_q));
6693
6694    if (!scalar) {
6695        if (!is_q && extract32(immh, 3, 1)) {
6696            unallocated_encoding(s);
6697            return;
6698        }
6699
6700        /* Since we use the variable-shift helpers we must
6701         * replicate the shift count into each element of
6702         * the tcg_shift value.
6703         */
6704        switch (size) {
6705        case 0:
6706            shift |= shift << 8;
6707            /* fall through */
6708        case 1:
6709            shift |= shift << 16;
6710            break;
6711        case 2:
6712        case 3:
6713            break;
6714        default:
6715            g_assert_not_reached();
6716        }
6717    }
6718
6719    if (!fp_access_check(s)) {
6720        return;
6721    }
6722
6723    if (size == 3) {
6724        TCGv_i64 tcg_shift = tcg_const_i64(shift);
6725        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6726            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6727            { NULL, gen_helper_neon_qshl_u64 },
6728        };
6729        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6730        int maxpass = is_q ? 2 : 1;
6731
6732        for (pass = 0; pass < maxpass; pass++) {
6733            TCGv_i64 tcg_op = tcg_temp_new_i64();
6734
6735            read_vec_element(s, tcg_op, rn, pass, MO_64);
6736            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6737            write_vec_element(s, tcg_op, rd, pass, MO_64);
6738
6739            tcg_temp_free_i64(tcg_op);
6740        }
6741        tcg_temp_free_i64(tcg_shift);
6742
6743        if (!is_q) {
6744            clear_vec_high(s, rd);
6745        }
6746    } else {
6747        TCGv_i32 tcg_shift = tcg_const_i32(shift);
6748        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6749            {
6750                { gen_helper_neon_qshl_s8,
6751                  gen_helper_neon_qshl_s16,
6752                  gen_helper_neon_qshl_s32 },
6753                { gen_helper_neon_qshlu_s8,
6754                  gen_helper_neon_qshlu_s16,
6755                  gen_helper_neon_qshlu_s32 }
6756            }, {
6757                { NULL, NULL, NULL },
6758                { gen_helper_neon_qshl_u8,
6759                  gen_helper_neon_qshl_u16,
6760                  gen_helper_neon_qshl_u32 }
6761            }
6762        };
6763        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6764        TCGMemOp memop = scalar ? size : MO_32;
6765        int maxpass = scalar ? 1 : is_q ? 4 : 2;
6766
6767        for (pass = 0; pass < maxpass; pass++) {
6768            TCGv_i32 tcg_op = tcg_temp_new_i32();
6769
6770            read_vec_element_i32(s, tcg_op, rn, pass, memop);
6771            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6772            if (scalar) {
6773                switch (size) {
6774                case 0:
6775                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
6776                    break;
6777                case 1:
6778                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
6779                    break;
6780                case 2:
6781                    break;
6782                default:
6783                    g_assert_not_reached();
6784                }
6785                write_fp_sreg(s, rd, tcg_op);
6786            } else {
6787                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6788            }
6789
6790            tcg_temp_free_i32(tcg_op);
6791        }
6792        tcg_temp_free_i32(tcg_shift);
6793
6794        if (!is_q && !scalar) {
6795            clear_vec_high(s, rd);
6796        }
6797    }
6798}
6799
6800/* Common vector code for handling integer to FP conversion */
6801static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6802                                   int elements, int is_signed,
6803                                   int fracbits, int size)
6804{
6805    bool is_double = size == 3 ? true : false;
6806    TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6807    TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6808    TCGv_i64 tcg_int = tcg_temp_new_i64();
6809    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6810    int pass;
6811
6812    for (pass = 0; pass < elements; pass++) {
6813        read_vec_element(s, tcg_int, rn, pass, mop);
6814
6815        if (is_double) {
6816            TCGv_i64 tcg_double = tcg_temp_new_i64();
6817            if (is_signed) {
6818                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6819                                     tcg_shift, tcg_fpst);
6820            } else {
6821                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6822                                     tcg_shift, tcg_fpst);
6823            }
6824            if (elements == 1) {
6825                write_fp_dreg(s, rd, tcg_double);
6826            } else {
6827                write_vec_element(s, tcg_double, rd, pass, MO_64);
6828            }
6829            tcg_temp_free_i64(tcg_double);
6830        } else {
6831            TCGv_i32 tcg_single = tcg_temp_new_i32();
6832            if (is_signed) {
6833                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6834                                     tcg_shift, tcg_fpst);
6835            } else {
6836                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6837                                     tcg_shift, tcg_fpst);
6838            }
6839            if (elements == 1) {
6840                write_fp_sreg(s, rd, tcg_single);
6841            } else {
6842                write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6843            }
6844            tcg_temp_free_i32(tcg_single);
6845        }
6846    }
6847
6848    if (!is_double && elements == 2) {
6849        clear_vec_high(s, rd);
6850    }
6851
6852    tcg_temp_free_i64(tcg_int);
6853    tcg_temp_free_ptr(tcg_fpst);
6854    tcg_temp_free_i32(tcg_shift);
6855}
6856
6857/* UCVTF/SCVTF - Integer to FP conversion */
6858static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6859                                         bool is_q, bool is_u,
6860                                         int immh, int immb, int opcode,
6861                                         int rn, int rd)
6862{
6863    bool is_double = extract32(immh, 3, 1);
6864    int size = is_double ? MO_64 : MO_32;
6865    int elements;
6866    int immhb = immh << 3 | immb;
6867    int fracbits = (is_double ? 128 : 64) - immhb;
6868
6869    if (!extract32(immh, 2, 2)) {
6870        unallocated_encoding(s);
6871        return;
6872    }
6873
6874    if (is_scalar) {
6875        elements = 1;
6876    } else {
6877        elements = is_double ? 2 : is_q ? 4 : 2;
6878        if (is_double && !is_q) {
6879            unallocated_encoding(s);
6880            return;
6881        }
6882    }
6883
6884    if (!fp_access_check(s)) {
6885        return;
6886    }
6887
6888    /* immh == 0 would be a failure of the decode logic */
6889    g_assert(immh);
6890
6891    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6892}
6893
6894/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6895static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6896                                         bool is_q, bool is_u,
6897                                         int immh, int immb, int rn, int rd)
6898{
6899    bool is_double = extract32(immh, 3, 1);
6900    int immhb = immh << 3 | immb;
6901    int fracbits = (is_double ? 128 : 64) - immhb;
6902    int pass;
6903    TCGv_ptr tcg_fpstatus;
6904    TCGv_i32 tcg_rmode, tcg_shift;
6905
6906    if (!extract32(immh, 2, 2)) {
6907        unallocated_encoding(s);
6908        return;
6909    }
6910
6911    if (!is_scalar && !is_q && is_double) {
6912        unallocated_encoding(s);
6913        return;
6914    }
6915
6916    if (!fp_access_check(s)) {
6917        return;
6918    }
6919
6920    assert(!(is_scalar && is_q));
6921
6922    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6923    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6924    tcg_fpstatus = get_fpstatus_ptr();
6925    tcg_shift = tcg_const_i32(fracbits);
6926
6927    if (is_double) {
6928        int maxpass = is_scalar ? 1 : 2;
6929
6930        for (pass = 0; pass < maxpass; pass++) {
6931            TCGv_i64 tcg_op = tcg_temp_new_i64();
6932
6933            read_vec_element(s, tcg_op, rn, pass, MO_64);
6934            if (is_u) {
6935                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6936            } else {
6937                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6938            }
6939            write_vec_element(s, tcg_op, rd, pass, MO_64);
6940            tcg_temp_free_i64(tcg_op);
6941        }
6942        if (!is_q) {
6943            clear_vec_high(s, rd);
6944        }
6945    } else {
6946        int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6947        for (pass = 0; pass < maxpass; pass++) {
6948            TCGv_i32 tcg_op = tcg_temp_new_i32();
6949
6950            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6951            if (is_u) {
6952                gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6953            } else {
6954                gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6955            }
6956            if (is_scalar) {
6957                write_fp_sreg(s, rd, tcg_op);
6958            } else {
6959                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6960            }
6961            tcg_temp_free_i32(tcg_op);
6962        }
6963        if (!is_q && !is_scalar) {
6964            clear_vec_high(s, rd);
6965        }
6966    }
6967
6968    tcg_temp_free_ptr(tcg_fpstatus);
6969    tcg_temp_free_i32(tcg_shift);
6970    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6971    tcg_temp_free_i32(tcg_rmode);
6972}
6973
6974/* C3.6.9 AdvSIMD scalar shift by immediate
6975 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6976 * +-----+---+-------------+------+------+--------+---+------+------+
6977 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6978 * +-----+---+-------------+------+------+--------+---+------+------+
6979 *
6980 * This is the scalar version so it works on a fixed sized registers
6981 */
6982static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6983{
6984    int rd = extract32(insn, 0, 5);
6985    int rn = extract32(insn, 5, 5);
6986    int opcode = extract32(insn, 11, 5);
6987    int immb = extract32(insn, 16, 3);
6988    int immh = extract32(insn, 19, 4);
6989    bool is_u = extract32(insn, 29, 1);
6990
6991    if (immh == 0) {
6992        unallocated_encoding(s);
6993        return;
6994    }
6995
6996    switch (opcode) {
6997    case 0x08: /* SRI */
6998        if (!is_u) {
6999            unallocated_encoding(s);
7000            return;

7001        }
7002        /* fall through */
7003    case 0x00: /* SSHR / USHR */
7004    case 0x02: /* SSRA / USRA */
7005    case 0x04: /* SRSHR / URSHR */
7006    case 0x06: /* SRSRA / URSRA */
7007        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7008        break;
7009    case 0x0a: /* SHL / SLI */
7010        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7011        break;
7012    case 0x1c: /* SCVTF, UCVTF */
7013        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7014                                     opcode, rn, rd);
7015        break;
7016    case 0x10: /* SQSHRUN, SQSHRUN2 */
7017    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7018        if (!is_u) {
7019            unallocated_encoding(s);
7020            return;
7021        }
7022        handle_vec_simd_sqshrn(s, true, false, false, true,
7023                               immh, immb, opcode, rn, rd);
7024        break;
7025    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7026    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7027        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7028                               immh, immb, opcode, rn, rd);
7029        break;
7030    case 0xc: /* SQSHLU */
7031        if (!is_u) {
7032            unallocated_encoding(s);
7033            return;
7034        }
7035        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7036        break;
7037    case 0xe: /* SQSHL, UQSHL */
7038        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7039        break;
7040    case 0x1f: /* FCVTZS, FCVTZU */
7041        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7042        break;
7043    default:
7044        unallocated_encoding(s);
7045        break;
7046    }
7047}
7048
7049/* C3.6.10 AdvSIMD scalar three different
7050 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7051 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7052 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7053 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7054 */
7055static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7056{
7057    bool is_u = extract32(insn, 29, 1);
7058    int size = extract32(insn, 22, 2);
7059    int opcode = extract32(insn, 12, 4);
7060    int rm = extract32(insn, 16, 5);
7061    int rn = extract32(insn, 5, 5);
7062    int rd = extract32(insn, 0, 5);
7063
7064    if (is_u) {
7065        unallocated_encoding(s);
7066        return;
7067    }
7068
7069    switch (opcode) {
7070    case 0x9: /* SQDMLAL, SQDMLAL2 */
7071    case 0xb: /* SQDMLSL, SQDMLSL2 */
7072    case 0xd: /* SQDMULL, SQDMULL2 */
7073        if (size == 0 || size == 3) {
7074            unallocated_encoding(s);
7075            return;
7076        }
7077        break;
7078    default:
7079        unallocated_encoding(s);
7080        return;
7081    }
7082
7083    if (!fp_access_check(s)) {
7084        return;
7085    }
7086
7087    if (size == 2) {
7088        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7089        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7090        TCGv_i64 tcg_res = tcg_temp_new_i64();
7091
7092        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7093        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7094
7095        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7096        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7097
7098        switch (opcode) {
7099        case 0xd: /* SQDMULL, SQDMULL2 */
7100            break;
7101        case 0xb: /* SQDMLSL, SQDMLSL2 */
7102            tcg_gen_neg_i64(tcg_res, tcg_res);
7103            /* fall through */
7104        case 0x9: /* SQDMLAL, SQDMLAL2 */
7105            read_vec_element(s, tcg_op1, rd, 0, MO_64);
7106            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7107                                              tcg_res, tcg_op1);
7108            break;
7109        default:
7110            g_assert_not_reached();
7111        }
7112
7113        write_fp_dreg(s, rd, tcg_res);
7114
7115        tcg_temp_free_i64(tcg_op1);
7116        tcg_temp_free_i64(tcg_op2);
7117        tcg_temp_free_i64(tcg_res);
7118    } else {
7119        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7120        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7121        TCGv_i64 tcg_res = tcg_temp_new_i64();
7122
7123        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7124        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7125
7126        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7127        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7128
7129        switch (opcode) {
7130        case 0xd: /* SQDMULL, SQDMULL2 */
7131            break;
7132        case 0xb: /* SQDMLSL, SQDMLSL2 */
7133            gen_helper_neon_negl_u32(tcg_res, tcg_res);
7134            /* fall through */
7135        case 0x9: /* SQDMLAL, SQDMLAL2 */
7136        {
7137            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7138            read_vec_element(s, tcg_op3, rd, 0, MO_32);
7139            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7140                                              tcg_res, tcg_op3);
7141            tcg_temp_free_i64(tcg_op3);
7142            break;
7143        }
7144        default:
7145            g_assert_not_reached();
7146        }
7147
7148        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7149        write_fp_dreg(s, rd, tcg_res);
7150
7151        tcg_temp_free_i32(tcg_op1);
7152        tcg_temp_free_i32(tcg_op2);
7153        tcg_temp_free_i64(tcg_res);
7154    }
7155}
7156
7157static void handle_3same_64(DisasContext *s, int opcode, bool u,
7158                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7159{
7160    /* Handle 64x64->64 opcodes which are shared between the scalar
7161     * and vector 3-same groups. We cover every opcode where size == 3
7162     * is valid in either the three-reg-same (integer, not pairwise)
7163     * or scalar-three-reg-same groups. (Some opcodes are not yet
7164     * implemented.)
7165     */
7166    TCGCond cond;
7167
7168    switch (opcode) {
7169    case 0x1: /* SQADD */
7170        if (u) {
7171            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7172        } else {
7173            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7174        }
7175        break;
7176    case 0x5: /* SQSUB */
7177        if (u) {
7178            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7179        } else {
7180            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7181        }
7182        break;
7183    case 0x6: /* CMGT, CMHI */
7184        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7185         * We implement this using setcond (test) and then negating.
7186         */
7187        cond = u ? TCG_COND_GTU : TCG_COND_GT;
7188    do_cmop:
7189        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7190        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7191        break;
7192    case 0x7: /* CMGE, CMHS */
7193        cond = u ? TCG_COND_GEU : TCG_COND_GE;
7194        goto do_cmop;
7195    case 0x11: /* CMTST, CMEQ */
7196        if (u) {
7197            cond = TCG_COND_EQ;
7198            goto do_cmop;
7199        }
7200        /* CMTST : test is "if (X & Y != 0)". */
7201        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7202        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7203        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7204        break;
7205    case 0x8: /* SSHL, USHL */
7206        if (u) {
7207            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7208        } else {
7209            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7210        }
7211        break;
7212    case 0x9: /* SQSHL, UQSHL */
7213        if (u) {
7214            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7215        } else {
7216            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7217        }
7218        break;
7219    case 0xa: /* SRSHL, URSHL */
7220        if (u) {
7221            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7222        } else {
7223            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7224        }
7225        break;
7226    case 0xb: /* SQRSHL, UQRSHL */
7227        if (u) {
7228            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7229        } else {
7230            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7231        }
7232        break;
7233    case 0x10: /* ADD, SUB */
7234        if (u) {
7235            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7236        } else {
7237            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7238        }
7239        break;
7240    default:
7241        g_assert_not_reached();
7242    }
7243}
7244
7245/* Handle the 3-same-operands float operations; shared by the scalar
7246 * and vector encodings. The caller must filter out any encodings
7247 * not allocated for the encoding it is dealing with.
7248 */
7249static void handle_3same_float(DisasContext *s, int size, int elements,
7250                               int fpopcode, int rd, int rn, int rm)
7251{
7252    int pass;
7253    TCGv_ptr fpst = get_fpstatus_ptr();
7254
7255    for (pass = 0; pass < elements; pass++) {
7256        if (size) {
7257            /* Double */
7258            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7259            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7260            TCGv_i64 tcg_res = tcg_temp_new_i64();
7261
7262            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7263            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7264
7265            switch (fpopcode) {
7266            case 0x39: /* FMLS */
7267                /* As usual for ARM, separate negation for fused multiply-add */
7268                gen_helper_vfp_negd(tcg_op1, tcg_op1);
7269                /* fall through */
7270            case 0x19: /* FMLA */
7271                read_vec_element(s, tcg_res, rd, pass, MO_64);
7272                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7273                                       tcg_res, fpst);
7274                break;
7275            case 0x18: /* FMAXNM */
7276                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7277                break;
7278            case 0x1a: /* FADD */
7279                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7280                break;
7281            case 0x1b: /* FMULX */
7282                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7283                break;
7284            case 0x1c: /* FCMEQ */
7285                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7286                break;
7287            case 0x1e: /* FMAX */
7288                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7289                break;
7290            case 0x1f: /* FRECPS */
7291                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7292                break;
7293            case 0x38: /* FMINNM */
7294                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7295                break;
7296            case 0x3a: /* FSUB */
7297                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7298                break;
7299            case 0x3e: /* FMIN */
7300                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7301                break;
7302            case 0x3f: /* FRSQRTS */
7303                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7304                break;
7305            case 0x5b: /* FMUL */
7306                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7307                break;
7308            case 0x5c: /* FCMGE */
7309                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7310                break;
7311            case 0x5d: /* FACGE */
7312                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7313                break;
7314            case 0x5f: /* FDIV */
7315                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7316                break;
7317            case 0x7a: /* FABD */
7318                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7319                gen_helper_vfp_absd(tcg_res, tcg_res);
7320                break;
7321            case 0x7c: /* FCMGT */
7322                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7323                break;
7324            case 0x7d: /* FACGT */
7325                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7326                break;
7327            default:
7328                g_assert_not_reached();
7329            }
7330
7331            write_vec_element(s, tcg_res, rd, pass, MO_64);
7332
7333            tcg_temp_free_i64(tcg_res);
7334            tcg_temp_free_i64(tcg_op1);
7335            tcg_temp_free_i64(tcg_op2);
7336        } else {
7337            /* Single */
7338            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7339            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7340            TCGv_i32 tcg_res = tcg_temp_new_i32();
7341
7342            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7343            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7344
7345            switch (fpopcode) {
7346            case 0x39: /* FMLS */
7347                /* As usual for ARM, separate negation for fused multiply-add */
7348                gen_helper_vfp_negs(tcg_op1, tcg_op1);
7349                /* fall through */
7350            case 0x19: /* FMLA */
7351                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7352                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7353                                       tcg_res, fpst);
7354                break;
7355            case 0x1a: /* FADD */
7356                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7357                break;
7358            case 0x1b: /* FMULX */
7359                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7360                break;
7361            case 0x1c: /* FCMEQ */
7362                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7363                break;
7364            case 0x1e: /* FMAX */
7365                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7366                break;
7367            case 0x1f: /* FRECPS */
7368                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7369                break;
7370            case 0x18: /* FMAXNM */
7371                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7372                break;
7373            case 0x38: /* FMINNM */
7374                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7375                break;
7376            case 0x3a: /* FSUB */
7377                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7378                break;
7379            case 0x3e: /* FMIN */
7380                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7381                break;
7382            case 0x3f: /* FRSQRTS */
7383                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7384                break;
7385            case 0x5b: /* FMUL */
7386                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7387                break;
7388            case 0x5c: /* FCMGE */
7389                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7390                break;
7391            case 0x5d: /* FACGE */
7392                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7393                break;
7394            case 0x5f: /* FDIV */
7395                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7396                break;
7397            case 0x7a: /* FABD */
7398                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7399                gen_helper_vfp_abss(tcg_res, tcg_res);
7400                break;
7401            case 0x7c: /* FCMGT */
7402                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7403                break;
7404            case 0x7d: /* FACGT */
7405                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7406                break;
7407            default:
7408                g_assert_not_reached();
7409            }
7410
7411            if (elements == 1) {
7412                /* scalar single so clear high part */
7413                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7414
7415                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7416                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7417                tcg_temp_free_i64(tcg_tmp);
7418            } else {
7419                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7420            }
7421
7422            tcg_temp_free_i32(tcg_res);
7423            tcg_temp_free_i32(tcg_op1);
7424            tcg_temp_free_i32(tcg_op2);
7425        }
7426    }
7427
7428    tcg_temp_free_ptr(fpst);
7429
7430    if ((elements << size) < 4) {
7431        /* scalar, or non-quad vector op */
7432        clear_vec_high(s, rd);
7433    }
7434}
7435
7436/* C3.6.11 AdvSIMD scalar three same
7437 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7438 * +-----+---+-----------+------+---+------+--------+---+------+------+
7439 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7440 * +-----+---+-----------+------+---+------+--------+---+------+------+
7441 */
7442static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7443{
7444    int rd = extract32(insn, 0, 5);
7445    int rn = extract32(insn, 5, 5);
7446    int opcode = extract32(insn, 11, 5);
7447    int rm = extract32(insn, 16, 5);
7448    int size = extract32(insn, 22, 2);
7449    bool u = extract32(insn, 29, 1);
7450    TCGv_i64 tcg_rd;
7451
7452    if (opcode >= 0x18) {
7453        /* Floating point: U, size[1] and opcode indicate operation */
7454        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7455        switch (fpopcode) {
7456        case 0x1b: /* FMULX */
7457        case 0x1f: /* FRECPS */
7458        case 0x3f: /* FRSQRTS */
7459        case 0x5d: /* FACGE */
7460        case 0x7d: /* FACGT */
7461        case 0x1c: /* FCMEQ */
7462        case 0x5c: /* FCMGE */
7463        case 0x7c: /* FCMGT */
7464        case 0x7a: /* FABD */
7465            break;
7466        default:
7467            unallocated_encoding(s);
7468            return;
7469        }
7470
7471        if (!fp_access_check(s)) {
7472            return;
7473        }
7474
7475        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7476        return;
7477    }
7478
7479    switch (opcode) {
7480    case 0x1: /* SQADD, UQADD */
7481    case 0x5: /* SQSUB, UQSUB */
7482    case 0x9: /* SQSHL, UQSHL */
7483    case 0xb: /* SQRSHL, UQRSHL */
7484        break;
7485    case 0x8: /* SSHL, USHL */
7486    case 0xa: /* SRSHL, URSHL */
7487    case 0x6: /* CMGT, CMHI */
7488    case 0x7: /* CMGE, CMHS */
7489    case 0x11: /* CMTST, CMEQ */
7490    case 0x10: /* ADD, SUB (vector) */
7491        if (size != 3) {
7492            unallocated_encoding(s);
7493            return;
7494        }
7495        break;
7496    case 0x16: /* SQDMULH, SQRDMULH (vector) */
7497        if (size != 1 && size != 2) {
7498            unallocated_encoding(s);
7499            return;
7500        }
7501        break;
7502    default:
7503        unallocated_encoding(s);
7504        return;
7505    }
7506
7507    if (!fp_access_check(s)) {
7508        return;
7509    }
7510
7511    tcg_rd = tcg_temp_new_i64();
7512
7513    if (size == 3) {
7514        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7515        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7516
7517        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7518        tcg_temp_free_i64(tcg_rn);
7519        tcg_temp_free_i64(tcg_rm);
7520    } else {
7521        /* Do a single operation on the lowest element in the vector.
7522         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7523         * no side effects for all these operations.
7524         * OPTME: special-purpose helpers would avoid doing some
7525         * unnecessary work in the helper for the 8 and 16 bit cases.
7526         */
7527        NeonGenTwoOpEnvFn *genenvfn;
7528        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7529        TCGv_i32 tcg_rm = tcg_temp_new_i32();
7530        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7531
7532        read_vec_element_i32(s, tcg_rn, rn, 0, size);
7533        read_vec_element_i32(s, tcg_rm, rm, 0, size);
7534
7535        switch (opcode) {
7536        case 0x1: /* SQADD, UQADD */
7537        {
7538            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7539                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7540                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7541                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7542            };
7543            genenvfn = fns[size][u];
7544            break;
7545        }
7546        case 0x5: /* SQSUB, UQSUB */
7547        {
7548            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7549                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7550                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7551                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7552            };
7553            genenvfn = fns[size][u];
7554            break;
7555        }
7556        case 0x9: /* SQSHL, UQSHL */
7557        {
7558            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7559                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7560                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7561                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7562            };
7563            genenvfn = fns[size][u];
7564            break;
7565        }
7566        case 0xb: /* SQRSHL, UQRSHL */
7567        {
7568            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7569                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7570                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7571                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7572            };
7573            genenvfn = fns[size][u];
7574            break;
7575        }
7576        case 0x16: /* SQDMULH, SQRDMULH */
7577        {
7578            static NeonGenTwoOpEnvFn * const fns[2][2] = {
7579                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7580                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7581            };
7582            assert(size == 1 || size == 2);
7583            genenvfn = fns[size - 1][u];
7584            break;
7585        }
7586        default:
7587            g_assert_not_reached();
7588        }
7589
7590        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7591        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7592        tcg_temp_free_i32(tcg_rd32);
7593        tcg_temp_free_i32(tcg_rn);
7594        tcg_temp_free_i32(tcg_rm);
7595    }
7596
7597    write_fp_dreg(s, rd, tcg_rd);
7598
7599    tcg_temp_free_i64(tcg_rd);
7600}
7601
7602static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7603                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7604                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7605{
7606    /* Handle 64->64 opcodes which are shared between the scalar and
7607     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7608     * is valid in either group and also the double-precision fp ops.
7609     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7610     * requires them.
7611     */
7612    TCGCond cond;
7613
7614    switch (opcode) {
7615    case 0x4: /* CLS, CLZ */
7616        if (u) {
7617            gen_helper_clz64(tcg_rd, tcg_rn);
7618        } else {
7619            gen_helper_cls64(tcg_rd, tcg_rn);
7620        }
7621        break;
7622    case 0x5: /* NOT */
7623        /* This opcode is shared with CNT and RBIT but we have earlier
7624         * enforced that size == 3 if and only if this is the NOT insn.
7625         */
7626        tcg_gen_not_i64(tcg_rd, tcg_rn);
7627        break;
7628    case 0x7: /* SQABS, SQNEG */
7629        if (u) {
7630            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7631        } else {
7632            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7633        }
7634        break;
7635    case 0xa: /* CMLT */
7636        /* 64 bit integer comparison against zero, result is
7637         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7638         * subtracting 1.
7639         */
7640        cond = TCG_COND_LT;
7641    do_cmop:
7642        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7643        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7644        break;
7645    case 0x8: /* CMGT, CMGE */
7646        cond = u ? TCG_COND_GE : TCG_COND_GT;
7647        goto do_cmop;
7648    case 0x9: /* CMEQ, CMLE */
7649        cond = u ? TCG_COND_LE : TCG_COND_EQ;
7650        goto do_cmop;
7651    case 0xb: /* ABS, NEG */
7652        if (u) {
7653            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7654        } else {
7655            TCGv_i64 tcg_zero = tcg_const_i64(0);
7656            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7657            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7658                                tcg_rn, tcg_rd);
7659            tcg_temp_free_i64(tcg_zero);
7660        }
7661        break;
7662    case 0x2f: /* FABS */
7663        gen_helper_vfp_absd(tcg_rd, tcg_rn);
7664        break;
7665    case 0x6f: /* FNEG */
7666        gen_helper_vfp_negd(tcg_rd, tcg_rn);
7667        break;
7668    case 0x7f: /* FSQRT */
7669        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7670        break;
7671    case 0x1a: /* FCVTNS */
7672    case 0x1b: /* FCVTMS */
7673    case 0x1c: /* FCVTAS */
7674    case 0x3a: /* FCVTPS */
7675    case 0x3b: /* FCVTZS */
7676    {
7677        TCGv_i32 tcg_shift = tcg_const_i32(0);
7678        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7679        tcg_temp_free_i32(tcg_shift);
7680        break;
7681    }
7682    case 0x5a: /* FCVTNU */
7683    case 0x5b: /* FCVTMU */
7684    case 0x5c: /* FCVTAU */
7685    case 0x7a: /* FCVTPU */
7686    case 0x7b: /* FCVTZU */
7687    {
7688        TCGv_i32 tcg_shift = tcg_const_i32(0);
7689        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7690        tcg_temp_free_i32(tcg_shift);
7691        break;
7692    }
7693    case 0x18: /* FRINTN */
7694    case 0x19: /* FRINTM */
7695    case 0x38: /* FRINTP */
7696    case 0x39: /* FRINTZ */
7697    case 0x58: /* FRINTA */
7698    case 0x79: /* FRINTI */
7699        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7700        break;
7701    case 0x59: /* FRINTX */
7702        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7703        break;
7704    default:
7705        g_assert_not_reached();
7706    }
7707}
7708
7709static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7710                                   bool is_scalar, bool is_u, bool is_q,
7711                                   int size, int rn, int rd)
7712{
7713    bool is_double = (size == 3);
7714    TCGv_ptr fpst;
7715
7716    if (!fp_access_check(s)) {
7717        return;
7718    }
7719
7720    fpst = get_fpstatus_ptr();
7721
7722    if (is_double) {
7723        TCGv_i64 tcg_op = tcg_temp_new_i64();
7724        TCGv_i64 tcg_zero = tcg_const_i64(0);
7725        TCGv_i64 tcg_res = tcg_temp_new_i64();
7726        NeonGenTwoDoubleOPFn *genfn;
7727        bool swap = false;
7728        int pass;
7729
7730        switch (opcode) {
7731        case 0x2e: /* FCMLT (zero) */
7732            swap = true;
7733            /* fallthrough */
7734        case 0x2c: /* FCMGT (zero) */
7735            genfn = gen_helper_neon_cgt_f64;
7736            break;
7737        case 0x2d: /* FCMEQ (zero) */
7738            genfn = gen_helper_neon_ceq_f64;
7739            break;
7740        case 0x6d: /* FCMLE (zero) */
7741            swap = true;
7742            /* fall through */
7743        case 0x6c: /* FCMGE (zero) */
7744            genfn = gen_helper_neon_cge_f64;
7745            break;
7746        default:
7747            g_assert_not_reached();
7748        }
7749
7750        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7751            read_vec_element(s, tcg_op, rn, pass, MO_64);
7752            if (swap) {
7753                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7754            } else {
7755                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7756            }
7757            write_vec_element(s, tcg_res, rd, pass, MO_64);
7758        }
7759        if (is_scalar) {
7760            clear_vec_high(s, rd);
7761        }
7762
7763        tcg_temp_free_i64(tcg_res);
7764        tcg_temp_free_i64(tcg_zero);
7765        tcg_temp_free_i64(tcg_op);
7766    } else {
7767        TCGv_i32 tcg_op = tcg_temp_new_i32();
7768        TCGv_i32 tcg_zero = tcg_const_i32(0);
7769        TCGv_i32 tcg_res = tcg_temp_new_i32();
7770        NeonGenTwoSingleOPFn *genfn;
7771        bool swap = false;
7772        int pass, maxpasses;
7773
7774        switch (opcode) {
7775        case 0x2e: /* FCMLT (zero) */
7776            swap = true;
7777            /* fall through */
7778        case 0x2c: /* FCMGT (zero) */
7779            genfn = gen_helper_neon_cgt_f32;
7780            break;
7781        case 0x2d: /* FCMEQ (zero) */
7782            genfn = gen_helper_neon_ceq_f32;
7783            break;
7784        case 0x6d: /* FCMLE (zero) */
7785            swap = true;
7786            /* fall through */
7787        case 0x6c: /* FCMGE (zero) */
7788            genfn = gen_helper_neon_cge_f32;
7789            break;
7790        default:
7791            g_assert_not_reached();
7792        }
7793
7794        if (is_scalar) {
7795            maxpasses = 1;
7796        } else {
7797            maxpasses = is_q ? 4 : 2;
7798        }
7799
7800        for (pass = 0; pass < maxpasses; pass++) {
7801            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7802            if (swap) {
7803                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7804            } else {
7805                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7806            }
7807            if (is_scalar) {
7808                write_fp_sreg(s, rd, tcg_res);
7809            } else {
7810                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7811            }
7812        }
7813        tcg_temp_free_i32(tcg_res);
7814        tcg_temp_free_i32(tcg_zero);
7815        tcg_temp_free_i32(tcg_op);
7816        if (!is_q && !is_scalar) {
7817            clear_vec_high(s, rd);
7818        }
7819    }
7820
7821    tcg_temp_free_ptr(fpst);
7822}
7823
7824static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7825                                    bool is_scalar, bool is_u, bool is_q,
7826                                    int size, int rn, int rd)
7827{
7828    bool is_double = (size == 3);
7829    TCGv_ptr fpst = get_fpstatus_ptr();
7830
7831    if (is_double) {
7832        TCGv_i64 tcg_op = tcg_temp_new_i64();
7833        TCGv_i64 tcg_res = tcg_temp_new_i64();
7834        int pass;
7835
7836        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7837            read_vec_element(s, tcg_op, rn, pass, MO_64);
7838            switch (opcode) {
7839            case 0x3d: /* FRECPE */
7840                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7841                break;
7842            case 0x3f: /* FRECPX */
7843                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7844                break;
7845            case 0x7d: /* FRSQRTE */
7846                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7847                break;
7848            default:
7849                g_assert_not_reached();
7850            }
7851            write_vec_element(s, tcg_res, rd, pass, MO_64);
7852        }
7853        if (is_scalar) {
7854            clear_vec_high(s, rd);
7855        }
7856
7857        tcg_temp_free_i64(tcg_res);
7858        tcg_temp_free_i64(tcg_op);
7859    } else {
7860        TCGv_i32 tcg_op = tcg_temp_new_i32();
7861        TCGv_i32 tcg_res = tcg_temp_new_i32();
7862        int pass, maxpasses;
7863
7864        if (is_scalar) {
7865            maxpasses = 1;
7866        } else {
7867            maxpasses = is_q ? 4 : 2;
7868        }
7869
7870        for (pass = 0; pass < maxpasses; pass++) {
7871            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7872
7873            switch (opcode) {
7874            case 0x3c: /* URECPE */
7875                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7876                break;
7877            case 0x3d: /* FRECPE */
7878                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7879                break;
7880            case 0x3f: /* FRECPX */
7881                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7882                break;
7883            case 0x7d: /* FRSQRTE */
7884                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7885                break;
7886            default:
7887                g_assert_not_reached();
7888            }
7889
7890            if (is_scalar) {
7891                write_fp_sreg(s, rd, tcg_res);
7892            } else {
7893                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7894            }
7895        }
7896        tcg_temp_free_i32(tcg_res);
7897        tcg_temp_free_i32(tcg_op);
7898        if (!is_q && !is_scalar) {
7899            clear_vec_high(s, rd);
7900        }
7901    }
7902    tcg_temp_free_ptr(fpst);
7903}
7904
7905static void handle_2misc_narrow(DisasContext *s, bool scalar,
7906                                int opcode, bool u, bool is_q,
7907                                int size, int rn, int rd)
7908{
7909    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7910     * in the source becomes a size element in the destination).
7911     */
7912    int pass;
7913    TCGv_i32 tcg_res[2];
7914    int destelt = is_q ? 2 : 0;
7915    int passes = scalar ? 1 : 2;
7916
7917    if (scalar) {
7918        tcg_res[1] = tcg_const_i32(0);
7919    }
7920
7921    for (pass = 0; pass < passes; pass++) {
7922        TCGv_i64 tcg_op = tcg_temp_new_i64();
7923        NeonGenNarrowFn *genfn = NULL;
7924        NeonGenNarrowEnvFn *genenvfn = NULL;
7925
7926        if (scalar) {
7927            read_vec_element(s, tcg_op, rn, pass, size + 1);
7928        } else {
7929            read_vec_element(s, tcg_op, rn, pass, MO_64);
7930        }
7931        tcg_res[pass] = tcg_temp_new_i32();
7932
7933        switch (opcode) {
7934        case 0x12: /* XTN, SQXTUN */
7935        {
7936            static NeonGenNarrowFn * const xtnfns[3] = {
7937                gen_helper_neon_narrow_u8,
7938                gen_helper_neon_narrow_u16,
7939                tcg_gen_extrl_i64_i32,
7940            };
7941            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7942                gen_helper_neon_unarrow_sat8,
7943                gen_helper_neon_unarrow_sat16,
7944                gen_helper_neon_unarrow_sat32,
7945            };
7946            if (u) {
7947                genenvfn = sqxtunfns[size];
7948            } else {
7949                genfn = xtnfns[size];
7950            }
7951            break;
7952        }
7953        case 0x14: /* SQXTN, UQXTN */
7954        {
7955            static NeonGenNarrowEnvFn * const fns[3][2] = {
7956                { gen_helper_neon_narrow_sat_s8,
7957                  gen_helper_neon_narrow_sat_u8 },
7958                { gen_helper_neon_narrow_sat_s16,
7959                  gen_helper_neon_narrow_sat_u16 },
7960                { gen_helper_neon_narrow_sat_s32,
7961                  gen_helper_neon_narrow_sat_u32 },
7962            };
7963            genenvfn = fns[size][u];
7964            break;
7965        }
7966        case 0x16: /* FCVTN, FCVTN2 */
7967            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7968            if (size == 2) {
7969                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7970            } else {
7971                TCGv_i32 tcg_lo = tcg_temp_new_i32();
7972                TCGv_i32 tcg_hi = tcg_temp_new_i32();
7973                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7974                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7975                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7976                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7977                tcg_temp_free_i32(tcg_lo);
7978                tcg_temp_free_i32(tcg_hi);
7979            }
7980            break;
7981        case 0x56:  /* FCVTXN, FCVTXN2 */
7982            /* 64 bit to 32 bit float conversion
7983             * with von Neumann rounding (round to odd)
7984             */
7985            assert(size == 2);
7986            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7987            break;
7988        default:
7989            g_assert_not_reached();
7990        }
7991
7992        if (genfn) {
7993            genfn(tcg_res[pass], tcg_op);
7994        } else if (genenvfn) {
7995            genenvfn(tcg_res[pass], cpu_env, tcg_op);
7996        }
7997
7998        tcg_temp_free_i64(tcg_op);
7999    }
8000

8001    for (pass = 0; pass < 2; pass++) {
8002        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8003        tcg_temp_free_i32(tcg_res[pass]);
8004    }
8005    if (!is_q) {
8006        clear_vec_high(s, rd);
8007    }
8008}
8009
8010/* Remaining saturating accumulating ops */
8011static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8012                                bool is_q, int size, int rn, int rd)
8013{
8014    bool is_double = (size == 3);
8015
8016    if (is_double) {
8017        TCGv_i64 tcg_rn = tcg_temp_new_i64();
8018        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8019        int pass;
8020
8021        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8022            read_vec_element(s, tcg_rn, rn, pass, MO_64);
8023            read_vec_element(s, tcg_rd, rd, pass, MO_64);
8024
8025            if (is_u) { /* USQADD */
8026                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8027            } else { /* SUQADD */
8028                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8029            }
8030            write_vec_element(s, tcg_rd, rd, pass, MO_64);
8031        }
8032        if (is_scalar) {
8033            clear_vec_high(s, rd);
8034        }
8035
8036        tcg_temp_free_i64(tcg_rd);
8037        tcg_temp_free_i64(tcg_rn);
8038    } else {
8039        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8040        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8041        int pass, maxpasses;
8042
8043        if (is_scalar) {
8044            maxpasses = 1;
8045        } else {
8046            maxpasses = is_q ? 4 : 2;
8047        }
8048
8049        for (pass = 0; pass < maxpasses; pass++) {
8050            if (is_scalar) {
8051                read_vec_element_i32(s, tcg_rn, rn, pass, size);
8052                read_vec_element_i32(s, tcg_rd, rd, pass, size);
8053            } else {
8054                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8055                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8056            }
8057
8058            if (is_u) { /* USQADD */
8059                switch (size) {
8060                case 0:
8061                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8062                    break;
8063                case 1:
8064                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8065                    break;
8066                case 2:
8067                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8068                    break;
8069                default:
8070                    g_assert_not_reached();
8071                }
8072            } else { /* SUQADD */
8073                switch (size) {
8074                case 0:
8075                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8076                    break;
8077                case 1:
8078                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8079                    break;
8080                case 2:
8081                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8082                    break;
8083                default:
8084                    g_assert_not_reached();
8085                }
8086            }
8087
8088            if (is_scalar) {
8089                TCGv_i64 tcg_zero = tcg_const_i64(0);
8090                write_vec_element(s, tcg_zero, rd, 0, MO_64);
8091                tcg_temp_free_i64(tcg_zero);
8092            }
8093            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8094        }
8095
8096        if (!is_q) {
8097            clear_vec_high(s, rd);
8098        }
8099
8100        tcg_temp_free_i32(tcg_rd);
8101        tcg_temp_free_i32(tcg_rn);
8102    }
8103}
8104
8105/* C3.6.12 AdvSIMD scalar two reg misc
8106 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8107 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8108 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8109 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8110 */
8111static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8112{
8113    int rd = extract32(insn, 0, 5);
8114    int rn = extract32(insn, 5, 5);
8115    int opcode = extract32(insn, 12, 5);
8116    int size = extract32(insn, 22, 2);
8117    bool u = extract32(insn, 29, 1);
8118    bool is_fcvt = false;
8119    int rmode;
8120    TCGv_i32 tcg_rmode;
8121    TCGv_ptr tcg_fpstatus;
8122
8123    switch (opcode) {
8124    case 0x3: /* USQADD / SUQADD*/
8125        if (!fp_access_check(s)) {
8126            return;
8127        }
8128        handle_2misc_satacc(s, true, u, false, size, rn, rd);
8129        return;
8130    case 0x7: /* SQABS / SQNEG */
8131        break;
8132    case 0xa: /* CMLT */
8133        if (u) {
8134            unallocated_encoding(s);
8135            return;
8136        }
8137        /* fall through */
8138    case 0x8: /* CMGT, CMGE */
8139    case 0x9: /* CMEQ, CMLE */
8140    case 0xb: /* ABS, NEG */
8141        if (size != 3) {
8142            unallocated_encoding(s);
8143            return;
8144        }
8145        break;
8146    case 0x12: /* SQXTUN */
8147        if (!u) {
8148            unallocated_encoding(s);
8149            return;
8150        }
8151        /* fall through */
8152    case 0x14: /* SQXTN, UQXTN */
8153        if (size == 3) {
8154            unallocated_encoding(s);
8155            return;
8156        }
8157        if (!fp_access_check(s)) {
8158            return;
8159        }
8160        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8161        return;
8162    case 0xc ... 0xf:
8163    case 0x16 ... 0x1d:
8164    case 0x1f:
8165        /* Floating point: U, size[1] and opcode indicate operation;
8166         * size[0] indicates single or double precision.
8167         */
8168        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8169        size = extract32(size, 0, 1) ? 3 : 2;
8170        switch (opcode) {
8171        case 0x2c: /* FCMGT (zero) */
8172        case 0x2d: /* FCMEQ (zero) */
8173        case 0x2e: /* FCMLT (zero) */
8174        case 0x6c: /* FCMGE (zero) */
8175        case 0x6d: /* FCMLE (zero) */
8176            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8177            return;
8178        case 0x1d: /* SCVTF */
8179        case 0x5d: /* UCVTF */
8180        {
8181            bool is_signed = (opcode == 0x1d);
8182            if (!fp_access_check(s)) {
8183                return;
8184            }
8185            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8186            return;
8187        }
8188        case 0x3d: /* FRECPE */
8189        case 0x3f: /* FRECPX */
8190        case 0x7d: /* FRSQRTE */
8191            if (!fp_access_check(s)) {
8192                return;
8193            }
8194            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8195            return;
8196        case 0x1a: /* FCVTNS */
8197        case 0x1b: /* FCVTMS */
8198        case 0x3a: /* FCVTPS */
8199        case 0x3b: /* FCVTZS */
8200        case 0x5a: /* FCVTNU */
8201        case 0x5b: /* FCVTMU */
8202        case 0x7a: /* FCVTPU */
8203        case 0x7b: /* FCVTZU */
8204            is_fcvt = true;
8205            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8206            break;
8207        case 0x1c: /* FCVTAS */
8208        case 0x5c: /* FCVTAU */
8209            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8210            is_fcvt = true;
8211            rmode = FPROUNDING_TIEAWAY;
8212            break;
8213        case 0x56: /* FCVTXN, FCVTXN2 */
8214            if (size == 2) {
8215                unallocated_encoding(s);
8216                return;
8217            }
8218            if (!fp_access_check(s)) {
8219                return;
8220            }
8221            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8222            return;
8223        default:
8224            unallocated_encoding(s);
8225            return;
8226        }
8227        break;
8228    default:
8229        unallocated_encoding(s);
8230        return;
8231    }
8232
8233    if (!fp_access_check(s)) {
8234        return;
8235    }
8236
8237    if (is_fcvt) {
8238        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8239        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8240        tcg_fpstatus = get_fpstatus_ptr();
8241    } else {
8242        TCGV_UNUSED_I32(tcg_rmode);
8243        TCGV_UNUSED_PTR(tcg_fpstatus);
8244    }
8245
8246    if (size == 3) {
8247        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8248        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8249
8250        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8251        write_fp_dreg(s, rd, tcg_rd);
8252        tcg_temp_free_i64(tcg_rd);
8253        tcg_temp_free_i64(tcg_rn);
8254    } else {
8255        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8256        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8257
8258        read_vec_element_i32(s, tcg_rn, rn, 0, size);
8259
8260        switch (opcode) {
8261        case 0x7: /* SQABS, SQNEG */
8262        {
8263            NeonGenOneOpEnvFn *genfn;
8264            static NeonGenOneOpEnvFn * const fns[3][2] = {
8265                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8266                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8267                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8268            };
8269            genfn = fns[size][u];
8270            genfn(tcg_rd, cpu_env, tcg_rn);
8271            break;
8272        }
8273        case 0x1a: /* FCVTNS */
8274        case 0x1b: /* FCVTMS */
8275        case 0x1c: /* FCVTAS */
8276        case 0x3a: /* FCVTPS */
8277        case 0x3b: /* FCVTZS */
8278        {
8279            TCGv_i32 tcg_shift = tcg_const_i32(0);
8280            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8281            tcg_temp_free_i32(tcg_shift);
8282            break;
8283        }
8284        case 0x5a: /* FCVTNU */
8285        case 0x5b: /* FCVTMU */
8286        case 0x5c: /* FCVTAU */
8287        case 0x7a: /* FCVTPU */
8288        case 0x7b: /* FCVTZU */
8289        {
8290            TCGv_i32 tcg_shift = tcg_const_i32(0);
8291            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8292            tcg_temp_free_i32(tcg_shift);
8293            break;
8294        }
8295        default:
8296            g_assert_not_reached();
8297        }
8298
8299        write_fp_sreg(s, rd, tcg_rd);
8300        tcg_temp_free_i32(tcg_rd);
8301        tcg_temp_free_i32(tcg_rn);
8302    }
8303
8304    if (is_fcvt) {
8305        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8306        tcg_temp_free_i32(tcg_rmode);
8307        tcg_temp_free_ptr(tcg_fpstatus);
8308    }
8309}
8310
8311/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8312static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8313                                 int immh, int immb, int opcode, int rn, int rd)
8314{
8315    int size = 32 - clz32(immh) - 1;
8316    int immhb = immh << 3 | immb;
8317    int shift = 2 * (8 << size) - immhb;
8318    bool accumulate = false;
8319    bool round = false;
8320    bool insert = false;
8321    int dsize = is_q ? 128 : 64;
8322    int esize = 8 << size;
8323    int elements = dsize/esize;
8324    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8325    TCGv_i64 tcg_rn = new_tmp_a64(s);
8326    TCGv_i64 tcg_rd = new_tmp_a64(s);
8327    TCGv_i64 tcg_round;
8328    int i;
8329
8330    if (extract32(immh, 3, 1) && !is_q) {
8331        unallocated_encoding(s);
8332        return;
8333    }
8334
8335    if (size > 3 && !is_q) {
8336        unallocated_encoding(s);
8337        return;
8338    }
8339
8340    if (!fp_access_check(s)) {
8341        return;
8342    }
8343
8344    switch (opcode) {
8345    case 0x02: /* SSRA / USRA (accumulate) */
8346        accumulate = true;
8347        break;
8348    case 0x04: /* SRSHR / URSHR (rounding) */
8349        round = true;
8350        break;
8351    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8352        accumulate = round = true;
8353        break;
8354    case 0x08: /* SRI */
8355        insert = true;
8356        break;
8357    }
8358
8359    if (round) {
8360        uint64_t round_const = 1ULL << (shift - 1);
8361        tcg_round = tcg_const_i64(round_const);
8362    } else {
8363        TCGV_UNUSED_I64(tcg_round);
8364    }
8365
8366    for (i = 0; i < elements; i++) {
8367        read_vec_element(s, tcg_rn, rn, i, memop);
8368        if (accumulate || insert) {
8369            read_vec_element(s, tcg_rd, rd, i, memop);
8370        }
8371
8372        if (insert) {
8373            handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8374        } else {
8375            handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8376                                    accumulate, is_u, size, shift);
8377        }
8378
8379        write_vec_element(s, tcg_rd, rd, i, size);
8380    }
8381
8382    if (!is_q) {
8383        clear_vec_high(s, rd);
8384    }
8385
8386    if (round) {
8387        tcg_temp_free_i64(tcg_round);
8388    }
8389}
8390
8391/* SHL/SLI - Vector shift left */
8392static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8393                                int immh, int immb, int opcode, int rn, int rd)
8394{
8395    int size = 32 - clz32(immh) - 1;
8396    int immhb = immh << 3 | immb;
8397    int shift = immhb - (8 << size);
8398    int dsize = is_q ? 128 : 64;
8399    int esize = 8 << size;
8400    int elements = dsize/esize;
8401    TCGv_i64 tcg_rn = new_tmp_a64(s);
8402    TCGv_i64 tcg_rd = new_tmp_a64(s);
8403    int i;
8404
8405    if (extract32(immh, 3, 1) && !is_q) {
8406        unallocated_encoding(s);
8407        return;
8408    }
8409
8410    if (size > 3 && !is_q) {
8411        unallocated_encoding(s);
8412        return;
8413    }
8414
8415    if (!fp_access_check(s)) {
8416        return;
8417    }
8418
8419    for (i = 0; i < elements; i++) {
8420        read_vec_element(s, tcg_rn, rn, i, size);
8421        if (insert) {
8422            read_vec_element(s, tcg_rd, rd, i, size);
8423        }
8424
8425        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8426
8427        write_vec_element(s, tcg_rd, rd, i, size);
8428    }
8429
8430    if (!is_q) {
8431        clear_vec_high(s, rd);
8432    }
8433}
8434
8435/* USHLL/SHLL - Vector shift left with widening */
8436static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8437                                 int immh, int immb, int opcode, int rn, int rd)
8438{
8439    int size = 32 - clz32(immh) - 1;
8440    int immhb = immh << 3 | immb;
8441    int shift = immhb - (8 << size);
8442    int dsize = 64;
8443    int esize = 8 << size;
8444    int elements = dsize/esize;
8445    TCGv_i64 tcg_rn = new_tmp_a64(s);
8446    TCGv_i64 tcg_rd = new_tmp_a64(s);
8447    int i;
8448
8449    if (size >= 3) {
8450        unallocated_encoding(s);
8451        return;
8452    }
8453
8454    if (!fp_access_check(s)) {
8455        return;
8456    }
8457
8458    /* For the LL variants the store is larger than the load,
8459     * so if rd == rn we would overwrite parts of our input.
8460     * So load everything right now and use shifts in the main loop.
8461     */
8462    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8463
8464    for (i = 0; i < elements; i++) {
8465        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8466        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8467        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8468        write_vec_element(s, tcg_rd, rd, i, size + 1);
8469    }
8470}
8471
8472/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8473static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8474                                 int immh, int immb, int opcode, int rn, int rd)
8475{
8476    int immhb = immh << 3 | immb;
8477    int size = 32 - clz32(immh) - 1;
8478    int dsize = 64;
8479    int esize = 8 << size;
8480    int elements = dsize/esize;
8481    int shift = (2 * esize) - immhb;
8482    bool round = extract32(opcode, 0, 1);
8483    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8484    TCGv_i64 tcg_round;
8485    int i;
8486
8487    if (extract32(immh, 3, 1)) {
8488        unallocated_encoding(s);
8489        return;
8490    }
8491
8492    if (!fp_access_check(s)) {
8493        return;
8494    }
8495
8496    tcg_rn = tcg_temp_new_i64();
8497    tcg_rd = tcg_temp_new_i64();
8498    tcg_final = tcg_temp_new_i64();
8499    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8500
8501    if (round) {
8502        uint64_t round_const = 1ULL << (shift - 1);
8503        tcg_round = tcg_const_i64(round_const);
8504    } else {
8505        TCGV_UNUSED_I64(tcg_round);
8506    }
8507
8508    for (i = 0; i < elements; i++) {
8509        read_vec_element(s, tcg_rn, rn, i, size+1);
8510        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8511                                false, true, size+1, shift);
8512
8513        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8514    }
8515
8516    if (!is_q) {
8517        clear_vec_high(s, rd);
8518        write_vec_element(s, tcg_final, rd, 0, MO_64);
8519    } else {
8520        write_vec_element(s, tcg_final, rd, 1, MO_64);
8521    }
8522
8523    if (round) {
8524        tcg_temp_free_i64(tcg_round);
8525    }
8526    tcg_temp_free_i64(tcg_rn);
8527    tcg_temp_free_i64(tcg_rd);
8528    tcg_temp_free_i64(tcg_final);
8529    return;
8530}
8531
8532
8533/* C3.6.14 AdvSIMD shift by immediate
8534 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8535 * +---+---+---+-------------+------+------+--------+---+------+------+
8536 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8537 * +---+---+---+-------------+------+------+--------+---+------+------+
8538 */
8539static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8540{
8541    int rd = extract32(insn, 0, 5);
8542    int rn = extract32(insn, 5, 5);
8543    int opcode = extract32(insn, 11, 5);
8544    int immb = extract32(insn, 16, 3);
8545    int immh = extract32(insn, 19, 4);
8546    bool is_u = extract32(insn, 29, 1);
8547    bool is_q = extract32(insn, 30, 1);
8548
8549    switch (opcode) {
8550    case 0x08: /* SRI */
8551        if (!is_u) {
8552            unallocated_encoding(s);
8553            return;
8554        }
8555        /* fall through */
8556    case 0x00: /* SSHR / USHR */
8557    case 0x02: /* SSRA / USRA (accumulate) */
8558    case 0x04: /* SRSHR / URSHR (rounding) */
8559    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8560        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8561        break;
8562    case 0x0a: /* SHL / SLI */
8563        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8564        break;
8565    case 0x10: /* SHRN */
8566    case 0x11: /* RSHRN / SQRSHRUN */
8567        if (is_u) {
8568            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8569                                   opcode, rn, rd);
8570        } else {
8571            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8572        }
8573        break;
8574    case 0x12: /* SQSHRN / UQSHRN */
8575    case 0x13: /* SQRSHRN / UQRSHRN */
8576        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8577                               opcode, rn, rd);
8578        break;
8579    case 0x14: /* SSHLL / USHLL */
8580        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8581        break;
8582    case 0x1c: /* SCVTF / UCVTF */
8583        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8584                                     opcode, rn, rd);
8585        break;
8586    case 0xc: /* SQSHLU */
8587        if (!is_u) {
8588            unallocated_encoding(s);
8589            return;
8590        }
8591        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8592        break;
8593    case 0xe: /* SQSHL, UQSHL */
8594        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8595        break;
8596    case 0x1f: /* FCVTZS/ FCVTZU */
8597        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8598        return;
8599    default:
8600        unallocated_encoding(s);
8601        return;
8602    }
8603}
8604
8605/* Generate code to do a "long" addition or subtraction, ie one done in
8606 * TCGv_i64 on vector lanes twice the width specified by size.
8607 */
8608static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8609                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8610{
8611    static NeonGenTwo64OpFn * const fns[3][2] = {
8612        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8613        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8614        { tcg_gen_add_i64, tcg_gen_sub_i64 },
8615    };
8616    NeonGenTwo64OpFn *genfn;
8617    assert(size < 3);
8618
8619    genfn = fns[size][is_sub];
8620    genfn(tcg_res, tcg_op1, tcg_op2);
8621}
8622
8623static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8624                                int opcode, int rd, int rn, int rm)
8625{
8626    /* 3-reg-different widening insns: 64 x 64 -> 128 */
8627    TCGv_i64 tcg_res[2];
8628    int pass, accop;
8629
8630    tcg_res[0] = tcg_temp_new_i64();
8631    tcg_res[1] = tcg_temp_new_i64();
8632
8633    /* Does this op do an adding accumulate, a subtracting accumulate,
8634     * or no accumulate at all?
8635     */
8636    switch (opcode) {
8637    case 5:
8638    case 8:
8639    case 9:
8640        accop = 1;
8641        break;
8642    case 10:
8643    case 11:
8644        accop = -1;
8645        break;
8646    default:
8647        accop = 0;
8648        break;
8649    }
8650
8651    if (accop != 0) {
8652        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8653        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8654    }
8655
8656    /* size == 2 means two 32x32->64 operations; this is worth special
8657     * casing because we can generally handle it inline.
8658     */
8659    if (size == 2) {
8660        for (pass = 0; pass < 2; pass++) {
8661            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8662            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8663            TCGv_i64 tcg_passres;
8664            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8665
8666            int elt = pass + is_q * 2;
8667
8668            read_vec_element(s, tcg_op1, rn, elt, memop);
8669            read_vec_element(s, tcg_op2, rm, elt, memop);
8670
8671            if (accop == 0) {
8672                tcg_passres = tcg_res[pass];
8673            } else {
8674                tcg_passres = tcg_temp_new_i64();
8675            }
8676
8677            switch (opcode) {
8678            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8679                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8680                break;
8681            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8682                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8683                break;
8684            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8685            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8686            {
8687                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8688                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8689
8690                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8691                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8692                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8693                                    tcg_passres,
8694                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8695                tcg_temp_free_i64(tcg_tmp1);
8696                tcg_temp_free_i64(tcg_tmp2);
8697                break;
8698            }
8699            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8700            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8701            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8702                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8703                break;
8704            case 9: /* SQDMLAL, SQDMLAL2 */
8705            case 11: /* SQDMLSL, SQDMLSL2 */
8706            case 13: /* SQDMULL, SQDMULL2 */
8707                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8708                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8709                                                  tcg_passres, tcg_passres);
8710                break;
8711            default:
8712                g_assert_not_reached();
8713            }
8714
8715            if (opcode == 9 || opcode == 11) {
8716                /* saturating accumulate ops */
8717                if (accop < 0) {
8718                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
8719                }
8720                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8721                                                  tcg_res[pass], tcg_passres);
8722            } else if (accop > 0) {
8723                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8724            } else if (accop < 0) {
8725                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8726            }
8727
8728            if (accop != 0) {
8729                tcg_temp_free_i64(tcg_passres);
8730            }
8731
8732            tcg_temp_free_i64(tcg_op1);
8733            tcg_temp_free_i64(tcg_op2);
8734        }
8735    } else {
8736        /* size 0 or 1, generally helper functions */
8737        for (pass = 0; pass < 2; pass++) {
8738            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8739            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8740            TCGv_i64 tcg_passres;
8741            int elt = pass + is_q * 2;
8742
8743            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8744            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8745
8746            if (accop == 0) {
8747                tcg_passres = tcg_res[pass];
8748            } else {
8749                tcg_passres = tcg_temp_new_i64();
8750            }
8751
8752            switch (opcode) {
8753            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8754            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8755            {
8756                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8757                static NeonGenWidenFn * const widenfns[2][2] = {
8758                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8759                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8760                };
8761                NeonGenWidenFn *widenfn = widenfns[size][is_u];
8762
8763                widenfn(tcg_op2_64, tcg_op2);
8764                widenfn(tcg_passres, tcg_op1);
8765                gen_neon_addl(size, (opcode == 2), tcg_passres,
8766                              tcg_passres, tcg_op2_64);
8767                tcg_temp_free_i64(tcg_op2_64);
8768                break;
8769            }
8770            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8771            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8772                if (size == 0) {
8773                    if (is_u) {
8774                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8775                    } else {
8776                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8777                    }
8778                } else {
8779                    if (is_u) {
8780                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8781                    } else {
8782                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8783                    }
8784                }
8785                break;
8786            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8787            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8788            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8789                if (size == 0) {
8790                    if (is_u) {
8791                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8792                    } else {
8793                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8794                    }
8795                } else {
8796                    if (is_u) {
8797                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8798                    } else {
8799                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8800                    }
8801                }
8802                break;
8803            case 9: /* SQDMLAL, SQDMLAL2 */
8804            case 11: /* SQDMLSL, SQDMLSL2 */
8805            case 13: /* SQDMULL, SQDMULL2 */
8806                assert(size == 1);
8807                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8808                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8809                                                  tcg_passres, tcg_passres);
8810                break;
8811            case 14: /* PMULL */
8812                assert(size == 0);
8813                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8814                break;
8815            default:
8816                g_assert_not_reached();
8817            }
8818            tcg_temp_free_i32(tcg_op1);
8819            tcg_temp_free_i32(tcg_op2);
8820
8821            if (accop != 0) {
8822                if (opcode == 9 || opcode == 11) {
8823                    /* saturating accumulate ops */
8824                    if (accop < 0) {
8825                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8826                    }
8827                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8828                                                      tcg_res[pass],
8829                                                      tcg_passres);
8830                } else {
8831                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
8832                                  tcg_res[pass], tcg_passres);
8833                }
8834                tcg_temp_free_i64(tcg_passres);
8835            }
8836        }
8837    }
8838
8839    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8840    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8841    tcg_temp_free_i64(tcg_res[0]);
8842    tcg_temp_free_i64(tcg_res[1]);
8843}
8844
8845static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8846                            int opcode, int rd, int rn, int rm)
8847{
8848    TCGv_i64 tcg_res[2];
8849    int part = is_q ? 2 : 0;
8850    int pass;
8851
8852    for (pass = 0; pass < 2; pass++) {
8853        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8854        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8855        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8856        static NeonGenWidenFn * const widenfns[3][2] = {
8857            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8858            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8859            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8860        };
8861        NeonGenWidenFn *widenfn = widenfns[size][is_u];
8862
8863        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8864        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8865        widenfn(tcg_op2_wide, tcg_op2);
8866        tcg_temp_free_i32(tcg_op2);
8867        tcg_res[pass] = tcg_temp_new_i64();
8868        gen_neon_addl(size, (opcode == 3),
8869                      tcg_res[pass], tcg_op1, tcg_op2_wide);
8870        tcg_temp_free_i64(tcg_op1);
8871        tcg_temp_free_i64(tcg_op2_wide);
8872    }
8873
8874    for (pass = 0; pass < 2; pass++) {
8875        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8876        tcg_temp_free_i64(tcg_res[pass]);
8877    }
8878}
8879
8880static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8881{
8882    tcg_gen_addi_i64(in, in, 1U << 31);
8883    tcg_gen_extrh_i64_i32(res, in);
8884}
8885
8886static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8887                                 int opcode, int rd, int rn, int rm)
8888{
8889    TCGv_i32 tcg_res[2];
8890    int part = is_q ? 2 : 0;
8891    int pass;
8892
8893    for (pass = 0; pass < 2; pass++) {
8894        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8895        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8896        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8897        static NeonGenNarrowFn * const narrowfns[3][2] = {
8898            { gen_helper_neon_narrow_high_u8,
8899              gen_helper_neon_narrow_round_high_u8 },
8900            { gen_helper_neon_narrow_high_u16,
8901              gen_helper_neon_narrow_round_high_u16 },
8902            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8903        };
8904        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8905
8906        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8907        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8908
8909        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8910
8911        tcg_temp_free_i64(tcg_op1);
8912        tcg_temp_free_i64(tcg_op2);
8913
8914        tcg_res[pass] = tcg_temp_new_i32();
8915        gennarrow(tcg_res[pass], tcg_wideres);
8916        tcg_temp_free_i64(tcg_wideres);
8917    }
8918
8919    for (pass = 0; pass < 2; pass++) {
8920        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8921        tcg_temp_free_i32(tcg_res[pass]);
8922    }
8923    if (!is_q) {
8924        clear_vec_high(s, rd);
8925    }
8926}
8927
8928static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8929{
8930    /* PMULL of 64 x 64 -> 128 is an odd special case because it
8931     * is the only three-reg-diff instruction which produces a
8932     * 128-bit wide result from a single operation. However since
8933     * it's possible to calculate the two halves more or less
8934     * separately we just use two helper calls.
8935     */
8936    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8937    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8938    TCGv_i64 tcg_res = tcg_temp_new_i64();
8939
8940    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8941    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8942    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8943    write_vec_element(s, tcg_res, rd, 0, MO_64);
8944    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8945    write_vec_element(s, tcg_res, rd, 1, MO_64);
8946
8947    tcg_temp_free_i64(tcg_op1);
8948    tcg_temp_free_i64(tcg_op2);
8949    tcg_temp_free_i64(tcg_res);
8950}
8951
8952/* C3.6.15 AdvSIMD three different
8953 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8954 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8955 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8956 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8957 */
8958static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8959{
8960    /* Instructions in this group fall into three basic classes
8961     * (in each case with the operation working on each element in
8962     * the input vectors):
8963     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8964     *     128 bit input)
8965     * (2) wide 64 x 128 -> 128
8966     * (3) narrowing 128 x 128 -> 64
8967     * Here we do initial decode, catch unallocated cases and
8968     * dispatch to separate functions for each class.
8969     */
8970    int is_q = extract32(insn, 30, 1);
8971    int is_u = extract32(insn, 29, 1);
8972    int size = extract32(insn, 22, 2);
8973    int opcode = extract32(insn, 12, 4);
8974    int rm = extract32(insn, 16, 5);
8975    int rn = extract32(insn, 5, 5);
8976    int rd = extract32(insn, 0, 5);
8977
8978    switch (opcode) {
8979    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8980    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8981        /* 64 x 128 -> 128 */
8982        if (size == 3) {
8983            unallocated_encoding(s);
8984            return;
8985        }
8986        if (!fp_access_check(s)) {
8987            return;
8988        }
8989        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8990        break;
8991    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8992    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8993        /* 128 x 128 -> 64 */
8994        if (size == 3) {
8995            unallocated_encoding(s);
8996            return;
8997        }
8998        if (!fp_access_check(s)) {
8999            return;
9000        }

9001        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9002        break;
9003    case 14: /* PMULL, PMULL2 */
9004        if (is_u || size == 1 || size == 2) {
9005            unallocated_encoding(s);
9006            return;
9007        }
9008        if (size == 3) {
9009            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9010                unallocated_encoding(s);
9011                return;
9012            }
9013            if (!fp_access_check(s)) {
9014                return;
9015            }
9016            handle_pmull_64(s, is_q, rd, rn, rm);
9017            return;
9018        }
9019        goto is_widening;
9020    case 9: /* SQDMLAL, SQDMLAL2 */
9021    case 11: /* SQDMLSL, SQDMLSL2 */
9022    case 13: /* SQDMULL, SQDMULL2 */
9023        if (is_u || size == 0) {
9024            unallocated_encoding(s);
9025            return;
9026        }
9027        /* fall through */
9028    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9029    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9030    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9031    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9032    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9033    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9034    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9035        /* 64 x 64 -> 128 */
9036        if (size == 3) {
9037            unallocated_encoding(s);
9038            return;
9039        }
9040    is_widening:
9041        if (!fp_access_check(s)) {
9042            return;
9043        }
9044
9045        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9046        break;
9047    default:
9048        /* opcode 15 not allocated */
9049        unallocated_encoding(s);
9050        break;
9051    }
9052}
9053
9054/* Logic op (opcode == 3) subgroup of C3.6.16. */
9055static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9056{
9057    int rd = extract32(insn, 0, 5);
9058    int rn = extract32(insn, 5, 5);
9059    int rm = extract32(insn, 16, 5);
9060    int size = extract32(insn, 22, 2);
9061    bool is_u = extract32(insn, 29, 1);
9062    bool is_q = extract32(insn, 30, 1);
9063    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
9064    int pass;
9065
9066    if (!fp_access_check(s)) {
9067        return;
9068    }
9069
9070    tcg_op1 = tcg_temp_new_i64();
9071    tcg_op2 = tcg_temp_new_i64();
9072    tcg_res[0] = tcg_temp_new_i64();
9073    tcg_res[1] = tcg_temp_new_i64();
9074
9075    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9076        read_vec_element(s, tcg_op1, rn, pass, MO_64);
9077        read_vec_element(s, tcg_op2, rm, pass, MO_64);
9078
9079        if (!is_u) {
9080            switch (size) {
9081            case 0: /* AND */
9082                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
9083                break;
9084            case 1: /* BIC */
9085                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9086                break;
9087            case 2: /* ORR */
9088                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9089                break;
9090            case 3: /* ORN */
9091                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9092                break;
9093            }
9094        } else {
9095            if (size != 0) {
9096                /* B* ops need res loaded to operate on */
9097                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9098            }
9099
9100            switch (size) {
9101            case 0: /* EOR */
9102                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9103                break;
9104            case 1: /* BSL bitwise select */
9105                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9106                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9107                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9108                break;
9109            case 2: /* BIT, bitwise insert if true */
9110                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9111                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9112                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9113                break;
9114            case 3: /* BIF, bitwise insert if false */
9115                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9116                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9117                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9118                break;
9119            }
9120        }
9121    }
9122
9123    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9124    if (!is_q) {
9125        tcg_gen_movi_i64(tcg_res[1], 0);
9126    }
9127    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9128
9129    tcg_temp_free_i64(tcg_op1);
9130    tcg_temp_free_i64(tcg_op2);
9131    tcg_temp_free_i64(tcg_res[0]);
9132    tcg_temp_free_i64(tcg_res[1]);
9133}
9134
9135/* Helper functions for 32 bit comparisons */
9136static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9137{
9138    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9139}
9140
9141static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9142{
9143    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9144}
9145
9146static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9147{
9148    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9149}
9150
9151static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9152{
9153    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9154}
9155
9156/* Pairwise op subgroup of C3.6.16.
9157 *
9158 * This is called directly or via the handle_3same_float for float pairwise
9159 * operations where the opcode and size are calculated differently.
9160 */
9161static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9162                                   int size, int rn, int rm, int rd)
9163{
9164    TCGv_ptr fpst;
9165    int pass;
9166
9167    /* Floating point operations need fpst */
9168    if (opcode >= 0x58) {
9169        fpst = get_fpstatus_ptr();
9170    } else {
9171        TCGV_UNUSED_PTR(fpst);
9172    }
9173
9174    if (!fp_access_check(s)) {
9175        return;
9176    }
9177
9178    /* These operations work on the concatenated rm:rn, with each pair of
9179     * adjacent elements being operated on to produce an element in the result.
9180     */
9181    if (size == 3) {
9182        TCGv_i64 tcg_res[2];
9183
9184        for (pass = 0; pass < 2; pass++) {
9185            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9186            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9187            int passreg = (pass == 0) ? rn : rm;
9188
9189            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9190            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9191            tcg_res[pass] = tcg_temp_new_i64();
9192
9193            switch (opcode) {
9194            case 0x17: /* ADDP */
9195                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9196                break;
9197            case 0x58: /* FMAXNMP */
9198                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9199                break;
9200            case 0x5a: /* FADDP */
9201                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9202                break;
9203            case 0x5e: /* FMAXP */
9204                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9205                break;
9206            case 0x78: /* FMINNMP */
9207                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9208                break;
9209            case 0x7e: /* FMINP */
9210                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9211                break;
9212            default:
9213                g_assert_not_reached();
9214            }
9215
9216            tcg_temp_free_i64(tcg_op1);
9217            tcg_temp_free_i64(tcg_op2);
9218        }
9219
9220        for (pass = 0; pass < 2; pass++) {
9221            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9222            tcg_temp_free_i64(tcg_res[pass]);
9223        }
9224    } else {
9225        int maxpass = is_q ? 4 : 2;
9226        TCGv_i32 tcg_res[4];
9227
9228        for (pass = 0; pass < maxpass; pass++) {
9229            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9230            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9231            NeonGenTwoOpFn *genfn = NULL;
9232            int passreg = pass < (maxpass / 2) ? rn : rm;
9233            int passelt = (is_q && (pass & 1)) ? 2 : 0;
9234
9235            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9236            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9237            tcg_res[pass] = tcg_temp_new_i32();
9238
9239            switch (opcode) {
9240            case 0x17: /* ADDP */
9241            {
9242                static NeonGenTwoOpFn * const fns[3] = {
9243                    gen_helper_neon_padd_u8,
9244                    gen_helper_neon_padd_u16,
9245                    tcg_gen_add_i32,
9246                };
9247                genfn = fns[size];
9248                break;
9249            }
9250            case 0x14: /* SMAXP, UMAXP */
9251            {
9252                static NeonGenTwoOpFn * const fns[3][2] = {
9253                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9254                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9255                    { gen_max_s32, gen_max_u32 },
9256                };
9257                genfn = fns[size][u];
9258                break;
9259            }
9260            case 0x15: /* SMINP, UMINP */
9261            {
9262                static NeonGenTwoOpFn * const fns[3][2] = {
9263                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9264                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9265                    { gen_min_s32, gen_min_u32 },
9266                };
9267                genfn = fns[size][u];
9268                break;
9269            }
9270            /* The FP operations are all on single floats (32 bit) */
9271            case 0x58: /* FMAXNMP */
9272                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9273                break;
9274            case 0x5a: /* FADDP */
9275                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9276                break;
9277            case 0x5e: /* FMAXP */
9278                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9279                break;
9280            case 0x78: /* FMINNMP */
9281                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9282                break;
9283            case 0x7e: /* FMINP */
9284                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9285                break;
9286            default:
9287                g_assert_not_reached();
9288            }
9289
9290            /* FP ops called directly, otherwise call now */
9291            if (genfn) {
9292                genfn(tcg_res[pass], tcg_op1, tcg_op2);
9293            }
9294
9295            tcg_temp_free_i32(tcg_op1);
9296            tcg_temp_free_i32(tcg_op2);
9297        }
9298
9299        for (pass = 0; pass < maxpass; pass++) {
9300            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9301            tcg_temp_free_i32(tcg_res[pass]);
9302        }
9303        if (!is_q) {
9304            clear_vec_high(s, rd);
9305        }
9306    }
9307
9308    if (!TCGV_IS_UNUSED_PTR(fpst)) {
9309        tcg_temp_free_ptr(fpst);
9310    }
9311}
9312
9313/* Floating point op subgroup of C3.6.16. */
9314static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9315{
9316    /* For floating point ops, the U, size[1] and opcode bits
9317     * together indicate the operation. size[0] indicates single
9318     * or double.
9319     */
9320    int fpopcode = extract32(insn, 11, 5)
9321        | (extract32(insn, 23, 1) << 5)
9322        | (extract32(insn, 29, 1) << 6);
9323    int is_q = extract32(insn, 30, 1);
9324    int size = extract32(insn, 22, 1);
9325    int rm = extract32(insn, 16, 5);
9326    int rn = extract32(insn, 5, 5);
9327    int rd = extract32(insn, 0, 5);
9328
9329    int datasize = is_q ? 128 : 64;
9330    int esize = 32 << size;
9331    int elements = datasize / esize;
9332
9333    if (size == 1 && !is_q) {
9334        unallocated_encoding(s);
9335        return;
9336    }
9337
9338    switch (fpopcode) {
9339    case 0x58: /* FMAXNMP */
9340    case 0x5a: /* FADDP */
9341    case 0x5e: /* FMAXP */
9342    case 0x78: /* FMINNMP */
9343    case 0x7e: /* FMINP */
9344        if (size && !is_q) {
9345            unallocated_encoding(s);
9346            return;
9347        }
9348        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9349                               rn, rm, rd);
9350        return;
9351    case 0x1b: /* FMULX */
9352    case 0x1f: /* FRECPS */
9353    case 0x3f: /* FRSQRTS */
9354    case 0x5d: /* FACGE */
9355    case 0x7d: /* FACGT */
9356    case 0x19: /* FMLA */
9357    case 0x39: /* FMLS */
9358    case 0x18: /* FMAXNM */
9359    case 0x1a: /* FADD */
9360    case 0x1c: /* FCMEQ */
9361    case 0x1e: /* FMAX */
9362    case 0x38: /* FMINNM */
9363    case 0x3a: /* FSUB */
9364    case 0x3e: /* FMIN */
9365    case 0x5b: /* FMUL */
9366    case 0x5c: /* FCMGE */
9367    case 0x5f: /* FDIV */
9368    case 0x7a: /* FABD */
9369    case 0x7c: /* FCMGT */
9370        if (!fp_access_check(s)) {
9371            return;
9372        }
9373
9374        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9375        return;
9376    default:
9377        unallocated_encoding(s);
9378        return;
9379    }
9380}
9381
9382/* Integer op subgroup of C3.6.16. */
9383static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9384{
9385    int is_q = extract32(insn, 30, 1);
9386    int u = extract32(insn, 29, 1);
9387    int size = extract32(insn, 22, 2);
9388    int opcode = extract32(insn, 11, 5);
9389    int rm = extract32(insn, 16, 5);
9390    int rn = extract32(insn, 5, 5);
9391    int rd = extract32(insn, 0, 5);
9392    int pass;
9393
9394    switch (opcode) {
9395    case 0x13: /* MUL, PMUL */
9396        if (u && size != 0) {
9397            unallocated_encoding(s);
9398            return;
9399        }
9400        /* fall through */
9401    case 0x0: /* SHADD, UHADD */
9402    case 0x2: /* SRHADD, URHADD */
9403    case 0x4: /* SHSUB, UHSUB */
9404    case 0xc: /* SMAX, UMAX */
9405    case 0xd: /* SMIN, UMIN */
9406    case 0xe: /* SABD, UABD */
9407    case 0xf: /* SABA, UABA */
9408    case 0x12: /* MLA, MLS */
9409        if (size == 3) {
9410            unallocated_encoding(s);
9411            return;
9412        }
9413        break;
9414    case 0x16: /* SQDMULH, SQRDMULH */
9415        if (size == 0 || size == 3) {
9416            unallocated_encoding(s);
9417            return;
9418        }
9419        break;
9420    default:
9421        if (size == 3 && !is_q) {
9422            unallocated_encoding(s);
9423            return;
9424        }
9425        break;
9426    }
9427
9428    if (!fp_access_check(s)) {
9429        return;
9430    }
9431
9432    if (size == 3) {
9433        assert(is_q);
9434        for (pass = 0; pass < 2; pass++) {
9435            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9436            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9437            TCGv_i64 tcg_res = tcg_temp_new_i64();
9438
9439            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9440            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9441
9442            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9443
9444            write_vec_element(s, tcg_res, rd, pass, MO_64);
9445
9446            tcg_temp_free_i64(tcg_res);
9447            tcg_temp_free_i64(tcg_op1);
9448            tcg_temp_free_i64(tcg_op2);
9449        }
9450    } else {
9451        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9452            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9453            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9454            TCGv_i32 tcg_res = tcg_temp_new_i32();
9455            NeonGenTwoOpFn *genfn = NULL;
9456            NeonGenTwoOpEnvFn *genenvfn = NULL;
9457
9458            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9459            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9460
9461            switch (opcode) {
9462            case 0x0: /* SHADD, UHADD */
9463            {
9464                static NeonGenTwoOpFn * const fns[3][2] = {
9465                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9466                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9467                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9468                };
9469                genfn = fns[size][u];
9470                break;
9471            }
9472            case 0x1: /* SQADD, UQADD */
9473            {
9474                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9475                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9476                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9477                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9478                };
9479                genenvfn = fns[size][u];
9480                break;
9481            }
9482            case 0x2: /* SRHADD, URHADD */
9483            {
9484                static NeonGenTwoOpFn * const fns[3][2] = {
9485                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9486                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9487                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9488                };
9489                genfn = fns[size][u];
9490                break;
9491            }
9492            case 0x4: /* SHSUB, UHSUB */
9493            {
9494                static NeonGenTwoOpFn * const fns[3][2] = {
9495                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9496                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9497                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9498                };
9499                genfn = fns[size][u];
9500                break;
9501            }
9502            case 0x5: /* SQSUB, UQSUB */
9503            {
9504                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9505                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9506                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9507                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9508                };
9509                genenvfn = fns[size][u];
9510                break;
9511            }
9512            case 0x6: /* CMGT, CMHI */
9513            {
9514                static NeonGenTwoOpFn * const fns[3][2] = {
9515                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9516                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9517                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9518                };
9519                genfn = fns[size][u];
9520                break;
9521            }
9522            case 0x7: /* CMGE, CMHS */
9523            {
9524                static NeonGenTwoOpFn * const fns[3][2] = {
9525                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9526                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9527                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9528                };
9529                genfn = fns[size][u];
9530                break;
9531            }
9532            case 0x8: /* SSHL, USHL */
9533            {
9534                static NeonGenTwoOpFn * const fns[3][2] = {
9535                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9536                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9537                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9538                };
9539                genfn = fns[size][u];
9540                break;
9541            }
9542            case 0x9: /* SQSHL, UQSHL */
9543            {
9544                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9545                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9546                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9547                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9548                };
9549                genenvfn = fns[size][u];
9550                break;
9551            }
9552            case 0xa: /* SRSHL, URSHL */
9553            {
9554                static NeonGenTwoOpFn * const fns[3][2] = {
9555                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9556                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9557                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9558                };
9559                genfn = fns[size][u];
9560                break;
9561            }
9562            case 0xb: /* SQRSHL, UQRSHL */
9563            {
9564                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9565                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9566                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9567                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9568                };
9569                genenvfn = fns[size][u];
9570                break;
9571            }
9572            case 0xc: /* SMAX, UMAX */
9573            {
9574                static NeonGenTwoOpFn * const fns[3][2] = {
9575                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9576                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9577                    { gen_max_s32, gen_max_u32 },
9578                };
9579                genfn = fns[size][u];
9580                break;
9581            }
9582
9583            case 0xd: /* SMIN, UMIN */
9584            {
9585                static NeonGenTwoOpFn * const fns[3][2] = {
9586                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9587                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9588                    { gen_min_s32, gen_min_u32 },
9589                };
9590                genfn = fns[size][u];
9591                break;
9592            }
9593            case 0xe: /* SABD, UABD */
9594            case 0xf: /* SABA, UABA */
9595            {
9596                static NeonGenTwoOpFn * const fns[3][2] = {
9597                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9598                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9599                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9600                };
9601                genfn = fns[size][u];
9602                break;
9603            }
9604            case 0x10: /* ADD, SUB */
9605            {
9606                static NeonGenTwoOpFn * const fns[3][2] = {
9607                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9608                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9609                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9610                };
9611                genfn = fns[size][u];
9612                break;
9613            }
9614            case 0x11: /* CMTST, CMEQ */
9615            {
9616                static NeonGenTwoOpFn * const fns[3][2] = {
9617                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9618                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9619                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9620                };
9621                genfn = fns[size][u];
9622                break;
9623            }
9624            case 0x13: /* MUL, PMUL */
9625                if (u) {
9626                    /* PMUL */
9627                    assert(size == 0);
9628                    genfn = gen_helper_neon_mul_p8;
9629                    break;
9630                }
9631                /* fall through : MUL */
9632            case 0x12: /* MLA, MLS */
9633            {
9634                static NeonGenTwoOpFn * const fns[3] = {
9635                    gen_helper_neon_mul_u8,
9636                    gen_helper_neon_mul_u16,
9637                    tcg_gen_mul_i32,
9638                };
9639                genfn = fns[size];
9640                break;
9641            }
9642            case 0x16: /* SQDMULH, SQRDMULH */
9643            {
9644                static NeonGenTwoOpEnvFn * const fns[2][2] = {
9645                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9646                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9647                };
9648                assert(size == 1 || size == 2);
9649                genenvfn = fns[size - 1][u];
9650                break;
9651            }
9652            default:
9653                g_assert_not_reached();
9654            }
9655
9656            if (genenvfn) {
9657                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9658            } else {
9659                genfn(tcg_res, tcg_op1, tcg_op2);
9660            }
9661
9662            if (opcode == 0xf || opcode == 0x12) {
9663                /* SABA, UABA, MLA, MLS: accumulating ops */
9664                static NeonGenTwoOpFn * const fns[3][2] = {
9665                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9666                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9667                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9668                };
9669                bool is_sub = (opcode == 0x12 && u); /* MLS */
9670
9671                genfn = fns[size][is_sub];
9672                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9673                genfn(tcg_res, tcg_op1, tcg_res);
9674            }
9675
9676            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9677
9678            tcg_temp_free_i32(tcg_res);
9679            tcg_temp_free_i32(tcg_op1);
9680            tcg_temp_free_i32(tcg_op2);
9681        }
9682    }
9683
9684    if (!is_q) {
9685        clear_vec_high(s, rd);
9686    }
9687}
9688
9689/* C3.6.16 AdvSIMD three same
9690 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9691 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9692 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9693 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9694 */
9695static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9696{
9697    int opcode = extract32(insn, 11, 5);
9698
9699    switch (opcode) {
9700    case 0x3: /* logic ops */
9701        disas_simd_3same_logic(s, insn);
9702        break;
9703    case 0x17: /* ADDP */
9704    case 0x14: /* SMAXP, UMAXP */
9705    case 0x15: /* SMINP, UMINP */
9706    {
9707        /* Pairwise operations */
9708        int is_q = extract32(insn, 30, 1);
9709        int u = extract32(insn, 29, 1);
9710        int size = extract32(insn, 22, 2);
9711        int rm = extract32(insn, 16, 5);
9712        int rn = extract32(insn, 5, 5);
9713        int rd = extract32(insn, 0, 5);
9714        if (opcode == 0x17) {
9715            if (u || (size == 3 && !is_q)) {
9716                unallocated_encoding(s);
9717                return;
9718            }
9719        } else {
9720            if (size == 3) {
9721                unallocated_encoding(s);
9722                return;
9723            }
9724        }
9725        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9726        break;
9727    }
9728    case 0x18 ... 0x31:
9729        /* floating point ops, sz[1] and U are part of opcode */
9730        disas_simd_3same_float(s, insn);
9731        break;
9732    default:
9733        disas_simd_3same_int(s, insn);
9734        break;
9735    }
9736}
9737
9738static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9739                                  int size, int rn, int rd)
9740{
9741    /* Handle 2-reg-misc ops which are widening (so each size element
9742     * in the source becomes a 2*size element in the destination.
9743     * The only instruction like this is FCVTL.
9744     */
9745    int pass;
9746
9747    if (size == 3) {
9748        /* 32 -> 64 bit fp conversion */
9749        TCGv_i64 tcg_res[2];
9750        int srcelt = is_q ? 2 : 0;
9751
9752        for (pass = 0; pass < 2; pass++) {
9753            TCGv_i32 tcg_op = tcg_temp_new_i32();
9754            tcg_res[pass] = tcg_temp_new_i64();
9755
9756            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9757            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9758            tcg_temp_free_i32(tcg_op);
9759        }
9760        for (pass = 0; pass < 2; pass++) {
9761            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9762            tcg_temp_free_i64(tcg_res[pass]);
9763        }
9764    } else {
9765        /* 16 -> 32 bit fp conversion */
9766        int srcelt = is_q ? 4 : 0;
9767        TCGv_i32 tcg_res[4];
9768
9769        for (pass = 0; pass < 4; pass++) {
9770            tcg_res[pass] = tcg_temp_new_i32();
9771
9772            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9773            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9774                                           cpu_env);
9775        }
9776        for (pass = 0; pass < 4; pass++) {
9777            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9778            tcg_temp_free_i32(tcg_res[pass]);
9779        }
9780    }
9781}
9782
9783static void handle_rev(DisasContext *s, int opcode, bool u,
9784                       bool is_q, int size, int rn, int rd)
9785{
9786    int op = (opcode << 1) | u;
9787    int opsz = op + size;
9788    int grp_size = 3 - opsz;
9789    int dsize = is_q ? 128 : 64;
9790    int i;
9791
9792    if (opsz >= 3) {
9793        unallocated_encoding(s);
9794        return;
9795    }
9796
9797    if (!fp_access_check(s)) {
9798        return;
9799    }
9800
9801    if (size == 0) {
9802        /* Special case bytes, use bswap op on each group of elements */
9803        int groups = dsize / (8 << grp_size);
9804
9805        for (i = 0; i < groups; i++) {
9806            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9807
9808            read_vec_element(s, tcg_tmp, rn, i, grp_size);
9809            switch (grp_size) {
9810            case MO_16:
9811                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9812                break;
9813            case MO_32:
9814                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9815                break;
9816            case MO_64:
9817                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9818                break;
9819            default:
9820                g_assert_not_reached();
9821            }
9822            write_vec_element(s, tcg_tmp, rd, i, grp_size);
9823            tcg_temp_free_i64(tcg_tmp);
9824        }
9825        if (!is_q) {
9826            clear_vec_high(s, rd);
9827        }
9828    } else {
9829        int revmask = (1 << grp_size) - 1;
9830        int esize = 8 << size;
9831        int elements = dsize / esize;
9832        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9833        TCGv_i64 tcg_rd = tcg_const_i64(0);
9834        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9835
9836        for (i = 0; i < elements; i++) {
9837            int e_rev = (i & 0xf) ^ revmask;
9838            int off = e_rev * esize;
9839            read_vec_element(s, tcg_rn, rn, i, size);
9840            if (off >= 64) {
9841                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9842                                    tcg_rn, off - 64, esize);
9843            } else {
9844                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9845            }
9846        }
9847        write_vec_element(s, tcg_rd, rd, 0, MO_64);
9848        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9849
9850        tcg_temp_free_i64(tcg_rd_hi);
9851        tcg_temp_free_i64(tcg_rd);
9852        tcg_temp_free_i64(tcg_rn);
9853    }
9854}
9855
9856static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9857                                  bool is_q, int size, int rn, int rd)
9858{
9859    /* Implement the pairwise operations from 2-misc:
9860     * SADDLP, UADDLP, SADALP, UADALP.
9861     * These all add pairs of elements in the input to produce a
9862     * double-width result element in the output (possibly accumulating).
9863     */
9864    bool accum = (opcode == 0x6);
9865    int maxpass = is_q ? 2 : 1;
9866    int pass;
9867    TCGv_i64 tcg_res[2];
9868
9869    if (size == 2) {
9870        /* 32 + 32 -> 64 op */
9871        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9872
9873        for (pass = 0; pass < maxpass; pass++) {
9874            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9875            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9876
9877            tcg_res[pass] = tcg_temp_new_i64();
9878
9879            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9880            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9881            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9882            if (accum) {
9883                read_vec_element(s, tcg_op1, rd, pass, MO_64);
9884                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9885            }
9886
9887            tcg_temp_free_i64(tcg_op1);
9888            tcg_temp_free_i64(tcg_op2);
9889        }
9890    } else {
9891        for (pass = 0; pass < maxpass; pass++) {
9892            TCGv_i64 tcg_op = tcg_temp_new_i64();
9893            NeonGenOneOpFn *genfn;
9894            static NeonGenOneOpFn * const fns[2][2] = {
9895                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9896                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9897            };
9898
9899            genfn = fns[size][u];
9900
9901            tcg_res[pass] = tcg_temp_new_i64();
9902
9903            read_vec_element(s, tcg_op, rn, pass, MO_64);
9904            genfn(tcg_res[pass], tcg_op);
9905
9906            if (accum) {
9907                read_vec_element(s, tcg_op, rd, pass, MO_64);
9908                if (size == 0) {
9909                    gen_helper_neon_addl_u16(tcg_res[pass],
9910                                             tcg_res[pass], tcg_op);
9911                } else {
9912                    gen_helper_neon_addl_u32(tcg_res[pass],
9913                                             tcg_res[pass], tcg_op);
9914                }
9915            }
9916            tcg_temp_free_i64(tcg_op);
9917        }
9918    }
9919    if (!is_q) {
9920        tcg_res[1] = tcg_const_i64(0);
9921    }
9922    for (pass = 0; pass < 2; pass++) {
9923        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9924        tcg_temp_free_i64(tcg_res[pass]);
9925    }
9926}
9927
9928static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9929{
9930    /* Implement SHLL and SHLL2 */
9931    int pass;
9932    int part = is_q ? 2 : 0;
9933    TCGv_i64 tcg_res[2];
9934
9935    for (pass = 0; pass < 2; pass++) {
9936        static NeonGenWidenFn * const widenfns[3] = {
9937            gen_helper_neon_widen_u8,
9938            gen_helper_neon_widen_u16,
9939            tcg_gen_extu_i32_i64,
9940        };
9941        NeonGenWidenFn *widenfn = widenfns[size];
9942        TCGv_i32 tcg_op = tcg_temp_new_i32();
9943
9944        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9945        tcg_res[pass] = tcg_temp_new_i64();
9946        widenfn(tcg_res[pass], tcg_op);
9947        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9948
9949        tcg_temp_free_i32(tcg_op);
9950    }
9951
9952    for (pass = 0; pass < 2; pass++) {
9953        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9954        tcg_temp_free_i64(tcg_res[pass]);
9955    }
9956}
9957
9958/* C3.6.17 AdvSIMD two reg misc
9959 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9960 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9961 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9962 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9963 */
9964static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9965{
9966    int size = extract32(insn, 22, 2);
9967    int opcode = extract32(insn, 12, 5);
9968    bool u = extract32(insn, 29, 1);
9969    bool is_q = extract32(insn, 30, 1);
9970    int rn = extract32(insn, 5, 5);
9971    int rd = extract32(insn, 0, 5);
9972    bool need_fpstatus = false;
9973    bool need_rmode = false;
9974    int rmode = -1;
9975    TCGv_i32 tcg_rmode;
9976    TCGv_ptr tcg_fpstatus;
9977
9978    switch (opcode) {
9979    case 0x0: /* REV64, REV32 */
9980    case 0x1: /* REV16 */
9981        handle_rev(s, opcode, u, is_q, size, rn, rd);
9982        return;
9983    case 0x5: /* CNT, NOT, RBIT */
9984        if (u && size == 0) {
9985            /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9986            size = 3;
9987            break;
9988        } else if (u && size == 1) {
9989            /* RBIT */
9990            break;
9991        } else if (!u && size == 0) {
9992            /* CNT */
9993            break;
9994        }
9995        unallocated_encoding(s);
9996        return;
9997    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9998    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9999        if (size == 3) {
10000            unallocated_encoding(s);

10001            return;
10002        }
10003        if (!fp_access_check(s)) {
10004            return;
10005        }
10006
10007        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10008        return;
10009    case 0x4: /* CLS, CLZ */
10010        if (size == 3) {
10011            unallocated_encoding(s);
10012            return;
10013        }
10014        break;
10015    case 0x2: /* SADDLP, UADDLP */
10016    case 0x6: /* SADALP, UADALP */
10017        if (size == 3) {
10018            unallocated_encoding(s);
10019            return;
10020        }
10021        if (!fp_access_check(s)) {
10022            return;
10023        }
10024        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10025        return;
10026    case 0x13: /* SHLL, SHLL2 */
10027        if (u == 0 || size == 3) {
10028            unallocated_encoding(s);
10029            return;
10030        }
10031        if (!fp_access_check(s)) {
10032            return;
10033        }
10034        handle_shll(s, is_q, size, rn, rd);
10035        return;
10036    case 0xa: /* CMLT */
10037        if (u == 1) {
10038            unallocated_encoding(s);
10039            return;
10040        }
10041        /* fall through */
10042    case 0x8: /* CMGT, CMGE */
10043    case 0x9: /* CMEQ, CMLE */
10044    case 0xb: /* ABS, NEG */
10045        if (size == 3 && !is_q) {
10046            unallocated_encoding(s);
10047            return;
10048        }
10049        break;
10050    case 0x3: /* SUQADD, USQADD */
10051        if (size == 3 && !is_q) {
10052            unallocated_encoding(s);
10053            return;
10054        }
10055        if (!fp_access_check(s)) {
10056            return;
10057        }
10058        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10059        return;
10060    case 0x7: /* SQABS, SQNEG */
10061        if (size == 3 && !is_q) {
10062            unallocated_encoding(s);
10063            return;
10064        }
10065        break;
10066    case 0xc ... 0xf:
10067    case 0x16 ... 0x1d:
10068    case 0x1f:
10069    {
10070        /* Floating point: U, size[1] and opcode indicate operation;
10071         * size[0] indicates single or double precision.
10072         */
10073        int is_double = extract32(size, 0, 1);
10074        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10075        size = is_double ? 3 : 2;
10076        switch (opcode) {
10077        case 0x2f: /* FABS */
10078        case 0x6f: /* FNEG */
10079            if (size == 3 && !is_q) {
10080                unallocated_encoding(s);
10081                return;
10082            }
10083            break;
10084        case 0x1d: /* SCVTF */
10085        case 0x5d: /* UCVTF */
10086        {
10087            bool is_signed = (opcode == 0x1d) ? true : false;
10088            int elements = is_double ? 2 : is_q ? 4 : 2;
10089            if (is_double && !is_q) {
10090                unallocated_encoding(s);
10091                return;
10092            }
10093            if (!fp_access_check(s)) {
10094                return;
10095            }
10096            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10097            return;
10098        }
10099        case 0x2c: /* FCMGT (zero) */
10100        case 0x2d: /* FCMEQ (zero) */
10101        case 0x2e: /* FCMLT (zero) */
10102        case 0x6c: /* FCMGE (zero) */
10103        case 0x6d: /* FCMLE (zero) */
10104            if (size == 3 && !is_q) {
10105                unallocated_encoding(s);
10106                return;
10107            }
10108            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10109            return;
10110        case 0x7f: /* FSQRT */
10111            if (size == 3 && !is_q) {
10112                unallocated_encoding(s);
10113                return;
10114            }
10115            break;
10116        case 0x1a: /* FCVTNS */
10117        case 0x1b: /* FCVTMS */
10118        case 0x3a: /* FCVTPS */
10119        case 0x3b: /* FCVTZS */
10120        case 0x5a: /* FCVTNU */
10121        case 0x5b: /* FCVTMU */
10122        case 0x7a: /* FCVTPU */
10123        case 0x7b: /* FCVTZU */
10124            need_fpstatus = true;
10125            need_rmode = true;
10126            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10127            if (size == 3 && !is_q) {
10128                unallocated_encoding(s);
10129                return;
10130            }
10131            break;
10132        case 0x5c: /* FCVTAU */
10133        case 0x1c: /* FCVTAS */
10134            need_fpstatus = true;
10135            need_rmode = true;
10136            rmode = FPROUNDING_TIEAWAY;
10137            if (size == 3 && !is_q) {
10138                unallocated_encoding(s);
10139                return;
10140            }
10141            break;
10142        case 0x3c: /* URECPE */
10143            if (size == 3) {
10144                unallocated_encoding(s);
10145                return;
10146            }
10147            /* fall through */
10148        case 0x3d: /* FRECPE */
10149        case 0x7d: /* FRSQRTE */
10150            if (size == 3 && !is_q) {
10151                unallocated_encoding(s);
10152                return;
10153            }
10154            if (!fp_access_check(s)) {
10155                return;
10156            }
10157            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10158            return;
10159        case 0x56: /* FCVTXN, FCVTXN2 */
10160            if (size == 2) {
10161                unallocated_encoding(s);
10162                return;
10163            }
10164            /* fall through */
10165        case 0x16: /* FCVTN, FCVTN2 */
10166            /* handle_2misc_narrow does a 2*size -> size operation, but these
10167             * instructions encode the source size rather than dest size.
10168             */
10169            if (!fp_access_check(s)) {
10170                return;
10171            }
10172            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10173            return;
10174        case 0x17: /* FCVTL, FCVTL2 */
10175            if (!fp_access_check(s)) {
10176                return;
10177            }
10178            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10179            return;
10180        case 0x18: /* FRINTN */
10181        case 0x19: /* FRINTM */
10182        case 0x38: /* FRINTP */
10183        case 0x39: /* FRINTZ */
10184            need_rmode = true;
10185            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10186            /* fall through */
10187        case 0x59: /* FRINTX */
10188        case 0x79: /* FRINTI */
10189            need_fpstatus = true;
10190            if (size == 3 && !is_q) {
10191                unallocated_encoding(s);
10192                return;
10193            }
10194            break;
10195        case 0x58: /* FRINTA */
10196            need_rmode = true;
10197            rmode = FPROUNDING_TIEAWAY;
10198            need_fpstatus = true;
10199            if (size == 3 && !is_q) {
10200                unallocated_encoding(s);
10201                return;
10202            }
10203            break;
10204        case 0x7c: /* URSQRTE */
10205            if (size == 3) {
10206                unallocated_encoding(s);
10207                return;
10208            }
10209            need_fpstatus = true;
10210            break;
10211        default:
10212            unallocated_encoding(s);
10213            return;
10214        }
10215        break;
10216    }
10217    default:
10218        unallocated_encoding(s);
10219        return;
10220    }
10221
10222    if (!fp_access_check(s)) {
10223        return;
10224    }
10225
10226    if (need_fpstatus) {
10227        tcg_fpstatus = get_fpstatus_ptr();
10228    } else {
10229        TCGV_UNUSED_PTR(tcg_fpstatus);
10230    }
10231    if (need_rmode) {
10232        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10233        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10234    } else {
10235        TCGV_UNUSED_I32(tcg_rmode);
10236    }
10237
10238    if (size == 3) {
10239        /* All 64-bit element operations can be shared with scalar 2misc */
10240        int pass;
10241
10242        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10243            TCGv_i64 tcg_op = tcg_temp_new_i64();
10244            TCGv_i64 tcg_res = tcg_temp_new_i64();
10245
10246            read_vec_element(s, tcg_op, rn, pass, MO_64);
10247
10248            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10249                            tcg_rmode, tcg_fpstatus);
10250
10251            write_vec_element(s, tcg_res, rd, pass, MO_64);
10252
10253            tcg_temp_free_i64(tcg_res);
10254            tcg_temp_free_i64(tcg_op);
10255        }
10256    } else {
10257        int pass;
10258
10259        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10260            TCGv_i32 tcg_op = tcg_temp_new_i32();
10261            TCGv_i32 tcg_res = tcg_temp_new_i32();
10262            TCGCond cond;
10263
10264            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10265
10266            if (size == 2) {
10267                /* Special cases for 32 bit elements */
10268                switch (opcode) {
10269                case 0xa: /* CMLT */
10270                    /* 32 bit integer comparison against zero, result is
10271                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
10272                     * and inverting.
10273                     */
10274                    cond = TCG_COND_LT;
10275                do_cmop:
10276                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10277                    tcg_gen_neg_i32(tcg_res, tcg_res);
10278                    break;
10279                case 0x8: /* CMGT, CMGE */
10280                    cond = u ? TCG_COND_GE : TCG_COND_GT;
10281                    goto do_cmop;
10282                case 0x9: /* CMEQ, CMLE */
10283                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
10284                    goto do_cmop;
10285                case 0x4: /* CLS */
10286                    if (u) {
10287                        gen_helper_clz32(tcg_res, tcg_op);
10288                    } else {
10289                        gen_helper_cls32(tcg_res, tcg_op);
10290                    }
10291                    break;
10292                case 0x7: /* SQABS, SQNEG */
10293                    if (u) {
10294                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10295                    } else {
10296                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10297                    }
10298                    break;
10299                case 0xb: /* ABS, NEG */
10300                    if (u) {
10301                        tcg_gen_neg_i32(tcg_res, tcg_op);
10302                    } else {
10303                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10304                        tcg_gen_neg_i32(tcg_res, tcg_op);
10305                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10306                                            tcg_zero, tcg_op, tcg_res);
10307                        tcg_temp_free_i32(tcg_zero);
10308                    }
10309                    break;
10310                case 0x2f: /* FABS */
10311                    gen_helper_vfp_abss(tcg_res, tcg_op);
10312                    break;
10313                case 0x6f: /* FNEG */
10314                    gen_helper_vfp_negs(tcg_res, tcg_op);
10315                    break;
10316                case 0x7f: /* FSQRT */
10317                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10318                    break;
10319                case 0x1a: /* FCVTNS */
10320                case 0x1b: /* FCVTMS */
10321                case 0x1c: /* FCVTAS */
10322                case 0x3a: /* FCVTPS */
10323                case 0x3b: /* FCVTZS */
10324                {
10325                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10326                    gen_helper_vfp_tosls(tcg_res, tcg_op,
10327                                         tcg_shift, tcg_fpstatus);
10328                    tcg_temp_free_i32(tcg_shift);
10329                    break;
10330                }
10331                case 0x5a: /* FCVTNU */
10332                case 0x5b: /* FCVTMU */
10333                case 0x5c: /* FCVTAU */
10334                case 0x7a: /* FCVTPU */
10335                case 0x7b: /* FCVTZU */
10336                {
10337                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10338                    gen_helper_vfp_touls(tcg_res, tcg_op,
10339                                         tcg_shift, tcg_fpstatus);
10340                    tcg_temp_free_i32(tcg_shift);
10341                    break;
10342                }
10343                case 0x18: /* FRINTN */
10344                case 0x19: /* FRINTM */
10345                case 0x38: /* FRINTP */
10346                case 0x39: /* FRINTZ */
10347                case 0x58: /* FRINTA */
10348                case 0x79: /* FRINTI */
10349                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10350                    break;
10351                case 0x59: /* FRINTX */
10352                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10353                    break;
10354                case 0x7c: /* URSQRTE */
10355                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10356                    break;
10357                default:
10358                    g_assert_not_reached();
10359                }
10360            } else {
10361                /* Use helpers for 8 and 16 bit elements */
10362                switch (opcode) {
10363                case 0x5: /* CNT, RBIT */
10364                    /* For these two insns size is part of the opcode specifier
10365                     * (handled earlier); they always operate on byte elements.
10366                     */
10367                    if (u) {
10368                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10369                    } else {
10370                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10371                    }
10372                    break;
10373                case 0x7: /* SQABS, SQNEG */
10374                {
10375                    NeonGenOneOpEnvFn *genfn;
10376                    static NeonGenOneOpEnvFn * const fns[2][2] = {
10377                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10378                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10379                    };
10380                    genfn = fns[size][u];
10381                    genfn(tcg_res, cpu_env, tcg_op);
10382                    break;
10383                }
10384                case 0x8: /* CMGT, CMGE */
10385                case 0x9: /* CMEQ, CMLE */
10386                case 0xa: /* CMLT */
10387                {
10388                    static NeonGenTwoOpFn * const fns[3][2] = {
10389                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10390                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10391                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10392                    };
10393                    NeonGenTwoOpFn *genfn;
10394                    int comp;
10395                    bool reverse;
10396                    TCGv_i32 tcg_zero = tcg_const_i32(0);
10397
10398                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10399                    comp = (opcode - 0x8) * 2 + u;
10400                    /* ...but LE, LT are implemented as reverse GE, GT */
10401                    reverse = (comp > 2);
10402                    if (reverse) {
10403                        comp = 4 - comp;
10404                    }
10405                    genfn = fns[comp][size];
10406                    if (reverse) {
10407                        genfn(tcg_res, tcg_zero, tcg_op);
10408                    } else {
10409                        genfn(tcg_res, tcg_op, tcg_zero);
10410                    }
10411                    tcg_temp_free_i32(tcg_zero);
10412                    break;
10413                }
10414                case 0xb: /* ABS, NEG */
10415                    if (u) {
10416                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10417                        if (size) {
10418                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10419                        } else {
10420                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10421                        }
10422                        tcg_temp_free_i32(tcg_zero);
10423                    } else {
10424                        if (size) {
10425                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
10426                        } else {
10427                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
10428                        }
10429                    }
10430                    break;
10431                case 0x4: /* CLS, CLZ */
10432                    if (u) {
10433                        if (size == 0) {
10434                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
10435                        } else {
10436                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
10437                        }
10438                    } else {
10439                        if (size == 0) {
10440                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
10441                        } else {
10442                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
10443                        }
10444                    }
10445                    break;
10446                default:
10447                    g_assert_not_reached();
10448                }
10449            }
10450
10451            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10452
10453            tcg_temp_free_i32(tcg_res);
10454            tcg_temp_free_i32(tcg_op);
10455        }
10456    }
10457    if (!is_q) {
10458        clear_vec_high(s, rd);
10459    }
10460
10461    if (need_rmode) {
10462        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10463        tcg_temp_free_i32(tcg_rmode);
10464    }
10465    if (need_fpstatus) {
10466        tcg_temp_free_ptr(tcg_fpstatus);
10467    }
10468}
10469
10470/* C3.6.13 AdvSIMD scalar x indexed element
10471 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10472 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10473 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10474 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10475 * C3.6.18 AdvSIMD vector x indexed element
10476 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10477 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10478 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10479 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10480 */
10481static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10482{
10483    /* This encoding has two kinds of instruction:
10484     *  normal, where we perform elt x idxelt => elt for each
10485     *     element in the vector
10486     *  long, where we perform elt x idxelt and generate a result of
10487     *     double the width of the input element
10488     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10489     */
10490    bool is_scalar = extract32(insn, 28, 1);
10491    bool is_q = extract32(insn, 30, 1);
10492    bool u = extract32(insn, 29, 1);
10493    int size = extract32(insn, 22, 2);
10494    int l = extract32(insn, 21, 1);
10495    int m = extract32(insn, 20, 1);
10496    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10497    int rm = extract32(insn, 16, 4);
10498    int opcode = extract32(insn, 12, 4);
10499    int h = extract32(insn, 11, 1);
10500    int rn = extract32(insn, 5, 5);
10501    int rd = extract32(insn, 0, 5);
10502    bool is_long = false;
10503    bool is_fp = false;
10504    int index;
10505    TCGv_ptr fpst;
10506
10507    switch (opcode) {
10508    case 0x0: /* MLA */
10509    case 0x4: /* MLS */
10510        if (!u || is_scalar) {
10511            unallocated_encoding(s);
10512            return;
10513        }
10514        break;
10515    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10516    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10517    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10518        if (is_scalar) {
10519            unallocated_encoding(s);
10520            return;
10521        }
10522        is_long = true;
10523        break;
10524    case 0x3: /* SQDMLAL, SQDMLAL2 */
10525    case 0x7: /* SQDMLSL, SQDMLSL2 */
10526    case 0xb: /* SQDMULL, SQDMULL2 */
10527        is_long = true;
10528        /* fall through */
10529    case 0xc: /* SQDMULH */
10530    case 0xd: /* SQRDMULH */
10531        if (u) {
10532            unallocated_encoding(s);
10533            return;
10534        }
10535        break;
10536    case 0x8: /* MUL */
10537        if (u || is_scalar) {
10538            unallocated_encoding(s);
10539            return;
10540        }
10541        break;
10542    case 0x1: /* FMLA */
10543    case 0x5: /* FMLS */
10544        if (u) {
10545            unallocated_encoding(s);
10546            return;
10547        }
10548        /* fall through */
10549    case 0x9: /* FMUL, FMULX */
10550        if (!extract32(size, 1, 1)) {
10551            unallocated_encoding(s);
10552            return;
10553        }
10554        is_fp = true;
10555        break;
10556    default:
10557        unallocated_encoding(s);
10558        return;
10559    }
10560
10561    if (is_fp) {
10562        /* low bit of size indicates single/double */
10563        size = extract32(size, 0, 1) ? 3 : 2;
10564        if (size == 2) {
10565            index = h << 1 | l;
10566        } else {
10567            if (l || !is_q) {
10568                unallocated_encoding(s);
10569                return;
10570            }
10571            index = h;
10572        }
10573        rm |= (m << 4);
10574    } else {
10575        switch (size) {
10576        case 1:
10577            index = h << 2 | l << 1 | m;
10578            break;
10579        case 2:
10580            index = h << 1 | l;
10581            rm |= (m << 4);
10582            break;
10583        default:
10584            unallocated_encoding(s);
10585            return;
10586        }
10587    }
10588
10589    if (!fp_access_check(s)) {
10590        return;
10591    }
10592
10593    if (is_fp) {
10594        fpst = get_fpstatus_ptr();
10595    } else {
10596        TCGV_UNUSED_PTR(fpst);
10597    }
10598
10599    if (size == 3) {
10600        TCGv_i64 tcg_idx = tcg_temp_new_i64();
10601        int pass;
10602
10603        assert(is_fp && is_q && !is_long);
10604
10605        read_vec_element(s, tcg_idx, rm, index, MO_64);
10606
10607        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10608            TCGv_i64 tcg_op = tcg_temp_new_i64();
10609            TCGv_i64 tcg_res = tcg_temp_new_i64();
10610
10611            read_vec_element(s, tcg_op, rn, pass, MO_64);
10612
10613            switch (opcode) {
10614            case 0x5: /* FMLS */
10615                /* As usual for ARM, separate negation for fused multiply-add */
10616                gen_helper_vfp_negd(tcg_op, tcg_op);
10617                /* fall through */
10618            case 0x1: /* FMLA */
10619                read_vec_element(s, tcg_res, rd, pass, MO_64);
10620                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10621                break;
10622            case 0x9: /* FMUL, FMULX */
10623                if (u) {
10624                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10625                } else {
10626                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10627                }
10628                break;
10629            default:
10630                g_assert_not_reached();
10631            }
10632
10633            write_vec_element(s, tcg_res, rd, pass, MO_64);
10634            tcg_temp_free_i64(tcg_op);
10635            tcg_temp_free_i64(tcg_res);
10636        }
10637
10638        if (is_scalar) {
10639            clear_vec_high(s, rd);
10640        }
10641
10642        tcg_temp_free_i64(tcg_idx);
10643    } else if (!is_long) {
10644        /* 32 bit floating point, or 16 or 32 bit integer.
10645         * For the 16 bit scalar case we use the usual Neon helpers and
10646         * rely on the fact that 0 op 0 == 0 with no side effects.
10647         */
10648        TCGv_i32 tcg_idx = tcg_temp_new_i32();
10649        int pass, maxpasses;
10650
10651        if (is_scalar) {
10652            maxpasses = 1;
10653        } else {
10654            maxpasses = is_q ? 4 : 2;
10655        }
10656
10657        read_vec_element_i32(s, tcg_idx, rm, index, size);
10658
10659        if (size == 1 && !is_scalar) {
10660            /* The simplest way to handle the 16x16 indexed ops is to duplicate
10661             * the index into both halves of the 32 bit tcg_idx and then use
10662             * the usual Neon helpers.
10663             */
10664            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10665        }
10666
10667        for (pass = 0; pass < maxpasses; pass++) {
10668            TCGv_i32 tcg_op = tcg_temp_new_i32();
10669            TCGv_i32 tcg_res = tcg_temp_new_i32();
10670
10671            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10672
10673            switch (opcode) {
10674            case 0x0: /* MLA */
10675            case 0x4: /* MLS */
10676            case 0x8: /* MUL */
10677            {
10678                static NeonGenTwoOpFn * const fns[2][2] = {
10679                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10680                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
10681                };
10682                NeonGenTwoOpFn *genfn;
10683                bool is_sub = opcode == 0x4;
10684
10685                if (size == 1) {
10686                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10687                } else {
10688                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10689                }
10690                if (opcode == 0x8) {
10691                    break;
10692                }
10693                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10694                genfn = fns[size - 1][is_sub];
10695                genfn(tcg_res, tcg_op, tcg_res);
10696                break;
10697            }
10698            case 0x5: /* FMLS */
10699                /* As usual for ARM, separate negation for fused multiply-add */
10700                gen_helper_vfp_negs(tcg_op, tcg_op);
10701                /* fall through */
10702            case 0x1: /* FMLA */
10703                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10704                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10705                break;
10706            case 0x9: /* FMUL, FMULX */
10707                if (u) {
10708                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10709                } else {
10710                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10711                }
10712                break;
10713            case 0xc: /* SQDMULH */
10714                if (size == 1) {
10715                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10716                                               tcg_op, tcg_idx);
10717                } else {
10718                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10719                                               tcg_op, tcg_idx);
10720                }
10721                break;
10722            case 0xd: /* SQRDMULH */
10723                if (size == 1) {
10724                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10725                                                tcg_op, tcg_idx);
10726                } else {
10727                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10728                                                tcg_op, tcg_idx);
10729                }
10730                break;
10731            default:
10732                g_assert_not_reached();
10733            }
10734
10735            if (is_scalar) {
10736                write_fp_sreg(s, rd, tcg_res);
10737            } else {
10738                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10739            }
10740
10741            tcg_temp_free_i32(tcg_op);
10742            tcg_temp_free_i32(tcg_res);
10743        }
10744
10745        tcg_temp_free_i32(tcg_idx);
10746
10747        if (!is_q) {
10748            clear_vec_high(s, rd);
10749        }
10750    } else {
10751        /* long ops: 16x16->32 or 32x32->64 */
10752        TCGv_i64 tcg_res[2];
10753        int pass;
10754        bool satop = extract32(opcode, 0, 1);
10755        TCGMemOp memop = MO_32;
10756
10757        if (satop || !u) {
10758            memop |= MO_SIGN;
10759        }
10760
10761        if (size == 2) {
10762            TCGv_i64 tcg_idx = tcg_temp_new_i64();
10763
10764            read_vec_element(s, tcg_idx, rm, index, memop);
10765
10766            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10767                TCGv_i64 tcg_op = tcg_temp_new_i64();
10768                TCGv_i64 tcg_passres;
10769                int passelt;
10770
10771                if (is_scalar) {
10772                    passelt = 0;
10773                } else {
10774                    passelt = pass + (is_q * 2);
10775                }
10776
10777                read_vec_element(s, tcg_op, rn, passelt, memop);
10778
10779                tcg_res[pass] = tcg_temp_new_i64();
10780
10781                if (opcode == 0xa || opcode == 0xb) {
10782                    /* Non-accumulating ops */
10783                    tcg_passres = tcg_res[pass];
10784                } else {
10785                    tcg_passres = tcg_temp_new_i64();
10786                }
10787
10788                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10789                tcg_temp_free_i64(tcg_op);
10790
10791                if (satop) {
10792                    /* saturating, doubling */
10793                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10794                                                      tcg_passres, tcg_passres);
10795                }
10796
10797                if (opcode == 0xa || opcode == 0xb) {
10798                    continue;
10799                }
10800
10801                /* Accumulating op: handle accumulate step */
10802                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10803
10804                switch (opcode) {
10805                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10806                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10807                    break;
10808                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10809                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10810                    break;
10811                case 0x7: /* SQDMLSL, SQDMLSL2 */
10812                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10813                    /* fall through */
10814                case 0x3: /* SQDMLAL, SQDMLAL2 */
10815                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10816                                                      tcg_res[pass],
10817                                                      tcg_passres);
10818                    break;
10819                default:
10820                    g_assert_not_reached();
10821                }
10822                tcg_temp_free_i64(tcg_passres);
10823            }
10824            tcg_temp_free_i64(tcg_idx);
10825
10826            if (is_scalar) {
10827                clear_vec_high(s, rd);
10828            }
10829        } else {
10830            TCGv_i32 tcg_idx = tcg_temp_new_i32();
10831
10832            assert(size == 1);
10833            read_vec_element_i32(s, tcg_idx, rm, index, size);
10834
10835            if (!is_scalar) {
10836                /* The simplest way to handle the 16x16 indexed ops is to
10837                 * duplicate the index into both halves of the 32 bit tcg_idx
10838                 * and then use the usual Neon helpers.
10839                 */
10840                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10841            }
10842
10843            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10844                TCGv_i32 tcg_op = tcg_temp_new_i32();
10845                TCGv_i64 tcg_passres;
10846
10847                if (is_scalar) {
10848                    read_vec_element_i32(s, tcg_op, rn, pass, size);
10849                } else {
10850                    read_vec_element_i32(s, tcg_op, rn,
10851                                         pass + (is_q * 2), MO_32);
10852                }
10853
10854                tcg_res[pass] = tcg_temp_new_i64();
10855
10856                if (opcode == 0xa || opcode == 0xb) {
10857                    /* Non-accumulating ops */
10858                    tcg_passres = tcg_res[pass];
10859                } else {
10860                    tcg_passres = tcg_temp_new_i64();
10861                }
10862
10863                if (memop & MO_SIGN) {
10864                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10865                } else {
10866                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10867                }
10868                if (satop) {
10869                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10870                                                      tcg_passres, tcg_passres);
10871                }
10872                tcg_temp_free_i32(tcg_op);
10873
10874                if (opcode == 0xa || opcode == 0xb) {
10875                    continue;
10876                }
10877
10878                /* Accumulating op: handle accumulate step */
10879                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10880
10881                switch (opcode) {
10882                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10883                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10884                                             tcg_passres);
10885                    break;
10886                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10887                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10888                                             tcg_passres);
10889                    break;
10890                case 0x7: /* SQDMLSL, SQDMLSL2 */
10891                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10892                    /* fall through */
10893                case 0x3: /* SQDMLAL, SQDMLAL2 */
10894                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10895                                                      tcg_res[pass],
10896                                                      tcg_passres);
10897                    break;
10898                default:
10899                    g_assert_not_reached();
10900                }
10901                tcg_temp_free_i64(tcg_passres);
10902            }
10903            tcg_temp_free_i32(tcg_idx);
10904
10905            if (is_scalar) {
10906                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10907            }
10908        }
10909
10910        if (is_scalar) {
10911            tcg_res[1] = tcg_const_i64(0);
10912        }
10913
10914        for (pass = 0; pass < 2; pass++) {
10915            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10916            tcg_temp_free_i64(tcg_res[pass]);
10917        }
10918    }
10919
10920    if (!TCGV_IS_UNUSED_PTR(fpst)) {
10921        tcg_temp_free_ptr(fpst);
10922    }
10923}
10924
10925/* C3.6.19 Crypto AES
10926 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10927 * +-----------------+------+-----------+--------+-----+------+------+
10928 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10929 * +-----------------+------+-----------+--------+-----+------+------+
10930 */
10931static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10932{
10933    int size = extract32(insn, 22, 2);
10934    int opcode = extract32(insn, 12, 5);
10935    int rn = extract32(insn, 5, 5);
10936    int rd = extract32(insn, 0, 5);
10937    int decrypt;
10938    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10939    CryptoThreeOpEnvFn *genfn;
10940
10941    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10942        || size != 0) {
10943        unallocated_encoding(s);
10944        return;
10945    }
10946
10947    switch (opcode) {
10948    case 0x4: /* AESE */
10949        decrypt = 0;
10950        genfn = gen_helper_crypto_aese;
10951        break;
10952    case 0x6: /* AESMC */
10953        decrypt = 0;
10954        genfn = gen_helper_crypto_aesmc;
10955        break;
10956    case 0x5: /* AESD */
10957        decrypt = 1;
10958        genfn = gen_helper_crypto_aese;
10959        break;
10960    case 0x7: /* AESIMC */
10961        decrypt = 1;
10962        genfn = gen_helper_crypto_aesmc;
10963        break;
10964    default:
10965        unallocated_encoding(s);
10966        return;
10967    }
10968
10969    /* Note that we convert the Vx register indexes into the
10970     * index within the vfp.regs[] array, so we can share the
10971     * helper with the AArch32 instructions.
10972     */
10973    tcg_rd_regno = tcg_const_i32(rd << 1);
10974    tcg_rn_regno = tcg_const_i32(rn << 1);
10975    tcg_decrypt = tcg_const_i32(decrypt);
10976
10977    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10978
10979    tcg_temp_free_i32(tcg_rd_regno);
10980    tcg_temp_free_i32(tcg_rn_regno);
10981    tcg_temp_free_i32(tcg_decrypt);
10982}
10983
10984/* C3.6.20 Crypto three-reg SHA
10985 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10986 * +-----------------+------+---+------+---+--------+-----+------+------+
10987 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10988 * +-----------------+------+---+------+---+--------+-----+------+------+
10989 */
10990static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10991{
10992    int size = extract32(insn, 22, 2);
10993    int opcode = extract32(insn, 12, 3);
10994    int rm = extract32(insn, 16, 5);
10995    int rn = extract32(insn, 5, 5);
10996    int rd = extract32(insn, 0, 5);
10997    CryptoThreeOpEnvFn *genfn;
10998    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10999    int feature = ARM_FEATURE_V8_SHA256;
11000

11001    if (size != 0) {
11002        unallocated_encoding(s);
11003        return;
11004    }
11005
11006    switch (opcode) {
11007    case 0: /* SHA1C */
11008    case 1: /* SHA1P */
11009    case 2: /* SHA1M */
11010    case 3: /* SHA1SU0 */
11011        genfn = NULL;
11012        feature = ARM_FEATURE_V8_SHA1;
11013        break;
11014    case 4: /* SHA256H */
11015        genfn = gen_helper_crypto_sha256h;
11016        break;
11017    case 5: /* SHA256H2 */
11018        genfn = gen_helper_crypto_sha256h2;
11019        break;
11020    case 6: /* SHA256SU1 */
11021        genfn = gen_helper_crypto_sha256su1;
11022        break;
11023    default:
11024        unallocated_encoding(s);
11025        return;
11026    }
11027
11028    if (!arm_dc_feature(s, feature)) {
11029        unallocated_encoding(s);
11030        return;
11031    }
11032
11033    tcg_rd_regno = tcg_const_i32(rd << 1);
11034    tcg_rn_regno = tcg_const_i32(rn << 1);
11035    tcg_rm_regno = tcg_const_i32(rm << 1);
11036
11037    if (genfn) {
11038        genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
11039    } else {
11040        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11041
11042        gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
11043                                    tcg_rn_regno, tcg_rm_regno, tcg_opcode);
11044        tcg_temp_free_i32(tcg_opcode);
11045    }
11046
11047    tcg_temp_free_i32(tcg_rd_regno);
11048    tcg_temp_free_i32(tcg_rn_regno);
11049    tcg_temp_free_i32(tcg_rm_regno);
11050}
11051
11052/* C3.6.21 Crypto two-reg SHA
11053 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11054 * +-----------------+------+-----------+--------+-----+------+------+
11055 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11056 * +-----------------+------+-----------+--------+-----+------+------+
11057 */
11058static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11059{
11060    int size = extract32(insn, 22, 2);
11061    int opcode = extract32(insn, 12, 5);
11062    int rn = extract32(insn, 5, 5);
11063    int rd = extract32(insn, 0, 5);
11064    CryptoTwoOpEnvFn *genfn;
11065    int feature;
11066    TCGv_i32 tcg_rd_regno, tcg_rn_regno;
11067
11068    if (size != 0) {
11069        unallocated_encoding(s);
11070        return;
11071    }
11072
11073    switch (opcode) {
11074    case 0: /* SHA1H */
11075        feature = ARM_FEATURE_V8_SHA1;
11076        genfn = gen_helper_crypto_sha1h;
11077        break;
11078    case 1: /* SHA1SU1 */
11079        feature = ARM_FEATURE_V8_SHA1;
11080        genfn = gen_helper_crypto_sha1su1;
11081        break;
11082    case 2: /* SHA256SU0 */
11083        feature = ARM_FEATURE_V8_SHA256;
11084        genfn = gen_helper_crypto_sha256su0;
11085        break;
11086    default:
11087        unallocated_encoding(s);
11088        return;
11089    }
11090
11091    if (!arm_dc_feature(s, feature)) {
11092        unallocated_encoding(s);
11093        return;
11094    }
11095
11096    tcg_rd_regno = tcg_const_i32(rd << 1);
11097    tcg_rn_regno = tcg_const_i32(rn << 1);
11098
11099    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11100
11101    tcg_temp_free_i32(tcg_rd_regno);
11102    tcg_temp_free_i32(tcg_rn_regno);
11103}
11104
11105/* C3.6 Data processing - SIMD, inc Crypto
11106 *
11107 * As the decode gets a little complex we are using a table based
11108 * approach for this part of the decode.
11109 */
11110static const AArch64DecodeTable data_proc_simd[] = {
11111    /* pattern  ,  mask     ,  fn                        */
11112    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11113    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11114    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11115    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11116    { 0x0e000400, 0x9fe08400, disas_simd_copy },
11117    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11118    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11119    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11120    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11121    { 0x0e000000, 0xbf208c00, disas_simd_tb },
11122    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11123    { 0x2e000000, 0xbf208400, disas_simd_ext },
11124    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11125    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11126    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11127    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11128    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11129    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11130    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11131    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11132    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11133    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11134    { 0x00000000, 0x00000000, NULL }
11135};
11136
11137static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11138{
11139    /* Note that this is called with all non-FP cases from
11140     * table C3-6 so it must UNDEF for entries not specifically
11141     * allocated to instructions in that table.
11142     */
11143    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11144    if (fn) {
11145        fn(s, insn);
11146    } else {
11147        unallocated_encoding(s);
11148    }
11149}
11150
11151/* C3.6 Data processing - SIMD and floating point */
11152static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11153{
11154    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11155        disas_data_proc_fp(s, insn);
11156    } else {
11157        /* SIMD, including crypto */
11158        disas_data_proc_simd(s, insn);
11159    }
11160}
11161
11162/* C3.1 A64 instruction index by encoding */
11163static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11164{
11165    uint32_t insn;
11166
11167    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11168    s->insn = insn;
11169    s->pc += 4;
11170
11171    s->fp_access_checked = false;
11172
11173    switch (extract32(insn, 25, 4)) {
11174    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11175        unallocated_encoding(s);
11176        break;
11177    case 0x8: case 0x9: /* Data processing - immediate */
11178        disas_data_proc_imm(s, insn);
11179        break;
11180    case 0xa: case 0xb: /* Branch, exception generation and system insns */
11181        disas_b_exc_sys(s, insn);
11182        break;
11183    case 0x4:
11184    case 0x6:
11185    case 0xc:
11186    case 0xe:      /* Loads and stores */
11187        disas_ldst(s, insn);
11188        break;
11189    case 0x5:
11190    case 0xd:      /* Data processing - register */
11191        disas_data_proc_reg(s, insn);
11192        break;
11193    case 0x7:
11194    case 0xf:      /* Data processing - SIMD and floating point */
11195        disas_data_proc_simd_fp(s, insn);
11196        break;
11197    default:
11198        assert(FALSE); /* all 15 cases should be handled above */
11199        break;
11200    }
11201
11202    /* if we allocated any temporaries, free them here */
11203    free_tmp_a64(s);
11204}
11205
11206void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11207{
11208    CPUState *cs = CPU(cpu);
11209    CPUARMState *env = &cpu->env;
11210    DisasContext dc1, *dc = &dc1;
11211    target_ulong pc_start;
11212    target_ulong next_page_start;
11213    int num_insns;
11214    int max_insns;
11215
11216    pc_start = tb->pc;
11217
11218    dc->tb = tb;
11219
11220    dc->is_jmp = DISAS_NEXT;
11221    dc->pc = pc_start;
11222    dc->singlestep_enabled = cs->singlestep_enabled;
11223    dc->condjmp = 0;
11224
11225    dc->aarch64 = 1;
11226    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11227     * there is no secure EL1, so we route exceptions to EL3.
11228     */
11229    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11230                               !arm_el_is_aa64(env, 3);
11231    dc->thumb = 0;
11232    dc->sctlr_b = 0;
11233    dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11234    dc->condexec_mask = 0;
11235    dc->condexec_cond = 0;
11236    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11237    dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
11238    dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
11239    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11240#if !defined(CONFIG_USER_ONLY)
11241    dc->user = (dc->current_el == 0);
11242#endif
11243    dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11244    dc->vec_len = 0;
11245    dc->vec_stride = 0;
11246    dc->cp_regs = cpu->cp_regs;
11247    dc->features = env->features;
11248
11249    /* Single step state. The code-generation logic here is:
11250     *  SS_ACTIVE == 0:
11251     *   generate code with no special handling for single-stepping (except
11252     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11253     *   this happens anyway because those changes are all system register or
11254     *   PSTATE writes).
11255     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11256     *   emit code for one insn
11257     *   emit code to clear PSTATE.SS
11258     *   emit code to generate software step exception for completed step
11259     *   end TB (as usual for having generated an exception)
11260     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11261     *   emit code to generate a software step exception
11262     *   end the TB
11263     */
11264    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11265    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11266    dc->is_ldex = false;
11267    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11268
11269    init_tmp_a64_array(dc);
11270
11271    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11272    num_insns = 0;
11273    max_insns = tb->cflags & CF_COUNT_MASK;
11274    if (max_insns == 0) {
11275        max_insns = CF_COUNT_MASK;
11276    }
11277    if (max_insns > TCG_MAX_INSNS) {
11278        max_insns = TCG_MAX_INSNS;
11279    }
11280
11281    gen_tb_start(tb);
11282
11283    tcg_clear_temp_count();
11284
11285    do {
11286        dc->insn_start_idx = tcg_op_buf_count();
11287        tcg_gen_insn_start(dc->pc, 0, 0);
11288        num_insns++;
11289
11290        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11291            CPUBreakpoint *bp;
11292            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11293                if (bp->pc == dc->pc) {
11294                    if (bp->flags & BP_CPU) {
11295                        gen_a64_set_pc_im(dc->pc);
11296                        gen_helper_check_breakpoints(cpu_env);
11297                        /* End the TB early; it likely won't be executed */
11298                        dc->is_jmp = DISAS_UPDATE;
11299                    } else {
11300                        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11301                        /* The address covered by the breakpoint must be
11302                           included in [tb->pc, tb->pc + tb->size) in order
11303                           to for it to be properly cleared -- thus we
11304                           increment the PC here so that the logic setting
11305                           tb->size below does the right thing.  */
11306                        dc->pc += 4;
11307                        goto done_generating;
11308                    }
11309                    break;
11310                }
11311            }
11312        }
11313
11314        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11315            gen_io_start();
11316        }
11317
11318        if (dc->ss_active && !dc->pstate_ss) {
11319            /* Singlestep state is Active-pending.
11320             * If we're in this state at the start of a TB then either
11321             *  a) we just took an exception to an EL which is being debugged
11322             *     and this is the first insn in the exception handler
11323             *  b) debug exceptions were masked and we just unmasked them
11324             *     without changing EL (eg by clearing PSTATE.D)
11325             * In either case we're going to take a swstep exception in the
11326             * "did not step an insn" case, and so the syndrome ISV and EX
11327             * bits should be zero.
11328             */
11329            assert(num_insns == 1);
11330            gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11331                          default_exception_el(dc));
11332            dc->is_jmp = DISAS_EXC;
11333            break;
11334        }
11335
11336        disas_a64_insn(env, dc);
11337
11338        if (tcg_check_temp_count()) {
11339            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11340                    dc->pc);
11341        }
11342
11343        /* Translation stops when a conditional branch is encountered.
11344         * Otherwise the subsequent code could get translated several times.
11345         * Also stop translation when a page boundary is reached.  This
11346         * ensures prefetch aborts occur at the right place.
11347         */
11348    } while (!dc->is_jmp && !tcg_op_buf_full() &&
11349             !cs->singlestep_enabled &&
11350             !singlestep &&
11351             !dc->ss_active &&
11352             dc->pc < next_page_start &&
11353             num_insns < max_insns);
11354
11355    if (tb->cflags & CF_LAST_IO) {
11356        gen_io_end();
11357    }
11358
11359    if (unlikely(cs->singlestep_enabled || dc->ss_active)
11360        && dc->is_jmp != DISAS_EXC) {
11361        /* Note that this means single stepping WFI doesn't halt the CPU.
11362         * For conditional branch insns this is harmless unreachable code as
11363         * gen_goto_tb() has already handled emitting the debug exception
11364         * (and thus a tb-jump is not possible when singlestepping).
11365         */
11366        assert(dc->is_jmp != DISAS_TB_JUMP);
11367        if (dc->is_jmp != DISAS_JUMP) {
11368            gen_a64_set_pc_im(dc->pc);
11369        }
11370        if (cs->singlestep_enabled) {
11371            gen_exception_internal(EXCP_DEBUG);
11372        } else {
11373            gen_step_complete_exception(dc);
11374        }
11375    } else {
11376        switch (dc->is_jmp) {
11377        case DISAS_NEXT:
11378            gen_goto_tb(dc, 1, dc->pc);
11379            break;
11380        default:
11381        case DISAS_UPDATE:
11382            gen_a64_set_pc_im(dc->pc);
11383            /* fall through */
11384        case DISAS_JUMP:
11385            /* indicate that the hash table must be used to find the next TB */
11386            tcg_gen_exit_tb(0);
11387            break;
11388        case DISAS_TB_JUMP:
11389        case DISAS_EXC:
11390        case DISAS_SWI:
11391            break;
11392        case DISAS_WFE:
11393            gen_a64_set_pc_im(dc->pc);
11394            gen_helper_wfe(cpu_env);
11395            break;
11396        case DISAS_YIELD:
11397            gen_a64_set_pc_im(dc->pc);
11398            gen_helper_yield(cpu_env);
11399            break;
11400        case DISAS_WFI:
11401            /* This is a special case because we don't want to just halt the CPU
11402             * if trying to debug across a WFI.
11403             */
11404            gen_a64_set_pc_im(dc->pc);
11405            gen_helper_wfi(cpu_env);
11406            /* The helper doesn't necessarily throw an exception, but we
11407             * must go back to the main loop to check for interrupts anyway.
11408             */
11409            tcg_gen_exit_tb(0);
11410            break;
11411        }
11412    }
11413
11414done_generating:
11415    gen_tb_end(tb, num_insns);
11416
11417#ifdef DEBUG_DISAS
11418    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11419        qemu_log_in_addr_range(pc_start)) {
11420        qemu_log_lock();
11421        qemu_log("----------------\n");
11422        qemu_log("IN: %s\n", lookup_symbol(pc_start));
11423        log_target_disas(cs, pc_start, dc->pc - pc_start,
11424                         4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11425        qemu_log("\n");
11426        qemu_log_unlock();
11427    }
11428#endif
11429    tb->size = dc->pc - pc_start;
11430    tb->icount = num_insns;
11431}
11432