LXR qemu/target-arm/translate-a64.c

   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "qemu/log.h"
  25#include "arm_ldst.h"
  26#include "translate.h"
  27#include "internals.h"
  28#include "qemu/host-utils.h"
  29
  30#include "exec/semihost.h"
  31#include "exec/gen-icount.h"
  32
  33#include "exec/helper-proto.h"
  34#include "exec/helper-gen.h"
  35#include "exec/log.h"
  36
  37#include "trace-tcg.h"
  38
  39static TCGv_i64 cpu_X[32];
  40static TCGv_i64 cpu_pc;
  41
  42/* Load/store exclusive handling */
  43static TCGv_i64 cpu_exclusive_high;
  44static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  45
  46static const char *regnames[] = {
  47    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  48    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  49    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  50    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  51};
  52
  53enum a64_shift_type {
  54    A64_SHIFT_TYPE_LSL = 0,
  55    A64_SHIFT_TYPE_LSR = 1,
  56    A64_SHIFT_TYPE_ASR = 2,
  57    A64_SHIFT_TYPE_ROR = 3
  58};
  59
  60/* Table based decoder typedefs - used when the relevant bits for decode
  61 * are too awkwardly scattered across the instruction (eg SIMD).
  62 */
  63typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  64
  65typedef struct AArch64DecodeTable {
  66    uint32_t pattern;
  67    uint32_t mask;
  68    AArch64DecodeFn *disas_fn;
  69} AArch64DecodeTable;
  70
  71/* Function prototype for gen_ functions for calling Neon helpers */
  72typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  73typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  74typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  75typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  76typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  77typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  78typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  79typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  80typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  81typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  82typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  83typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  84typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  85
  86/* initialize TCG globals.  */
  87void a64_translate_init(void)
  88{
  89    int i;
  90
  91    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  92                                    offsetof(CPUARMState, pc),
  93                                    "pc");
  94    for (i = 0; i < 32; i++) {
  95        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  96                                          offsetof(CPUARMState, xregs[i]),
  97                                          regnames[i]);
  98    }
  99
 100    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 101        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 102}
 103
 104static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 105{
 106    /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 107     *  if EL1, access as if EL0; otherwise access at current EL
 108     */
 109    switch (s->mmu_idx) {
 110    case ARMMMUIdx_S12NSE1:
 111        return ARMMMUIdx_S12NSE0;
 112    case ARMMMUIdx_S1SE1:
 113        return ARMMMUIdx_S1SE0;
 114    case ARMMMUIdx_S2NS:
 115        g_assert_not_reached();
 116    default:
 117        return s->mmu_idx;
 118    }
 119}
 120
 121void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 122                            fprintf_function cpu_fprintf, int flags)
 123{
 124    ARMCPU *cpu = ARM_CPU(cs);
 125    CPUARMState *env = &cpu->env;
 126    uint32_t psr = pstate_read(env);
 127    int i;
 128    int el = arm_current_el(env);
 129    const char *ns_status;
 130
 131    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 132            env->pc, env->xregs[31]);
 133    for (i = 0; i < 31; i++) {
 134        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 135        if ((i % 4) == 3) {
 136            cpu_fprintf(f, "\n");
 137        } else {
 138            cpu_fprintf(f, " ");
 139        }
 140    }
 141
 142    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 143        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 144    } else {
 145        ns_status = "";
 146    }
 147
 148    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 149                psr,
 150                psr & PSTATE_N ? 'N' : '-',
 151                psr & PSTATE_Z ? 'Z' : '-',
 152                psr & PSTATE_C ? 'C' : '-',
 153                psr & PSTATE_V ? 'V' : '-',
 154                ns_status,
 155                el,
 156                psr & PSTATE_SP ? 'h' : 't');
 157
 158    if (flags & CPU_DUMP_FPU) {
 159        int numvfpregs = 32;
 160        for (i = 0; i < numvfpregs; i += 2) {
 161            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 162            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 163            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 164                        i, vhi, vlo);
 165            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 166            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 167            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 168                        i + 1, vhi, vlo);
 169        }
 170        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 171                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 172    }
 173}
 174
 175void gen_a64_set_pc_im(uint64_t val)
 176{
 177    tcg_gen_movi_i64(cpu_pc, val);
 178}
 179
 180/* Load the PC from a generic TCG variable.
 181 *
 182 * If address tagging is enabled via the TCR TBI bits, then loading
 183 * an address into the PC will clear out any tag in the it:
 184 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 185 *    then the address is zero-extended, clearing bits [63:56]
 186 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 187 *    and TBI1 controls addressses with bit 55 == 1.
 188 *    If the appropriate TBI bit is set for the address then
 189 *    the address is sign-extended from bit 55 into bits [63:56]
 190 *
 191 * We can avoid doing this for relative-branches, because the
 192 * PC + offset can never overflow into the tag bits (assuming
 193 * that virtual addresses are less than 56 bits wide, as they
 194 * are currently), but we must handle it for branch-to-register.
 195 */
 196static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 197{
 198
 199    if (s->current_el <= 1) {
 200        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 201         * examine bit 55 of address, can just generate code.
 202         * If mixed, then test via generated code
 203         */
 204        if (s->tbi0 && s->tbi1) {
 205            TCGv_i64 tmp_reg = tcg_temp_new_i64();
 206            /* Both bits set, sign extension from bit 55 into [63:56] will
 207             * cover both cases
 208             */
 209            tcg_gen_shli_i64(tmp_reg, src, 8);
 210            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 211            tcg_temp_free_i64(tmp_reg);
 212        } else if (!s->tbi0 && !s->tbi1) {
 213            /* Neither bit set, just load it as-is */
 214            tcg_gen_mov_i64(cpu_pc, src);
 215        } else {
 216            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 217            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 218            TCGv_i64 tcg_zero   = tcg_const_i64(0);
 219
 220            tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 221
 222            if (s->tbi0) {
 223                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 224                tcg_gen_andi_i64(tcg_tmpval, src,
 225                                 0x00FFFFFFFFFFFFFFull);
 226                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 227                                    tcg_tmpval, src);
 228            } else {
 229                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 230                tcg_gen_ori_i64(tcg_tmpval, src,
 231                                0xFF00000000000000ull);
 232                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 233                                    tcg_tmpval, src);
 234            }
 235            tcg_temp_free_i64(tcg_zero);
 236            tcg_temp_free_i64(tcg_bit55);
 237            tcg_temp_free_i64(tcg_tmpval);
 238        }
 239    } else {  /* EL > 1 */
 240        if (s->tbi0) {
 241            /* Force tag byte to all zero */
 242            tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 243        } else {
 244            /* Load unmodified address */
 245            tcg_gen_mov_i64(cpu_pc, src);
 246        }
 247    }
 248}
 249
 250typedef struct DisasCompare64 {
 251    TCGCond cond;
 252    TCGv_i64 value;
 253} DisasCompare64;
 254
 255static void a64_test_cc(DisasCompare64 *c64, int cc)
 256{
 257    DisasCompare c32;
 258
 259    arm_test_cc(&c32, cc);
 260
 261    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 262       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 263    c64->cond = c32.cond;
 264    c64->value = tcg_temp_new_i64();
 265    tcg_gen_ext_i32_i64(c64->value, c32.value);
 266
 267    arm_free_cc(&c32);
 268}
 269
 270static void a64_free_cc(DisasCompare64 *c64)
 271{
 272    tcg_temp_free_i64(c64->value);
 273}
 274
 275static void gen_exception_internal(int excp)
 276{
 277    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 278
 279    assert(excp_is_internal(excp));
 280    gen_helper_exception_internal(cpu_env, tcg_excp);
 281    tcg_temp_free_i32(tcg_excp);
 282}
 283
 284static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 285{
 286    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 287    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 288    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 289
 290    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 291                                       tcg_syn, tcg_el);
 292    tcg_temp_free_i32(tcg_el);
 293    tcg_temp_free_i32(tcg_syn);
 294    tcg_temp_free_i32(tcg_excp);
 295}
 296
 297static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 298{
 299    gen_a64_set_pc_im(s->pc - offset);
 300    gen_exception_internal(excp);
 301    s->is_jmp = DISAS_EXC;
 302}
 303
 304static void gen_exception_insn(DisasContext *s, int offset, int excp,
 305                               uint32_t syndrome, uint32_t target_el)
 306{
 307    gen_a64_set_pc_im(s->pc - offset);
 308    gen_exception(excp, syndrome, target_el);
 309    s->is_jmp = DISAS_EXC;
 310}
 311
 312static void gen_ss_advance(DisasContext *s)
 313{
 314    /* If the singlestep state is Active-not-pending, advance to
 315     * Active-pending.
 316     */
 317    if (s->ss_active) {
 318        s->pstate_ss = 0;
 319        gen_helper_clear_pstate_ss(cpu_env);
 320    }
 321}
 322
 323static void gen_step_complete_exception(DisasContext *s)
 324{
 325    /* We just completed step of an insn. Move from Active-not-pending
 326     * to Active-pending, and then also take the swstep exception.
 327     * This corresponds to making the (IMPDEF) choice to prioritize
 328     * swstep exceptions over asynchronous exceptions taken to an exception
 329     * level where debug is disabled. This choice has the advantage that
 330     * we do not need to maintain internal state corresponding to the
 331     * ISV/EX syndrome bits between completion of the step and generation
 332     * of the exception, and our syndrome information is always correct.
 333     */
 334    gen_ss_advance(s);
 335    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 336                  default_exception_el(s));
 337    s->is_jmp = DISAS_EXC;
 338}
 339
 340static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 341{
 342    /* No direct tb linking with singlestep (either QEMU's or the ARM
 343     * debug architecture kind) or deterministic io
 344     */
 345    if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 346        return false;
 347    }
 348
 349#ifndef CONFIG_USER_ONLY
 350    /* Only link tbs from inside the same guest page */
 351    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 352        return false;
 353    }
 354#endif
 355
 356    return true;
 357}
 358
 359static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 360{
 361    TranslationBlock *tb;
 362
 363    tb = s->tb;
 364    if (use_goto_tb(s, n, dest)) {
 365        tcg_gen_goto_tb(n);
 366        gen_a64_set_pc_im(dest);
 367        tcg_gen_exit_tb((intptr_t)tb + n);
 368        s->is_jmp = DISAS_TB_JUMP;
 369    } else {
 370        gen_a64_set_pc_im(dest);
 371        if (s->ss_active) {
 372            gen_step_complete_exception(s);
 373        } else if (s->singlestep_enabled) {
 374            gen_exception_internal(EXCP_DEBUG);
 375        } else {
 376            tcg_gen_exit_tb(0);
 377            s->is_jmp = DISAS_TB_JUMP;
 378        }
 379    }
 380}
 381
 382static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 383{
 384    /* We don't need to save all of the syndrome so we mask and shift
 385     * out uneeded bits to help the sleb128 encoder do a better job.
 386     */
 387    syn &= ARM_INSN_START_WORD2_MASK;
 388    syn >>= ARM_INSN_START_WORD2_SHIFT;
 389
 390    /* We check and clear insn_start_idx to catch multiple updates.  */
 391    assert(s->insn_start_idx != 0);
 392    tcg_set_insn_param(s->insn_start_idx, 2, syn);
 393    s->insn_start_idx = 0;
 394}
 395
 396static void unallocated_encoding(DisasContext *s)
 397{
 398    /* Unallocated and reserved encodings are uncategorized */
 399    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 400                       default_exception_el(s));
 401}
 402
 403#define unsupported_encoding(s, insn)                                    \
 404    do {                                                                 \
 405        qemu_log_mask(LOG_UNIMP,                                         \
 406                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 407                      "at pc=%016" PRIx64 "\n",                          \
 408                      __FILE__, __LINE__, insn, s->pc - 4);              \
 409        unallocated_encoding(s);                                         \
 410    } while (0);
 411
 412static void init_tmp_a64_array(DisasContext *s)
 413{
 414#ifdef CONFIG_DEBUG_TCG
 415    int i;
 416    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 417        TCGV_UNUSED_I64(s->tmp_a64[i]);
 418    }
 419#endif
 420    s->tmp_a64_count = 0;
 421}
 422
 423static void free_tmp_a64(DisasContext *s)
 424{
 425    int i;
 426    for (i = 0; i < s->tmp_a64_count; i++) {
 427        tcg_temp_free_i64(s->tmp_a64[i]);
 428    }
 429    init_tmp_a64_array(s);
 430}
 431
 432static TCGv_i64 new_tmp_a64(DisasContext *s)
 433{
 434    assert(s->tmp_a64_count < TMP_A64_MAX);
 435    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 436}
 437
 438static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 439{
 440    TCGv_i64 t = new_tmp_a64(s);
 441    tcg_gen_movi_i64(t, 0);
 442    return t;
 443}
 444
 445/*
 446 * Register access functions
 447 *
 448 * These functions are used for directly accessing a register in where
 449 * changes to the final register value are likely to be made. If you
 450 * need to use a register for temporary calculation (e.g. index type
 451 * operations) use the read_* form.
 452 *
 453 * B1.2.1 Register mappings
 454 *
 455 * In instruction register encoding 31 can refer to ZR (zero register) or
 456 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 457 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 458 * This is the point of the _sp forms.
 459 */
 460static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 461{
 462    if (reg == 31) {
 463        return new_tmp_a64_zero(s);
 464    } else {
 465        return cpu_X[reg];
 466    }
 467}
 468
 469/* register access for when 31 == SP */
 470static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 471{
 472    return cpu_X[reg];
 473}
 474
 475/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 476 * representing the register contents. This TCGv is an auto-freed
 477 * temporary so it need not be explicitly freed, and may be modified.
 478 */
 479static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 480{
 481    TCGv_i64 v = new_tmp_a64(s);
 482    if (reg != 31) {
 483        if (sf) {
 484            tcg_gen_mov_i64(v, cpu_X[reg]);
 485        } else {
 486            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 487        }
 488    } else {
 489        tcg_gen_movi_i64(v, 0);
 490    }
 491    return v;
 492}
 493
 494static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 495{
 496    TCGv_i64 v = new_tmp_a64(s);
 497    if (sf) {
 498        tcg_gen_mov_i64(v, cpu_X[reg]);
 499    } else {
 500        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 501    }
 502    return v;
 503}
 504
 505/* We should have at some point before trying to access an FP register
 506 * done the necessary access check, so assert that
 507 * (a) we did the check and
 508 * (b) we didn't then just plough ahead anyway if it failed.
 509 * Print the instruction pattern in the abort message so we can figure
 510 * out what we need to fix if a user encounters this problem in the wild.
 511 */
 512static inline void assert_fp_access_checked(DisasContext *s)
 513{
 514#ifdef CONFIG_DEBUG_TCG
 515    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 516        fprintf(stderr, "target-arm: FP access check missing for "
 517                "instruction 0x%08x\n", s->insn);
 518        abort();
 519    }
 520#endif
 521}
 522
 523/* Return the offset into CPUARMState of an element of specified
 524 * size, 'element' places in from the least significant end of
 525 * the FP/vector register Qn.
 526 */
 527static inline int vec_reg_offset(DisasContext *s, int regno,
 528                                 int element, TCGMemOp size)
 529{
 530    int offs = 0;
 531#ifdef HOST_WORDS_BIGENDIAN
 532    /* This is complicated slightly because vfp.regs[2n] is
 533     * still the low half and  vfp.regs[2n+1] the high half
 534     * of the 128 bit vector, even on big endian systems.
 535     * Calculate the offset assuming a fully bigendian 128 bits,
 536     * then XOR to account for the order of the two 64 bit halves.
 537     */
 538    offs += (16 - ((element + 1) * (1 << size)));
 539    offs ^= 8;
 540#else
 541    offs += element * (1 << size);
 542#endif
 543    offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
 544    assert_fp_access_checked(s);
 545    return offs;
 546}
 547
 548/* Return the offset into CPUARMState of a slice (from
 549 * the least significant end) of FP register Qn (ie
 550 * Dn, Sn, Hn or Bn).
 551 * (Note that this is not the same mapping as for A32; see cpu.h)
 552 */
 553static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 554{
 555    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 556#ifdef HOST_WORDS_BIGENDIAN
 557    offs += (8 - (1 << size));
 558#endif
 559    assert_fp_access_checked(s);
 560    return offs;
 561}
 562
 563/* Offset of the high half of the 128 bit vector Qn */
 564static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 565{
 566    assert_fp_access_checked(s);
 567    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 568}
 569
 570/* Convenience accessors for reading and writing single and double
 571 * FP registers. Writing clears the upper parts of the associated
 572 * 128 bit vector register, as required by the architecture.
 573 * Note that unlike the GP register accessors, the values returned
 574 * by the read functions must be manually freed.
 575 */
 576static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 577{
 578    TCGv_i64 v = tcg_temp_new_i64();
 579
 580    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 581    return v;
 582}
 583
 584static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 585{
 586    TCGv_i32 v = tcg_temp_new_i32();
 587
 588    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 589    return v;
 590}
 591
 592static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 593{
 594    TCGv_i64 tcg_zero = tcg_const_i64(0);
 595
 596    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 597    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 598    tcg_temp_free_i64(tcg_zero);
 599}
 600
 601static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 602{
 603    TCGv_i64 tmp = tcg_temp_new_i64();
 604
 605    tcg_gen_extu_i32_i64(tmp, v);
 606    write_fp_dreg(s, reg, tmp);
 607    tcg_temp_free_i64(tmp);
 608}
 609
 610static TCGv_ptr get_fpstatus_ptr(void)
 611{
 612    TCGv_ptr statusptr = tcg_temp_new_ptr();
 613    int offset;
 614
 615    /* In A64 all instructions (both FP and Neon) use the FPCR;
 616     * there is no equivalent of the A32 Neon "standard FPSCR value"
 617     * and all operations use vfp.fp_status.
 618     */
 619    offset = offsetof(CPUARMState, vfp.fp_status);
 620    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 621    return statusptr;
 622}
 623
 624/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 625 * than the 32 bit equivalent.
 626 */
 627static inline void gen_set_NZ64(TCGv_i64 result)
 628{
 629    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 630    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 631}
 632
 633/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 634static inline void gen_logic_CC(int sf, TCGv_i64 result)
 635{
 636    if (sf) {
 637        gen_set_NZ64(result);
 638    } else {
 639        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 640        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 641    }
 642    tcg_gen_movi_i32(cpu_CF, 0);
 643    tcg_gen_movi_i32(cpu_VF, 0);
 644}
 645
 646/* dest = T0 + T1; compute C, N, V and Z flags */
 647static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 648{
 649    if (sf) {
 650        TCGv_i64 result, flag, tmp;
 651        result = tcg_temp_new_i64();
 652        flag = tcg_temp_new_i64();
 653        tmp = tcg_temp_new_i64();
 654
 655        tcg_gen_movi_i64(tmp, 0);
 656        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 657
 658        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 659
 660        gen_set_NZ64(result);
 661
 662        tcg_gen_xor_i64(flag, result, t0);
 663        tcg_gen_xor_i64(tmp, t0, t1);
 664        tcg_gen_andc_i64(flag, flag, tmp);
 665        tcg_temp_free_i64(tmp);
 666        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 667
 668        tcg_gen_mov_i64(dest, result);
 669        tcg_temp_free_i64(result);
 670        tcg_temp_free_i64(flag);
 671    } else {
 672        /* 32 bit arithmetic */
 673        TCGv_i32 t0_32 = tcg_temp_new_i32();
 674        TCGv_i32 t1_32 = tcg_temp_new_i32();
 675        TCGv_i32 tmp = tcg_temp_new_i32();
 676
 677        tcg_gen_movi_i32(tmp, 0);
 678        tcg_gen_extrl_i64_i32(t0_32, t0);
 679        tcg_gen_extrl_i64_i32(t1_32, t1);
 680        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 681        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 682        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 683        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 684        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 685        tcg_gen_extu_i32_i64(dest, cpu_NF);
 686
 687        tcg_temp_free_i32(tmp);
 688        tcg_temp_free_i32(t0_32);
 689        tcg_temp_free_i32(t1_32);
 690    }
 691}
 692
 693/* dest = T0 - T1; compute C, N, V and Z flags */
 694static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 695{
 696    if (sf) {
 697        /* 64 bit arithmetic */
 698        TCGv_i64 result, flag, tmp;
 699
 700        result = tcg_temp_new_i64();
 701        flag = tcg_temp_new_i64();
 702        tcg_gen_sub_i64(result, t0, t1);
 703
 704        gen_set_NZ64(result);
 705
 706        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 707        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 708
 709        tcg_gen_xor_i64(flag, result, t0);
 710        tmp = tcg_temp_new_i64();
 711        tcg_gen_xor_i64(tmp, t0, t1);
 712        tcg_gen_and_i64(flag, flag, tmp);
 713        tcg_temp_free_i64(tmp);
 714        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 715        tcg_gen_mov_i64(dest, result);
 716        tcg_temp_free_i64(flag);
 717        tcg_temp_free_i64(result);
 718    } else {
 719        /* 32 bit arithmetic */
 720        TCGv_i32 t0_32 = tcg_temp_new_i32();
 721        TCGv_i32 t1_32 = tcg_temp_new_i32();
 722        TCGv_i32 tmp;
 723
 724        tcg_gen_extrl_i64_i32(t0_32, t0);
 725        tcg_gen_extrl_i64_i32(t1_32, t1);
 726        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 727        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 728        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 729        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 730        tmp = tcg_temp_new_i32();
 731        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 732        tcg_temp_free_i32(t0_32);
 733        tcg_temp_free_i32(t1_32);
 734        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 735        tcg_temp_free_i32(tmp);
 736        tcg_gen_extu_i32_i64(dest, cpu_NF);
 737    }
 738}
 739
 740/* dest = T0 + T1 + CF; do not compute flags. */
 741static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 742{
 743    TCGv_i64 flag = tcg_temp_new_i64();
 744    tcg_gen_extu_i32_i64(flag, cpu_CF);
 745    tcg_gen_add_i64(dest, t0, t1);
 746    tcg_gen_add_i64(dest, dest, flag);
 747    tcg_temp_free_i64(flag);
 748
 749    if (!sf) {
 750        tcg_gen_ext32u_i64(dest, dest);
 751    }
 752}
 753
 754/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 755static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 756{
 757    if (sf) {
 758        TCGv_i64 result, cf_64, vf_64, tmp;
 759        result = tcg_temp_new_i64();
 760        cf_64 = tcg_temp_new_i64();
 761        vf_64 = tcg_temp_new_i64();
 762        tmp = tcg_const_i64(0);
 763
 764        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 765        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 766        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 767        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 768        gen_set_NZ64(result);
 769
 770        tcg_gen_xor_i64(vf_64, result, t0);
 771        tcg_gen_xor_i64(tmp, t0, t1);
 772        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 773        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 774
 775        tcg_gen_mov_i64(dest, result);
 776
 777        tcg_temp_free_i64(tmp);
 778        tcg_temp_free_i64(vf_64);
 779        tcg_temp_free_i64(cf_64);
 780        tcg_temp_free_i64(result);
 781    } else {
 782        TCGv_i32 t0_32, t1_32, tmp;
 783        t0_32 = tcg_temp_new_i32();
 784        t1_32 = tcg_temp_new_i32();
 785        tmp = tcg_const_i32(0);
 786
 787        tcg_gen_extrl_i64_i32(t0_32, t0);
 788        tcg_gen_extrl_i64_i32(t1_32, t1);
 789        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 790        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 791
 792        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 793        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 794        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 795        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 796        tcg_gen_extu_i32_i64(dest, cpu_NF);
 797
 798        tcg_temp_free_i32(tmp);
 799        tcg_temp_free_i32(t1_32);
 800        tcg_temp_free_i32(t0_32);
 801    }
 802}
 803
 804/*
 805 * Load/Store generators
 806 */
 807
 808/*
 809 * Store from GPR register to memory.
 810 */
 811static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 812                             TCGv_i64 tcg_addr, int size, int memidx,
 813                             bool iss_valid,
 814                             unsigned int iss_srt,
 815                             bool iss_sf, bool iss_ar)
 816{
 817    g_assert(size <= 3);
 818    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 819
 820    if (iss_valid) {
 821        uint32_t syn;
 822
 823        syn = syn_data_abort_with_iss(0,
 824                                      size,
 825                                      false,
 826                                      iss_srt,
 827                                      iss_sf,
 828                                      iss_ar,
 829                                      0, 0, 0, 0, 0, false);
 830        disas_set_insn_syndrome(s, syn);
 831    }
 832}
 833
 834static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 835                      TCGv_i64 tcg_addr, int size,
 836                      bool iss_valid,
 837                      unsigned int iss_srt,
 838                      bool iss_sf, bool iss_ar)
 839{
 840    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 841                     iss_valid, iss_srt, iss_sf, iss_ar);
 842}
 843
 844/*
 845 * Load from memory to GPR register
 846 */
 847static void do_gpr_ld_memidx(DisasContext *s,
 848                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 849                             int size, bool is_signed,
 850                             bool extend, int memidx,
 851                             bool iss_valid, unsigned int iss_srt,
 852                             bool iss_sf, bool iss_ar)
 853{
 854    TCGMemOp memop = s->be_data + size;
 855
 856    g_assert(size <= 3);
 857
 858    if (is_signed) {
 859        memop += MO_SIGN;
 860    }
 861
 862    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 863
 864    if (extend && is_signed) {
 865        g_assert(size < 3);
 866        tcg_gen_ext32u_i64(dest, dest);
 867    }
 868
 869    if (iss_valid) {
 870        uint32_t syn;
 871
 872        syn = syn_data_abort_with_iss(0,
 873                                      size,
 874                                      is_signed,
 875                                      iss_srt,
 876                                      iss_sf,
 877                                      iss_ar,
 878                                      0, 0, 0, 0, 0, false);
 879        disas_set_insn_syndrome(s, syn);
 880    }
 881}
 882
 883static void do_gpr_ld(DisasContext *s,
 884                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 885                      int size, bool is_signed, bool extend,
 886                      bool iss_valid, unsigned int iss_srt,
 887                      bool iss_sf, bool iss_ar)
 888{
 889    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 890                     get_mem_index(s),
 891                     iss_valid, iss_srt, iss_sf, iss_ar);
 892}
 893
 894/*
 895 * Store from FP register to memory
 896 */
 897static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 898{
 899    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 900    TCGv_i64 tmp = tcg_temp_new_i64();
 901    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 902    if (size < 4) {
 903        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 904                            s->be_data + size);
 905    } else {
 906        bool be = s->be_data == MO_BE;
 907        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 908
 909        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 910        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 911                            s->be_data | MO_Q);
 912        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 913        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 914                            s->be_data | MO_Q);
 915        tcg_temp_free_i64(tcg_hiaddr);
 916    }
 917
 918    tcg_temp_free_i64(tmp);
 919}
 920
 921/*
 922 * Load from memory to FP register
 923 */
 924static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 925{
 926    /* This always zero-extends and writes to a full 128 bit wide vector */
 927    TCGv_i64 tmplo = tcg_temp_new_i64();
 928    TCGv_i64 tmphi;
 929
 930    if (size < 4) {
 931        TCGMemOp memop = s->be_data + size;
 932        tmphi = tcg_const_i64(0);
 933        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 934    } else {
 935        bool be = s->be_data == MO_BE;
 936        TCGv_i64 tcg_hiaddr;
 937
 938        tmphi = tcg_temp_new_i64();
 939        tcg_hiaddr = tcg_temp_new_i64();
 940
 941        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 942        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 943                            s->be_data | MO_Q);
 944        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 945                            s->be_data | MO_Q);
 946        tcg_temp_free_i64(tcg_hiaddr);
 947    }
 948
 949    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 950    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 951
 952    tcg_temp_free_i64(tmplo);
 953    tcg_temp_free_i64(tmphi);
 954}
 955
 956/*
 957 * Vector load/store helpers.
 958 *
 959 * The principal difference between this and a FP load is that we don't
 960 * zero extend as we are filling a partial chunk of the vector register.
 961 * These functions don't support 128 bit loads/stores, which would be
 962 * normal load/store operations.
 963 *
 964 * The _i32 versions are useful when operating on 32 bit quantities
 965 * (eg for floating point single or using Neon helper functions).
 966 */
 967
 968/* Get value of an element within a vector register */
 969static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 970                             int element, TCGMemOp memop)
 971{
 972    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 973    switch (memop) {
 974    case MO_8:
 975        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 976        break;
 977    case MO_16:
 978        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 979        break;
 980    case MO_32:
 981        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 982        break;
 983    case MO_8|MO_SIGN:
 984        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 985        break;
 986    case MO_16|MO_SIGN:
 987        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 988        break;
 989    case MO_32|MO_SIGN:
 990        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 991        break;
 992    case MO_64:
 993    case MO_64|MO_SIGN:
 994        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 995        break;
 996    default:
 997        g_assert_not_reached();
 998    }
 999}
1000

1001static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1002                                 int element, TCGMemOp memop)
1003{
1004    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1005    switch (memop) {
1006    case MO_8:
1007        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1008        break;
1009    case MO_16:
1010        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1011        break;
1012    case MO_8|MO_SIGN:
1013        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1014        break;
1015    case MO_16|MO_SIGN:
1016        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1017        break;
1018    case MO_32:
1019    case MO_32|MO_SIGN:
1020        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1021        break;
1022    default:
1023        g_assert_not_reached();
1024    }
1025}
1026
1027/* Set value of an element within a vector register */
1028static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1029                              int element, TCGMemOp memop)
1030{
1031    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1032    switch (memop) {
1033    case MO_8:
1034        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1035        break;
1036    case MO_16:
1037        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1038        break;
1039    case MO_32:
1040        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1041        break;
1042    case MO_64:
1043        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1044        break;
1045    default:
1046        g_assert_not_reached();
1047    }
1048}
1049
1050static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1051                                  int destidx, int element, TCGMemOp memop)
1052{
1053    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1054    switch (memop) {
1055    case MO_8:
1056        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1057        break;
1058    case MO_16:
1059        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1060        break;
1061    case MO_32:
1062        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1063        break;
1064    default:
1065        g_assert_not_reached();
1066    }
1067}
1068
1069/* Clear the high 64 bits of a 128 bit vector (in general non-quad
1070 * vector ops all need to do this).
1071 */
1072static void clear_vec_high(DisasContext *s, int rd)
1073{
1074    TCGv_i64 tcg_zero = tcg_const_i64(0);
1075
1076    write_vec_element(s, tcg_zero, rd, 1, MO_64);
1077    tcg_temp_free_i64(tcg_zero);
1078}
1079
1080/* Store from vector register to memory */
1081static void do_vec_st(DisasContext *s, int srcidx, int element,
1082                      TCGv_i64 tcg_addr, int size)
1083{
1084    TCGMemOp memop = s->be_data + size;
1085    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1086
1087    read_vec_element(s, tcg_tmp, srcidx, element, size);
1088    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1089
1090    tcg_temp_free_i64(tcg_tmp);
1091}
1092
1093/* Load from memory to vector register */
1094static void do_vec_ld(DisasContext *s, int destidx, int element,
1095                      TCGv_i64 tcg_addr, int size)
1096{
1097    TCGMemOp memop = s->be_data + size;
1098    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1099
1100    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1101    write_vec_element(s, tcg_tmp, destidx, element, size);
1102
1103    tcg_temp_free_i64(tcg_tmp);
1104}
1105
1106/* Check that FP/Neon access is enabled. If it is, return
1107 * true. If not, emit code to generate an appropriate exception,
1108 * and return false; the caller should not emit any code for
1109 * the instruction. Note that this check must happen after all
1110 * unallocated-encoding checks (otherwise the syndrome information
1111 * for the resulting exception will be incorrect).
1112 */
1113static inline bool fp_access_check(DisasContext *s)
1114{
1115    assert(!s->fp_access_checked);
1116    s->fp_access_checked = true;
1117
1118    if (!s->fp_excp_el) {
1119        return true;
1120    }
1121
1122    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1123                       s->fp_excp_el);
1124    return false;
1125}
1126
1127/*
1128 * This utility function is for doing register extension with an
1129 * optional shift. You will likely want to pass a temporary for the
1130 * destination register. See DecodeRegExtend() in the ARM ARM.
1131 */
1132static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1133                              int option, unsigned int shift)
1134{
1135    int extsize = extract32(option, 0, 2);
1136    bool is_signed = extract32(option, 2, 1);
1137
1138    if (is_signed) {
1139        switch (extsize) {
1140        case 0:
1141            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1142            break;
1143        case 1:
1144            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1145            break;
1146        case 2:
1147            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1148            break;
1149        case 3:
1150            tcg_gen_mov_i64(tcg_out, tcg_in);
1151            break;
1152        }
1153    } else {
1154        switch (extsize) {
1155        case 0:
1156            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1157            break;
1158        case 1:
1159            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1160            break;
1161        case 2:
1162            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1163            break;
1164        case 3:
1165            tcg_gen_mov_i64(tcg_out, tcg_in);
1166            break;
1167        }
1168    }
1169
1170    if (shift) {
1171        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1172    }
1173}
1174
1175static inline void gen_check_sp_alignment(DisasContext *s)
1176{
1177    /* The AArch64 architecture mandates that (if enabled via PSTATE
1178     * or SCTLR bits) there is a check that SP is 16-aligned on every
1179     * SP-relative load or store (with an exception generated if it is not).
1180     * In line with general QEMU practice regarding misaligned accesses,
1181     * we omit these checks for the sake of guest program performance.
1182     * This function is provided as a hook so we can more easily add these
1183     * checks in future (possibly as a "favour catching guest program bugs
1184     * over speed" user selectable option).
1185     */
1186}
1187
1188/*
1189 * This provides a simple table based table lookup decoder. It is
1190 * intended to be used when the relevant bits for decode are too
1191 * awkwardly placed and switch/if based logic would be confusing and
1192 * deeply nested. Since it's a linear search through the table, tables
1193 * should be kept small.
1194 *
1195 * It returns the first handler where insn & mask == pattern, or
1196 * NULL if there is no match.
1197 * The table is terminated by an empty mask (i.e. 0)
1198 */
1199static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1200                                               uint32_t insn)
1201{
1202    const AArch64DecodeTable *tptr = table;
1203
1204    while (tptr->mask) {
1205        if ((insn & tptr->mask) == tptr->pattern) {
1206            return tptr->disas_fn;
1207        }
1208        tptr++;
1209    }
1210    return NULL;
1211}
1212
1213/*
1214 * the instruction disassembly implemented here matches
1215 * the instruction encoding classifications in chapter 3 (C3)
1216 * of the ARM Architecture Reference Manual (DDI0487A_a)
1217 */
1218
1219/* C3.2.7 Unconditional branch (immediate)
1220 *   31  30       26 25                                  0
1221 * +----+-----------+-------------------------------------+
1222 * | op | 0 0 1 0 1 |                 imm26               |
1223 * +----+-----------+-------------------------------------+
1224 */
1225static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1226{
1227    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1228
1229    if (insn & (1U << 31)) {
1230        /* C5.6.26 BL Branch with link */
1231        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1232    }
1233
1234    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1235    gen_goto_tb(s, 0, addr);
1236}
1237
1238/* C3.2.1 Compare & branch (immediate)
1239 *   31  30         25  24  23                  5 4      0
1240 * +----+-------------+----+---------------------+--------+
1241 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1242 * +----+-------------+----+---------------------+--------+
1243 */
1244static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1245{
1246    unsigned int sf, op, rt;
1247    uint64_t addr;
1248    TCGLabel *label_match;
1249    TCGv_i64 tcg_cmp;
1250
1251    sf = extract32(insn, 31, 1);
1252    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1253    rt = extract32(insn, 0, 5);
1254    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1255
1256    tcg_cmp = read_cpu_reg(s, rt, sf);
1257    label_match = gen_new_label();
1258
1259    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1260                        tcg_cmp, 0, label_match);
1261
1262    gen_goto_tb(s, 0, s->pc);
1263    gen_set_label(label_match);
1264    gen_goto_tb(s, 1, addr);
1265}
1266
1267/* C3.2.5 Test & branch (immediate)
1268 *   31  30         25  24  23   19 18          5 4    0
1269 * +----+-------------+----+-------+-------------+------+
1270 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1271 * +----+-------------+----+-------+-------------+------+
1272 */
1273static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1274{
1275    unsigned int bit_pos, op, rt;
1276    uint64_t addr;
1277    TCGLabel *label_match;
1278    TCGv_i64 tcg_cmp;
1279
1280    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1281    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1282    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1283    rt = extract32(insn, 0, 5);
1284
1285    tcg_cmp = tcg_temp_new_i64();
1286    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1287    label_match = gen_new_label();
1288    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1289                        tcg_cmp, 0, label_match);
1290    tcg_temp_free_i64(tcg_cmp);
1291    gen_goto_tb(s, 0, s->pc);
1292    gen_set_label(label_match);
1293    gen_goto_tb(s, 1, addr);
1294}
1295
1296/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1297 *  31           25  24  23                  5   4  3    0
1298 * +---------------+----+---------------------+----+------+
1299 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1300 * +---------------+----+---------------------+----+------+
1301 */
1302static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1303{
1304    unsigned int cond;
1305    uint64_t addr;
1306
1307    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1308        unallocated_encoding(s);
1309        return;
1310    }
1311    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1312    cond = extract32(insn, 0, 4);
1313
1314    if (cond < 0x0e) {
1315        /* genuinely conditional branches */
1316        TCGLabel *label_match = gen_new_label();
1317        arm_gen_test_cc(cond, label_match);
1318        gen_goto_tb(s, 0, s->pc);
1319        gen_set_label(label_match);
1320        gen_goto_tb(s, 1, addr);
1321    } else {
1322        /* 0xe and 0xf are both "always" conditions */
1323        gen_goto_tb(s, 0, addr);
1324    }
1325}
1326
1327/* C5.6.68 HINT */
1328static void handle_hint(DisasContext *s, uint32_t insn,
1329                        unsigned int op1, unsigned int op2, unsigned int crm)
1330{
1331    unsigned int selector = crm << 3 | op2;
1332
1333    if (op1 != 3) {
1334        unallocated_encoding(s);
1335        return;
1336    }
1337
1338    switch (selector) {
1339    case 0: /* NOP */
1340        return;
1341    case 3: /* WFI */
1342        s->is_jmp = DISAS_WFI;
1343        return;
1344    case 1: /* YIELD */
1345        s->is_jmp = DISAS_YIELD;
1346        return;
1347    case 2: /* WFE */
1348        s->is_jmp = DISAS_WFE;
1349        return;
1350    case 4: /* SEV */
1351        gen_helper_sev(cpu_env);
1352        return;
1353    case 5: /* SEVL */
1354        gen_helper_sevl(cpu_env);
1355        return;
1356    default:
1357        /* default specified as NOP equivalent */
1358        return;
1359    }
1360}
1361
1362static void gen_clrex(DisasContext *s, uint32_t insn)
1363{
1364    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1365    gen_helper_sev(cpu_env);
1366}
1367
1368/* CLREX, DSB, DMB, ISB */
1369static void handle_sync(DisasContext *s, uint32_t insn,
1370                        unsigned int op1, unsigned int op2, unsigned int crm)
1371{
1372    TCGBar bar;
1373
1374    if (op1 != 3) {
1375        unallocated_encoding(s);
1376        return;
1377    }
1378
1379    switch (op2) {
1380    case 2: /* CLREX */
1381        gen_clrex(s, insn);
1382        return;
1383    case 4: /* DSB */
1384    case 5: /* DMB */
1385        switch (crm & 3) {
1386        case 1: /* MBReqTypes_Reads */
1387            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1388            break;
1389        case 2: /* MBReqTypes_Writes */
1390            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1391            break;
1392        default: /* MBReqTypes_All */
1393            bar = TCG_BAR_SC | TCG_MO_ALL;
1394            break;
1395        }
1396        tcg_gen_mb(bar);
1397        return;
1398    case 6: /* ISB */
1399        /* We need to break the TB after this insn to execute
1400         * a self-modified code correctly and also to take
1401         * any pending interrupts immediately.
1402         */
1403        s->is_jmp = DISAS_UPDATE;
1404        return;
1405    default:
1406        unallocated_encoding(s);
1407        return;
1408    }
1409}
1410
1411/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1412static void handle_msr_i(DisasContext *s, uint32_t insn,
1413                         unsigned int op1, unsigned int op2, unsigned int crm)
1414{
1415    int op = op1 << 3 | op2;
1416    switch (op) {
1417    case 0x05: /* SPSel */
1418        if (s->current_el == 0) {
1419            unallocated_encoding(s);
1420            return;
1421        }
1422        /* fall through */
1423    case 0x1e: /* DAIFSet */
1424    case 0x1f: /* DAIFClear */
1425    {
1426        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1427        TCGv_i32 tcg_op = tcg_const_i32(op);
1428        gen_a64_set_pc_im(s->pc - 4);
1429        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1430        tcg_temp_free_i32(tcg_imm);
1431        tcg_temp_free_i32(tcg_op);
1432        s->is_jmp = DISAS_UPDATE;
1433        break;
1434    }
1435    default:
1436        unallocated_encoding(s);
1437        return;
1438    }
1439}
1440
1441static void gen_get_nzcv(TCGv_i64 tcg_rt)
1442{
1443    TCGv_i32 tmp = tcg_temp_new_i32();
1444    TCGv_i32 nzcv = tcg_temp_new_i32();
1445
1446    /* build bit 31, N */
1447    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1448    /* build bit 30, Z */
1449    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1450    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1451    /* build bit 29, C */
1452    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1453    /* build bit 28, V */
1454    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1455    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1456    /* generate result */
1457    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1458
1459    tcg_temp_free_i32(nzcv);
1460    tcg_temp_free_i32(tmp);
1461}
1462
1463static void gen_set_nzcv(TCGv_i64 tcg_rt)
1464
1465{
1466    TCGv_i32 nzcv = tcg_temp_new_i32();
1467
1468    /* take NZCV from R[t] */
1469    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1470
1471    /* bit 31, N */
1472    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1473    /* bit 30, Z */
1474    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1475    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1476    /* bit 29, C */
1477    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1478    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1479    /* bit 28, V */
1480    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1481    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1482    tcg_temp_free_i32(nzcv);
1483}
1484
1485/* C5.6.129 MRS - move from system register
1486 * C5.6.131 MSR (register) - move to system register
1487 * C5.6.204 SYS
1488 * C5.6.205 SYSL
1489 * These are all essentially the same insn in 'read' and 'write'
1490 * versions, with varying op0 fields.
1491 */
1492static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1493                       unsigned int op0, unsigned int op1, unsigned int op2,
1494                       unsigned int crn, unsigned int crm, unsigned int rt)
1495{
1496    const ARMCPRegInfo *ri;
1497    TCGv_i64 tcg_rt;
1498
1499    ri = get_arm_cp_reginfo(s->cp_regs,
1500                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1501                                               crn, crm, op0, op1, op2));
1502
1503    if (!ri) {
1504        /* Unknown register; this might be a guest error or a QEMU
1505         * unimplemented feature.
1506         */
1507        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1508                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1509                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1510        unallocated_encoding(s);
1511        return;
1512    }
1513
1514    /* Check access permissions */
1515    if (!cp_access_ok(s->current_el, ri, isread)) {
1516        unallocated_encoding(s);
1517        return;
1518    }
1519
1520    if (ri->accessfn) {
1521        /* Emit code to perform further access permissions checks at
1522         * runtime; this may result in an exception.
1523         */
1524        TCGv_ptr tmpptr;
1525        TCGv_i32 tcg_syn, tcg_isread;
1526        uint32_t syndrome;
1527
1528        gen_a64_set_pc_im(s->pc - 4);
1529        tmpptr = tcg_const_ptr(ri);
1530        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1531        tcg_syn = tcg_const_i32(syndrome);
1532        tcg_isread = tcg_const_i32(isread);
1533        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1534        tcg_temp_free_ptr(tmpptr);
1535        tcg_temp_free_i32(tcg_syn);
1536        tcg_temp_free_i32(tcg_isread);
1537    }
1538
1539    /* Handle special cases first */
1540    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1541    case ARM_CP_NOP:
1542        return;
1543    case ARM_CP_NZCV:
1544        tcg_rt = cpu_reg(s, rt);
1545        if (isread) {
1546            gen_get_nzcv(tcg_rt);
1547        } else {
1548            gen_set_nzcv(tcg_rt);
1549        }
1550        return;
1551    case ARM_CP_CURRENTEL:
1552        /* Reads as current EL value from pstate, which is
1553         * guaranteed to be constant by the tb flags.
1554         */
1555        tcg_rt = cpu_reg(s, rt);
1556        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1557        return;
1558    case ARM_CP_DC_ZVA:
1559        /* Writes clear the aligned block of memory which rt points into. */
1560        tcg_rt = cpu_reg(s, rt);
1561        gen_helper_dc_zva(cpu_env, tcg_rt);
1562        return;
1563    default:
1564        break;
1565    }
1566
1567    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1568        gen_io_start();
1569    }
1570
1571    tcg_rt = cpu_reg(s, rt);
1572
1573    if (isread) {
1574        if (ri->type & ARM_CP_CONST) {
1575            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1576        } else if (ri->readfn) {
1577            TCGv_ptr tmpptr;
1578            tmpptr = tcg_const_ptr(ri);
1579            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1580            tcg_temp_free_ptr(tmpptr);
1581        } else {
1582            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1583        }
1584    } else {
1585        if (ri->type & ARM_CP_CONST) {
1586            /* If not forbidden by access permissions, treat as WI */
1587            return;
1588        } else if (ri->writefn) {
1589            TCGv_ptr tmpptr;
1590            tmpptr = tcg_const_ptr(ri);
1591            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1592            tcg_temp_free_ptr(tmpptr);
1593        } else {
1594            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1595        }
1596    }
1597
1598    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1599        /* I/O operations must end the TB here (whether read or write) */
1600        gen_io_end();
1601        s->is_jmp = DISAS_UPDATE;
1602    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1603        /* We default to ending the TB on a coprocessor register write,
1604         * but allow this to be suppressed by the register definition
1605         * (usually only necessary to work around guest bugs).
1606         */
1607        s->is_jmp = DISAS_UPDATE;
1608    }
1609}
1610
1611/* C3.2.4 System
1612 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1613 * +---------------------+---+-----+-----+-------+-------+-----+------+
1614 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1615 * +---------------------+---+-----+-----+-------+-------+-----+------+
1616 */
1617static void disas_system(DisasContext *s, uint32_t insn)
1618{
1619    unsigned int l, op0, op1, crn, crm, op2, rt;
1620    l = extract32(insn, 21, 1);
1621    op0 = extract32(insn, 19, 2);
1622    op1 = extract32(insn, 16, 3);
1623    crn = extract32(insn, 12, 4);
1624    crm = extract32(insn, 8, 4);
1625    op2 = extract32(insn, 5, 3);
1626    rt = extract32(insn, 0, 5);
1627
1628    if (op0 == 0) {
1629        if (l || rt != 31) {
1630            unallocated_encoding(s);
1631            return;
1632        }
1633        switch (crn) {
1634        case 2: /* C5.6.68 HINT */
1635            handle_hint(s, insn, op1, op2, crm);
1636            break;
1637        case 3: /* CLREX, DSB, DMB, ISB */
1638            handle_sync(s, insn, op1, op2, crm);
1639            break;
1640        case 4: /* C5.6.130 MSR (immediate) */
1641            handle_msr_i(s, insn, op1, op2, crm);
1642            break;
1643        default:
1644            unallocated_encoding(s);
1645            break;
1646        }
1647        return;
1648    }
1649    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1650}
1651
1652/* C3.2.3 Exception generation
1653 *
1654 *  31             24 23 21 20                     5 4   2 1  0
1655 * +-----------------+-----+------------------------+-----+----+
1656 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1657 * +-----------------------+------------------------+----------+
1658 */
1659static void disas_exc(DisasContext *s, uint32_t insn)
1660{
1661    int opc = extract32(insn, 21, 3);
1662    int op2_ll = extract32(insn, 0, 5);
1663    int imm16 = extract32(insn, 5, 16);
1664    TCGv_i32 tmp;
1665
1666    switch (opc) {
1667    case 0:
1668        /* For SVC, HVC and SMC we advance the single-step state
1669         * machine before taking the exception. This is architecturally
1670         * mandated, to ensure that single-stepping a system call
1671         * instruction works properly.
1672         */
1673        switch (op2_ll) {
1674        case 1:                                                     /* SVC */
1675            gen_ss_advance(s);
1676            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1677                               default_exception_el(s));
1678            break;
1679        case 2:                                                     /* HVC */
1680            if (s->current_el == 0) {
1681                unallocated_encoding(s);
1682                break;
1683            }
1684            /* The pre HVC helper handles cases when HVC gets trapped
1685             * as an undefined insn by runtime configuration.
1686             */
1687            gen_a64_set_pc_im(s->pc - 4);
1688            gen_helper_pre_hvc(cpu_env);
1689            gen_ss_advance(s);
1690            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1691            break;
1692        case 3:                                                     /* SMC */
1693            if (s->current_el == 0) {
1694                unallocated_encoding(s);
1695                break;
1696            }
1697            gen_a64_set_pc_im(s->pc - 4);
1698            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1699            gen_helper_pre_smc(cpu_env, tmp);
1700            tcg_temp_free_i32(tmp);
1701            gen_ss_advance(s);
1702            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1703            break;
1704        default:
1705            unallocated_encoding(s);
1706            break;
1707        }
1708        break;
1709    case 1:
1710        if (op2_ll != 0) {
1711            unallocated_encoding(s);
1712            break;
1713        }
1714        /* BRK */
1715        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1716                           default_exception_el(s));
1717        break;
1718    case 2:
1719        if (op2_ll != 0) {
1720            unallocated_encoding(s);
1721            break;
1722        }
1723        /* HLT. This has two purposes.
1724         * Architecturally, it is an external halting debug instruction.
1725         * Since QEMU doesn't implement external debug, we treat this as
1726         * it is required for halting debug disabled: it will UNDEF.
1727         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1728         */
1729        if (semihosting_enabled() && imm16 == 0xf000) {
1730#ifndef CONFIG_USER_ONLY
1731            /* In system mode, don't allow userspace access to semihosting,
1732             * to provide some semblance of security (and for consistency
1733             * with our 32-bit semihosting).
1734             */
1735            if (s->current_el == 0) {
1736                unsupported_encoding(s, insn);
1737                break;
1738            }
1739#endif
1740            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1741        } else {
1742            unsupported_encoding(s, insn);
1743        }
1744        break;
1745    case 5:
1746        if (op2_ll < 1 || op2_ll > 3) {
1747            unallocated_encoding(s);
1748            break;
1749        }
1750        /* DCPS1, DCPS2, DCPS3 */
1751        unsupported_encoding(s, insn);
1752        break;
1753    default:
1754        unallocated_encoding(s);
1755        break;
1756    }
1757}
1758
1759/* C3.2.7 Unconditional branch (register)
1760 *  31           25 24   21 20   16 15   10 9    5 4     0
1761 * +---------------+-------+-------+-------+------+-------+
1762 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1763 * +---------------+-------+-------+-------+------+-------+
1764 */
1765static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1766{
1767    unsigned int opc, op2, op3, rn, op4;
1768
1769    opc = extract32(insn, 21, 4);
1770    op2 = extract32(insn, 16, 5);
1771    op3 = extract32(insn, 10, 6);
1772    rn = extract32(insn, 5, 5);
1773    op4 = extract32(insn, 0, 5);
1774
1775    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1776        unallocated_encoding(s);
1777        return;
1778    }
1779
1780    switch (opc) {
1781    case 0: /* BR */
1782    case 1: /* BLR */
1783    case 2: /* RET */
1784        gen_a64_set_pc(s, cpu_reg(s, rn));
1785        /* BLR also needs to load return address */
1786        if (opc == 1) {
1787            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1788        }
1789        break;
1790    case 4: /* ERET */
1791        if (s->current_el == 0) {
1792            unallocated_encoding(s);
1793            return;
1794        }
1795        gen_helper_exception_return(cpu_env);
1796        s->is_jmp = DISAS_JUMP;
1797        return;
1798    case 5: /* DRPS */
1799        if (rn != 0x1f) {
1800            unallocated_encoding(s);
1801        } else {
1802            unsupported_encoding(s, insn);
1803        }
1804        return;
1805    default:
1806        unallocated_encoding(s);
1807        return;
1808    }
1809
1810    s->is_jmp = DISAS_JUMP;
1811}
1812
1813/* C3.2 Branches, exception generating and system instructions */
1814static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1815{
1816    switch (extract32(insn, 25, 7)) {
1817    case 0x0a: case 0x0b:
1818    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1819        disas_uncond_b_imm(s, insn);
1820        break;
1821    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1822        disas_comp_b_imm(s, insn);
1823        break;
1824    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1825        disas_test_b_imm(s, insn);
1826        break;
1827    case 0x2a: /* Conditional branch (immediate) */
1828        disas_cond_b_imm(s, insn);
1829        break;
1830    case 0x6a: /* Exception generation / System */
1831        if (insn & (1 << 24)) {
1832            disas_system(s, insn);
1833        } else {
1834            disas_exc(s, insn);
1835        }
1836        break;
1837    case 0x6b: /* Unconditional branch (register) */
1838        disas_uncond_b_reg(s, insn);
1839        break;
1840    default:
1841        unallocated_encoding(s);
1842        break;
1843    }
1844}
1845
1846/*
1847 * Load/Store exclusive instructions are implemented by remembering
1848 * the value/address loaded, and seeing if these are the same
1849 * when the store is performed. This is not actually the architecturally
1850 * mandated semantics, but it works for typical guest code sequences
1851 * and avoids having to monitor regular stores.
1852 *
1853 * The store exclusive uses the atomic cmpxchg primitives to avoid
1854 * races in multi-threaded linux-user and when MTTCG softmmu is
1855 * enabled.
1856 */
1857static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1858                               TCGv_i64 addr, int size, bool is_pair)
1859{
1860    TCGv_i64 tmp = tcg_temp_new_i64();
1861    TCGMemOp memop = s->be_data + size;
1862
1863    g_assert(size <= 3);
1864    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1865
1866    if (is_pair) {
1867        TCGv_i64 addr2 = tcg_temp_new_i64();
1868        TCGv_i64 hitmp = tcg_temp_new_i64();
1869
1870        g_assert(size >= 2);
1871        tcg_gen_addi_i64(addr2, addr, 1 << size);
1872        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1873        tcg_temp_free_i64(addr2);
1874        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1875        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1876        tcg_temp_free_i64(hitmp);
1877    }
1878
1879    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1880    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1881
1882    tcg_temp_free_i64(tmp);
1883    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1884}
1885
1886static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1887                                TCGv_i64 inaddr, int size, int is_pair)
1888{
1889    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1890     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1891     *     [addr] = {Rt};
1892     *     if (is_pair) {
1893     *         [addr + datasize] = {Rt2};
1894     *     }
1895     *     {Rd} = 0;
1896     * } else {
1897     *     {Rd} = 1;
1898     * }
1899     * env->exclusive_addr = -1;
1900     */
1901    TCGLabel *fail_label = gen_new_label();
1902    TCGLabel *done_label = gen_new_label();
1903    TCGv_i64 addr = tcg_temp_local_new_i64();
1904    TCGv_i64 tmp;
1905
1906    /* Copy input into a local temp so it is not trashed when the
1907     * basic block ends at the branch insn.
1908     */
1909    tcg_gen_mov_i64(addr, inaddr);
1910    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1911
1912    tmp = tcg_temp_new_i64();
1913    if (is_pair) {
1914        if (size == 2) {
1915            TCGv_i64 val = tcg_temp_new_i64();
1916            tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1917            tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
1918            tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
1919                                       get_mem_index(s),
1920                                       size | MO_ALIGN | s->be_data);
1921            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
1922            tcg_temp_free_i64(val);
1923        } else if (s->be_data == MO_LE) {
1924            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
1925                                           cpu_reg(s, rt2));
1926        } else {
1927            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
1928                                           cpu_reg(s, rt2));
1929        }
1930
1931        /* Xilinx: We need a few changes in here to ensure that our
1932         * randomised testing software works with exclusive stores.
1933         */
1934        TCGv_i64 addrhi = tcg_temp_new_i64();
1935
1936        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1937        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1938                            get_mem_index(s), s->be_data + size);
1939        tcg_temp_free_i64(addrhi);
1940    } else {
1941        tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
1942                            s->be_data + size);
1943    }
1944
1945    tcg_temp_free_i64(addr);
1946
1947    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1948    tcg_temp_free_i64(tmp);
1949    tcg_gen_br(done_label);
1950
1951    gen_set_label(fail_label);
1952    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1953    gen_set_label(done_label);
1954    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1955
1956    gen_helper_sev(cpu_env);
1957}
1958
1959/* Update the Sixty-Four bit (SF) registersize. This logic is derived
1960 * from the ARMv8 specs for LDR (Shared decode for all encodings).
1961 */
1962static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1963{
1964    int opc0 = extract32(opc, 0, 1);
1965    int regsize;
1966
1967    if (is_signed) {
1968        regsize = opc0 ? 32 : 64;
1969    } else {
1970        regsize = size == 3 ? 64 : 32;
1971    }
1972    return regsize == 64;
1973}
1974
1975/* C3.3.6 Load/store exclusive
1976 *
1977 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1978 * +-----+-------------+----+---+----+------+----+-------+------+------+
1979 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1980 * +-----+-------------+----+---+----+------+----+-------+------+------+
1981 *
1982 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1983 *   L: 0 -> store, 1 -> load
1984 *  o2: 0 -> exclusive, 1 -> not
1985 *  o1: 0 -> single register, 1 -> register pair
1986 *  o0: 1 -> load-acquire/store-release, 0 -> not
1987 */
1988static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1989{
1990    int rt = extract32(insn, 0, 5);
1991    int rn = extract32(insn, 5, 5);
1992    int rt2 = extract32(insn, 10, 5);
1993    int is_lasr = extract32(insn, 15, 1);
1994    int rs = extract32(insn, 16, 5);
1995    int is_pair = extract32(insn, 21, 1);
1996    int is_store = !extract32(insn, 22, 1);
1997    int is_excl = !extract32(insn, 23, 1);
1998    int size = extract32(insn, 30, 2);
1999    TCGv_i64 tcg_addr;
2000

2001    if ((!is_excl && !is_pair && !is_lasr) ||
2002        (!is_excl && is_pair) ||
2003        (is_pair && size < 2)) {
2004        unallocated_encoding(s);
2005        return;
2006    }
2007
2008    if (rn == 31) {
2009        gen_check_sp_alignment(s);
2010    }
2011    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2012
2013    /* Note that since TCG is single threaded load-acquire/store-release
2014     * semantics require no extra if (is_lasr) { ... } handling.
2015     */
2016
2017    if (is_excl) {
2018        if (!is_store) {
2019            s->is_ldex = true;
2020            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2021            if (is_lasr) {
2022                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2023            }
2024        } else {
2025            if (is_lasr) {
2026                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2027            }
2028            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2029        }
2030    } else {
2031        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2032        bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2033
2034        /* Generate ISS for non-exclusive accesses including LASR.  */
2035        if (is_store) {
2036            if (is_lasr) {
2037                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2038            }
2039            do_gpr_st(s, tcg_rt, tcg_addr, size,
2040                      true, rt, iss_sf, is_lasr);
2041        } else {
2042            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2043                      true, rt, iss_sf, is_lasr);
2044            if (is_lasr) {
2045                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2046            }
2047        }
2048    }
2049}
2050
2051/*
2052 * C3.3.5 Load register (literal)
2053 *
2054 *  31 30 29   27  26 25 24 23                5 4     0
2055 * +-----+-------+---+-----+-------------------+-------+
2056 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2057 * +-----+-------+---+-----+-------------------+-------+
2058 *
2059 * V: 1 -> vector (simd/fp)
2060 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2061 *                   10-> 32 bit signed, 11 -> prefetch
2062 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2063 */
2064static void disas_ld_lit(DisasContext *s, uint32_t insn)
2065{
2066    int rt = extract32(insn, 0, 5);
2067    int64_t imm = sextract32(insn, 5, 19) << 2;
2068    bool is_vector = extract32(insn, 26, 1);
2069    int opc = extract32(insn, 30, 2);
2070    bool is_signed = false;
2071    int size = 2;
2072    TCGv_i64 tcg_rt, tcg_addr;
2073
2074    if (is_vector) {
2075        if (opc == 3) {
2076            unallocated_encoding(s);
2077            return;
2078        }
2079        size = 2 + opc;
2080        if (!fp_access_check(s)) {
2081            return;
2082        }
2083    } else {
2084        if (opc == 3) {
2085            /* PRFM (literal) : prefetch */
2086            return;
2087        }
2088        size = 2 + extract32(opc, 0, 1);
2089        is_signed = extract32(opc, 1, 1);
2090    }
2091
2092    tcg_rt = cpu_reg(s, rt);
2093
2094    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2095    if (is_vector) {
2096        do_fp_ld(s, rt, tcg_addr, size);
2097    } else {
2098        /* Only unsigned 32bit loads target 32bit registers.  */
2099        bool iss_sf = opc != 0;
2100
2101        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2102                  true, rt, iss_sf, false);
2103    }
2104    tcg_temp_free_i64(tcg_addr);
2105}
2106
2107/*
2108 * C5.6.80 LDNP (Load Pair - non-temporal hint)
2109 * C5.6.81 LDP (Load Pair - non vector)
2110 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2111 * C5.6.176 STNP (Store Pair - non-temporal hint)
2112 * C5.6.177 STP (Store Pair - non vector)
2113 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2114 * C6.3.165 LDP (Load Pair of SIMD&FP)
2115 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2116 * C6.3.284 STP (Store Pair of SIMD&FP)
2117 *
2118 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2119 * +-----+-------+---+---+-------+---+-----------------------------+
2120 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2121 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2122 *
2123 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2124 *      LDPSW                    01
2125 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2126 *   V: 0 -> GPR, 1 -> Vector
2127 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2128 *      10 -> signed offset, 11 -> pre-index
2129 *   L: 0 -> Store 1 -> Load
2130 *
2131 * Rt, Rt2 = GPR or SIMD registers to be stored
2132 * Rn = general purpose register containing address
2133 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2134 */
2135static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2136{
2137    int rt = extract32(insn, 0, 5);
2138    int rn = extract32(insn, 5, 5);
2139    int rt2 = extract32(insn, 10, 5);
2140    uint64_t offset = sextract64(insn, 15, 7);
2141    int index = extract32(insn, 23, 2);
2142    bool is_vector = extract32(insn, 26, 1);
2143    bool is_load = extract32(insn, 22, 1);
2144    int opc = extract32(insn, 30, 2);
2145
2146    bool is_signed = false;
2147    bool postindex = false;
2148    bool wback = false;
2149
2150    TCGv_i64 tcg_addr; /* calculated address */
2151    int size;
2152
2153    if (opc == 3) {
2154        unallocated_encoding(s);
2155        return;
2156    }
2157
2158    if (is_vector) {
2159        size = 2 + opc;
2160    } else {
2161        size = 2 + extract32(opc, 1, 1);
2162        is_signed = extract32(opc, 0, 1);
2163        if (!is_load && is_signed) {
2164            unallocated_encoding(s);
2165            return;
2166        }
2167    }
2168
2169    switch (index) {
2170    case 1: /* post-index */
2171        postindex = true;
2172        wback = true;
2173        break;
2174    case 0:
2175        /* signed offset with "non-temporal" hint. Since we don't emulate
2176         * caches we don't care about hints to the cache system about
2177         * data access patterns, and handle this identically to plain
2178         * signed offset.
2179         */
2180        if (is_signed) {
2181            /* There is no non-temporal-hint version of LDPSW */
2182            unallocated_encoding(s);
2183            return;
2184        }
2185        postindex = false;
2186        break;
2187    case 2: /* signed offset, rn not updated */
2188        postindex = false;
2189        break;
2190    case 3: /* pre-index */
2191        postindex = false;
2192        wback = true;
2193        break;
2194    }
2195
2196    if (is_vector && !fp_access_check(s)) {
2197        return;
2198    }
2199
2200    offset <<= size;
2201
2202    if (rn == 31) {
2203        gen_check_sp_alignment(s);
2204    }
2205
2206    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2207
2208    if (!postindex) {
2209        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2210    }
2211
2212    if (is_vector) {
2213        if (is_load) {
2214            do_fp_ld(s, rt, tcg_addr, size);
2215        } else {
2216            do_fp_st(s, rt, tcg_addr, size);
2217        }
2218    } else {
2219        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2220        if (is_load) {
2221            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2222                      false, 0, false, false);
2223        } else {
2224            do_gpr_st(s, tcg_rt, tcg_addr, size,
2225                      false, 0, false, false);
2226        }
2227    }
2228    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2229    if (is_vector) {
2230        if (is_load) {
2231            do_fp_ld(s, rt2, tcg_addr, size);
2232        } else {
2233            do_fp_st(s, rt2, tcg_addr, size);
2234        }
2235    } else {
2236        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2237        if (is_load) {
2238            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2239                      false, 0, false, false);
2240        } else {
2241            do_gpr_st(s, tcg_rt2, tcg_addr, size,
2242                      false, 0, false, false);
2243        }
2244    }
2245
2246    if (wback) {
2247        if (postindex) {
2248            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2249        } else {
2250            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2251        }
2252        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2253    }
2254}
2255
2256/*
2257 * C3.3.8 Load/store (immediate post-indexed)
2258 * C3.3.9 Load/store (immediate pre-indexed)
2259 * C3.3.12 Load/store (unscaled immediate)
2260 *
2261 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2262 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2263 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2264 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2265 *
2266 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2267         10 -> unprivileged
2268 * V = 0 -> non-vector
2269 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2270 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2271 */
2272static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2273                                int opc,
2274                                int size,
2275                                int rt,
2276                                bool is_vector)
2277{
2278    int rn = extract32(insn, 5, 5);
2279    int imm9 = sextract32(insn, 12, 9);
2280    int idx = extract32(insn, 10, 2);
2281    bool is_signed = false;
2282    bool is_store = false;
2283    bool is_extended = false;
2284    bool is_unpriv = (idx == 2);
2285    bool iss_valid = !is_vector;
2286    bool post_index;
2287    bool writeback;
2288
2289    TCGv_i64 tcg_addr;
2290
2291    if (is_vector) {
2292        size |= (opc & 2) << 1;
2293        if (size > 4 || is_unpriv) {
2294            unallocated_encoding(s);
2295            return;
2296        }
2297        is_store = ((opc & 1) == 0);
2298        if (!fp_access_check(s)) {
2299            return;
2300        }
2301    } else {
2302        if (size == 3 && opc == 2) {
2303            /* PRFM - prefetch */
2304            if (is_unpriv) {
2305                unallocated_encoding(s);
2306                return;
2307            }
2308            return;
2309        }
2310        if (opc == 3 && size > 1) {
2311            unallocated_encoding(s);
2312            return;
2313        }
2314        is_store = (opc == 0);
2315        is_signed = extract32(opc, 1, 1);
2316        is_extended = (size < 3) && extract32(opc, 0, 1);
2317    }
2318
2319    switch (idx) {
2320    case 0:
2321    case 2:
2322        post_index = false;
2323        writeback = false;
2324        break;
2325    case 1:
2326        post_index = true;
2327        writeback = true;
2328        break;
2329    case 3:
2330        post_index = false;
2331        writeback = true;
2332        break;
2333    }
2334
2335    if (rn == 31) {
2336        gen_check_sp_alignment(s);
2337    }
2338    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2339
2340    if (!post_index) {
2341        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2342    }
2343
2344    if (is_vector) {
2345        if (is_store) {
2346            do_fp_st(s, rt, tcg_addr, size);
2347        } else {
2348            do_fp_ld(s, rt, tcg_addr, size);
2349        }
2350    } else {
2351        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2352        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2353        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2354
2355        if (is_store) {
2356            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2357                             iss_valid, rt, iss_sf, false);
2358        } else {
2359            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2360                             is_signed, is_extended, memidx,
2361                             iss_valid, rt, iss_sf, false);
2362        }
2363    }
2364
2365    if (writeback) {
2366        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2367        if (post_index) {
2368            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2369        }
2370        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2371    }
2372}
2373
2374/*
2375 * C3.3.10 Load/store (register offset)
2376 *
2377 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2378 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2379 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2380 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2381 *
2382 * For non-vector:
2383 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2384 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2385 * For vector:
2386 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2387 *   opc<0>: 0 -> store, 1 -> load
2388 * V: 1 -> vector/simd
2389 * opt: extend encoding (see DecodeRegExtend)
2390 * S: if S=1 then scale (essentially index by sizeof(size))
2391 * Rt: register to transfer into/out of
2392 * Rn: address register or SP for base
2393 * Rm: offset register or ZR for offset
2394 */
2395static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2396                                   int opc,
2397                                   int size,
2398                                   int rt,
2399                                   bool is_vector)
2400{
2401    int rn = extract32(insn, 5, 5);
2402    int shift = extract32(insn, 12, 1);
2403    int rm = extract32(insn, 16, 5);
2404    int opt = extract32(insn, 13, 3);
2405    bool is_signed = false;
2406    bool is_store = false;
2407    bool is_extended = false;
2408
2409    TCGv_i64 tcg_rm;
2410    TCGv_i64 tcg_addr;
2411
2412    if (extract32(opt, 1, 1) == 0) {
2413        unallocated_encoding(s);
2414        return;
2415    }
2416
2417    if (is_vector) {
2418        size |= (opc & 2) << 1;
2419        if (size > 4) {
2420            unallocated_encoding(s);
2421            return;
2422        }
2423        is_store = !extract32(opc, 0, 1);
2424        if (!fp_access_check(s)) {
2425            return;
2426        }
2427    } else {
2428        if (size == 3 && opc == 2) {
2429            /* PRFM - prefetch */
2430            return;
2431        }
2432        if (opc == 3 && size > 1) {
2433            unallocated_encoding(s);
2434            return;
2435        }
2436        is_store = (opc == 0);
2437        is_signed = extract32(opc, 1, 1);
2438        is_extended = (size < 3) && extract32(opc, 0, 1);
2439    }
2440
2441    if (rn == 31) {
2442        gen_check_sp_alignment(s);
2443    }
2444    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2445
2446    tcg_rm = read_cpu_reg(s, rm, 1);
2447    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2448
2449    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2450
2451    if (is_vector) {
2452        if (is_store) {
2453            do_fp_st(s, rt, tcg_addr, size);
2454        } else {
2455            do_fp_ld(s, rt, tcg_addr, size);
2456        }
2457    } else {
2458        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2459        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2460        if (is_store) {
2461            do_gpr_st(s, tcg_rt, tcg_addr, size,
2462                      true, rt, iss_sf, false);
2463        } else {
2464            do_gpr_ld(s, tcg_rt, tcg_addr, size,
2465                      is_signed, is_extended,
2466                      true, rt, iss_sf, false);
2467        }
2468    }
2469}
2470
2471/*
2472 * C3.3.13 Load/store (unsigned immediate)
2473 *
2474 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2475 * +----+-------+---+-----+-----+------------+-------+------+
2476 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2477 * +----+-------+---+-----+-----+------------+-------+------+
2478 *
2479 * For non-vector:
2480 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2481 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2482 * For vector:
2483 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2484 *   opc<0>: 0 -> store, 1 -> load
2485 * Rn: base address register (inc SP)
2486 * Rt: target register
2487 */
2488static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2489                                        int opc,
2490                                        int size,
2491                                        int rt,
2492                                        bool is_vector)
2493{
2494    int rn = extract32(insn, 5, 5);
2495    unsigned int imm12 = extract32(insn, 10, 12);
2496    unsigned int offset;
2497
2498    TCGv_i64 tcg_addr;
2499
2500    bool is_store;
2501    bool is_signed = false;
2502    bool is_extended = false;
2503
2504    if (is_vector) {
2505        size |= (opc & 2) << 1;
2506        if (size > 4) {
2507            unallocated_encoding(s);
2508            return;
2509        }
2510        is_store = !extract32(opc, 0, 1);
2511        if (!fp_access_check(s)) {
2512            return;
2513        }
2514    } else {
2515        if (size == 3 && opc == 2) {
2516            /* PRFM - prefetch */
2517            return;
2518        }
2519        if (opc == 3 && size > 1) {
2520            unallocated_encoding(s);
2521            return;
2522        }
2523        is_store = (opc == 0);
2524        is_signed = extract32(opc, 1, 1);
2525        is_extended = (size < 3) && extract32(opc, 0, 1);
2526    }
2527
2528    if (rn == 31) {
2529        gen_check_sp_alignment(s);
2530    }
2531    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2532    offset = imm12 << size;
2533    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2534
2535    if (is_vector) {
2536        if (is_store) {
2537            do_fp_st(s, rt, tcg_addr, size);
2538        } else {
2539            do_fp_ld(s, rt, tcg_addr, size);
2540        }
2541    } else {
2542        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2543        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2544        if (is_store) {
2545            do_gpr_st(s, tcg_rt, tcg_addr, size,
2546                      true, rt, iss_sf, false);
2547        } else {
2548            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2549                      true, rt, iss_sf, false);
2550        }
2551    }
2552}
2553
2554/* Load/store register (all forms) */
2555static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2556{
2557    int rt = extract32(insn, 0, 5);
2558    int opc = extract32(insn, 22, 2);
2559    bool is_vector = extract32(insn, 26, 1);
2560    int size = extract32(insn, 30, 2);
2561
2562    switch (extract32(insn, 24, 2)) {
2563    case 0:
2564        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2565            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2566        } else {
2567            /* Load/store register (unscaled immediate)
2568             * Load/store immediate pre/post-indexed
2569             * Load/store register unprivileged
2570             */
2571            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2572        }
2573        break;
2574    case 1:
2575        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2576        break;
2577    default:
2578        unallocated_encoding(s);
2579        break;
2580    }
2581}
2582
2583/* C3.3.1 AdvSIMD load/store multiple structures
2584 *
2585 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2586 * +---+---+---------------+---+-------------+--------+------+------+------+
2587 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2588 * +---+---+---------------+---+-------------+--------+------+------+------+
2589 *
2590 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2591 *
2592 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2593 * +---+---+---------------+---+---+---------+--------+------+------+------+
2594 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2595 * +---+---+---------------+---+---+---------+--------+------+------+------+
2596 *
2597 * Rt: first (or only) SIMD&FP register to be transferred
2598 * Rn: base address or SP
2599 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2600 */
2601static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2602{
2603    int rt = extract32(insn, 0, 5);
2604    int rn = extract32(insn, 5, 5);
2605    int size = extract32(insn, 10, 2);
2606    int opcode = extract32(insn, 12, 4);
2607    bool is_store = !extract32(insn, 22, 1);
2608    bool is_postidx = extract32(insn, 23, 1);
2609    bool is_q = extract32(insn, 30, 1);
2610    TCGv_i64 tcg_addr, tcg_rn;
2611
2612    int ebytes = 1 << size;
2613    int elements = (is_q ? 128 : 64) / (8 << size);
2614    int rpt;    /* num iterations */
2615    int selem;  /* structure elements */
2616    int r;
2617
2618    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2619        unallocated_encoding(s);
2620        return;
2621    }
2622
2623    /* From the shared decode logic */
2624    switch (opcode) {
2625    case 0x0:
2626        rpt = 1;
2627        selem = 4;
2628        break;
2629    case 0x2:
2630        rpt = 4;
2631        selem = 1;
2632        break;
2633    case 0x4:
2634        rpt = 1;
2635        selem = 3;
2636        break;
2637    case 0x6:
2638        rpt = 3;
2639        selem = 1;
2640        break;
2641    case 0x7:
2642        rpt = 1;
2643        selem = 1;
2644        break;
2645    case 0x8:
2646        rpt = 1;
2647        selem = 2;
2648        break;
2649    case 0xa:
2650        rpt = 2;
2651        selem = 1;
2652        break;
2653    default:
2654        unallocated_encoding(s);
2655        return;
2656    }
2657
2658    if (size == 3 && !is_q && selem != 1) {
2659        /* reserved */
2660        unallocated_encoding(s);
2661        return;
2662    }
2663
2664    if (!fp_access_check(s)) {
2665        return;
2666    }
2667
2668    if (rn == 31) {
2669        gen_check_sp_alignment(s);
2670    }
2671
2672    tcg_rn = cpu_reg_sp(s, rn);
2673    tcg_addr = tcg_temp_new_i64();
2674    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2675
2676    for (r = 0; r < rpt; r++) {
2677        int e;
2678        for (e = 0; e < elements; e++) {
2679            int tt = (rt + r) % 32;
2680            int xs;
2681            for (xs = 0; xs < selem; xs++) {
2682                if (is_store) {
2683                    do_vec_st(s, tt, e, tcg_addr, size);
2684                } else {
2685                    do_vec_ld(s, tt, e, tcg_addr, size);
2686
2687                    /* For non-quad operations, setting a slice of the low
2688                     * 64 bits of the register clears the high 64 bits (in
2689                     * the ARM ARM pseudocode this is implicit in the fact
2690                     * that 'rval' is a 64 bit wide variable). We optimize
2691                     * by noticing that we only need to do this the first
2692                     * time we touch a register.
2693                     */
2694                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2695                        clear_vec_high(s, tt);
2696                    }
2697                }
2698                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2699                tt = (tt + 1) % 32;
2700            }
2701        }
2702    }
2703
2704    if (is_postidx) {
2705        int rm = extract32(insn, 16, 5);
2706        if (rm == 31) {
2707            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2708        } else {
2709            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2710        }
2711    }
2712    tcg_temp_free_i64(tcg_addr);
2713}
2714
2715/* C3.3.3 AdvSIMD load/store single structure
2716 *
2717 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2718 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2719 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2720 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2721 *
2722 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2723 *
2724 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2725 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2726 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2727 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2728 *
2729 * Rt: first (or only) SIMD&FP register to be transferred
2730 * Rn: base address or SP
2731 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2732 * index = encoded in Q:S:size dependent on size
2733 *
2734 * lane_size = encoded in R, opc
2735 * transfer width = encoded in opc, S, size
2736 */
2737static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2738{
2739    int rt = extract32(insn, 0, 5);
2740    int rn = extract32(insn, 5, 5);
2741    int size = extract32(insn, 10, 2);
2742    int S = extract32(insn, 12, 1);
2743    int opc = extract32(insn, 13, 3);
2744    int R = extract32(insn, 21, 1);
2745    int is_load = extract32(insn, 22, 1);
2746    int is_postidx = extract32(insn, 23, 1);
2747    int is_q = extract32(insn, 30, 1);
2748
2749    int scale = extract32(opc, 1, 2);
2750    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2751    bool replicate = false;
2752    int index = is_q << 3 | S << 2 | size;
2753    int ebytes, xs;
2754    TCGv_i64 tcg_addr, tcg_rn;
2755
2756    switch (scale) {
2757    case 3:
2758        if (!is_load || S) {
2759            unallocated_encoding(s);
2760            return;
2761        }
2762        scale = size;
2763        replicate = true;
2764        break;
2765    case 0:
2766        break;
2767    case 1:
2768        if (extract32(size, 0, 1)) {
2769            unallocated_encoding(s);
2770            return;
2771        }
2772        index >>= 1;
2773        break;
2774    case 2:
2775        if (extract32(size, 1, 1)) {
2776            unallocated_encoding(s);
2777            return;
2778        }
2779        if (!extract32(size, 0, 1)) {
2780            index >>= 2;
2781        } else {
2782            if (S) {
2783                unallocated_encoding(s);
2784                return;
2785            }
2786            index >>= 3;
2787            scale = 3;
2788        }
2789        break;
2790    default:
2791        g_assert_not_reached();
2792    }
2793
2794    if (!fp_access_check(s)) {
2795        return;
2796    }
2797
2798    ebytes = 1 << scale;
2799
2800    if (rn == 31) {
2801        gen_check_sp_alignment(s);
2802    }
2803
2804    tcg_rn = cpu_reg_sp(s, rn);
2805    tcg_addr = tcg_temp_new_i64();
2806    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2807
2808    for (xs = 0; xs < selem; xs++) {
2809        if (replicate) {
2810            /* Load and replicate to all elements */
2811            uint64_t mulconst;
2812            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2813
2814            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2815                                get_mem_index(s), s->be_data + scale);
2816            switch (scale) {
2817            case 0:
2818                mulconst = 0x0101010101010101ULL;
2819                break;
2820            case 1:
2821                mulconst = 0x0001000100010001ULL;
2822                break;
2823            case 2:
2824                mulconst = 0x0000000100000001ULL;
2825                break;
2826            case 3:
2827                mulconst = 0;
2828                break;
2829            default:
2830                g_assert_not_reached();
2831            }
2832            if (mulconst) {
2833                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2834            }
2835            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2836            if (is_q) {
2837                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2838            } else {
2839                clear_vec_high(s, rt);
2840            }
2841            tcg_temp_free_i64(tcg_tmp);
2842        } else {
2843            /* Load/store one element per register */
2844            if (is_load) {
2845                do_vec_ld(s, rt, index, tcg_addr, scale);
2846            } else {
2847                do_vec_st(s, rt, index, tcg_addr, scale);
2848            }
2849        }
2850        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2851        rt = (rt + 1) % 32;
2852    }
2853
2854    if (is_postidx) {
2855        int rm = extract32(insn, 16, 5);
2856        if (rm == 31) {
2857            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2858        } else {
2859            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2860        }
2861    }
2862    tcg_temp_free_i64(tcg_addr);
2863}
2864
2865/* C3.3 Loads and stores */
2866static void disas_ldst(DisasContext *s, uint32_t insn)
2867{
2868    switch (extract32(insn, 24, 6)) {
2869    case 0x08: /* Load/store exclusive */
2870        disas_ldst_excl(s, insn);
2871        break;
2872    case 0x18: case 0x1c: /* Load register (literal) */
2873        disas_ld_lit(s, insn);
2874        break;
2875    case 0x28: case 0x29:
2876    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2877        disas_ldst_pair(s, insn);
2878        break;
2879    case 0x38: case 0x39:
2880    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2881        disas_ldst_reg(s, insn);
2882        break;
2883    case 0x0c: /* AdvSIMD load/store multiple structures */
2884        disas_ldst_multiple_struct(s, insn);
2885        break;
2886    case 0x0d: /* AdvSIMD load/store single structure */
2887        disas_ldst_single_struct(s, insn);
2888        break;
2889    default:
2890        unallocated_encoding(s);
2891        break;
2892    }
2893}
2894
2895/* C3.4.6 PC-rel. addressing
2896 *   31  30   29 28       24 23                5 4    0
2897 * +----+-------+-----------+-------------------+------+
2898 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2899 * +----+-------+-----------+-------------------+------+
2900 */
2901static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2902{
2903    unsigned int page, rd;
2904    uint64_t base;
2905    uint64_t offset;
2906
2907    page = extract32(insn, 31, 1);
2908    /* SignExtend(immhi:immlo) -> offset */
2909    offset = sextract64(insn, 5, 19);
2910    offset = offset << 2 | extract32(insn, 29, 2);
2911    rd = extract32(insn, 0, 5);
2912    base = s->pc - 4;
2913
2914    if (page) {
2915        /* ADRP (page based) */
2916        base &= ~0xfff;
2917        offset <<= 12;
2918    }
2919
2920    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2921}
2922
2923/*
2924 * C3.4.1 Add/subtract (immediate)
2925 *
2926 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2927 * +--+--+--+-----------+-----+-------------+-----+-----+
2928 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2929 * +--+--+--+-----------+-----+-------------+-----+-----+
2930 *
2931 *    sf: 0 -> 32bit, 1 -> 64bit
2932 *    op: 0 -> add  , 1 -> sub
2933 *     S: 1 -> set flags
2934 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2935 */
2936static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2937{
2938    int rd = extract32(insn, 0, 5);
2939    int rn = extract32(insn, 5, 5);
2940    uint64_t imm = extract32(insn, 10, 12);
2941    int shift = extract32(insn, 22, 2);
2942    bool setflags = extract32(insn, 29, 1);
2943    bool sub_op = extract32(insn, 30, 1);
2944    bool is_64bit = extract32(insn, 31, 1);
2945
2946    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2947    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2948    TCGv_i64 tcg_result;
2949
2950    switch (shift) {
2951    case 0x0:
2952        break;
2953    case 0x1:
2954        imm <<= 12;
2955        break;
2956    default:
2957        unallocated_encoding(s);
2958        return;
2959    }
2960
2961    tcg_result = tcg_temp_new_i64();
2962    if (!setflags) {
2963        if (sub_op) {
2964            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2965        } else {
2966            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2967        }
2968    } else {
2969        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2970        if (sub_op) {
2971            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2972        } else {
2973            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2974        }
2975        tcg_temp_free_i64(tcg_imm);
2976    }
2977
2978    if (is_64bit) {
2979        tcg_gen_mov_i64(tcg_rd, tcg_result);
2980    } else {
2981        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2982    }
2983
2984    tcg_temp_free_i64(tcg_result);
2985}
2986
2987/* The input should be a value in the bottom e bits (with higher
2988 * bits zero); returns that value replicated into every element
2989 * of size e in a 64 bit integer.
2990 */
2991static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2992{
2993    assert(e != 0);
2994    while (e < 64) {
2995        mask |= mask << e;
2996        e *= 2;
2997    }
2998    return mask;
2999}
3000

3001/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3002static inline uint64_t bitmask64(unsigned int length)
3003{
3004    assert(length > 0 && length <= 64);
3005    return ~0ULL >> (64 - length);
3006}
3007
3008/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3009 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3010 * value (ie should cause a guest UNDEF exception), and true if they are
3011 * valid, in which case the decoded bit pattern is written to result.
3012 */
3013static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3014                                   unsigned int imms, unsigned int immr)
3015{
3016    uint64_t mask;
3017    unsigned e, levels, s, r;
3018    int len;
3019
3020    assert(immn < 2 && imms < 64 && immr < 64);
3021
3022    /* The bit patterns we create here are 64 bit patterns which
3023     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3024     * 64 bits each. Each element contains the same value: a run
3025     * of between 1 and e-1 non-zero bits, rotated within the
3026     * element by between 0 and e-1 bits.
3027     *
3028     * The element size and run length are encoded into immn (1 bit)
3029     * and imms (6 bits) as follows:
3030     * 64 bit elements: immn = 1, imms = <length of run - 1>
3031     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3032     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3033     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3034     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3035     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3036     * Notice that immn = 0, imms = 11111x is the only combination
3037     * not covered by one of the above options; this is reserved.
3038     * Further, <length of run - 1> all-ones is a reserved pattern.
3039     *
3040     * In all cases the rotation is by immr % e (and immr is 6 bits).
3041     */
3042
3043    /* First determine the element size */
3044    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3045    if (len < 1) {
3046        /* This is the immn == 0, imms == 0x11111x case */
3047        return false;
3048    }
3049    e = 1 << len;
3050
3051    levels = e - 1;
3052    s = imms & levels;
3053    r = immr & levels;
3054
3055    if (s == levels) {
3056        /* <length of run - 1> mustn't be all-ones. */
3057        return false;
3058    }
3059
3060    /* Create the value of one element: s+1 set bits rotated
3061     * by r within the element (which is e bits wide)...
3062     */
3063    mask = bitmask64(s + 1);
3064    if (r) {
3065        mask = (mask >> r) | (mask << (e - r));
3066        mask &= bitmask64(e);
3067    }
3068    /* ...then replicate the element over the whole 64 bit value */
3069    mask = bitfield_replicate(mask, e);
3070    *result = mask;
3071    return true;
3072}
3073
3074/* C3.4.4 Logical (immediate)
3075 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3076 * +----+-----+-------------+---+------+------+------+------+
3077 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3078 * +----+-----+-------------+---+------+------+------+------+
3079 */
3080static void disas_logic_imm(DisasContext *s, uint32_t insn)
3081{
3082    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3083    TCGv_i64 tcg_rd, tcg_rn;
3084    uint64_t wmask;
3085    bool is_and = false;
3086
3087    sf = extract32(insn, 31, 1);
3088    opc = extract32(insn, 29, 2);
3089    is_n = extract32(insn, 22, 1);
3090    immr = extract32(insn, 16, 6);
3091    imms = extract32(insn, 10, 6);
3092    rn = extract32(insn, 5, 5);
3093    rd = extract32(insn, 0, 5);
3094
3095    if (!sf && is_n) {
3096        unallocated_encoding(s);
3097        return;
3098    }
3099
3100    if (opc == 0x3) { /* ANDS */
3101        tcg_rd = cpu_reg(s, rd);
3102    } else {
3103        tcg_rd = cpu_reg_sp(s, rd);
3104    }
3105    tcg_rn = cpu_reg(s, rn);
3106
3107    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3108        /* some immediate field values are reserved */
3109        unallocated_encoding(s);
3110        return;
3111    }
3112
3113    if (!sf) {
3114        wmask &= 0xffffffff;
3115    }
3116
3117    switch (opc) {
3118    case 0x3: /* ANDS */
3119    case 0x0: /* AND */
3120        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3121        is_and = true;
3122        break;
3123    case 0x1: /* ORR */
3124        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3125        break;
3126    case 0x2: /* EOR */
3127        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3128        break;
3129    default:
3130        assert(FALSE); /* must handle all above */
3131        break;
3132    }
3133
3134    if (!sf && !is_and) {
3135        /* zero extend final result; we know we can skip this for AND
3136         * since the immediate had the high 32 bits clear.
3137         */
3138        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3139    }
3140
3141    if (opc == 3) { /* ANDS */
3142        gen_logic_CC(sf, tcg_rd);
3143    }
3144}
3145
3146/*
3147 * C3.4.5 Move wide (immediate)
3148 *
3149 *  31 30 29 28         23 22 21 20             5 4    0
3150 * +--+-----+-------------+-----+----------------+------+
3151 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3152 * +--+-----+-------------+-----+----------------+------+
3153 *
3154 * sf: 0 -> 32 bit, 1 -> 64 bit
3155 * opc: 00 -> N, 10 -> Z, 11 -> K
3156 * hw: shift/16 (0,16, and sf only 32, 48)
3157 */
3158static void disas_movw_imm(DisasContext *s, uint32_t insn)
3159{
3160    int rd = extract32(insn, 0, 5);
3161    uint64_t imm = extract32(insn, 5, 16);
3162    int sf = extract32(insn, 31, 1);
3163    int opc = extract32(insn, 29, 2);
3164    int pos = extract32(insn, 21, 2) << 4;
3165    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3166    TCGv_i64 tcg_imm;
3167
3168    if (!sf && (pos >= 32)) {
3169        unallocated_encoding(s);
3170        return;
3171    }
3172
3173    switch (opc) {
3174    case 0: /* MOVN */
3175    case 2: /* MOVZ */
3176        imm <<= pos;
3177        if (opc == 0) {
3178            imm = ~imm;
3179        }
3180        if (!sf) {
3181            imm &= 0xffffffffu;
3182        }
3183        tcg_gen_movi_i64(tcg_rd, imm);
3184        break;
3185    case 3: /* MOVK */
3186        tcg_imm = tcg_const_i64(imm);
3187        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3188        tcg_temp_free_i64(tcg_imm);
3189        if (!sf) {
3190            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3191        }
3192        break;
3193    default:
3194        unallocated_encoding(s);
3195        break;
3196    }
3197}
3198
3199/* C3.4.2 Bitfield
3200 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3201 * +----+-----+-------------+---+------+------+------+------+
3202 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3203 * +----+-----+-------------+---+------+------+------+------+
3204 */
3205static void disas_bitfield(DisasContext *s, uint32_t insn)
3206{
3207    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3208    TCGv_i64 tcg_rd, tcg_tmp;
3209
3210    sf = extract32(insn, 31, 1);
3211    opc = extract32(insn, 29, 2);
3212    n = extract32(insn, 22, 1);
3213    ri = extract32(insn, 16, 6);
3214    si = extract32(insn, 10, 6);
3215    rn = extract32(insn, 5, 5);
3216    rd = extract32(insn, 0, 5);
3217    bitsize = sf ? 64 : 32;
3218
3219    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3220        unallocated_encoding(s);
3221        return;
3222    }
3223
3224    tcg_rd = cpu_reg(s, rd);
3225
3226    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3227       to be smaller than bitsize, we'll never reference data outside the
3228       low 32-bits anyway.  */
3229    tcg_tmp = read_cpu_reg(s, rn, 1);
3230
3231    /* Recognize the common aliases.  */
3232    if (opc == 0) { /* SBFM */
3233        if (ri == 0) {
3234            if (si == 7) { /* SXTB */
3235                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3236                goto done;
3237            } else if (si == 15) { /* SXTH */
3238                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3239                goto done;
3240            } else if (si == 31) { /* SXTW */
3241                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3242                goto done;
3243            }
3244        }
3245        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3246            if (si == 31) {
3247                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3248            }
3249            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3250            goto done;
3251        }
3252    } else if (opc == 2) { /* UBFM */
3253        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3254            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3255            return;
3256        }
3257        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3258            if (si == 31) {
3259                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3260            }
3261            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3262            return;
3263        }
3264        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3265            int shift = bitsize - 1 - si;
3266            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3267            goto done;
3268        }
3269    }
3270
3271    if (opc != 1) { /* SBFM or UBFM */
3272        tcg_gen_movi_i64(tcg_rd, 0);
3273    }
3274
3275    /* do the bit move operation */
3276    if (si >= ri) {
3277        /* Wd<s-r:0> = Wn<s:r> */
3278        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3279        pos = 0;
3280        len = (si - ri) + 1;
3281    } else {
3282        /* Wd<32+s-r,32-r> = Wn<s:0> */
3283        pos = bitsize - ri;
3284        len = si + 1;
3285    }
3286
3287    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3288
3289    if (opc == 0) { /* SBFM - sign extend the destination field */
3290        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3291        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3292    }
3293
3294 done:
3295    if (!sf) { /* zero extend final result */
3296        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3297    }
3298}
3299
3300/* C3.4.3 Extract
3301 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3302 * +----+------+-------------+---+----+------+--------+------+------+
3303 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3304 * +----+------+-------------+---+----+------+--------+------+------+
3305 */
3306static void disas_extract(DisasContext *s, uint32_t insn)
3307{
3308    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3309
3310    sf = extract32(insn, 31, 1);
3311    n = extract32(insn, 22, 1);
3312    rm = extract32(insn, 16, 5);
3313    imm = extract32(insn, 10, 6);
3314    rn = extract32(insn, 5, 5);
3315    rd = extract32(insn, 0, 5);
3316    op21 = extract32(insn, 29, 2);
3317    op0 = extract32(insn, 21, 1);
3318    bitsize = sf ? 64 : 32;
3319
3320    if (sf != n || op21 || op0 || imm >= bitsize) {
3321        unallocated_encoding(s);
3322    } else {
3323        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3324
3325        tcg_rd = cpu_reg(s, rd);
3326
3327        if (unlikely(imm == 0)) {
3328            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3329             * so an extract from bit 0 is a special case.
3330             */
3331            if (sf) {
3332                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3333            } else {
3334                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3335            }
3336        } else if (rm == rn) { /* ROR */
3337            tcg_rm = cpu_reg(s, rm);
3338            if (sf) {
3339                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3340            } else {
3341                TCGv_i32 tmp = tcg_temp_new_i32();
3342                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3343                tcg_gen_rotri_i32(tmp, tmp, imm);
3344                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3345                tcg_temp_free_i32(tmp);
3346            }
3347        } else {
3348            tcg_rm = read_cpu_reg(s, rm, sf);
3349            tcg_rn = read_cpu_reg(s, rn, sf);
3350            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3351            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3352            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3353            if (!sf) {
3354                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3355            }
3356        }
3357    }
3358}
3359
3360/* C3.4 Data processing - immediate */
3361static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3362{
3363    switch (extract32(insn, 23, 6)) {
3364    case 0x20: case 0x21: /* PC-rel. addressing */
3365        disas_pc_rel_adr(s, insn);
3366        break;
3367    case 0x22: case 0x23: /* Add/subtract (immediate) */
3368        disas_add_sub_imm(s, insn);
3369        break;
3370    case 0x24: /* Logical (immediate) */
3371        disas_logic_imm(s, insn);
3372        break;
3373    case 0x25: /* Move wide (immediate) */
3374        disas_movw_imm(s, insn);
3375        break;
3376    case 0x26: /* Bitfield */
3377        disas_bitfield(s, insn);
3378        break;
3379    case 0x27: /* Extract */
3380        disas_extract(s, insn);
3381        break;
3382    default:
3383        unallocated_encoding(s);
3384        break;
3385    }
3386}
3387
3388/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3389 * Note that it is the caller's responsibility to ensure that the
3390 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3391 * mandated semantics for out of range shifts.
3392 */
3393static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3394                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3395{
3396    switch (shift_type) {
3397    case A64_SHIFT_TYPE_LSL:
3398        tcg_gen_shl_i64(dst, src, shift_amount);
3399        break;
3400    case A64_SHIFT_TYPE_LSR:
3401        tcg_gen_shr_i64(dst, src, shift_amount);
3402        break;
3403    case A64_SHIFT_TYPE_ASR:
3404        if (!sf) {
3405            tcg_gen_ext32s_i64(dst, src);
3406        }
3407        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3408        break;
3409    case A64_SHIFT_TYPE_ROR:
3410        if (sf) {
3411            tcg_gen_rotr_i64(dst, src, shift_amount);
3412        } else {
3413            TCGv_i32 t0, t1;
3414            t0 = tcg_temp_new_i32();
3415            t1 = tcg_temp_new_i32();
3416            tcg_gen_extrl_i64_i32(t0, src);
3417            tcg_gen_extrl_i64_i32(t1, shift_amount);
3418            tcg_gen_rotr_i32(t0, t0, t1);
3419            tcg_gen_extu_i32_i64(dst, t0);
3420            tcg_temp_free_i32(t0);
3421            tcg_temp_free_i32(t1);
3422        }
3423        break;
3424    default:
3425        assert(FALSE); /* all shift types should be handled */
3426        break;
3427    }
3428
3429    if (!sf) { /* zero extend final result */
3430        tcg_gen_ext32u_i64(dst, dst);
3431    }
3432}
3433
3434/* Shift a TCGv src by immediate, put result in dst.
3435 * The shift amount must be in range (this should always be true as the
3436 * relevant instructions will UNDEF on bad shift immediates).
3437 */
3438static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3439                          enum a64_shift_type shift_type, unsigned int shift_i)
3440{
3441    assert(shift_i < (sf ? 64 : 32));
3442
3443    if (shift_i == 0) {
3444        tcg_gen_mov_i64(dst, src);
3445    } else {
3446        TCGv_i64 shift_const;
3447
3448        shift_const = tcg_const_i64(shift_i);
3449        shift_reg(dst, src, sf, shift_type, shift_const);
3450        tcg_temp_free_i64(shift_const);
3451    }
3452}
3453
3454/* C3.5.10 Logical (shifted register)
3455 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3456 * +----+-----+-----------+-------+---+------+--------+------+------+
3457 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3458 * +----+-----+-----------+-------+---+------+--------+------+------+
3459 */
3460static void disas_logic_reg(DisasContext *s, uint32_t insn)
3461{
3462    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3463    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3464
3465    sf = extract32(insn, 31, 1);
3466    opc = extract32(insn, 29, 2);
3467    shift_type = extract32(insn, 22, 2);
3468    invert = extract32(insn, 21, 1);
3469    rm = extract32(insn, 16, 5);
3470    shift_amount = extract32(insn, 10, 6);
3471    rn = extract32(insn, 5, 5);
3472    rd = extract32(insn, 0, 5);
3473
3474    if (!sf && (shift_amount & (1 << 5))) {
3475        unallocated_encoding(s);
3476        return;
3477    }
3478
3479    tcg_rd = cpu_reg(s, rd);
3480
3481    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3482        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3483         * register-register MOV and MVN, so it is worth special casing.
3484         */
3485        tcg_rm = cpu_reg(s, rm);
3486        if (invert) {
3487            tcg_gen_not_i64(tcg_rd, tcg_rm);
3488            if (!sf) {
3489                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3490            }
3491        } else {
3492            if (sf) {
3493                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3494            } else {
3495                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3496            }
3497        }
3498        return;
3499    }
3500
3501    tcg_rm = read_cpu_reg(s, rm, sf);
3502
3503    if (shift_amount) {
3504        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3505    }
3506
3507    tcg_rn = cpu_reg(s, rn);
3508
3509    switch (opc | (invert << 2)) {
3510    case 0: /* AND */
3511    case 3: /* ANDS */
3512        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3513        break;
3514    case 1: /* ORR */
3515        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3516        break;
3517    case 2: /* EOR */
3518        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3519        break;
3520    case 4: /* BIC */
3521    case 7: /* BICS */
3522        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3523        break;
3524    case 5: /* ORN */
3525        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3526        break;
3527    case 6: /* EON */
3528        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3529        break;
3530    default:
3531        assert(FALSE);
3532        break;
3533    }
3534
3535    if (!sf) {
3536        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3537    }
3538
3539    if (opc == 3) {
3540        gen_logic_CC(sf, tcg_rd);
3541    }
3542}
3543
3544/*
3545 * C3.5.1 Add/subtract (extended register)
3546 *
3547 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3548 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3549 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3550 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3551 *
3552 *  sf: 0 -> 32bit, 1 -> 64bit
3553 *  op: 0 -> add  , 1 -> sub
3554 *   S: 1 -> set flags
3555 * opt: 00
3556 * option: extension type (see DecodeRegExtend)
3557 * imm3: optional shift to Rm
3558 *
3559 * Rd = Rn + LSL(extend(Rm), amount)
3560 */
3561static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3562{
3563    int rd = extract32(insn, 0, 5);
3564    int rn = extract32(insn, 5, 5);
3565    int imm3 = extract32(insn, 10, 3);
3566    int option = extract32(insn, 13, 3);
3567    int rm = extract32(insn, 16, 5);
3568    bool setflags = extract32(insn, 29, 1);
3569    bool sub_op = extract32(insn, 30, 1);
3570    bool sf = extract32(insn, 31, 1);
3571
3572    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3573    TCGv_i64 tcg_rd;
3574    TCGv_i64 tcg_result;
3575
3576    if (imm3 > 4) {
3577        unallocated_encoding(s);
3578        return;
3579    }
3580
3581    /* non-flag setting ops may use SP */
3582    if (!setflags) {
3583        tcg_rd = cpu_reg_sp(s, rd);
3584    } else {
3585        tcg_rd = cpu_reg(s, rd);
3586    }
3587    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3588
3589    tcg_rm = read_cpu_reg(s, rm, sf);
3590    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3591
3592    tcg_result = tcg_temp_new_i64();
3593
3594    if (!setflags) {
3595        if (sub_op) {
3596            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3597        } else {
3598            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3599        }
3600    } else {
3601        if (sub_op) {
3602            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3603        } else {
3604            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3605        }
3606    }
3607
3608    if (sf) {
3609        tcg_gen_mov_i64(tcg_rd, tcg_result);
3610    } else {
3611        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3612    }
3613
3614    tcg_temp_free_i64(tcg_result);
3615}
3616
3617/*
3618 * C3.5.2 Add/subtract (shifted register)
3619 *
3620 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3621 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3622 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3623 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3624 *
3625 *    sf: 0 -> 32bit, 1 -> 64bit
3626 *    op: 0 -> add  , 1 -> sub
3627 *     S: 1 -> set flags
3628 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3629 *  imm6: Shift amount to apply to Rm before the add/sub
3630 */
3631static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3632{
3633    int rd = extract32(insn, 0, 5);
3634    int rn = extract32(insn, 5, 5);
3635    int imm6 = extract32(insn, 10, 6);
3636    int rm = extract32(insn, 16, 5);
3637    int shift_type = extract32(insn, 22, 2);
3638    bool setflags = extract32(insn, 29, 1);
3639    bool sub_op = extract32(insn, 30, 1);
3640    bool sf = extract32(insn, 31, 1);
3641
3642    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3643    TCGv_i64 tcg_rn, tcg_rm;
3644    TCGv_i64 tcg_result;
3645
3646    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3647        unallocated_encoding(s);
3648        return;
3649    }
3650
3651    tcg_rn = read_cpu_reg(s, rn, sf);
3652    tcg_rm = read_cpu_reg(s, rm, sf);
3653
3654    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3655
3656    tcg_result = tcg_temp_new_i64();
3657
3658    if (!setflags) {
3659        if (sub_op) {
3660            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3661        } else {
3662            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3663        }
3664    } else {
3665        if (sub_op) {
3666            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3667        } else {
3668            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3669        }
3670    }
3671
3672    if (sf) {
3673        tcg_gen_mov_i64(tcg_rd, tcg_result);
3674    } else {
3675        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3676    }
3677
3678    tcg_temp_free_i64(tcg_result);
3679}
3680
3681/* C3.5.9 Data-processing (3 source)
3682
3683   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3684  +--+------+-----------+------+------+----+------+------+------+
3685  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3686  +--+------+-----------+------+------+----+------+------+------+
3687
3688 */
3689static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3690{
3691    int rd = extract32(insn, 0, 5);
3692    int rn = extract32(insn, 5, 5);
3693    int ra = extract32(insn, 10, 5);
3694    int rm = extract32(insn, 16, 5);
3695    int op_id = (extract32(insn, 29, 3) << 4) |
3696        (extract32(insn, 21, 3) << 1) |
3697        extract32(insn, 15, 1);
3698    bool sf = extract32(insn, 31, 1);
3699    bool is_sub = extract32(op_id, 0, 1);
3700    bool is_high = extract32(op_id, 2, 1);
3701    bool is_signed = false;
3702    TCGv_i64 tcg_op1;
3703    TCGv_i64 tcg_op2;
3704    TCGv_i64 tcg_tmp;
3705
3706    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3707    switch (op_id) {
3708    case 0x42: /* SMADDL */
3709    case 0x43: /* SMSUBL */
3710    case 0x44: /* SMULH */
3711        is_signed = true;
3712        break;
3713    case 0x0: /* MADD (32bit) */
3714    case 0x1: /* MSUB (32bit) */
3715    case 0x40: /* MADD (64bit) */
3716    case 0x41: /* MSUB (64bit) */
3717    case 0x4a: /* UMADDL */
3718    case 0x4b: /* UMSUBL */
3719    case 0x4c: /* UMULH */
3720        break;
3721    default:
3722        unallocated_encoding(s);
3723        return;
3724    }
3725
3726    if (is_high) {
3727        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3728        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3729        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3730        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3731
3732        if (is_signed) {
3733            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3734        } else {
3735            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3736        }
3737
3738        tcg_temp_free_i64(low_bits);
3739        return;
3740    }
3741
3742    tcg_op1 = tcg_temp_new_i64();
3743    tcg_op2 = tcg_temp_new_i64();
3744    tcg_tmp = tcg_temp_new_i64();
3745
3746    if (op_id < 0x42) {
3747        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3748        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3749    } else {
3750        if (is_signed) {
3751            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3752            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3753        } else {
3754            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3755            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3756        }
3757    }
3758
3759    if (ra == 31 && !is_sub) {
3760        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3761        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3762    } else {
3763        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3764        if (is_sub) {
3765            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3766        } else {
3767            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3768        }
3769    }
3770
3771    if (!sf) {
3772        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3773    }
3774
3775    tcg_temp_free_i64(tcg_op1);
3776    tcg_temp_free_i64(tcg_op2);
3777    tcg_temp_free_i64(tcg_tmp);
3778}
3779
3780/* C3.5.3 - Add/subtract (with carry)
3781 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3782 * +--+--+--+------------------------+------+---------+------+-----+
3783 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3784 * +--+--+--+------------------------+------+---------+------+-----+
3785 *                                            [000000]
3786 */
3787
3788static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3789{
3790    unsigned int sf, op, setflags, rm, rn, rd;
3791    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3792
3793    if (extract32(insn, 10, 6) != 0) {
3794        unallocated_encoding(s);
3795        return;
3796    }
3797
3798    sf = extract32(insn, 31, 1);
3799    op = extract32(insn, 30, 1);
3800    setflags = extract32(insn, 29, 1);
3801    rm = extract32(insn, 16, 5);
3802    rn = extract32(insn, 5, 5);
3803    rd = extract32(insn, 0, 5);
3804
3805    tcg_rd = cpu_reg(s, rd);
3806    tcg_rn = cpu_reg(s, rn);
3807
3808    if (op) {
3809        tcg_y = new_tmp_a64(s);
3810        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3811    } else {
3812        tcg_y = cpu_reg(s, rm);
3813    }
3814
3815    if (setflags) {
3816        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3817    } else {
3818        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3819    }
3820}
3821
3822/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3823 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3824 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3825 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3826 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3827 *        [1]                             y                [0]       [0]
3828 */
3829static void disas_cc(DisasContext *s, uint32_t insn)
3830{
3831    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3832    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3833    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3834    DisasCompare c;
3835
3836    if (!extract32(insn, 29, 1)) {
3837        unallocated_encoding(s);
3838        return;
3839    }
3840    if (insn & (1 << 10 | 1 << 4)) {
3841        unallocated_encoding(s);
3842        return;
3843    }
3844    sf = extract32(insn, 31, 1);
3845    op = extract32(insn, 30, 1);
3846    is_imm = extract32(insn, 11, 1);
3847    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3848    cond = extract32(insn, 12, 4);
3849    rn = extract32(insn, 5, 5);
3850    nzcv = extract32(insn, 0, 4);
3851
3852    /* Set T0 = !COND.  */
3853    tcg_t0 = tcg_temp_new_i32();
3854    arm_test_cc(&c, cond);
3855    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3856    arm_free_cc(&c);
3857
3858    /* Load the arguments for the new comparison.  */
3859    if (is_imm) {
3860        tcg_y = new_tmp_a64(s);
3861        tcg_gen_movi_i64(tcg_y, y);
3862    } else {
3863        tcg_y = cpu_reg(s, y);
3864    }
3865    tcg_rn = cpu_reg(s, rn);
3866
3867    /* Set the flags for the new comparison.  */
3868    tcg_tmp = tcg_temp_new_i64();
3869    if (op) {
3870        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3871    } else {
3872        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3873    }
3874    tcg_temp_free_i64(tcg_tmp);
3875
3876    /* If COND was false, force the flags to #nzcv.  Compute two masks
3877     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3878     * For tcg hosts that support ANDC, we can make do with just T1.
3879     * In either case, allow the tcg optimizer to delete any unused mask.
3880     */
3881    tcg_t1 = tcg_temp_new_i32();
3882    tcg_t2 = tcg_temp_new_i32();
3883    tcg_gen_neg_i32(tcg_t1, tcg_t0);
3884    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3885
3886    if (nzcv & 8) { /* N */
3887        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3888    } else {
3889        if (TCG_TARGET_HAS_andc_i32) {
3890            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3891        } else {
3892            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3893        }
3894    }
3895    if (nzcv & 4) { /* Z */
3896        if (TCG_TARGET_HAS_andc_i32) {
3897            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3898        } else {
3899            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3900        }
3901    } else {
3902        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3903    }
3904    if (nzcv & 2) { /* C */
3905        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3906    } else {
3907        if (TCG_TARGET_HAS_andc_i32) {
3908            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3909        } else {
3910            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3911        }
3912    }
3913    if (nzcv & 1) { /* V */
3914        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3915    } else {
3916        if (TCG_TARGET_HAS_andc_i32) {
3917            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3918        } else {
3919            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3920        }
3921    }
3922    tcg_temp_free_i32(tcg_t0);
3923    tcg_temp_free_i32(tcg_t1);
3924    tcg_temp_free_i32(tcg_t2);
3925}
3926
3927/* C3.5.6 Conditional select
3928 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3929 * +----+----+---+-----------------+------+------+-----+------+------+
3930 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3931 * +----+----+---+-----------------+------+------+-----+------+------+
3932 */
3933static void disas_cond_select(DisasContext *s, uint32_t insn)
3934{
3935    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3936    TCGv_i64 tcg_rd, zero;
3937    DisasCompare64 c;
3938
3939    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3940        /* S == 1 or op2<1> == 1 */
3941        unallocated_encoding(s);
3942        return;
3943    }
3944    sf = extract32(insn, 31, 1);
3945    else_inv = extract32(insn, 30, 1);
3946    rm = extract32(insn, 16, 5);
3947    cond = extract32(insn, 12, 4);
3948    else_inc = extract32(insn, 10, 1);
3949    rn = extract32(insn, 5, 5);
3950    rd = extract32(insn, 0, 5);
3951
3952    tcg_rd = cpu_reg(s, rd);
3953
3954    a64_test_cc(&c, cond);
3955    zero = tcg_const_i64(0);
3956
3957    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3958        /* CSET & CSETM.  */
3959        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3960        if (else_inv) {
3961            tcg_gen_neg_i64(tcg_rd, tcg_rd);
3962        }
3963    } else {
3964        TCGv_i64 t_true = cpu_reg(s, rn);
3965        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3966        if (else_inv && else_inc) {
3967            tcg_gen_neg_i64(t_false, t_false);
3968        } else if (else_inv) {
3969            tcg_gen_not_i64(t_false, t_false);
3970        } else if (else_inc) {
3971            tcg_gen_addi_i64(t_false, t_false, 1);
3972        }
3973        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3974    }
3975
3976    tcg_temp_free_i64(zero);
3977    a64_free_cc(&c);
3978
3979    if (!sf) {
3980        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3981    }
3982}
3983
3984static void handle_clz(DisasContext *s, unsigned int sf,
3985                       unsigned int rn, unsigned int rd)
3986{
3987    TCGv_i64 tcg_rd, tcg_rn;
3988    tcg_rd = cpu_reg(s, rd);
3989    tcg_rn = cpu_reg(s, rn);
3990
3991    if (sf) {
3992        gen_helper_clz64(tcg_rd, tcg_rn);
3993    } else {
3994        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3995        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3996        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3997        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3998        tcg_temp_free_i32(tcg_tmp32);
3999    }
4000}

4001
4002static void handle_cls(DisasContext *s, unsigned int sf,
4003                       unsigned int rn, unsigned int rd)
4004{
4005    TCGv_i64 tcg_rd, tcg_rn;
4006    tcg_rd = cpu_reg(s, rd);
4007    tcg_rn = cpu_reg(s, rn);
4008
4009    if (sf) {
4010        gen_helper_cls64(tcg_rd, tcg_rn);
4011    } else {
4012        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4013        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4014        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
4015        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4016        tcg_temp_free_i32(tcg_tmp32);
4017    }
4018}
4019
4020static void handle_rbit(DisasContext *s, unsigned int sf,
4021                        unsigned int rn, unsigned int rd)
4022{
4023    TCGv_i64 tcg_rd, tcg_rn;
4024    tcg_rd = cpu_reg(s, rd);
4025    tcg_rn = cpu_reg(s, rn);
4026
4027    if (sf) {
4028        gen_helper_rbit64(tcg_rd, tcg_rn);
4029    } else {
4030        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4031        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4032        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4033        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4034        tcg_temp_free_i32(tcg_tmp32);
4035    }
4036}
4037
4038/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
4039static void handle_rev64(DisasContext *s, unsigned int sf,
4040                         unsigned int rn, unsigned int rd)
4041{
4042    if (!sf) {
4043        unallocated_encoding(s);
4044        return;
4045    }
4046    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4047}
4048
4049/* C5.6.149 REV with sf==0, opcode==2
4050 * C5.6.151 REV32 (sf==1, opcode==2)
4051 */
4052static void handle_rev32(DisasContext *s, unsigned int sf,
4053                         unsigned int rn, unsigned int rd)
4054{
4055    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4056
4057    if (sf) {
4058        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4059        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4060
4061        /* bswap32_i64 requires zero high word */
4062        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4063        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4064        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4065        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4066        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4067
4068        tcg_temp_free_i64(tcg_tmp);
4069    } else {
4070        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4071        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4072    }
4073}
4074
4075/* C5.6.150 REV16 (opcode==1) */
4076static void handle_rev16(DisasContext *s, unsigned int sf,
4077                         unsigned int rn, unsigned int rd)
4078{
4079    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4080    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4081    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4082
4083    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
4084    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
4085
4086    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
4087    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4088    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4089    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
4090
4091    if (sf) {
4092        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4093        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4094        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4095        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
4096
4097        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
4098        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4099        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4100    }
4101
4102    tcg_temp_free_i64(tcg_tmp);
4103}
4104
4105/* C3.5.7 Data-processing (1 source)
4106 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4107 * +----+---+---+-----------------+---------+--------+------+------+
4108 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4109 * +----+---+---+-----------------+---------+--------+------+------+
4110 */
4111static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4112{
4113    unsigned int sf, opcode, rn, rd;
4114
4115    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4116        unallocated_encoding(s);
4117        return;
4118    }
4119
4120    sf = extract32(insn, 31, 1);
4121    opcode = extract32(insn, 10, 6);
4122    rn = extract32(insn, 5, 5);
4123    rd = extract32(insn, 0, 5);
4124
4125    switch (opcode) {
4126    case 0: /* RBIT */
4127        handle_rbit(s, sf, rn, rd);
4128        break;
4129    case 1: /* REV16 */
4130        handle_rev16(s, sf, rn, rd);
4131        break;
4132    case 2: /* REV32 */
4133        handle_rev32(s, sf, rn, rd);
4134        break;
4135    case 3: /* REV64 */
4136        handle_rev64(s, sf, rn, rd);
4137        break;
4138    case 4: /* CLZ */
4139        handle_clz(s, sf, rn, rd);
4140        break;
4141    case 5: /* CLS */
4142        handle_cls(s, sf, rn, rd);
4143        break;
4144    }
4145}
4146
4147static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4148                       unsigned int rm, unsigned int rn, unsigned int rd)
4149{
4150    TCGv_i64 tcg_n, tcg_m, tcg_rd;
4151    tcg_rd = cpu_reg(s, rd);
4152
4153    if (!sf && is_signed) {
4154        tcg_n = new_tmp_a64(s);
4155        tcg_m = new_tmp_a64(s);
4156        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4157        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4158    } else {
4159        tcg_n = read_cpu_reg(s, rn, sf);
4160        tcg_m = read_cpu_reg(s, rm, sf);
4161    }
4162
4163    if (is_signed) {
4164        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4165    } else {
4166        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4167    }
4168
4169    if (!sf) { /* zero extend final result */
4170        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4171    }
4172}
4173
4174/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4175static void handle_shift_reg(DisasContext *s,
4176                             enum a64_shift_type shift_type, unsigned int sf,
4177                             unsigned int rm, unsigned int rn, unsigned int rd)
4178{
4179    TCGv_i64 tcg_shift = tcg_temp_new_i64();
4180    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4181    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4182
4183    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4184    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4185    tcg_temp_free_i64(tcg_shift);
4186}
4187
4188/* CRC32[BHWX], CRC32C[BHWX] */
4189static void handle_crc32(DisasContext *s,
4190                         unsigned int sf, unsigned int sz, bool crc32c,
4191                         unsigned int rm, unsigned int rn, unsigned int rd)
4192{
4193    TCGv_i64 tcg_acc, tcg_val;
4194    TCGv_i32 tcg_bytes;
4195
4196    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4197        || (sf == 1 && sz != 3)
4198        || (sf == 0 && sz == 3)) {
4199        unallocated_encoding(s);
4200        return;
4201    }
4202
4203    if (sz == 3) {
4204        tcg_val = cpu_reg(s, rm);
4205    } else {
4206        uint64_t mask;
4207        switch (sz) {
4208        case 0:
4209            mask = 0xFF;
4210            break;
4211        case 1:
4212            mask = 0xFFFF;
4213            break;
4214        case 2:
4215            mask = 0xFFFFFFFF;
4216            break;
4217        default:
4218            g_assert_not_reached();
4219        }
4220        tcg_val = new_tmp_a64(s);
4221        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4222    }
4223
4224    tcg_acc = cpu_reg(s, rn);
4225    tcg_bytes = tcg_const_i32(1 << sz);
4226
4227    if (crc32c) {
4228        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4229    } else {
4230        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4231    }
4232
4233    tcg_temp_free_i32(tcg_bytes);
4234}
4235
4236/* C3.5.8 Data-processing (2 source)
4237 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4238 * +----+---+---+-----------------+------+--------+------+------+
4239 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4240 * +----+---+---+-----------------+------+--------+------+------+
4241 */
4242static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4243{
4244    unsigned int sf, rm, opcode, rn, rd;
4245    sf = extract32(insn, 31, 1);
4246    rm = extract32(insn, 16, 5);
4247    opcode = extract32(insn, 10, 6);
4248    rn = extract32(insn, 5, 5);
4249    rd = extract32(insn, 0, 5);
4250
4251    if (extract32(insn, 29, 1)) {
4252        unallocated_encoding(s);
4253        return;
4254    }
4255
4256    switch (opcode) {
4257    case 2: /* UDIV */
4258        handle_div(s, false, sf, rm, rn, rd);
4259        break;
4260    case 3: /* SDIV */
4261        handle_div(s, true, sf, rm, rn, rd);
4262        break;
4263    case 8: /* LSLV */
4264        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4265        break;
4266    case 9: /* LSRV */
4267        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4268        break;
4269    case 10: /* ASRV */
4270        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4271        break;
4272    case 11: /* RORV */
4273        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4274        break;
4275    case 16:
4276    case 17:
4277    case 18:
4278    case 19:
4279    case 20:
4280    case 21:
4281    case 22:
4282    case 23: /* CRC32 */
4283    {
4284        int sz = extract32(opcode, 0, 2);
4285        bool crc32c = extract32(opcode, 2, 1);
4286        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4287        break;
4288    }
4289    default:
4290        unallocated_encoding(s);
4291        break;
4292    }
4293}
4294
4295/* C3.5 Data processing - register */
4296static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4297{
4298    switch (extract32(insn, 24, 5)) {
4299    case 0x0a: /* Logical (shifted register) */
4300        disas_logic_reg(s, insn);
4301        break;
4302    case 0x0b: /* Add/subtract */
4303        if (insn & (1 << 21)) { /* (extended register) */
4304            disas_add_sub_ext_reg(s, insn);
4305        } else {
4306            disas_add_sub_reg(s, insn);
4307        }
4308        break;
4309    case 0x1b: /* Data-processing (3 source) */
4310        disas_data_proc_3src(s, insn);
4311        break;
4312    case 0x1a:
4313        switch (extract32(insn, 21, 3)) {
4314        case 0x0: /* Add/subtract (with carry) */
4315            disas_adc_sbc(s, insn);
4316            break;
4317        case 0x2: /* Conditional compare */
4318            disas_cc(s, insn); /* both imm and reg forms */
4319            break;
4320        case 0x4: /* Conditional select */
4321            disas_cond_select(s, insn);
4322            break;
4323        case 0x6: /* Data-processing */
4324            if (insn & (1 << 30)) { /* (1 source) */
4325                disas_data_proc_1src(s, insn);
4326            } else {            /* (2 source) */
4327                disas_data_proc_2src(s, insn);
4328            }
4329            break;
4330        default:
4331            unallocated_encoding(s);
4332            break;
4333        }
4334        break;
4335    default:
4336        unallocated_encoding(s);
4337        break;
4338    }
4339}
4340
4341static void handle_fp_compare(DisasContext *s, bool is_double,
4342                              unsigned int rn, unsigned int rm,
4343                              bool cmp_with_zero, bool signal_all_nans)
4344{
4345    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4346    TCGv_ptr fpst = get_fpstatus_ptr();
4347
4348    if (is_double) {
4349        TCGv_i64 tcg_vn, tcg_vm;
4350
4351        tcg_vn = read_fp_dreg(s, rn);
4352        if (cmp_with_zero) {
4353            tcg_vm = tcg_const_i64(0);
4354        } else {
4355            tcg_vm = read_fp_dreg(s, rm);
4356        }
4357        if (signal_all_nans) {
4358            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4359        } else {
4360            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4361        }
4362        tcg_temp_free_i64(tcg_vn);
4363        tcg_temp_free_i64(tcg_vm);
4364    } else {
4365        TCGv_i32 tcg_vn, tcg_vm;
4366
4367        tcg_vn = read_fp_sreg(s, rn);
4368        if (cmp_with_zero) {
4369            tcg_vm = tcg_const_i32(0);
4370        } else {
4371            tcg_vm = read_fp_sreg(s, rm);
4372        }
4373        if (signal_all_nans) {
4374            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4375        } else {
4376            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4377        }
4378        tcg_temp_free_i32(tcg_vn);
4379        tcg_temp_free_i32(tcg_vm);
4380    }
4381
4382    tcg_temp_free_ptr(fpst);
4383
4384    gen_set_nzcv(tcg_flags);
4385
4386    tcg_temp_free_i64(tcg_flags);
4387}
4388
4389/* C3.6.22 Floating point compare
4390 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4391 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4392 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4393 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4394 */
4395static void disas_fp_compare(DisasContext *s, uint32_t insn)
4396{
4397    unsigned int mos, type, rm, op, rn, opc, op2r;
4398
4399    mos = extract32(insn, 29, 3);
4400    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4401    rm = extract32(insn, 16, 5);
4402    op = extract32(insn, 14, 2);
4403    rn = extract32(insn, 5, 5);
4404    opc = extract32(insn, 3, 2);
4405    op2r = extract32(insn, 0, 3);
4406
4407    if (mos || op || op2r || type > 1) {
4408        unallocated_encoding(s);
4409        return;
4410    }
4411
4412    if (!fp_access_check(s)) {
4413        return;
4414    }
4415
4416    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4417}
4418
4419/* C3.6.23 Floating point conditional compare
4420 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4421 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4422 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4423 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4424 */
4425static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4426{
4427    unsigned int mos, type, rm, cond, rn, op, nzcv;
4428    TCGv_i64 tcg_flags;
4429    TCGLabel *label_continue = NULL;
4430
4431    mos = extract32(insn, 29, 3);
4432    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4433    rm = extract32(insn, 16, 5);
4434    cond = extract32(insn, 12, 4);
4435    rn = extract32(insn, 5, 5);
4436    op = extract32(insn, 4, 1);
4437    nzcv = extract32(insn, 0, 4);
4438
4439    if (mos || type > 1) {
4440        unallocated_encoding(s);
4441        return;
4442    }
4443
4444    if (!fp_access_check(s)) {
4445        return;
4446    }
4447
4448    if (cond < 0x0e) { /* not always */
4449        TCGLabel *label_match = gen_new_label();
4450        label_continue = gen_new_label();
4451        arm_gen_test_cc(cond, label_match);
4452        /* nomatch: */
4453        tcg_flags = tcg_const_i64(nzcv << 28);
4454        gen_set_nzcv(tcg_flags);
4455        tcg_temp_free_i64(tcg_flags);
4456        tcg_gen_br(label_continue);
4457        gen_set_label(label_match);
4458    }
4459
4460    handle_fp_compare(s, type, rn, rm, false, op);
4461
4462    if (cond < 0x0e) {
4463        gen_set_label(label_continue);
4464    }
4465}
4466
4467/* C3.6.24 Floating point conditional select
4468 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4469 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4470 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4471 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4472 */
4473static void disas_fp_csel(DisasContext *s, uint32_t insn)
4474{
4475    unsigned int mos, type, rm, cond, rn, rd;
4476    TCGv_i64 t_true, t_false, t_zero;
4477    DisasCompare64 c;
4478
4479    mos = extract32(insn, 29, 3);
4480    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4481    rm = extract32(insn, 16, 5);
4482    cond = extract32(insn, 12, 4);
4483    rn = extract32(insn, 5, 5);
4484    rd = extract32(insn, 0, 5);
4485
4486    if (mos || type > 1) {
4487        unallocated_encoding(s);
4488        return;
4489    }
4490
4491    if (!fp_access_check(s)) {
4492        return;
4493    }
4494
4495    /* Zero extend sreg inputs to 64 bits now.  */
4496    t_true = tcg_temp_new_i64();
4497    t_false = tcg_temp_new_i64();
4498    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4499    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4500
4501    a64_test_cc(&c, cond);
4502    t_zero = tcg_const_i64(0);
4503    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4504    tcg_temp_free_i64(t_zero);
4505    tcg_temp_free_i64(t_false);
4506    a64_free_cc(&c);
4507
4508    /* Note that sregs write back zeros to the high bits,
4509       and we've already done the zero-extension.  */
4510    write_fp_dreg(s, rd, t_true);
4511    tcg_temp_free_i64(t_true);
4512}
4513
4514/* C3.6.25 Floating-point data-processing (1 source) - single precision */
4515static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4516{
4517    TCGv_ptr fpst;
4518    TCGv_i32 tcg_op;
4519    TCGv_i32 tcg_res;
4520
4521    fpst = get_fpstatus_ptr();
4522    tcg_op = read_fp_sreg(s, rn);
4523    tcg_res = tcg_temp_new_i32();
4524
4525    switch (opcode) {
4526    case 0x0: /* FMOV */
4527        tcg_gen_mov_i32(tcg_res, tcg_op);
4528        break;
4529    case 0x1: /* FABS */
4530        gen_helper_vfp_abss(tcg_res, tcg_op);
4531        break;
4532    case 0x2: /* FNEG */
4533        gen_helper_vfp_negs(tcg_res, tcg_op);
4534        break;
4535    case 0x3: /* FSQRT */
4536        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4537        break;
4538    case 0x8: /* FRINTN */
4539    case 0x9: /* FRINTP */
4540    case 0xa: /* FRINTM */
4541    case 0xb: /* FRINTZ */
4542    case 0xc: /* FRINTA */
4543    {
4544        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4545
4546        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4547        gen_helper_rints(tcg_res, tcg_op, fpst);
4548
4549        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4550        tcg_temp_free_i32(tcg_rmode);
4551        break;
4552    }
4553    case 0xe: /* FRINTX */
4554        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4555        break;
4556    case 0xf: /* FRINTI */
4557        gen_helper_rints(tcg_res, tcg_op, fpst);
4558        break;
4559    default:
4560        abort();
4561    }
4562
4563    write_fp_sreg(s, rd, tcg_res);
4564
4565    tcg_temp_free_ptr(fpst);
4566    tcg_temp_free_i32(tcg_op);
4567    tcg_temp_free_i32(tcg_res);
4568}
4569
4570/* C3.6.25 Floating-point data-processing (1 source) - double precision */
4571static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4572{
4573    TCGv_ptr fpst;
4574    TCGv_i64 tcg_op;
4575    TCGv_i64 tcg_res;
4576
4577    fpst = get_fpstatus_ptr();
4578    tcg_op = read_fp_dreg(s, rn);
4579    tcg_res = tcg_temp_new_i64();
4580
4581    switch (opcode) {
4582    case 0x0: /* FMOV */
4583        tcg_gen_mov_i64(tcg_res, tcg_op);
4584        break;
4585    case 0x1: /* FABS */
4586        gen_helper_vfp_absd(tcg_res, tcg_op);
4587        break;
4588    case 0x2: /* FNEG */
4589        gen_helper_vfp_negd(tcg_res, tcg_op);
4590        break;
4591    case 0x3: /* FSQRT */
4592        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4593        break;
4594    case 0x8: /* FRINTN */
4595    case 0x9: /* FRINTP */
4596    case 0xa: /* FRINTM */
4597    case 0xb: /* FRINTZ */
4598    case 0xc: /* FRINTA */
4599    {
4600        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4601
4602        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4603        gen_helper_rintd(tcg_res, tcg_op, fpst);
4604
4605        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4606        tcg_temp_free_i32(tcg_rmode);
4607        break;
4608    }
4609    case 0xe: /* FRINTX */
4610        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4611        break;
4612    case 0xf: /* FRINTI */
4613        gen_helper_rintd(tcg_res, tcg_op, fpst);
4614        break;
4615    default:
4616        abort();
4617    }
4618
4619    write_fp_dreg(s, rd, tcg_res);
4620
4621    tcg_temp_free_ptr(fpst);
4622    tcg_temp_free_i64(tcg_op);
4623    tcg_temp_free_i64(tcg_res);
4624}
4625
4626static void handle_fp_fcvt(DisasContext *s, int opcode,
4627                           int rd, int rn, int dtype, int ntype)
4628{
4629    switch (ntype) {
4630    case 0x0:
4631    {
4632        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4633        if (dtype == 1) {
4634            /* Single to double */
4635            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4636            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4637            write_fp_dreg(s, rd, tcg_rd);
4638            tcg_temp_free_i64(tcg_rd);
4639        } else {
4640            /* Single to half */
4641            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4642            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4643            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4644            write_fp_sreg(s, rd, tcg_rd);
4645            tcg_temp_free_i32(tcg_rd);
4646        }
4647        tcg_temp_free_i32(tcg_rn);
4648        break;
4649    }
4650    case 0x1:
4651    {
4652        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4653        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4654        if (dtype == 0) {
4655            /* Double to single */
4656            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4657        } else {
4658            /* Double to half */
4659            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4660            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4661        }
4662        write_fp_sreg(s, rd, tcg_rd);
4663        tcg_temp_free_i32(tcg_rd);
4664        tcg_temp_free_i64(tcg_rn);
4665        break;
4666    }
4667    case 0x3:
4668    {
4669        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4670        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4671        if (dtype == 0) {
4672            /* Half to single */
4673            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4674            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4675            write_fp_sreg(s, rd, tcg_rd);
4676            tcg_temp_free_i32(tcg_rd);
4677        } else {
4678            /* Half to double */
4679            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4680            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4681            write_fp_dreg(s, rd, tcg_rd);
4682            tcg_temp_free_i64(tcg_rd);
4683        }
4684        tcg_temp_free_i32(tcg_rn);
4685        break;
4686    }
4687    default:
4688        abort();
4689    }
4690}
4691
4692/* C3.6.25 Floating point data-processing (1 source)
4693 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4694 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4695 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4696 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4697 */
4698static void disas_fp_1src(DisasContext *s, uint32_t insn)
4699{
4700    int type = extract32(insn, 22, 2);
4701    int opcode = extract32(insn, 15, 6);
4702    int rn = extract32(insn, 5, 5);
4703    int rd = extract32(insn, 0, 5);
4704
4705    switch (opcode) {
4706    case 0x4: case 0x5: case 0x7:
4707    {
4708        /* FCVT between half, single and double precision */
4709        int dtype = extract32(opcode, 0, 2);
4710        if (type == 2 || dtype == type) {
4711            unallocated_encoding(s);
4712            return;
4713        }
4714        if (!fp_access_check(s)) {
4715            return;
4716        }
4717
4718        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4719        break;
4720    }
4721    case 0x0 ... 0x3:
4722    case 0x8 ... 0xc:
4723    case 0xe ... 0xf:
4724        /* 32-to-32 and 64-to-64 ops */
4725        switch (type) {
4726        case 0:
4727            if (!fp_access_check(s)) {
4728                return;
4729            }
4730
4731            handle_fp_1src_single(s, opcode, rd, rn);
4732            break;
4733        case 1:
4734            if (!fp_access_check(s)) {
4735                return;
4736            }
4737
4738            handle_fp_1src_double(s, opcode, rd, rn);
4739            break;
4740        default:
4741            unallocated_encoding(s);
4742        }
4743        break;
4744    default:
4745        unallocated_encoding(s);
4746        break;
4747    }
4748}
4749
4750/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4751static void handle_fp_2src_single(DisasContext *s, int opcode,
4752                                  int rd, int rn, int rm)
4753{
4754    TCGv_i32 tcg_op1;
4755    TCGv_i32 tcg_op2;
4756    TCGv_i32 tcg_res;
4757    TCGv_ptr fpst;
4758
4759    tcg_res = tcg_temp_new_i32();
4760    fpst = get_fpstatus_ptr();
4761    tcg_op1 = read_fp_sreg(s, rn);
4762    tcg_op2 = read_fp_sreg(s, rm);
4763
4764    switch (opcode) {
4765    case 0x0: /* FMUL */
4766        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4767        break;
4768    case 0x1: /* FDIV */
4769        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4770        break;
4771    case 0x2: /* FADD */
4772        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4773        break;
4774    case 0x3: /* FSUB */
4775        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4776        break;
4777    case 0x4: /* FMAX */
4778        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4779        break;
4780    case 0x5: /* FMIN */
4781        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4782        break;
4783    case 0x6: /* FMAXNM */
4784        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4785        break;
4786    case 0x7: /* FMINNM */
4787        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4788        break;
4789    case 0x8: /* FNMUL */
4790        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4791        gen_helper_vfp_negs(tcg_res, tcg_res);
4792        break;
4793    }
4794
4795    write_fp_sreg(s, rd, tcg_res);
4796
4797    tcg_temp_free_ptr(fpst);
4798    tcg_temp_free_i32(tcg_op1);
4799    tcg_temp_free_i32(tcg_op2);
4800    tcg_temp_free_i32(tcg_res);
4801}
4802
4803/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4804static void handle_fp_2src_double(DisasContext *s, int opcode,
4805                                  int rd, int rn, int rm)
4806{
4807    TCGv_i64 tcg_op1;
4808    TCGv_i64 tcg_op2;
4809    TCGv_i64 tcg_res;
4810    TCGv_ptr fpst;
4811
4812    tcg_res = tcg_temp_new_i64();
4813    fpst = get_fpstatus_ptr();
4814    tcg_op1 = read_fp_dreg(s, rn);
4815    tcg_op2 = read_fp_dreg(s, rm);
4816
4817    switch (opcode) {
4818    case 0x0: /* FMUL */
4819        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4820        break;
4821    case 0x1: /* FDIV */
4822        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4823        break;
4824    case 0x2: /* FADD */
4825        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4826        break;
4827    case 0x3: /* FSUB */
4828        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4829        break;
4830    case 0x4: /* FMAX */
4831        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4832        break;
4833    case 0x5: /* FMIN */
4834        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4835        break;
4836    case 0x6: /* FMAXNM */
4837        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4838        break;
4839    case 0x7: /* FMINNM */
4840        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4841        break;
4842    case 0x8: /* FNMUL */
4843        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4844        gen_helper_vfp_negd(tcg_res, tcg_res);
4845        break;
4846    }
4847
4848    write_fp_dreg(s, rd, tcg_res);
4849
4850    tcg_temp_free_ptr(fpst);
4851    tcg_temp_free_i64(tcg_op1);
4852    tcg_temp_free_i64(tcg_op2);
4853    tcg_temp_free_i64(tcg_res);
4854}
4855
4856/* C3.6.26 Floating point data-processing (2 source)
4857 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4858 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4859 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4860 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4861 */
4862static void disas_fp_2src(DisasContext *s, uint32_t insn)
4863{
4864    int type = extract32(insn, 22, 2);
4865    int rd = extract32(insn, 0, 5);
4866    int rn = extract32(insn, 5, 5);
4867    int rm = extract32(insn, 16, 5);
4868    int opcode = extract32(insn, 12, 4);
4869
4870    if (opcode > 8) {
4871        unallocated_encoding(s);
4872        return;
4873    }
4874
4875    switch (type) {
4876    case 0:
4877        if (!fp_access_check(s)) {
4878            return;
4879        }
4880        handle_fp_2src_single(s, opcode, rd, rn, rm);
4881        break;
4882    case 1:
4883        if (!fp_access_check(s)) {
4884            return;
4885        }
4886        handle_fp_2src_double(s, opcode, rd, rn, rm);
4887        break;
4888    default:
4889        unallocated_encoding(s);
4890    }
4891}
4892
4893/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4894static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4895                                  int rd, int rn, int rm, int ra)
4896{
4897    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4898    TCGv_i32 tcg_res = tcg_temp_new_i32();
4899    TCGv_ptr fpst = get_fpstatus_ptr();
4900
4901    tcg_op1 = read_fp_sreg(s, rn);
4902    tcg_op2 = read_fp_sreg(s, rm);
4903    tcg_op3 = read_fp_sreg(s, ra);
4904
4905    /* These are fused multiply-add, and must be done as one
4906     * floating point operation with no rounding between the
4907     * multiplication and addition steps.
4908     * NB that doing the negations here as separate steps is
4909     * correct : an input NaN should come out with its sign bit
4910     * flipped if it is a negated-input.
4911     */
4912    if (o1 == true) {
4913        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4914    }
4915
4916    if (o0 != o1) {
4917        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4918    }
4919
4920    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4921
4922    write_fp_sreg(s, rd, tcg_res);
4923
4924    tcg_temp_free_ptr(fpst);
4925    tcg_temp_free_i32(tcg_op1);
4926    tcg_temp_free_i32(tcg_op2);
4927    tcg_temp_free_i32(tcg_op3);
4928    tcg_temp_free_i32(tcg_res);
4929}
4930
4931/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4932static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4933                                  int rd, int rn, int rm, int ra)
4934{
4935    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4936    TCGv_i64 tcg_res = tcg_temp_new_i64();
4937    TCGv_ptr fpst = get_fpstatus_ptr();
4938
4939    tcg_op1 = read_fp_dreg(s, rn);
4940    tcg_op2 = read_fp_dreg(s, rm);
4941    tcg_op3 = read_fp_dreg(s, ra);
4942
4943    /* These are fused multiply-add, and must be done as one
4944     * floating point operation with no rounding between the
4945     * multiplication and addition steps.
4946     * NB that doing the negations here as separate steps is
4947     * correct : an input NaN should come out with its sign bit
4948     * flipped if it is a negated-input.
4949     */
4950    if (o1 == true) {
4951        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4952    }
4953
4954    if (o0 != o1) {
4955        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4956    }
4957
4958    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4959
4960    write_fp_dreg(s, rd, tcg_res);
4961
4962    tcg_temp_free_ptr(fpst);
4963    tcg_temp_free_i64(tcg_op1);
4964    tcg_temp_free_i64(tcg_op2);
4965    tcg_temp_free_i64(tcg_op3);
4966    tcg_temp_free_i64(tcg_res);
4967}
4968
4969/* C3.6.27 Floating point data-processing (3 source)
4970 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4971 * +---+---+---+-----------+------+----+------+----+------+------+------+
4972 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4973 * +---+---+---+-----------+------+----+------+----+------+------+------+
4974 */
4975static void disas_fp_3src(DisasContext *s, uint32_t insn)
4976{
4977    int type = extract32(insn, 22, 2);
4978    int rd = extract32(insn, 0, 5);
4979    int rn = extract32(insn, 5, 5);
4980    int ra = extract32(insn, 10, 5);
4981    int rm = extract32(insn, 16, 5);
4982    bool o0 = extract32(insn, 15, 1);
4983    bool o1 = extract32(insn, 21, 1);
4984
4985    switch (type) {
4986    case 0:
4987        if (!fp_access_check(s)) {
4988            return;
4989        }
4990        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4991        break;
4992    case 1:
4993        if (!fp_access_check(s)) {
4994            return;
4995        }
4996        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4997        break;
4998    default:
4999        unallocated_encoding(s);
5000    }

5001}
5002
5003/* C3.6.28 Floating point immediate
5004 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5005 * +---+---+---+-----------+------+---+------------+-------+------+------+
5006 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5007 * +---+---+---+-----------+------+---+------------+-------+------+------+
5008 */
5009static void disas_fp_imm(DisasContext *s, uint32_t insn)
5010{
5011    int rd = extract32(insn, 0, 5);
5012    int imm8 = extract32(insn, 13, 8);
5013    int is_double = extract32(insn, 22, 2);
5014    uint64_t imm;
5015    TCGv_i64 tcg_res;
5016
5017    if (is_double > 1) {
5018        unallocated_encoding(s);
5019        return;
5020    }
5021
5022    if (!fp_access_check(s)) {
5023        return;
5024    }
5025
5026    /* The imm8 encodes the sign bit, enough bits to represent
5027     * an exponent in the range 01....1xx to 10....0xx,
5028     * and the most significant 4 bits of the mantissa; see
5029     * VFPExpandImm() in the v8 ARM ARM.
5030     */
5031    if (is_double) {
5032        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5033            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5034            extract32(imm8, 0, 6);
5035        imm <<= 48;
5036    } else {
5037        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5038            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5039            (extract32(imm8, 0, 6) << 3);
5040        imm <<= 16;
5041    }
5042
5043    tcg_res = tcg_const_i64(imm);
5044    write_fp_dreg(s, rd, tcg_res);
5045    tcg_temp_free_i64(tcg_res);
5046}
5047
5048/* Handle floating point <=> fixed point conversions. Note that we can
5049 * also deal with fp <=> integer conversions as a special case (scale == 64)
5050 * OPTME: consider handling that special case specially or at least skipping
5051 * the call to scalbn in the helpers for zero shifts.
5052 */
5053static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5054                           bool itof, int rmode, int scale, int sf, int type)
5055{
5056    bool is_signed = !(opcode & 1);
5057    bool is_double = type;
5058    TCGv_ptr tcg_fpstatus;
5059    TCGv_i32 tcg_shift;
5060
5061    tcg_fpstatus = get_fpstatus_ptr();
5062
5063    tcg_shift = tcg_const_i32(64 - scale);
5064
5065    if (itof) {
5066        TCGv_i64 tcg_int = cpu_reg(s, rn);
5067        if (!sf) {
5068            TCGv_i64 tcg_extend = new_tmp_a64(s);
5069
5070            if (is_signed) {
5071                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5072            } else {
5073                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5074            }
5075
5076            tcg_int = tcg_extend;
5077        }
5078
5079        if (is_double) {
5080            TCGv_i64 tcg_double = tcg_temp_new_i64();
5081            if (is_signed) {
5082                gen_helper_vfp_sqtod(tcg_double, tcg_int,
5083                                     tcg_shift, tcg_fpstatus);
5084            } else {
5085                gen_helper_vfp_uqtod(tcg_double, tcg_int,
5086                                     tcg_shift, tcg_fpstatus);
5087            }
5088            write_fp_dreg(s, rd, tcg_double);
5089            tcg_temp_free_i64(tcg_double);
5090        } else {
5091            TCGv_i32 tcg_single = tcg_temp_new_i32();
5092            if (is_signed) {
5093                gen_helper_vfp_sqtos(tcg_single, tcg_int,
5094                                     tcg_shift, tcg_fpstatus);
5095            } else {
5096                gen_helper_vfp_uqtos(tcg_single, tcg_int,
5097                                     tcg_shift, tcg_fpstatus);
5098            }
5099            write_fp_sreg(s, rd, tcg_single);
5100            tcg_temp_free_i32(tcg_single);
5101        }
5102    } else {
5103        TCGv_i64 tcg_int = cpu_reg(s, rd);
5104        TCGv_i32 tcg_rmode;
5105
5106        if (extract32(opcode, 2, 1)) {
5107            /* There are too many rounding modes to all fit into rmode,
5108             * so FCVTA[US] is a special case.
5109             */
5110            rmode = FPROUNDING_TIEAWAY;
5111        }
5112
5113        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5114
5115        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5116
5117        if (is_double) {
5118            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5119            if (is_signed) {
5120                if (!sf) {
5121                    gen_helper_vfp_tosld(tcg_int, tcg_double,
5122                                         tcg_shift, tcg_fpstatus);
5123                } else {
5124                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
5125                                         tcg_shift, tcg_fpstatus);
5126                }
5127            } else {
5128                if (!sf) {
5129                    gen_helper_vfp_tould(tcg_int, tcg_double,
5130                                         tcg_shift, tcg_fpstatus);
5131                } else {
5132                    gen_helper_vfp_touqd(tcg_int, tcg_double,
5133                                         tcg_shift, tcg_fpstatus);
5134                }
5135            }
5136            tcg_temp_free_i64(tcg_double);
5137        } else {
5138            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5139            if (sf) {
5140                if (is_signed) {
5141                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
5142                                         tcg_shift, tcg_fpstatus);
5143                } else {
5144                    gen_helper_vfp_touqs(tcg_int, tcg_single,
5145                                         tcg_shift, tcg_fpstatus);
5146                }
5147            } else {
5148                TCGv_i32 tcg_dest = tcg_temp_new_i32();
5149                if (is_signed) {
5150                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
5151                                         tcg_shift, tcg_fpstatus);
5152                } else {
5153                    gen_helper_vfp_touls(tcg_dest, tcg_single,
5154                                         tcg_shift, tcg_fpstatus);
5155                }
5156                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5157                tcg_temp_free_i32(tcg_dest);
5158            }
5159            tcg_temp_free_i32(tcg_single);
5160        }
5161
5162        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5163        tcg_temp_free_i32(tcg_rmode);
5164
5165        if (!sf) {
5166            tcg_gen_ext32u_i64(tcg_int, tcg_int);
5167        }
5168    }
5169
5170    tcg_temp_free_ptr(tcg_fpstatus);
5171    tcg_temp_free_i32(tcg_shift);
5172}
5173
5174/* C3.6.29 Floating point <-> fixed point conversions
5175 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5176 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5177 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5178 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5179 */
5180static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5181{
5182    int rd = extract32(insn, 0, 5);
5183    int rn = extract32(insn, 5, 5);
5184    int scale = extract32(insn, 10, 6);
5185    int opcode = extract32(insn, 16, 3);
5186    int rmode = extract32(insn, 19, 2);
5187    int type = extract32(insn, 22, 2);
5188    bool sbit = extract32(insn, 29, 1);
5189    bool sf = extract32(insn, 31, 1);
5190    bool itof;
5191
5192    if (sbit || (type > 1)
5193        || (!sf && scale < 32)) {
5194        unallocated_encoding(s);
5195        return;
5196    }
5197
5198    switch ((rmode << 3) | opcode) {
5199    case 0x2: /* SCVTF */
5200    case 0x3: /* UCVTF */
5201        itof = true;
5202        break;
5203    case 0x18: /* FCVTZS */
5204    case 0x19: /* FCVTZU */
5205        itof = false;
5206        break;
5207    default:
5208        unallocated_encoding(s);
5209        return;
5210    }
5211
5212    if (!fp_access_check(s)) {
5213        return;
5214    }
5215
5216    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5217}
5218
5219static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5220{
5221    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5222     * without conversion.
5223     */
5224
5225    if (itof) {
5226        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5227
5228        switch (type) {
5229        case 0:
5230        {
5231            /* 32 bit */
5232            TCGv_i64 tmp = tcg_temp_new_i64();
5233            tcg_gen_ext32u_i64(tmp, tcg_rn);
5234            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5235            tcg_gen_movi_i64(tmp, 0);
5236            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5237            tcg_temp_free_i64(tmp);
5238            break;
5239        }
5240        case 1:
5241        {
5242            /* 64 bit */
5243            TCGv_i64 tmp = tcg_const_i64(0);
5244            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5245            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5246            tcg_temp_free_i64(tmp);
5247            break;
5248        }
5249        case 2:
5250            /* 64 bit to top half. */
5251            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5252            break;
5253        }
5254    } else {
5255        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5256
5257        switch (type) {
5258        case 0:
5259            /* 32 bit */
5260            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5261            break;
5262        case 1:
5263            /* 64 bit */
5264            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5265            break;
5266        case 2:
5267            /* 64 bits from top half */
5268            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5269            break;
5270        }
5271    }
5272}
5273
5274/* C3.6.30 Floating point <-> integer conversions
5275 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5276 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5277 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5278 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5279 */
5280static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5281{
5282    int rd = extract32(insn, 0, 5);
5283    int rn = extract32(insn, 5, 5);
5284    int opcode = extract32(insn, 16, 3);
5285    int rmode = extract32(insn, 19, 2);
5286    int type = extract32(insn, 22, 2);
5287    bool sbit = extract32(insn, 29, 1);
5288    bool sf = extract32(insn, 31, 1);
5289
5290    if (sbit) {
5291        unallocated_encoding(s);
5292        return;
5293    }
5294
5295    if (opcode > 5) {
5296        /* FMOV */
5297        bool itof = opcode & 1;
5298
5299        if (rmode >= 2) {
5300            unallocated_encoding(s);
5301            return;
5302        }
5303
5304        switch (sf << 3 | type << 1 | rmode) {
5305        case 0x0: /* 32 bit */
5306        case 0xa: /* 64 bit */
5307        case 0xd: /* 64 bit to top half of quad */
5308            break;
5309        default:
5310            /* all other sf/type/rmode combinations are invalid */
5311            unallocated_encoding(s);
5312            break;
5313        }
5314
5315        if (!fp_access_check(s)) {
5316            return;
5317        }
5318        handle_fmov(s, rd, rn, type, itof);
5319    } else {
5320        /* actual FP conversions */
5321        bool itof = extract32(opcode, 1, 1);
5322
5323        if (type > 1 || (rmode != 0 && opcode > 1)) {
5324            unallocated_encoding(s);
5325            return;
5326        }
5327
5328        if (!fp_access_check(s)) {
5329            return;
5330        }
5331        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5332    }
5333}
5334
5335/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5336 *   31  30  29 28     25 24                          0
5337 * +---+---+---+---------+-----------------------------+
5338 * |   | 0 |   | 1 1 1 1 |                             |
5339 * +---+---+---+---------+-----------------------------+
5340 */
5341static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5342{
5343    if (extract32(insn, 24, 1)) {
5344        /* Floating point data-processing (3 source) */
5345        disas_fp_3src(s, insn);
5346    } else if (extract32(insn, 21, 1) == 0) {
5347        /* Floating point to fixed point conversions */
5348        disas_fp_fixed_conv(s, insn);
5349    } else {
5350        switch (extract32(insn, 10, 2)) {
5351        case 1:
5352            /* Floating point conditional compare */
5353            disas_fp_ccomp(s, insn);
5354            break;
5355        case 2:
5356            /* Floating point data-processing (2 source) */
5357            disas_fp_2src(s, insn);
5358            break;
5359        case 3:
5360            /* Floating point conditional select */
5361            disas_fp_csel(s, insn);
5362            break;
5363        case 0:
5364            switch (ctz32(extract32(insn, 12, 4))) {
5365            case 0: /* [15:12] == xxx1 */
5366                /* Floating point immediate */
5367                disas_fp_imm(s, insn);
5368                break;
5369            case 1: /* [15:12] == xx10 */
5370                /* Floating point compare */
5371                disas_fp_compare(s, insn);
5372                break;
5373            case 2: /* [15:12] == x100 */
5374                /* Floating point data-processing (1 source) */
5375                disas_fp_1src(s, insn);
5376                break;
5377            case 3: /* [15:12] == 1000 */
5378                unallocated_encoding(s);
5379                break;
5380            default: /* [15:12] == 0000 */
5381                /* Floating point <-> integer conversions */
5382                disas_fp_int_conv(s, insn);
5383                break;
5384            }
5385            break;
5386        }
5387    }
5388}
5389
5390static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5391                     int pos)
5392{
5393    /* Extract 64 bits from the middle of two concatenated 64 bit
5394     * vector register slices left:right. The extracted bits start
5395     * at 'pos' bits into the right (least significant) side.
5396     * We return the result in tcg_right, and guarantee not to
5397     * trash tcg_left.
5398     */
5399    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5400    assert(pos > 0 && pos < 64);
5401
5402    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5403    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5404    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5405
5406    tcg_temp_free_i64(tcg_tmp);
5407}
5408
5409/* C3.6.1 EXT
5410 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5411 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5412 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5413 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5414 */
5415static void disas_simd_ext(DisasContext *s, uint32_t insn)
5416{
5417    int is_q = extract32(insn, 30, 1);
5418    int op2 = extract32(insn, 22, 2);
5419    int imm4 = extract32(insn, 11, 4);
5420    int rm = extract32(insn, 16, 5);
5421    int rn = extract32(insn, 5, 5);
5422    int rd = extract32(insn, 0, 5);
5423    int pos = imm4 << 3;
5424    TCGv_i64 tcg_resl, tcg_resh;
5425
5426    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5427        unallocated_encoding(s);
5428        return;
5429    }
5430
5431    if (!fp_access_check(s)) {
5432        return;
5433    }
5434
5435    tcg_resh = tcg_temp_new_i64();
5436    tcg_resl = tcg_temp_new_i64();
5437
5438    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5439     * either extracting 128 bits from a 128:128 concatenation, or
5440     * extracting 64 bits from a 64:64 concatenation.
5441     */
5442    if (!is_q) {
5443        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5444        if (pos != 0) {
5445            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5446            do_ext64(s, tcg_resh, tcg_resl, pos);
5447        }
5448        tcg_gen_movi_i64(tcg_resh, 0);
5449    } else {
5450        TCGv_i64 tcg_hh;
5451        typedef struct {
5452            int reg;
5453            int elt;
5454        } EltPosns;
5455        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5456        EltPosns *elt = eltposns;
5457
5458        if (pos >= 64) {
5459            elt++;
5460            pos -= 64;
5461        }
5462
5463        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5464        elt++;
5465        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5466        elt++;
5467        if (pos != 0) {
5468            do_ext64(s, tcg_resh, tcg_resl, pos);
5469            tcg_hh = tcg_temp_new_i64();
5470            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5471            do_ext64(s, tcg_hh, tcg_resh, pos);
5472            tcg_temp_free_i64(tcg_hh);
5473        }
5474    }
5475
5476    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5477    tcg_temp_free_i64(tcg_resl);
5478    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5479    tcg_temp_free_i64(tcg_resh);
5480}
5481
5482/* C3.6.2 TBL/TBX
5483 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5484 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5485 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5486 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5487 */
5488static void disas_simd_tb(DisasContext *s, uint32_t insn)
5489{
5490    int op2 = extract32(insn, 22, 2);
5491    int is_q = extract32(insn, 30, 1);
5492    int rm = extract32(insn, 16, 5);
5493    int rn = extract32(insn, 5, 5);
5494    int rd = extract32(insn, 0, 5);
5495    int is_tblx = extract32(insn, 12, 1);
5496    int len = extract32(insn, 13, 2);
5497    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5498    TCGv_i32 tcg_regno, tcg_numregs;
5499
5500    if (op2 != 0) {
5501        unallocated_encoding(s);
5502        return;
5503    }
5504
5505    if (!fp_access_check(s)) {
5506        return;
5507    }
5508
5509    /* This does a table lookup: for every byte element in the input
5510     * we index into a table formed from up to four vector registers,
5511     * and then the output is the result of the lookups. Our helper
5512     * function does the lookup operation for a single 64 bit part of
5513     * the input.
5514     */
5515    tcg_resl = tcg_temp_new_i64();
5516    tcg_resh = tcg_temp_new_i64();
5517
5518    if (is_tblx) {
5519        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5520    } else {
5521        tcg_gen_movi_i64(tcg_resl, 0);
5522    }
5523    if (is_tblx && is_q) {
5524        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5525    } else {
5526        tcg_gen_movi_i64(tcg_resh, 0);
5527    }
5528
5529    tcg_idx = tcg_temp_new_i64();
5530    tcg_regno = tcg_const_i32(rn);
5531    tcg_numregs = tcg_const_i32(len + 1);
5532    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5533    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5534                        tcg_regno, tcg_numregs);
5535    if (is_q) {
5536        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5537        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5538                            tcg_regno, tcg_numregs);
5539    }
5540    tcg_temp_free_i64(tcg_idx);
5541    tcg_temp_free_i32(tcg_regno);
5542    tcg_temp_free_i32(tcg_numregs);
5543
5544    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5545    tcg_temp_free_i64(tcg_resl);
5546    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5547    tcg_temp_free_i64(tcg_resh);
5548}
5549
5550/* C3.6.3 ZIP/UZP/TRN
5551 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5552 * +---+---+-------------+------+---+------+---+------------------+------+
5553 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5554 * +---+---+-------------+------+---+------+---+------------------+------+
5555 */
5556static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5557{
5558    int rd = extract32(insn, 0, 5);
5559    int rn = extract32(insn, 5, 5);
5560    int rm = extract32(insn, 16, 5);
5561    int size = extract32(insn, 22, 2);
5562    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5563     * bit 2 indicates 1 vs 2 variant of the insn.
5564     */
5565    int opcode = extract32(insn, 12, 2);
5566    bool part = extract32(insn, 14, 1);
5567    bool is_q = extract32(insn, 30, 1);
5568    int esize = 8 << size;
5569    int i, ofs;
5570    int datasize = is_q ? 128 : 64;
5571    int elements = datasize / esize;
5572    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5573
5574    if (opcode == 0 || (size == 3 && !is_q)) {
5575        unallocated_encoding(s);
5576        return;
5577    }
5578
5579    if (!fp_access_check(s)) {
5580        return;
5581    }
5582
5583    tcg_resl = tcg_const_i64(0);
5584    tcg_resh = tcg_const_i64(0);
5585    tcg_res = tcg_temp_new_i64();
5586
5587    for (i = 0; i < elements; i++) {
5588        switch (opcode) {
5589        case 1: /* UZP1/2 */
5590        {
5591            int midpoint = elements / 2;
5592            if (i < midpoint) {
5593                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5594            } else {
5595                read_vec_element(s, tcg_res, rm,
5596                                 2 * (i - midpoint) + part, size);
5597            }
5598            break;
5599        }
5600        case 2: /* TRN1/2 */
5601            if (i & 1) {
5602                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5603            } else {
5604                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5605            }
5606            break;
5607        case 3: /* ZIP1/2 */
5608        {
5609            int base = part * elements / 2;
5610            if (i & 1) {
5611                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5612            } else {
5613                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5614            }
5615            break;
5616        }
5617        default:
5618            g_assert_not_reached();
5619        }
5620
5621        ofs = i * esize;
5622        if (ofs < 64) {
5623            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5624            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5625        } else {
5626            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5627            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5628        }
5629    }
5630
5631    tcg_temp_free_i64(tcg_res);
5632
5633    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5634    tcg_temp_free_i64(tcg_resl);
5635    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5636    tcg_temp_free_i64(tcg_resh);
5637}
5638
5639static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5640                        int opc, bool is_min, TCGv_ptr fpst)
5641{
5642    /* Helper function for disas_simd_across_lanes: do a single precision
5643     * min/max operation on the specified two inputs,
5644     * and return the result in tcg_elt1.
5645     */
5646    if (opc == 0xc) {
5647        if (is_min) {
5648            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5649        } else {
5650            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5651        }
5652    } else {
5653        assert(opc == 0xf);
5654        if (is_min) {
5655            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5656        } else {
5657            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5658        }
5659    }
5660}
5661
5662/* C3.6.4 AdvSIMD across lanes
5663 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5664 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5665 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5666 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5667 */
5668static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5669{
5670    int rd = extract32(insn, 0, 5);
5671    int rn = extract32(insn, 5, 5);
5672    int size = extract32(insn, 22, 2);
5673    int opcode = extract32(insn, 12, 5);
5674    bool is_q = extract32(insn, 30, 1);
5675    bool is_u = extract32(insn, 29, 1);
5676    bool is_fp = false;
5677    bool is_min = false;
5678    int esize;
5679    int elements;
5680    int i;
5681    TCGv_i64 tcg_res, tcg_elt;
5682
5683    switch (opcode) {
5684    case 0x1b: /* ADDV */
5685        if (is_u) {
5686            unallocated_encoding(s);
5687            return;
5688        }
5689        /* fall through */
5690    case 0x3: /* SADDLV, UADDLV */
5691    case 0xa: /* SMAXV, UMAXV */
5692    case 0x1a: /* SMINV, UMINV */
5693        if (size == 3 || (size == 2 && !is_q)) {
5694            unallocated_encoding(s);
5695            return;
5696        }
5697        break;
5698    case 0xc: /* FMAXNMV, FMINNMV */
5699    case 0xf: /* FMAXV, FMINV */
5700        if (!is_u || !is_q || extract32(size, 0, 1)) {
5701            unallocated_encoding(s);
5702            return;
5703        }
5704        /* Bit 1 of size field encodes min vs max, and actual size is always
5705         * 32 bits: adjust the size variable so following code can rely on it
5706         */
5707        is_min = extract32(size, 1, 1);
5708        is_fp = true;
5709        size = 2;
5710        break;
5711    default:
5712        unallocated_encoding(s);
5713        return;
5714    }
5715
5716    if (!fp_access_check(s)) {
5717        return;
5718    }
5719
5720    esize = 8 << size;
5721    elements = (is_q ? 128 : 64) / esize;
5722
5723    tcg_res = tcg_temp_new_i64();
5724    tcg_elt = tcg_temp_new_i64();
5725
5726    /* These instructions operate across all lanes of a vector
5727     * to produce a single result. We can guarantee that a 64
5728     * bit intermediate is sufficient:
5729     *  + for [US]ADDLV the maximum element size is 32 bits, and
5730     *    the result type is 64 bits
5731     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5732     *    same as the element size, which is 32 bits at most
5733     * For the integer operations we can choose to work at 64
5734     * or 32 bits and truncate at the end; for simplicity
5735     * we use 64 bits always. The floating point
5736     * ops do require 32 bit intermediates, though.
5737     */
5738    if (!is_fp) {
5739        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5740
5741        for (i = 1; i < elements; i++) {
5742            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5743
5744            switch (opcode) {
5745            case 0x03: /* SADDLV / UADDLV */
5746            case 0x1b: /* ADDV */
5747                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5748                break;
5749            case 0x0a: /* SMAXV / UMAXV */
5750                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5751                                    tcg_res,
5752                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5753                break;
5754            case 0x1a: /* SMINV / UMINV */
5755                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5756                                    tcg_res,
5757                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5758                break;
5759                break;
5760            default:
5761                g_assert_not_reached();
5762            }
5763
5764        }
5765    } else {
5766        /* Floating point ops which work on 32 bit (single) intermediates.
5767         * Note that correct NaN propagation requires that we do these
5768         * operations in exactly the order specified by the pseudocode.
5769         */
5770        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5771        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5772        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5773        TCGv_ptr fpst = get_fpstatus_ptr();
5774
5775        assert(esize == 32);
5776        assert(elements == 4);
5777
5778        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5779        tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5780        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5781        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5782
5783        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5784
5785        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5786        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5787        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5788        tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5789
5790        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5791
5792        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5793
5794        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5795        tcg_temp_free_i32(tcg_elt1);
5796        tcg_temp_free_i32(tcg_elt2);
5797        tcg_temp_free_i32(tcg_elt3);
5798        tcg_temp_free_ptr(fpst);
5799    }
5800
5801    tcg_temp_free_i64(tcg_elt);
5802
5803    /* Now truncate the result to the width required for the final output */
5804    if (opcode == 0x03) {
5805        /* SADDLV, UADDLV: result is 2*esize */
5806        size++;
5807    }
5808
5809    switch (size) {
5810    case 0:
5811        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5812        break;
5813    case 1:
5814        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5815        break;
5816    case 2:
5817        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5818        break;
5819    case 3:
5820        break;
5821    default:
5822        g_assert_not_reached();
5823    }
5824
5825    write_fp_dreg(s, rd, tcg_res);
5826    tcg_temp_free_i64(tcg_res);
5827}
5828
5829/* C6.3.31 DUP (Element, Vector)
5830 *
5831 *  31  30   29              21 20    16 15        10  9    5 4    0
5832 * +---+---+-------------------+--------+-------------+------+------+
5833 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5834 * +---+---+-------------------+--------+-------------+------+------+
5835 *
5836 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5837 */
5838static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5839                             int imm5)
5840{
5841    int size = ctz32(imm5);
5842    int esize = 8 << size;
5843    int elements = (is_q ? 128 : 64) / esize;
5844    int index, i;
5845    TCGv_i64 tmp;
5846
5847    if (size > 3 || (size == 3 && !is_q)) {
5848        unallocated_encoding(s);
5849        return;
5850    }
5851
5852    if (!fp_access_check(s)) {
5853        return;
5854    }
5855
5856    index = imm5 >> (size + 1);
5857
5858    tmp = tcg_temp_new_i64();
5859    read_vec_element(s, tmp, rn, index, size);
5860
5861    for (i = 0; i < elements; i++) {
5862        write_vec_element(s, tmp, rd, i, size);
5863    }
5864
5865    if (!is_q) {
5866        clear_vec_high(s, rd);
5867    }
5868
5869    tcg_temp_free_i64(tmp);
5870}
5871
5872/* C6.3.31 DUP (element, scalar)
5873 *  31                   21 20    16 15        10  9    5 4    0
5874 * +-----------------------+--------+-------------+------+------+
5875 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5876 * +-----------------------+--------+-------------+------+------+
5877 */
5878static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5879                              int imm5)
5880{
5881    int size = ctz32(imm5);
5882    int index;
5883    TCGv_i64 tmp;
5884
5885    if (size > 3) {
5886        unallocated_encoding(s);
5887        return;
5888    }
5889
5890    if (!fp_access_check(s)) {
5891        return;
5892    }
5893
5894    index = imm5 >> (size + 1);
5895
5896    /* This instruction just extracts the specified element and
5897     * zero-extends it into the bottom of the destination register.
5898     */
5899    tmp = tcg_temp_new_i64();
5900    read_vec_element(s, tmp, rn, index, size);
5901    write_fp_dreg(s, rd, tmp);
5902    tcg_temp_free_i64(tmp);
5903}
5904
5905/* C6.3.32 DUP (General)
5906 *
5907 *  31  30   29              21 20    16 15        10  9    5 4    0
5908 * +---+---+-------------------+--------+-------------+------+------+
5909 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5910 * +---+---+-------------------+--------+-------------+------+------+
5911 *
5912 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5913 */
5914static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5915                             int imm5)
5916{
5917    int size = ctz32(imm5);
5918    int esize = 8 << size;
5919    int elements = (is_q ? 128 : 64)/esize;
5920    int i = 0;
5921
5922    if (size > 3 || ((size == 3) && !is_q)) {
5923        unallocated_encoding(s);
5924        return;
5925    }
5926
5927    if (!fp_access_check(s)) {
5928        return;
5929    }
5930
5931    for (i = 0; i < elements; i++) {
5932        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5933    }
5934    if (!is_q) {
5935        clear_vec_high(s, rd);
5936    }
5937}
5938
5939/* C6.3.150 INS (Element)
5940 *
5941 *  31                   21 20    16 15  14    11  10 9    5 4    0
5942 * +-----------------------+--------+------------+---+------+------+
5943 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5944 * +-----------------------+--------+------------+---+------+------+
5945 *
5946 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5947 * index: encoded in imm5<4:size+1>
5948 */
5949static void handle_simd_inse(DisasContext *s, int rd, int rn,
5950                             int imm4, int imm5)
5951{
5952    int size = ctz32(imm5);
5953    int src_index, dst_index;
5954    TCGv_i64 tmp;
5955
5956    if (size > 3) {
5957        unallocated_encoding(s);
5958        return;
5959    }
5960
5961    if (!fp_access_check(s)) {
5962        return;
5963    }
5964
5965    dst_index = extract32(imm5, 1+size, 5);
5966    src_index = extract32(imm4, size, 4);
5967
5968    tmp = tcg_temp_new_i64();
5969
5970    read_vec_element(s, tmp, rn, src_index, size);
5971    write_vec_element(s, tmp, rd, dst_index, size);
5972
5973    tcg_temp_free_i64(tmp);
5974}
5975
5976
5977/* C6.3.151 INS (General)
5978 *
5979 *  31                   21 20    16 15        10  9    5 4    0
5980 * +-----------------------+--------+-------------+------+------+
5981 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5982 * +-----------------------+--------+-------------+------+------+
5983 *
5984 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5985 * index: encoded in imm5<4:size+1>
5986 */
5987static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5988{
5989    int size = ctz32(imm5);
5990    int idx;
5991
5992    if (size > 3) {
5993        unallocated_encoding(s);
5994        return;
5995    }
5996
5997    if (!fp_access_check(s)) {
5998        return;
5999    }
6000

6001    idx = extract32(imm5, 1 + size, 4 - size);
6002    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6003}
6004
6005/*
6006 * C6.3.321 UMOV (General)
6007 * C6.3.237 SMOV (General)
6008 *
6009 *  31  30   29              21 20    16 15    12   10 9    5 4    0
6010 * +---+---+-------------------+--------+-------------+------+------+
6011 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6012 * +---+---+-------------------+--------+-------------+------+------+
6013 *
6014 * U: unsigned when set
6015 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6016 */
6017static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6018                                  int rn, int rd, int imm5)
6019{
6020    int size = ctz32(imm5);
6021    int element;
6022    TCGv_i64 tcg_rd;
6023
6024    /* Check for UnallocatedEncodings */
6025    if (is_signed) {
6026        if (size > 2 || (size == 2 && !is_q)) {
6027            unallocated_encoding(s);
6028            return;
6029        }
6030    } else {
6031        if (size > 3
6032            || (size < 3 && is_q)
6033            || (size == 3 && !is_q)) {
6034            unallocated_encoding(s);
6035            return;
6036        }
6037    }
6038
6039    if (!fp_access_check(s)) {
6040        return;
6041    }
6042
6043    element = extract32(imm5, 1+size, 4);
6044
6045    tcg_rd = cpu_reg(s, rd);
6046    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6047    if (is_signed && !is_q) {
6048        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6049    }
6050}
6051
6052/* C3.6.5 AdvSIMD copy
6053 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6054 * +---+---+----+-----------------+------+---+------+---+------+------+
6055 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6056 * +---+---+----+-----------------+------+---+------+---+------+------+
6057 */
6058static void disas_simd_copy(DisasContext *s, uint32_t insn)
6059{
6060    int rd = extract32(insn, 0, 5);
6061    int rn = extract32(insn, 5, 5);
6062    int imm4 = extract32(insn, 11, 4);
6063    int op = extract32(insn, 29, 1);
6064    int is_q = extract32(insn, 30, 1);
6065    int imm5 = extract32(insn, 16, 5);
6066
6067    if (op) {
6068        if (is_q) {
6069            /* INS (element) */
6070            handle_simd_inse(s, rd, rn, imm4, imm5);
6071        } else {
6072            unallocated_encoding(s);
6073        }
6074    } else {
6075        switch (imm4) {
6076        case 0:
6077            /* DUP (element - vector) */
6078            handle_simd_dupe(s, is_q, rd, rn, imm5);
6079            break;
6080        case 1:
6081            /* DUP (general) */
6082            handle_simd_dupg(s, is_q, rd, rn, imm5);
6083            break;
6084        case 3:
6085            if (is_q) {
6086                /* INS (general) */
6087                handle_simd_insg(s, rd, rn, imm5);
6088            } else {
6089                unallocated_encoding(s);
6090            }
6091            break;
6092        case 5:
6093        case 7:
6094            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6095            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6096            break;
6097        default:
6098            unallocated_encoding(s);
6099            break;
6100        }
6101    }
6102}
6103
6104/* C3.6.6 AdvSIMD modified immediate
6105 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6106 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6107 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6108 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6109 *
6110 * There are a number of operations that can be carried out here:
6111 *   MOVI - move (shifted) imm into register
6112 *   MVNI - move inverted (shifted) imm into register
6113 *   ORR  - bitwise OR of (shifted) imm with register
6114 *   BIC  - bitwise clear of (shifted) imm with register
6115 */
6116static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6117{
6118    int rd = extract32(insn, 0, 5);
6119    int cmode = extract32(insn, 12, 4);
6120    int cmode_3_1 = extract32(cmode, 1, 3);
6121    int cmode_0 = extract32(cmode, 0, 1);
6122    int o2 = extract32(insn, 11, 1);
6123    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6124    bool is_neg = extract32(insn, 29, 1);
6125    bool is_q = extract32(insn, 30, 1);
6126    uint64_t imm = 0;
6127    TCGv_i64 tcg_rd, tcg_imm;
6128    int i;
6129
6130    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6131        unallocated_encoding(s);
6132        return;
6133    }
6134
6135    if (!fp_access_check(s)) {
6136        return;
6137    }
6138
6139    /* See AdvSIMDExpandImm() in ARM ARM */
6140    switch (cmode_3_1) {
6141    case 0: /* Replicate(Zeros(24):imm8, 2) */
6142    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6143    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6144    case 3: /* Replicate(imm8:Zeros(24), 2) */
6145    {
6146        int shift = cmode_3_1 * 8;
6147        imm = bitfield_replicate(abcdefgh << shift, 32);
6148        break;
6149    }
6150    case 4: /* Replicate(Zeros(8):imm8, 4) */
6151    case 5: /* Replicate(imm8:Zeros(8), 4) */
6152    {
6153        int shift = (cmode_3_1 & 0x1) * 8;
6154        imm = bitfield_replicate(abcdefgh << shift, 16);
6155        break;
6156    }
6157    case 6:
6158        if (cmode_0) {
6159            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6160            imm = (abcdefgh << 16) | 0xffff;
6161        } else {
6162            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6163            imm = (abcdefgh << 8) | 0xff;
6164        }
6165        imm = bitfield_replicate(imm, 32);
6166        break;
6167    case 7:
6168        if (!cmode_0 && !is_neg) {
6169            imm = bitfield_replicate(abcdefgh, 8);
6170        } else if (!cmode_0 && is_neg) {
6171            int i;
6172            imm = 0;
6173            for (i = 0; i < 8; i++) {
6174                if ((abcdefgh) & (1 << i)) {
6175                    imm |= 0xffULL << (i * 8);
6176                }
6177            }
6178        } else if (cmode_0) {
6179            if (is_neg) {
6180                imm = (abcdefgh & 0x3f) << 48;
6181                if (abcdefgh & 0x80) {
6182                    imm |= 0x8000000000000000ULL;
6183                }
6184                if (abcdefgh & 0x40) {
6185                    imm |= 0x3fc0000000000000ULL;
6186                } else {
6187                    imm |= 0x4000000000000000ULL;
6188                }
6189            } else {
6190                imm = (abcdefgh & 0x3f) << 19;
6191                if (abcdefgh & 0x80) {
6192                    imm |= 0x80000000;
6193                }
6194                if (abcdefgh & 0x40) {
6195                    imm |= 0x3e000000;
6196                } else {
6197                    imm |= 0x40000000;
6198                }
6199                imm |= (imm << 32);
6200            }
6201        }
6202        break;
6203    }
6204
6205    if (cmode_3_1 != 7 && is_neg) {
6206        imm = ~imm;
6207    }
6208
6209    tcg_imm = tcg_const_i64(imm);
6210    tcg_rd = new_tmp_a64(s);
6211
6212    for (i = 0; i < 2; i++) {
6213        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6214
6215        if (i == 1 && !is_q) {
6216            /* non-quad ops clear high half of vector */
6217            tcg_gen_movi_i64(tcg_rd, 0);
6218        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6219            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6220            if (is_neg) {
6221                /* AND (BIC) */
6222                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6223            } else {
6224                /* ORR */
6225                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6226            }
6227        } else {
6228            /* MOVI */
6229            tcg_gen_mov_i64(tcg_rd, tcg_imm);
6230        }
6231        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6232    }
6233
6234    tcg_temp_free_i64(tcg_imm);
6235}
6236
6237/* C3.6.7 AdvSIMD scalar copy
6238 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6239 * +-----+----+-----------------+------+---+------+---+------+------+
6240 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6241 * +-----+----+-----------------+------+---+------+---+------+------+
6242 */
6243static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6244{
6245    int rd = extract32(insn, 0, 5);
6246    int rn = extract32(insn, 5, 5);
6247    int imm4 = extract32(insn, 11, 4);
6248    int imm5 = extract32(insn, 16, 5);
6249    int op = extract32(insn, 29, 1);
6250
6251    if (op != 0 || imm4 != 0) {
6252        unallocated_encoding(s);
6253        return;
6254    }
6255
6256    /* DUP (element, scalar) */
6257    handle_simd_dupes(s, rd, rn, imm5);
6258}
6259
6260/* C3.6.8 AdvSIMD scalar pairwise
6261 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6262 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6263 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6264 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6265 */
6266static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6267{
6268    int u = extract32(insn, 29, 1);
6269    int size = extract32(insn, 22, 2);
6270    int opcode = extract32(insn, 12, 5);
6271    int rn = extract32(insn, 5, 5);
6272    int rd = extract32(insn, 0, 5);
6273    TCGv_ptr fpst;
6274
6275    /* For some ops (the FP ones), size[1] is part of the encoding.
6276     * For ADDP strictly it is not but size[1] is always 1 for valid
6277     * encodings.
6278     */
6279    opcode |= (extract32(size, 1, 1) << 5);
6280
6281    switch (opcode) {
6282    case 0x3b: /* ADDP */
6283        if (u || size != 3) {
6284            unallocated_encoding(s);
6285            return;
6286        }
6287        if (!fp_access_check(s)) {
6288            return;
6289        }
6290
6291        TCGV_UNUSED_PTR(fpst);
6292        break;
6293    case 0xc: /* FMAXNMP */
6294    case 0xd: /* FADDP */
6295    case 0xf: /* FMAXP */
6296    case 0x2c: /* FMINNMP */
6297    case 0x2f: /* FMINP */
6298        /* FP op, size[0] is 32 or 64 bit */
6299        if (!u) {
6300            unallocated_encoding(s);
6301            return;
6302        }
6303        if (!fp_access_check(s)) {
6304            return;
6305        }
6306
6307        size = extract32(size, 0, 1) ? 3 : 2;
6308        fpst = get_fpstatus_ptr();
6309        break;
6310    default:
6311        unallocated_encoding(s);
6312        return;
6313    }
6314
6315    if (size == 3) {
6316        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6317        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6318        TCGv_i64 tcg_res = tcg_temp_new_i64();
6319
6320        read_vec_element(s, tcg_op1, rn, 0, MO_64);
6321        read_vec_element(s, tcg_op2, rn, 1, MO_64);
6322
6323        switch (opcode) {
6324        case 0x3b: /* ADDP */
6325            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6326            break;
6327        case 0xc: /* FMAXNMP */
6328            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6329            break;
6330        case 0xd: /* FADDP */
6331            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6332            break;
6333        case 0xf: /* FMAXP */
6334            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6335            break;
6336        case 0x2c: /* FMINNMP */
6337            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6338            break;
6339        case 0x2f: /* FMINP */
6340            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6341            break;
6342        default:
6343            g_assert_not_reached();
6344        }
6345
6346        write_fp_dreg(s, rd, tcg_res);
6347
6348        tcg_temp_free_i64(tcg_op1);
6349        tcg_temp_free_i64(tcg_op2);
6350        tcg_temp_free_i64(tcg_res);
6351    } else {
6352        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6353        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6354        TCGv_i32 tcg_res = tcg_temp_new_i32();
6355
6356        read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6357        read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6358
6359        switch (opcode) {
6360        case 0xc: /* FMAXNMP */
6361            gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6362            break;
6363        case 0xd: /* FADDP */
6364            gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6365            break;
6366        case 0xf: /* FMAXP */
6367            gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6368            break;
6369        case 0x2c: /* FMINNMP */
6370            gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6371            break;
6372        case 0x2f: /* FMINP */
6373            gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6374            break;
6375        default:
6376            g_assert_not_reached();
6377        }
6378
6379        write_fp_sreg(s, rd, tcg_res);
6380
6381        tcg_temp_free_i32(tcg_op1);
6382        tcg_temp_free_i32(tcg_op2);
6383        tcg_temp_free_i32(tcg_res);
6384    }
6385
6386    if (!TCGV_IS_UNUSED_PTR(fpst)) {
6387        tcg_temp_free_ptr(fpst);
6388    }
6389}
6390
6391/*
6392 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6393 *
6394 * This code is handles the common shifting code and is used by both
6395 * the vector and scalar code.
6396 */
6397static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6398                                    TCGv_i64 tcg_rnd, bool accumulate,
6399                                    bool is_u, int size, int shift)
6400{
6401    bool extended_result = false;
6402    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6403    int ext_lshift = 0;
6404    TCGv_i64 tcg_src_hi;
6405
6406    if (round && size == 3) {
6407        extended_result = true;
6408        ext_lshift = 64 - shift;
6409        tcg_src_hi = tcg_temp_new_i64();
6410    } else if (shift == 64) {
6411        if (!accumulate && is_u) {
6412            /* result is zero */
6413            tcg_gen_movi_i64(tcg_res, 0);
6414            return;
6415        }
6416    }
6417
6418    /* Deal with the rounding step */
6419    if (round) {
6420        if (extended_result) {
6421            TCGv_i64 tcg_zero = tcg_const_i64(0);
6422            if (!is_u) {
6423                /* take care of sign extending tcg_res */
6424                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6425                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6426                                 tcg_src, tcg_src_hi,
6427                                 tcg_rnd, tcg_zero);
6428            } else {
6429                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6430                                 tcg_src, tcg_zero,
6431                                 tcg_rnd, tcg_zero);
6432            }
6433            tcg_temp_free_i64(tcg_zero);
6434        } else {
6435            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6436        }
6437    }
6438
6439    /* Now do the shift right */
6440    if (round && extended_result) {
6441        /* extended case, >64 bit precision required */
6442        if (ext_lshift == 0) {
6443            /* special case, only high bits matter */
6444            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6445        } else {
6446            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6447            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6448            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6449        }
6450    } else {
6451        if (is_u) {
6452            if (shift == 64) {
6453                /* essentially shifting in 64 zeros */
6454                tcg_gen_movi_i64(tcg_src, 0);
6455            } else {
6456                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6457            }
6458        } else {
6459            if (shift == 64) {
6460                /* effectively extending the sign-bit */
6461                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6462            } else {
6463                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6464            }
6465        }
6466    }
6467
6468    if (accumulate) {
6469        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6470    } else {
6471        tcg_gen_mov_i64(tcg_res, tcg_src);
6472    }
6473
6474    if (extended_result) {
6475        tcg_temp_free_i64(tcg_src_hi);
6476    }
6477}
6478
6479/* Common SHL/SLI - Shift left with an optional insert */
6480static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6481                                 bool insert, int shift)
6482{
6483    if (insert) { /* SLI */
6484        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6485    } else { /* SHL */
6486        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6487    }
6488}
6489
6490/* SRI: shift right with insert */
6491static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6492                                 int size, int shift)
6493{
6494    int esize = 8 << size;
6495
6496    /* shift count same as element size is valid but does nothing;
6497     * special case to avoid potential shift by 64.
6498     */
6499    if (shift != esize) {
6500        tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6501        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6502    }
6503}
6504
6505/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6506static void handle_scalar_simd_shri(DisasContext *s,
6507                                    bool is_u, int immh, int immb,
6508                                    int opcode, int rn, int rd)
6509{
6510    const int size = 3;
6511    int immhb = immh << 3 | immb;
6512    int shift = 2 * (8 << size) - immhb;
6513    bool accumulate = false;
6514    bool round = false;
6515    bool insert = false;
6516    TCGv_i64 tcg_rn;
6517    TCGv_i64 tcg_rd;
6518    TCGv_i64 tcg_round;
6519
6520    if (!extract32(immh, 3, 1)) {
6521        unallocated_encoding(s);
6522        return;
6523    }
6524
6525    if (!fp_access_check(s)) {
6526        return;
6527    }
6528
6529    switch (opcode) {
6530    case 0x02: /* SSRA / USRA (accumulate) */
6531        accumulate = true;
6532        break;
6533    case 0x04: /* SRSHR / URSHR (rounding) */
6534        round = true;
6535        break;
6536    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6537        accumulate = round = true;
6538        break;
6539    case 0x08: /* SRI */
6540        insert = true;
6541        break;
6542    }
6543
6544    if (round) {
6545        uint64_t round_const = 1ULL << (shift - 1);
6546        tcg_round = tcg_const_i64(round_const);
6547    } else {
6548        TCGV_UNUSED_I64(tcg_round);
6549    }
6550
6551    tcg_rn = read_fp_dreg(s, rn);
6552    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6553
6554    if (insert) {
6555        handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6556    } else {
6557        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6558                                accumulate, is_u, size, shift);
6559    }
6560
6561    write_fp_dreg(s, rd, tcg_rd);
6562
6563    tcg_temp_free_i64(tcg_rn);
6564    tcg_temp_free_i64(tcg_rd);
6565    if (round) {
6566        tcg_temp_free_i64(tcg_round);
6567    }
6568}
6569
6570/* SHL/SLI - Scalar shift left */
6571static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6572                                    int immh, int immb, int opcode,
6573                                    int rn, int rd)
6574{
6575    int size = 32 - clz32(immh) - 1;
6576    int immhb = immh << 3 | immb;
6577    int shift = immhb - (8 << size);
6578    TCGv_i64 tcg_rn = new_tmp_a64(s);
6579    TCGv_i64 tcg_rd = new_tmp_a64(s);
6580
6581    if (!extract32(immh, 3, 1)) {
6582        unallocated_encoding(s);
6583        return;
6584    }
6585
6586    if (!fp_access_check(s)) {
6587        return;
6588    }
6589
6590    tcg_rn = read_fp_dreg(s, rn);
6591    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6592
6593    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6594
6595    write_fp_dreg(s, rd, tcg_rd);
6596
6597    tcg_temp_free_i64(tcg_rn);
6598    tcg_temp_free_i64(tcg_rd);
6599}
6600
6601/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6602 * (signed/unsigned) narrowing */
6603static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6604                                   bool is_u_shift, bool is_u_narrow,
6605                                   int immh, int immb, int opcode,
6606                                   int rn, int rd)
6607{
6608    int immhb = immh << 3 | immb;
6609    int size = 32 - clz32(immh) - 1;
6610    int esize = 8 << size;
6611    int shift = (2 * esize) - immhb;
6612    int elements = is_scalar ? 1 : (64 / esize);
6613    bool round = extract32(opcode, 0, 1);
6614    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6615    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6616    TCGv_i32 tcg_rd_narrowed;
6617    TCGv_i64 tcg_final;
6618
6619    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6620        { gen_helper_neon_narrow_sat_s8,
6621          gen_helper_neon_unarrow_sat8 },
6622        { gen_helper_neon_narrow_sat_s16,
6623          gen_helper_neon_unarrow_sat16 },
6624        { gen_helper_neon_narrow_sat_s32,
6625          gen_helper_neon_unarrow_sat32 },
6626        { NULL, NULL },
6627    };
6628    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6629        gen_helper_neon_narrow_sat_u8,
6630        gen_helper_neon_narrow_sat_u16,
6631        gen_helper_neon_narrow_sat_u32,
6632        NULL
6633    };
6634    NeonGenNarrowEnvFn *narrowfn;
6635
6636    int i;
6637
6638    assert(size < 4);
6639
6640    if (extract32(immh, 3, 1)) {
6641        unallocated_encoding(s);
6642        return;
6643    }
6644
6645    if (!fp_access_check(s)) {
6646        return;
6647    }
6648
6649    if (is_u_shift) {
6650        narrowfn = unsigned_narrow_fns[size];
6651    } else {
6652        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6653    }
6654
6655    tcg_rn = tcg_temp_new_i64();
6656    tcg_rd = tcg_temp_new_i64();
6657    tcg_rd_narrowed = tcg_temp_new_i32();
6658    tcg_final = tcg_const_i64(0);
6659
6660    if (round) {
6661        uint64_t round_const = 1ULL << (shift - 1);
6662        tcg_round = tcg_const_i64(round_const);
6663    } else {
6664        TCGV_UNUSED_I64(tcg_round);
6665    }
6666
6667    for (i = 0; i < elements; i++) {
6668        read_vec_element(s, tcg_rn, rn, i, ldop);
6669        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6670                                false, is_u_shift, size+1, shift);
6671        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6672        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6673        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6674    }
6675
6676    if (!is_q) {
6677        clear_vec_high(s, rd);
6678        write_vec_element(s, tcg_final, rd, 0, MO_64);
6679    } else {
6680        write_vec_element(s, tcg_final, rd, 1, MO_64);
6681    }
6682
6683    if (round) {
6684        tcg_temp_free_i64(tcg_round);
6685    }
6686    tcg_temp_free_i64(tcg_rn);
6687    tcg_temp_free_i64(tcg_rd);
6688    tcg_temp_free_i32(tcg_rd_narrowed);
6689    tcg_temp_free_i64(tcg_final);
6690    return;
6691}
6692
6693/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6694static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6695                             bool src_unsigned, bool dst_unsigned,
6696                             int immh, int immb, int rn, int rd)
6697{
6698    int immhb = immh << 3 | immb;
6699    int size = 32 - clz32(immh) - 1;
6700    int shift = immhb - (8 << size);
6701    int pass;
6702
6703    assert(immh != 0);
6704    assert(!(scalar && is_q));
6705
6706    if (!scalar) {
6707        if (!is_q && extract32(immh, 3, 1)) {
6708            unallocated_encoding(s);
6709            return;
6710        }
6711
6712        /* Since we use the variable-shift helpers we must
6713         * replicate the shift count into each element of
6714         * the tcg_shift value.
6715         */
6716        switch (size) {
6717        case 0:
6718            shift |= shift << 8;
6719            /* fall through */
6720        case 1:
6721            shift |= shift << 16;
6722            break;
6723        case 2:
6724        case 3:
6725            break;
6726        default:
6727            g_assert_not_reached();
6728        }
6729    }
6730
6731    if (!fp_access_check(s)) {
6732        return;
6733    }
6734
6735    if (size == 3) {
6736        TCGv_i64 tcg_shift = tcg_const_i64(shift);
6737        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6738            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6739            { NULL, gen_helper_neon_qshl_u64 },
6740        };
6741        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6742        int maxpass = is_q ? 2 : 1;
6743
6744        for (pass = 0; pass < maxpass; pass++) {
6745            TCGv_i64 tcg_op = tcg_temp_new_i64();
6746
6747            read_vec_element(s, tcg_op, rn, pass, MO_64);
6748            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6749            write_vec_element(s, tcg_op, rd, pass, MO_64);
6750
6751            tcg_temp_free_i64(tcg_op);
6752        }
6753        tcg_temp_free_i64(tcg_shift);
6754
6755        if (!is_q) {
6756            clear_vec_high(s, rd);
6757        }
6758    } else {
6759        TCGv_i32 tcg_shift = tcg_const_i32(shift);
6760        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6761            {
6762                { gen_helper_neon_qshl_s8,
6763                  gen_helper_neon_qshl_s16,
6764                  gen_helper_neon_qshl_s32 },
6765                { gen_helper_neon_qshlu_s8,
6766                  gen_helper_neon_qshlu_s16,
6767                  gen_helper_neon_qshlu_s32 }
6768            }, {
6769                { NULL, NULL, NULL },
6770                { gen_helper_neon_qshl_u8,
6771                  gen_helper_neon_qshl_u16,
6772                  gen_helper_neon_qshl_u32 }
6773            }
6774        };
6775        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6776        TCGMemOp memop = scalar ? size : MO_32;
6777        int maxpass = scalar ? 1 : is_q ? 4 : 2;
6778
6779        for (pass = 0; pass < maxpass; pass++) {
6780            TCGv_i32 tcg_op = tcg_temp_new_i32();
6781
6782            read_vec_element_i32(s, tcg_op, rn, pass, memop);
6783            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6784            if (scalar) {
6785                switch (size) {
6786                case 0:
6787                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
6788                    break;
6789                case 1:
6790                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
6791                    break;
6792                case 2:
6793                    break;
6794                default:
6795                    g_assert_not_reached();
6796                }
6797                write_fp_sreg(s, rd, tcg_op);
6798            } else {
6799                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6800            }
6801
6802            tcg_temp_free_i32(tcg_op);
6803        }
6804        tcg_temp_free_i32(tcg_shift);
6805
6806        if (!is_q && !scalar) {
6807            clear_vec_high(s, rd);
6808        }
6809    }
6810}
6811
6812/* Common vector code for handling integer to FP conversion */
6813static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6814                                   int elements, int is_signed,
6815                                   int fracbits, int size)
6816{
6817    bool is_double = size == 3 ? true : false;
6818    TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6819    TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6820    TCGv_i64 tcg_int = tcg_temp_new_i64();
6821    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6822    int pass;
6823
6824    for (pass = 0; pass < elements; pass++) {
6825        read_vec_element(s, tcg_int, rn, pass, mop);
6826
6827        if (is_double) {
6828            TCGv_i64 tcg_double = tcg_temp_new_i64();
6829            if (is_signed) {
6830                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6831                                     tcg_shift, tcg_fpst);
6832            } else {
6833                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6834                                     tcg_shift, tcg_fpst);
6835            }
6836            if (elements == 1) {
6837                write_fp_dreg(s, rd, tcg_double);
6838            } else {
6839                write_vec_element(s, tcg_double, rd, pass, MO_64);
6840            }
6841            tcg_temp_free_i64(tcg_double);
6842        } else {
6843            TCGv_i32 tcg_single = tcg_temp_new_i32();
6844            if (is_signed) {
6845                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6846                                     tcg_shift, tcg_fpst);
6847            } else {
6848                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6849                                     tcg_shift, tcg_fpst);
6850            }
6851            if (elements == 1) {
6852                write_fp_sreg(s, rd, tcg_single);
6853            } else {
6854                write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6855            }
6856            tcg_temp_free_i32(tcg_single);
6857        }
6858    }
6859
6860    if (!is_double && elements == 2) {
6861        clear_vec_high(s, rd);
6862    }
6863
6864    tcg_temp_free_i64(tcg_int);
6865    tcg_temp_free_ptr(tcg_fpst);
6866    tcg_temp_free_i32(tcg_shift);
6867}
6868
6869/* UCVTF/SCVTF - Integer to FP conversion */
6870static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6871                                         bool is_q, bool is_u,
6872                                         int immh, int immb, int opcode,
6873                                         int rn, int rd)
6874{
6875    bool is_double = extract32(immh, 3, 1);
6876    int size = is_double ? MO_64 : MO_32;
6877    int elements;
6878    int immhb = immh << 3 | immb;
6879    int fracbits = (is_double ? 128 : 64) - immhb;
6880
6881    if (!extract32(immh, 2, 2)) {
6882        unallocated_encoding(s);
6883        return;
6884    }
6885
6886    if (is_scalar) {
6887        elements = 1;
6888    } else {
6889        elements = is_double ? 2 : is_q ? 4 : 2;
6890        if (is_double && !is_q) {
6891            unallocated_encoding(s);
6892            return;
6893        }
6894    }
6895
6896    if (!fp_access_check(s)) {
6897        return;
6898    }
6899
6900    /* immh == 0 would be a failure of the decode logic */
6901    g_assert(immh);
6902
6903    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6904}
6905
6906/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6907static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6908                                         bool is_q, bool is_u,
6909                                         int immh, int immb, int rn, int rd)
6910{
6911    bool is_double = extract32(immh, 3, 1);
6912    int immhb = immh << 3 | immb;
6913    int fracbits = (is_double ? 128 : 64) - immhb;
6914    int pass;
6915    TCGv_ptr tcg_fpstatus;
6916    TCGv_i32 tcg_rmode, tcg_shift;
6917
6918    if (!extract32(immh, 2, 2)) {
6919        unallocated_encoding(s);
6920        return;
6921    }
6922
6923    if (!is_scalar && !is_q && is_double) {
6924        unallocated_encoding(s);
6925        return;
6926    }
6927
6928    if (!fp_access_check(s)) {
6929        return;
6930    }
6931
6932    assert(!(is_scalar && is_q));
6933
6934    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6935    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6936    tcg_fpstatus = get_fpstatus_ptr();
6937    tcg_shift = tcg_const_i32(fracbits);
6938
6939    if (is_double) {
6940        int maxpass = is_scalar ? 1 : 2;
6941
6942        for (pass = 0; pass < maxpass; pass++) {
6943            TCGv_i64 tcg_op = tcg_temp_new_i64();
6944
6945            read_vec_element(s, tcg_op, rn, pass, MO_64);
6946            if (is_u) {
6947                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6948            } else {
6949                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6950            }
6951            write_vec_element(s, tcg_op, rd, pass, MO_64);
6952            tcg_temp_free_i64(tcg_op);
6953        }
6954        if (!is_q) {
6955            clear_vec_high(s, rd);
6956        }
6957    } else {
6958        int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6959        for (pass = 0; pass < maxpass; pass++) {
6960            TCGv_i32 tcg_op = tcg_temp_new_i32();
6961
6962            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6963            if (is_u) {
6964                gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6965            } else {
6966                gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6967            }
6968            if (is_scalar) {
6969                write_fp_sreg(s, rd, tcg_op);
6970            } else {
6971                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6972            }
6973            tcg_temp_free_i32(tcg_op);
6974        }
6975        if (!is_q && !is_scalar) {
6976            clear_vec_high(s, rd);
6977        }
6978    }
6979
6980    tcg_temp_free_ptr(tcg_fpstatus);
6981    tcg_temp_free_i32(tcg_shift);
6982    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6983    tcg_temp_free_i32(tcg_rmode);
6984}
6985
6986/* C3.6.9 AdvSIMD scalar shift by immediate
6987 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6988 * +-----+---+-------------+------+------+--------+---+------+------+
6989 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6990 * +-----+---+-------------+------+------+--------+---+------+------+
6991 *
6992 * This is the scalar version so it works on a fixed sized registers
6993 */
6994static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6995{
6996    int rd = extract32(insn, 0, 5);
6997    int rn = extract32(insn, 5, 5);
6998    int opcode = extract32(insn, 11, 5);
6999    int immb = extract32(insn, 16, 3);
7000    int immh = extract32(insn, 19, 4);

7001    bool is_u = extract32(insn, 29, 1);
7002
7003    if (immh == 0) {
7004        unallocated_encoding(s);
7005        return;
7006    }
7007
7008    switch (opcode) {
7009    case 0x08: /* SRI */
7010        if (!is_u) {
7011            unallocated_encoding(s);
7012            return;
7013        }
7014        /* fall through */
7015    case 0x00: /* SSHR / USHR */
7016    case 0x02: /* SSRA / USRA */
7017    case 0x04: /* SRSHR / URSHR */
7018    case 0x06: /* SRSRA / URSRA */
7019        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7020        break;
7021    case 0x0a: /* SHL / SLI */
7022        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7023        break;
7024    case 0x1c: /* SCVTF, UCVTF */
7025        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7026                                     opcode, rn, rd);
7027        break;
7028    case 0x10: /* SQSHRUN, SQSHRUN2 */
7029    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7030        if (!is_u) {
7031            unallocated_encoding(s);
7032            return;
7033        }
7034        handle_vec_simd_sqshrn(s, true, false, false, true,
7035                               immh, immb, opcode, rn, rd);
7036        break;
7037    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7038    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7039        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7040                               immh, immb, opcode, rn, rd);
7041        break;
7042    case 0xc: /* SQSHLU */
7043        if (!is_u) {
7044            unallocated_encoding(s);
7045            return;
7046        }
7047        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7048        break;
7049    case 0xe: /* SQSHL, UQSHL */
7050        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7051        break;
7052    case 0x1f: /* FCVTZS, FCVTZU */
7053        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7054        break;
7055    default:
7056        unallocated_encoding(s);
7057        break;
7058    }
7059}
7060
7061/* C3.6.10 AdvSIMD scalar three different
7062 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7063 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7064 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7065 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7066 */
7067static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7068{
7069    bool is_u = extract32(insn, 29, 1);
7070    int size = extract32(insn, 22, 2);
7071    int opcode = extract32(insn, 12, 4);
7072    int rm = extract32(insn, 16, 5);
7073    int rn = extract32(insn, 5, 5);
7074    int rd = extract32(insn, 0, 5);
7075
7076    if (is_u) {
7077        unallocated_encoding(s);
7078        return;
7079    }
7080
7081    switch (opcode) {
7082    case 0x9: /* SQDMLAL, SQDMLAL2 */
7083    case 0xb: /* SQDMLSL, SQDMLSL2 */
7084    case 0xd: /* SQDMULL, SQDMULL2 */
7085        if (size == 0 || size == 3) {
7086            unallocated_encoding(s);
7087            return;
7088        }
7089        break;
7090    default:
7091        unallocated_encoding(s);
7092        return;
7093    }
7094
7095    if (!fp_access_check(s)) {
7096        return;
7097    }
7098
7099    if (size == 2) {
7100        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7101        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7102        TCGv_i64 tcg_res = tcg_temp_new_i64();
7103
7104        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7105        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7106
7107        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7108        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7109
7110        switch (opcode) {
7111        case 0xd: /* SQDMULL, SQDMULL2 */
7112            break;
7113        case 0xb: /* SQDMLSL, SQDMLSL2 */
7114            tcg_gen_neg_i64(tcg_res, tcg_res);
7115            /* fall through */
7116        case 0x9: /* SQDMLAL, SQDMLAL2 */
7117            read_vec_element(s, tcg_op1, rd, 0, MO_64);
7118            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7119                                              tcg_res, tcg_op1);
7120            break;
7121        default:
7122            g_assert_not_reached();
7123        }
7124
7125        write_fp_dreg(s, rd, tcg_res);
7126
7127        tcg_temp_free_i64(tcg_op1);
7128        tcg_temp_free_i64(tcg_op2);
7129        tcg_temp_free_i64(tcg_res);
7130    } else {
7131        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7132        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7133        TCGv_i64 tcg_res = tcg_temp_new_i64();
7134
7135        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7136        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7137
7138        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7139        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7140
7141        switch (opcode) {
7142        case 0xd: /* SQDMULL, SQDMULL2 */
7143            break;
7144        case 0xb: /* SQDMLSL, SQDMLSL2 */
7145            gen_helper_neon_negl_u32(tcg_res, tcg_res);
7146            /* fall through */
7147        case 0x9: /* SQDMLAL, SQDMLAL2 */
7148        {
7149            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7150            read_vec_element(s, tcg_op3, rd, 0, MO_32);
7151            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7152                                              tcg_res, tcg_op3);
7153            tcg_temp_free_i64(tcg_op3);
7154            break;
7155        }
7156        default:
7157            g_assert_not_reached();
7158        }
7159
7160        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7161        write_fp_dreg(s, rd, tcg_res);
7162
7163        tcg_temp_free_i32(tcg_op1);
7164        tcg_temp_free_i32(tcg_op2);
7165        tcg_temp_free_i64(tcg_res);
7166    }
7167}
7168
7169static void handle_3same_64(DisasContext *s, int opcode, bool u,
7170                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7171{
7172    /* Handle 64x64->64 opcodes which are shared between the scalar
7173     * and vector 3-same groups. We cover every opcode where size == 3
7174     * is valid in either the three-reg-same (integer, not pairwise)
7175     * or scalar-three-reg-same groups. (Some opcodes are not yet
7176     * implemented.)
7177     */
7178    TCGCond cond;
7179
7180    switch (opcode) {
7181    case 0x1: /* SQADD */
7182        if (u) {
7183            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7184        } else {
7185            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7186        }
7187        break;
7188    case 0x5: /* SQSUB */
7189        if (u) {
7190            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7191        } else {
7192            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7193        }
7194        break;
7195    case 0x6: /* CMGT, CMHI */
7196        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7197         * We implement this using setcond (test) and then negating.
7198         */
7199        cond = u ? TCG_COND_GTU : TCG_COND_GT;
7200    do_cmop:
7201        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7202        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7203        break;
7204    case 0x7: /* CMGE, CMHS */
7205        cond = u ? TCG_COND_GEU : TCG_COND_GE;
7206        goto do_cmop;
7207    case 0x11: /* CMTST, CMEQ */
7208        if (u) {
7209            cond = TCG_COND_EQ;
7210            goto do_cmop;
7211        }
7212        /* CMTST : test is "if (X & Y != 0)". */
7213        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7214        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7215        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7216        break;
7217    case 0x8: /* SSHL, USHL */
7218        if (u) {
7219            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7220        } else {
7221            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7222        }
7223        break;
7224    case 0x9: /* SQSHL, UQSHL */
7225        if (u) {
7226            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7227        } else {
7228            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7229        }
7230        break;
7231    case 0xa: /* SRSHL, URSHL */
7232        if (u) {
7233            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7234        } else {
7235            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7236        }
7237        break;
7238    case 0xb: /* SQRSHL, UQRSHL */
7239        if (u) {
7240            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7241        } else {
7242            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7243        }
7244        break;
7245    case 0x10: /* ADD, SUB */
7246        if (u) {
7247            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7248        } else {
7249            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7250        }
7251        break;
7252    default:
7253        g_assert_not_reached();
7254    }
7255}
7256
7257/* Handle the 3-same-operands float operations; shared by the scalar
7258 * and vector encodings. The caller must filter out any encodings
7259 * not allocated for the encoding it is dealing with.
7260 */
7261static void handle_3same_float(DisasContext *s, int size, int elements,
7262                               int fpopcode, int rd, int rn, int rm)
7263{
7264    int pass;
7265    TCGv_ptr fpst = get_fpstatus_ptr();
7266
7267    for (pass = 0; pass < elements; pass++) {
7268        if (size) {
7269            /* Double */
7270            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7271            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7272            TCGv_i64 tcg_res = tcg_temp_new_i64();
7273
7274            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7275            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7276
7277            switch (fpopcode) {
7278            case 0x39: /* FMLS */
7279                /* As usual for ARM, separate negation for fused multiply-add */
7280                gen_helper_vfp_negd(tcg_op1, tcg_op1);
7281                /* fall through */
7282            case 0x19: /* FMLA */
7283                read_vec_element(s, tcg_res, rd, pass, MO_64);
7284                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7285                                       tcg_res, fpst);
7286                break;
7287            case 0x18: /* FMAXNM */
7288                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7289                break;
7290            case 0x1a: /* FADD */
7291                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7292                break;
7293            case 0x1b: /* FMULX */
7294                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7295                break;
7296            case 0x1c: /* FCMEQ */
7297                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7298                break;
7299            case 0x1e: /* FMAX */
7300                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7301                break;
7302            case 0x1f: /* FRECPS */
7303                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7304                break;
7305            case 0x38: /* FMINNM */
7306                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7307                break;
7308            case 0x3a: /* FSUB */
7309                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7310                break;
7311            case 0x3e: /* FMIN */
7312                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7313                break;
7314            case 0x3f: /* FRSQRTS */
7315                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7316                break;
7317            case 0x5b: /* FMUL */
7318                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7319                break;
7320            case 0x5c: /* FCMGE */
7321                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7322                break;
7323            case 0x5d: /* FACGE */
7324                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7325                break;
7326            case 0x5f: /* FDIV */
7327                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7328                break;
7329            case 0x7a: /* FABD */
7330                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7331                gen_helper_vfp_absd(tcg_res, tcg_res);
7332                break;
7333            case 0x7c: /* FCMGT */
7334                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7335                break;
7336            case 0x7d: /* FACGT */
7337                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7338                break;
7339            default:
7340                g_assert_not_reached();
7341            }
7342
7343            write_vec_element(s, tcg_res, rd, pass, MO_64);
7344
7345            tcg_temp_free_i64(tcg_res);
7346            tcg_temp_free_i64(tcg_op1);
7347            tcg_temp_free_i64(tcg_op2);
7348        } else {
7349            /* Single */
7350            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7351            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7352            TCGv_i32 tcg_res = tcg_temp_new_i32();
7353
7354            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7355            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7356
7357            switch (fpopcode) {
7358            case 0x39: /* FMLS */
7359                /* As usual for ARM, separate negation for fused multiply-add */
7360                gen_helper_vfp_negs(tcg_op1, tcg_op1);
7361                /* fall through */
7362            case 0x19: /* FMLA */
7363                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7364                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7365                                       tcg_res, fpst);
7366                break;
7367            case 0x1a: /* FADD */
7368                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7369                break;
7370            case 0x1b: /* FMULX */
7371                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7372                break;
7373            case 0x1c: /* FCMEQ */
7374                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7375                break;
7376            case 0x1e: /* FMAX */
7377                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7378                break;
7379            case 0x1f: /* FRECPS */
7380                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7381                break;
7382            case 0x18: /* FMAXNM */
7383                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7384                break;
7385            case 0x38: /* FMINNM */
7386                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7387                break;
7388            case 0x3a: /* FSUB */
7389                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7390                break;
7391            case 0x3e: /* FMIN */
7392                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7393                break;
7394            case 0x3f: /* FRSQRTS */
7395                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7396                break;
7397            case 0x5b: /* FMUL */
7398                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7399                break;
7400            case 0x5c: /* FCMGE */
7401                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7402                break;
7403            case 0x5d: /* FACGE */
7404                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7405                break;
7406            case 0x5f: /* FDIV */
7407                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7408                break;
7409            case 0x7a: /* FABD */
7410                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7411                gen_helper_vfp_abss(tcg_res, tcg_res);
7412                break;
7413            case 0x7c: /* FCMGT */
7414                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7415                break;
7416            case 0x7d: /* FACGT */
7417                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7418                break;
7419            default:
7420                g_assert_not_reached();
7421            }
7422
7423            if (elements == 1) {
7424                /* scalar single so clear high part */
7425                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7426
7427                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7428                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7429                tcg_temp_free_i64(tcg_tmp);
7430            } else {
7431                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7432            }
7433
7434            tcg_temp_free_i32(tcg_res);
7435            tcg_temp_free_i32(tcg_op1);
7436            tcg_temp_free_i32(tcg_op2);
7437        }
7438    }
7439
7440    tcg_temp_free_ptr(fpst);
7441
7442    if ((elements << size) < 4) {
7443        /* scalar, or non-quad vector op */
7444        clear_vec_high(s, rd);
7445    }
7446}
7447
7448/* C3.6.11 AdvSIMD scalar three same
7449 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7450 * +-----+---+-----------+------+---+------+--------+---+------+------+
7451 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7452 * +-----+---+-----------+------+---+------+--------+---+------+------+
7453 */
7454static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7455{
7456    int rd = extract32(insn, 0, 5);
7457    int rn = extract32(insn, 5, 5);
7458    int opcode = extract32(insn, 11, 5);
7459    int rm = extract32(insn, 16, 5);
7460    int size = extract32(insn, 22, 2);
7461    bool u = extract32(insn, 29, 1);
7462    TCGv_i64 tcg_rd;
7463
7464    if (opcode >= 0x18) {
7465        /* Floating point: U, size[1] and opcode indicate operation */
7466        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7467        switch (fpopcode) {
7468        case 0x1b: /* FMULX */
7469        case 0x1f: /* FRECPS */
7470        case 0x3f: /* FRSQRTS */
7471        case 0x5d: /* FACGE */
7472        case 0x7d: /* FACGT */
7473        case 0x1c: /* FCMEQ */
7474        case 0x5c: /* FCMGE */
7475        case 0x7c: /* FCMGT */
7476        case 0x7a: /* FABD */
7477            break;
7478        default:
7479            unallocated_encoding(s);
7480            return;
7481        }
7482
7483        if (!fp_access_check(s)) {
7484            return;
7485        }
7486
7487        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7488        return;
7489    }
7490
7491    switch (opcode) {
7492    case 0x1: /* SQADD, UQADD */
7493    case 0x5: /* SQSUB, UQSUB */
7494    case 0x9: /* SQSHL, UQSHL */
7495    case 0xb: /* SQRSHL, UQRSHL */
7496        break;
7497    case 0x8: /* SSHL, USHL */
7498    case 0xa: /* SRSHL, URSHL */
7499    case 0x6: /* CMGT, CMHI */
7500    case 0x7: /* CMGE, CMHS */
7501    case 0x11: /* CMTST, CMEQ */
7502    case 0x10: /* ADD, SUB (vector) */
7503        if (size != 3) {
7504            unallocated_encoding(s);
7505            return;
7506        }
7507        break;
7508    case 0x16: /* SQDMULH, SQRDMULH (vector) */
7509        if (size != 1 && size != 2) {
7510            unallocated_encoding(s);
7511            return;
7512        }
7513        break;
7514    default:
7515        unallocated_encoding(s);
7516        return;
7517    }
7518
7519    if (!fp_access_check(s)) {
7520        return;
7521    }
7522
7523    tcg_rd = tcg_temp_new_i64();
7524
7525    if (size == 3) {
7526        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7527        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7528
7529        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7530        tcg_temp_free_i64(tcg_rn);
7531        tcg_temp_free_i64(tcg_rm);
7532    } else {
7533        /* Do a single operation on the lowest element in the vector.
7534         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7535         * no side effects for all these operations.
7536         * OPTME: special-purpose helpers would avoid doing some
7537         * unnecessary work in the helper for the 8 and 16 bit cases.
7538         */
7539        NeonGenTwoOpEnvFn *genenvfn;
7540        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7541        TCGv_i32 tcg_rm = tcg_temp_new_i32();
7542        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7543
7544        read_vec_element_i32(s, tcg_rn, rn, 0, size);
7545        read_vec_element_i32(s, tcg_rm, rm, 0, size);
7546
7547        switch (opcode) {
7548        case 0x1: /* SQADD, UQADD */
7549        {
7550            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7551                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7552                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7553                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7554            };
7555            genenvfn = fns[size][u];
7556            break;
7557        }
7558        case 0x5: /* SQSUB, UQSUB */
7559        {
7560            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7561                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7562                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7563                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7564            };
7565            genenvfn = fns[size][u];
7566            break;
7567        }
7568        case 0x9: /* SQSHL, UQSHL */
7569        {
7570            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7571                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7572                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7573                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7574            };
7575            genenvfn = fns[size][u];
7576            break;
7577        }
7578        case 0xb: /* SQRSHL, UQRSHL */
7579        {
7580            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7581                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7582                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7583                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7584            };
7585            genenvfn = fns[size][u];
7586            break;
7587        }
7588        case 0x16: /* SQDMULH, SQRDMULH */
7589        {
7590            static NeonGenTwoOpEnvFn * const fns[2][2] = {
7591                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7592                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7593            };
7594            assert(size == 1 || size == 2);
7595            genenvfn = fns[size - 1][u];
7596            break;
7597        }
7598        default:
7599            g_assert_not_reached();
7600        }
7601
7602        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7603        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7604        tcg_temp_free_i32(tcg_rd32);
7605        tcg_temp_free_i32(tcg_rn);
7606        tcg_temp_free_i32(tcg_rm);
7607    }
7608
7609    write_fp_dreg(s, rd, tcg_rd);
7610
7611    tcg_temp_free_i64(tcg_rd);
7612}
7613
7614static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7615                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7616                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7617{
7618    /* Handle 64->64 opcodes which are shared between the scalar and
7619     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7620     * is valid in either group and also the double-precision fp ops.
7621     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7622     * requires them.
7623     */
7624    TCGCond cond;
7625
7626    switch (opcode) {
7627    case 0x4: /* CLS, CLZ */
7628        if (u) {
7629            gen_helper_clz64(tcg_rd, tcg_rn);
7630        } else {
7631            gen_helper_cls64(tcg_rd, tcg_rn);
7632        }
7633        break;
7634    case 0x5: /* NOT */
7635        /* This opcode is shared with CNT and RBIT but we have earlier
7636         * enforced that size == 3 if and only if this is the NOT insn.
7637         */
7638        tcg_gen_not_i64(tcg_rd, tcg_rn);
7639        break;
7640    case 0x7: /* SQABS, SQNEG */
7641        if (u) {
7642            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7643        } else {
7644            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7645        }
7646        break;
7647    case 0xa: /* CMLT */
7648        /* 64 bit integer comparison against zero, result is
7649         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7650         * subtracting 1.
7651         */
7652        cond = TCG_COND_LT;
7653    do_cmop:
7654        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7655        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7656        break;
7657    case 0x8: /* CMGT, CMGE */
7658        cond = u ? TCG_COND_GE : TCG_COND_GT;
7659        goto do_cmop;
7660    case 0x9: /* CMEQ, CMLE */
7661        cond = u ? TCG_COND_LE : TCG_COND_EQ;
7662        goto do_cmop;
7663    case 0xb: /* ABS, NEG */
7664        if (u) {
7665            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7666        } else {
7667            TCGv_i64 tcg_zero = tcg_const_i64(0);
7668            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7669            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7670                                tcg_rn, tcg_rd);
7671            tcg_temp_free_i64(tcg_zero);
7672        }
7673        break;
7674    case 0x2f: /* FABS */
7675        gen_helper_vfp_absd(tcg_rd, tcg_rn);
7676        break;
7677    case 0x6f: /* FNEG */
7678        gen_helper_vfp_negd(tcg_rd, tcg_rn);
7679        break;
7680    case 0x7f: /* FSQRT */
7681        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7682        break;
7683    case 0x1a: /* FCVTNS */
7684    case 0x1b: /* FCVTMS */
7685    case 0x1c: /* FCVTAS */
7686    case 0x3a: /* FCVTPS */
7687    case 0x3b: /* FCVTZS */
7688    {
7689        TCGv_i32 tcg_shift = tcg_const_i32(0);
7690        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7691        tcg_temp_free_i32(tcg_shift);
7692        break;
7693    }
7694    case 0x5a: /* FCVTNU */
7695    case 0x5b: /* FCVTMU */
7696    case 0x5c: /* FCVTAU */
7697    case 0x7a: /* FCVTPU */
7698    case 0x7b: /* FCVTZU */
7699    {
7700        TCGv_i32 tcg_shift = tcg_const_i32(0);
7701        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7702        tcg_temp_free_i32(tcg_shift);
7703        break;
7704    }
7705    case 0x18: /* FRINTN */
7706    case 0x19: /* FRINTM */
7707    case 0x38: /* FRINTP */
7708    case 0x39: /* FRINTZ */
7709    case 0x58: /* FRINTA */
7710    case 0x79: /* FRINTI */
7711        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7712        break;
7713    case 0x59: /* FRINTX */
7714        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7715        break;
7716    default:
7717        g_assert_not_reached();
7718    }
7719}
7720
7721static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7722                                   bool is_scalar, bool is_u, bool is_q,
7723                                   int size, int rn, int rd)
7724{
7725    bool is_double = (size == 3);
7726    TCGv_ptr fpst;
7727
7728    if (!fp_access_check(s)) {
7729        return;
7730    }
7731
7732    fpst = get_fpstatus_ptr();
7733
7734    if (is_double) {
7735        TCGv_i64 tcg_op = tcg_temp_new_i64();
7736        TCGv_i64 tcg_zero = tcg_const_i64(0);
7737        TCGv_i64 tcg_res = tcg_temp_new_i64();
7738        NeonGenTwoDoubleOPFn *genfn;
7739        bool swap = false;
7740        int pass;
7741
7742        switch (opcode) {
7743        case 0x2e: /* FCMLT (zero) */
7744            swap = true;
7745            /* fallthrough */
7746        case 0x2c: /* FCMGT (zero) */
7747            genfn = gen_helper_neon_cgt_f64;
7748            break;
7749        case 0x2d: /* FCMEQ (zero) */
7750            genfn = gen_helper_neon_ceq_f64;
7751            break;
7752        case 0x6d: /* FCMLE (zero) */
7753            swap = true;
7754            /* fall through */
7755        case 0x6c: /* FCMGE (zero) */
7756            genfn = gen_helper_neon_cge_f64;
7757            break;
7758        default:
7759            g_assert_not_reached();
7760        }
7761
7762        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7763            read_vec_element(s, tcg_op, rn, pass, MO_64);
7764            if (swap) {
7765                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7766            } else {
7767                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7768            }
7769            write_vec_element(s, tcg_res, rd, pass, MO_64);
7770        }
7771        if (is_scalar) {
7772            clear_vec_high(s, rd);
7773        }
7774
7775        tcg_temp_free_i64(tcg_res);
7776        tcg_temp_free_i64(tcg_zero);
7777        tcg_temp_free_i64(tcg_op);
7778    } else {
7779        TCGv_i32 tcg_op = tcg_temp_new_i32();
7780        TCGv_i32 tcg_zero = tcg_const_i32(0);
7781        TCGv_i32 tcg_res = tcg_temp_new_i32();
7782        NeonGenTwoSingleOPFn *genfn;
7783        bool swap = false;
7784        int pass, maxpasses;
7785
7786        switch (opcode) {
7787        case 0x2e: /* FCMLT (zero) */
7788            swap = true;
7789            /* fall through */
7790        case 0x2c: /* FCMGT (zero) */
7791            genfn = gen_helper_neon_cgt_f32;
7792            break;
7793        case 0x2d: /* FCMEQ (zero) */
7794            genfn = gen_helper_neon_ceq_f32;
7795            break;
7796        case 0x6d: /* FCMLE (zero) */
7797            swap = true;
7798            /* fall through */
7799        case 0x6c: /* FCMGE (zero) */
7800            genfn = gen_helper_neon_cge_f32;
7801            break;
7802        default:
7803            g_assert_not_reached();
7804        }
7805
7806        if (is_scalar) {
7807            maxpasses = 1;
7808        } else {
7809            maxpasses = is_q ? 4 : 2;
7810        }
7811
7812        for (pass = 0; pass < maxpasses; pass++) {
7813            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7814            if (swap) {
7815                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7816            } else {
7817                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7818            }
7819            if (is_scalar) {
7820                write_fp_sreg(s, rd, tcg_res);
7821            } else {
7822                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7823            }
7824        }
7825        tcg_temp_free_i32(tcg_res);
7826        tcg_temp_free_i32(tcg_zero);
7827        tcg_temp_free_i32(tcg_op);
7828        if (!is_q && !is_scalar) {
7829            clear_vec_high(s, rd);
7830        }
7831    }
7832
7833    tcg_temp_free_ptr(fpst);
7834}
7835
7836static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7837                                    bool is_scalar, bool is_u, bool is_q,
7838                                    int size, int rn, int rd)
7839{
7840    bool is_double = (size == 3);
7841    TCGv_ptr fpst = get_fpstatus_ptr();
7842
7843    if (is_double) {
7844        TCGv_i64 tcg_op = tcg_temp_new_i64();
7845        TCGv_i64 tcg_res = tcg_temp_new_i64();
7846        int pass;
7847
7848        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7849            read_vec_element(s, tcg_op, rn, pass, MO_64);
7850            switch (opcode) {
7851            case 0x3d: /* FRECPE */
7852                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7853                break;
7854            case 0x3f: /* FRECPX */
7855                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7856                break;
7857            case 0x7d: /* FRSQRTE */
7858                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7859                break;
7860            default:
7861                g_assert_not_reached();
7862            }
7863            write_vec_element(s, tcg_res, rd, pass, MO_64);
7864        }
7865        if (is_scalar) {
7866            clear_vec_high(s, rd);
7867        }
7868
7869        tcg_temp_free_i64(tcg_res);
7870        tcg_temp_free_i64(tcg_op);
7871    } else {
7872        TCGv_i32 tcg_op = tcg_temp_new_i32();
7873        TCGv_i32 tcg_res = tcg_temp_new_i32();
7874        int pass, maxpasses;
7875
7876        if (is_scalar) {
7877            maxpasses = 1;
7878        } else {
7879            maxpasses = is_q ? 4 : 2;
7880        }
7881
7882        for (pass = 0; pass < maxpasses; pass++) {
7883            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7884
7885            switch (opcode) {
7886            case 0x3c: /* URECPE */
7887                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7888                break;
7889            case 0x3d: /* FRECPE */
7890                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7891                break;
7892            case 0x3f: /* FRECPX */
7893                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7894                break;
7895            case 0x7d: /* FRSQRTE */
7896                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7897                break;
7898            default:
7899                g_assert_not_reached();
7900            }
7901
7902            if (is_scalar) {
7903                write_fp_sreg(s, rd, tcg_res);
7904            } else {
7905                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7906            }
7907        }
7908        tcg_temp_free_i32(tcg_res);
7909        tcg_temp_free_i32(tcg_op);
7910        if (!is_q && !is_scalar) {
7911            clear_vec_high(s, rd);
7912        }
7913    }
7914    tcg_temp_free_ptr(fpst);
7915}
7916
7917static void handle_2misc_narrow(DisasContext *s, bool scalar,
7918                                int opcode, bool u, bool is_q,
7919                                int size, int rn, int rd)
7920{
7921    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7922     * in the source becomes a size element in the destination).
7923     */
7924    int pass;
7925    TCGv_i32 tcg_res[2];
7926    int destelt = is_q ? 2 : 0;
7927    int passes = scalar ? 1 : 2;
7928
7929    if (scalar) {
7930        tcg_res[1] = tcg_const_i32(0);
7931    }
7932
7933    for (pass = 0; pass < passes; pass++) {
7934        TCGv_i64 tcg_op = tcg_temp_new_i64();
7935        NeonGenNarrowFn *genfn = NULL;
7936        NeonGenNarrowEnvFn *genenvfn = NULL;
7937
7938        if (scalar) {
7939            read_vec_element(s, tcg_op, rn, pass, size + 1);
7940        } else {
7941            read_vec_element(s, tcg_op, rn, pass, MO_64);
7942        }
7943        tcg_res[pass] = tcg_temp_new_i32();
7944
7945        switch (opcode) {
7946        case 0x12: /* XTN, SQXTUN */
7947        {
7948            static NeonGenNarrowFn * const xtnfns[3] = {
7949                gen_helper_neon_narrow_u8,
7950                gen_helper_neon_narrow_u16,
7951                tcg_gen_extrl_i64_i32,
7952            };
7953            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7954                gen_helper_neon_unarrow_sat8,
7955                gen_helper_neon_unarrow_sat16,
7956                gen_helper_neon_unarrow_sat32,
7957            };
7958            if (u) {
7959                genenvfn = sqxtunfns[size];
7960            } else {
7961                genfn = xtnfns[size];
7962            }
7963            break;
7964        }
7965        case 0x14: /* SQXTN, UQXTN */
7966        {
7967            static NeonGenNarrowEnvFn * const fns[3][2] = {
7968                { gen_helper_neon_narrow_sat_s8,
7969                  gen_helper_neon_narrow_sat_u8 },
7970                { gen_helper_neon_narrow_sat_s16,
7971                  gen_helper_neon_narrow_sat_u16 },
7972                { gen_helper_neon_narrow_sat_s32,
7973                  gen_helper_neon_narrow_sat_u32 },
7974            };
7975            genenvfn = fns[size][u];
7976            break;
7977        }
7978        case 0x16: /* FCVTN, FCVTN2 */
7979            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7980            if (size == 2) {
7981                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7982            } else {
7983                TCGv_i32 tcg_lo = tcg_temp_new_i32();
7984                TCGv_i32 tcg_hi = tcg_temp_new_i32();
7985                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7986                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7987                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7988                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7989                tcg_temp_free_i32(tcg_lo);
7990                tcg_temp_free_i32(tcg_hi);
7991            }
7992            break;
7993        case 0x56:  /* FCVTXN, FCVTXN2 */
7994            /* 64 bit to 32 bit float conversion
7995             * with von Neumann rounding (round to odd)
7996             */
7997            assert(size == 2);
7998            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7999            break;
8000        default:

8001            g_assert_not_reached();
8002        }
8003
8004        if (genfn) {
8005            genfn(tcg_res[pass], tcg_op);
8006        } else if (genenvfn) {
8007            genenvfn(tcg_res[pass], cpu_env, tcg_op);
8008        }
8009
8010        tcg_temp_free_i64(tcg_op);
8011    }
8012
8013    for (pass = 0; pass < 2; pass++) {
8014        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8015        tcg_temp_free_i32(tcg_res[pass]);
8016    }
8017    if (!is_q) {
8018        clear_vec_high(s, rd);
8019    }
8020}
8021
8022/* Remaining saturating accumulating ops */
8023static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8024                                bool is_q, int size, int rn, int rd)
8025{
8026    bool is_double = (size == 3);
8027
8028    if (is_double) {
8029        TCGv_i64 tcg_rn = tcg_temp_new_i64();
8030        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8031        int pass;
8032
8033        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8034            read_vec_element(s, tcg_rn, rn, pass, MO_64);
8035            read_vec_element(s, tcg_rd, rd, pass, MO_64);
8036
8037            if (is_u) { /* USQADD */
8038                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8039            } else { /* SUQADD */
8040                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8041            }
8042            write_vec_element(s, tcg_rd, rd, pass, MO_64);
8043        }
8044        if (is_scalar) {
8045            clear_vec_high(s, rd);
8046        }
8047
8048        tcg_temp_free_i64(tcg_rd);
8049        tcg_temp_free_i64(tcg_rn);
8050    } else {
8051        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8052        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8053        int pass, maxpasses;
8054
8055        if (is_scalar) {
8056            maxpasses = 1;
8057        } else {
8058            maxpasses = is_q ? 4 : 2;
8059        }
8060
8061        for (pass = 0; pass < maxpasses; pass++) {
8062            if (is_scalar) {
8063                read_vec_element_i32(s, tcg_rn, rn, pass, size);
8064                read_vec_element_i32(s, tcg_rd, rd, pass, size);
8065            } else {
8066                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8067                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8068            }
8069
8070            if (is_u) { /* USQADD */
8071                switch (size) {
8072                case 0:
8073                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8074                    break;
8075                case 1:
8076                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8077                    break;
8078                case 2:
8079                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8080                    break;
8081                default:
8082                    g_assert_not_reached();
8083                }
8084            } else { /* SUQADD */
8085                switch (size) {
8086                case 0:
8087                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8088                    break;
8089                case 1:
8090                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8091                    break;
8092                case 2:
8093                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8094                    break;
8095                default:
8096                    g_assert_not_reached();
8097                }
8098            }
8099
8100            if (is_scalar) {
8101                TCGv_i64 tcg_zero = tcg_const_i64(0);
8102                write_vec_element(s, tcg_zero, rd, 0, MO_64);
8103                tcg_temp_free_i64(tcg_zero);
8104            }
8105            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8106        }
8107
8108        if (!is_q) {
8109            clear_vec_high(s, rd);
8110        }
8111
8112        tcg_temp_free_i32(tcg_rd);
8113        tcg_temp_free_i32(tcg_rn);
8114    }
8115}
8116
8117/* C3.6.12 AdvSIMD scalar two reg misc
8118 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8119 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8120 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8121 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8122 */
8123static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8124{
8125    int rd = extract32(insn, 0, 5);
8126    int rn = extract32(insn, 5, 5);
8127    int opcode = extract32(insn, 12, 5);
8128    int size = extract32(insn, 22, 2);
8129    bool u = extract32(insn, 29, 1);
8130    bool is_fcvt = false;
8131    int rmode;
8132    TCGv_i32 tcg_rmode;
8133    TCGv_ptr tcg_fpstatus;
8134
8135    switch (opcode) {
8136    case 0x3: /* USQADD / SUQADD*/
8137        if (!fp_access_check(s)) {
8138            return;
8139        }
8140        handle_2misc_satacc(s, true, u, false, size, rn, rd);
8141        return;
8142    case 0x7: /* SQABS / SQNEG */
8143        break;
8144    case 0xa: /* CMLT */
8145        if (u) {
8146            unallocated_encoding(s);
8147            return;
8148        }
8149        /* fall through */
8150    case 0x8: /* CMGT, CMGE */
8151    case 0x9: /* CMEQ, CMLE */
8152    case 0xb: /* ABS, NEG */
8153        if (size != 3) {
8154            unallocated_encoding(s);
8155            return;
8156        }
8157        break;
8158    case 0x12: /* SQXTUN */
8159        if (!u) {
8160            unallocated_encoding(s);
8161            return;
8162        }
8163        /* fall through */
8164    case 0x14: /* SQXTN, UQXTN */
8165        if (size == 3) {
8166            unallocated_encoding(s);
8167            return;
8168        }
8169        if (!fp_access_check(s)) {
8170            return;
8171        }
8172        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8173        return;
8174    case 0xc ... 0xf:
8175    case 0x16 ... 0x1d:
8176    case 0x1f:
8177        /* Floating point: U, size[1] and opcode indicate operation;
8178         * size[0] indicates single or double precision.
8179         */
8180        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8181        size = extract32(size, 0, 1) ? 3 : 2;
8182        switch (opcode) {
8183        case 0x2c: /* FCMGT (zero) */
8184        case 0x2d: /* FCMEQ (zero) */
8185        case 0x2e: /* FCMLT (zero) */
8186        case 0x6c: /* FCMGE (zero) */
8187        case 0x6d: /* FCMLE (zero) */
8188            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8189            return;
8190        case 0x1d: /* SCVTF */
8191        case 0x5d: /* UCVTF */
8192        {
8193            bool is_signed = (opcode == 0x1d);
8194            if (!fp_access_check(s)) {
8195                return;
8196            }
8197            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8198            return;
8199        }
8200        case 0x3d: /* FRECPE */
8201        case 0x3f: /* FRECPX */
8202        case 0x7d: /* FRSQRTE */
8203            if (!fp_access_check(s)) {
8204                return;
8205            }
8206            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8207            return;
8208        case 0x1a: /* FCVTNS */
8209        case 0x1b: /* FCVTMS */
8210        case 0x3a: /* FCVTPS */
8211        case 0x3b: /* FCVTZS */
8212        case 0x5a: /* FCVTNU */
8213        case 0x5b: /* FCVTMU */
8214        case 0x7a: /* FCVTPU */
8215        case 0x7b: /* FCVTZU */
8216            is_fcvt = true;
8217            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8218            break;
8219        case 0x1c: /* FCVTAS */
8220        case 0x5c: /* FCVTAU */
8221            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8222            is_fcvt = true;
8223            rmode = FPROUNDING_TIEAWAY;
8224            break;
8225        case 0x56: /* FCVTXN, FCVTXN2 */
8226            if (size == 2) {
8227                unallocated_encoding(s);
8228                return;
8229            }
8230            if (!fp_access_check(s)) {
8231                return;
8232            }
8233            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8234            return;
8235        default:
8236            unallocated_encoding(s);
8237            return;
8238        }
8239        break;
8240    default:
8241        unallocated_encoding(s);
8242        return;
8243    }
8244
8245    if (!fp_access_check(s)) {
8246        return;
8247    }
8248
8249    if (is_fcvt) {
8250        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8251        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8252        tcg_fpstatus = get_fpstatus_ptr();
8253    } else {
8254        TCGV_UNUSED_I32(tcg_rmode);
8255        TCGV_UNUSED_PTR(tcg_fpstatus);
8256    }
8257
8258    if (size == 3) {
8259        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8260        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8261
8262        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8263        write_fp_dreg(s, rd, tcg_rd);
8264        tcg_temp_free_i64(tcg_rd);
8265        tcg_temp_free_i64(tcg_rn);
8266    } else {
8267        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8268        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8269
8270        read_vec_element_i32(s, tcg_rn, rn, 0, size);
8271
8272        switch (opcode) {
8273        case 0x7: /* SQABS, SQNEG */
8274        {
8275            NeonGenOneOpEnvFn *genfn;
8276            static NeonGenOneOpEnvFn * const fns[3][2] = {
8277                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8278                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8279                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8280            };
8281            genfn = fns[size][u];
8282            genfn(tcg_rd, cpu_env, tcg_rn);
8283            break;
8284        }
8285        case 0x1a: /* FCVTNS */
8286        case 0x1b: /* FCVTMS */
8287        case 0x1c: /* FCVTAS */
8288        case 0x3a: /* FCVTPS */
8289        case 0x3b: /* FCVTZS */
8290        {
8291            TCGv_i32 tcg_shift = tcg_const_i32(0);
8292            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8293            tcg_temp_free_i32(tcg_shift);
8294            break;
8295        }
8296        case 0x5a: /* FCVTNU */
8297        case 0x5b: /* FCVTMU */
8298        case 0x5c: /* FCVTAU */
8299        case 0x7a: /* FCVTPU */
8300        case 0x7b: /* FCVTZU */
8301        {
8302            TCGv_i32 tcg_shift = tcg_const_i32(0);
8303            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8304            tcg_temp_free_i32(tcg_shift);
8305            break;
8306        }
8307        default:
8308            g_assert_not_reached();
8309        }
8310
8311        write_fp_sreg(s, rd, tcg_rd);
8312        tcg_temp_free_i32(tcg_rd);
8313        tcg_temp_free_i32(tcg_rn);
8314    }
8315
8316    if (is_fcvt) {
8317        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8318        tcg_temp_free_i32(tcg_rmode);
8319        tcg_temp_free_ptr(tcg_fpstatus);
8320    }
8321}
8322
8323/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8324static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8325                                 int immh, int immb, int opcode, int rn, int rd)
8326{
8327    int size = 32 - clz32(immh) - 1;
8328    int immhb = immh << 3 | immb;
8329    int shift = 2 * (8 << size) - immhb;
8330    bool accumulate = false;
8331    bool round = false;
8332    bool insert = false;
8333    int dsize = is_q ? 128 : 64;
8334    int esize = 8 << size;
8335    int elements = dsize/esize;
8336    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8337    TCGv_i64 tcg_rn = new_tmp_a64(s);
8338    TCGv_i64 tcg_rd = new_tmp_a64(s);
8339    TCGv_i64 tcg_round;
8340    int i;
8341
8342    if (extract32(immh, 3, 1) && !is_q) {
8343        unallocated_encoding(s);
8344        return;
8345    }
8346
8347    if (size > 3 && !is_q) {
8348        unallocated_encoding(s);
8349        return;
8350    }
8351
8352    if (!fp_access_check(s)) {
8353        return;
8354    }
8355
8356    switch (opcode) {
8357    case 0x02: /* SSRA / USRA (accumulate) */
8358        accumulate = true;
8359        break;
8360    case 0x04: /* SRSHR / URSHR (rounding) */
8361        round = true;
8362        break;
8363    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8364        accumulate = round = true;
8365        break;
8366    case 0x08: /* SRI */
8367        insert = true;
8368        break;
8369    }
8370
8371    if (round) {
8372        uint64_t round_const = 1ULL << (shift - 1);
8373        tcg_round = tcg_const_i64(round_const);
8374    } else {
8375        TCGV_UNUSED_I64(tcg_round);
8376    }
8377
8378    for (i = 0; i < elements; i++) {
8379        read_vec_element(s, tcg_rn, rn, i, memop);
8380        if (accumulate || insert) {
8381            read_vec_element(s, tcg_rd, rd, i, memop);
8382        }
8383
8384        if (insert) {
8385            handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8386        } else {
8387            handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8388                                    accumulate, is_u, size, shift);
8389        }
8390
8391        write_vec_element(s, tcg_rd, rd, i, size);
8392    }
8393
8394    if (!is_q) {
8395        clear_vec_high(s, rd);
8396    }
8397
8398    if (round) {
8399        tcg_temp_free_i64(tcg_round);
8400    }
8401}
8402
8403/* SHL/SLI - Vector shift left */
8404static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8405                                int immh, int immb, int opcode, int rn, int rd)
8406{
8407    int size = 32 - clz32(immh) - 1;
8408    int immhb = immh << 3 | immb;
8409    int shift = immhb - (8 << size);
8410    int dsize = is_q ? 128 : 64;
8411    int esize = 8 << size;
8412    int elements = dsize/esize;
8413    TCGv_i64 tcg_rn = new_tmp_a64(s);
8414    TCGv_i64 tcg_rd = new_tmp_a64(s);
8415    int i;
8416
8417    if (extract32(immh, 3, 1) && !is_q) {
8418        unallocated_encoding(s);
8419        return;
8420    }
8421
8422    if (size > 3 && !is_q) {
8423        unallocated_encoding(s);
8424        return;
8425    }
8426
8427    if (!fp_access_check(s)) {
8428        return;
8429    }
8430
8431    for (i = 0; i < elements; i++) {
8432        read_vec_element(s, tcg_rn, rn, i, size);
8433        if (insert) {
8434            read_vec_element(s, tcg_rd, rd, i, size);
8435        }
8436
8437        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8438
8439        write_vec_element(s, tcg_rd, rd, i, size);
8440    }
8441
8442    if (!is_q) {
8443        clear_vec_high(s, rd);
8444    }
8445}
8446
8447/* USHLL/SHLL - Vector shift left with widening */
8448static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8449                                 int immh, int immb, int opcode, int rn, int rd)
8450{
8451    int size = 32 - clz32(immh) - 1;
8452    int immhb = immh << 3 | immb;
8453    int shift = immhb - (8 << size);
8454    int dsize = 64;
8455    int esize = 8 << size;
8456    int elements = dsize/esize;
8457    TCGv_i64 tcg_rn = new_tmp_a64(s);
8458    TCGv_i64 tcg_rd = new_tmp_a64(s);
8459    int i;
8460
8461    if (size >= 3) {
8462        unallocated_encoding(s);
8463        return;
8464    }
8465
8466    if (!fp_access_check(s)) {
8467        return;
8468    }
8469
8470    /* For the LL variants the store is larger than the load,
8471     * so if rd == rn we would overwrite parts of our input.
8472     * So load everything right now and use shifts in the main loop.
8473     */
8474    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8475
8476    for (i = 0; i < elements; i++) {
8477        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8478        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8479        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8480        write_vec_element(s, tcg_rd, rd, i, size + 1);
8481    }
8482}
8483
8484/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8485static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8486                                 int immh, int immb, int opcode, int rn, int rd)
8487{
8488    int immhb = immh << 3 | immb;
8489    int size = 32 - clz32(immh) - 1;
8490    int dsize = 64;
8491    int esize = 8 << size;
8492    int elements = dsize/esize;
8493    int shift = (2 * esize) - immhb;
8494    bool round = extract32(opcode, 0, 1);
8495    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8496    TCGv_i64 tcg_round;
8497    int i;
8498
8499    if (extract32(immh, 3, 1)) {
8500        unallocated_encoding(s);
8501        return;
8502    }
8503
8504    if (!fp_access_check(s)) {
8505        return;
8506    }
8507
8508    tcg_rn = tcg_temp_new_i64();
8509    tcg_rd = tcg_temp_new_i64();
8510    tcg_final = tcg_temp_new_i64();
8511    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8512
8513    if (round) {
8514        uint64_t round_const = 1ULL << (shift - 1);
8515        tcg_round = tcg_const_i64(round_const);
8516    } else {
8517        TCGV_UNUSED_I64(tcg_round);
8518    }
8519
8520    for (i = 0; i < elements; i++) {
8521        read_vec_element(s, tcg_rn, rn, i, size+1);
8522        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8523                                false, true, size+1, shift);
8524
8525        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8526    }
8527
8528    if (!is_q) {
8529        clear_vec_high(s, rd);
8530        write_vec_element(s, tcg_final, rd, 0, MO_64);
8531    } else {
8532        write_vec_element(s, tcg_final, rd, 1, MO_64);
8533    }
8534
8535    if (round) {
8536        tcg_temp_free_i64(tcg_round);
8537    }
8538    tcg_temp_free_i64(tcg_rn);
8539    tcg_temp_free_i64(tcg_rd);
8540    tcg_temp_free_i64(tcg_final);
8541    return;
8542}
8543
8544
8545/* C3.6.14 AdvSIMD shift by immediate
8546 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8547 * +---+---+---+-------------+------+------+--------+---+------+------+
8548 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8549 * +---+---+---+-------------+------+------+--------+---+------+------+
8550 */
8551static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8552{
8553    int rd = extract32(insn, 0, 5);
8554    int rn = extract32(insn, 5, 5);
8555    int opcode = extract32(insn, 11, 5);
8556    int immb = extract32(insn, 16, 3);
8557    int immh = extract32(insn, 19, 4);
8558    bool is_u = extract32(insn, 29, 1);
8559    bool is_q = extract32(insn, 30, 1);
8560
8561    switch (opcode) {
8562    case 0x08: /* SRI */
8563        if (!is_u) {
8564            unallocated_encoding(s);
8565            return;
8566        }
8567        /* fall through */
8568    case 0x00: /* SSHR / USHR */
8569    case 0x02: /* SSRA / USRA (accumulate) */
8570    case 0x04: /* SRSHR / URSHR (rounding) */
8571    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8572        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8573        break;
8574    case 0x0a: /* SHL / SLI */
8575        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8576        break;
8577    case 0x10: /* SHRN */
8578    case 0x11: /* RSHRN / SQRSHRUN */
8579        if (is_u) {
8580            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8581                                   opcode, rn, rd);
8582        } else {
8583            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8584        }
8585        break;
8586    case 0x12: /* SQSHRN / UQSHRN */
8587    case 0x13: /* SQRSHRN / UQRSHRN */
8588        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8589                               opcode, rn, rd);
8590        break;
8591    case 0x14: /* SSHLL / USHLL */
8592        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8593        break;
8594    case 0x1c: /* SCVTF / UCVTF */
8595        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8596                                     opcode, rn, rd);
8597        break;
8598    case 0xc: /* SQSHLU */
8599        if (!is_u) {
8600            unallocated_encoding(s);
8601            return;
8602        }
8603        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8604        break;
8605    case 0xe: /* SQSHL, UQSHL */
8606        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8607        break;
8608    case 0x1f: /* FCVTZS/ FCVTZU */
8609        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8610        return;
8611    default:
8612        unallocated_encoding(s);
8613        return;
8614    }
8615}
8616
8617/* Generate code to do a "long" addition or subtraction, ie one done in
8618 * TCGv_i64 on vector lanes twice the width specified by size.
8619 */
8620static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8621                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8622{
8623    static NeonGenTwo64OpFn * const fns[3][2] = {
8624        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8625        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8626        { tcg_gen_add_i64, tcg_gen_sub_i64 },
8627    };
8628    NeonGenTwo64OpFn *genfn;
8629    assert(size < 3);
8630
8631    genfn = fns[size][is_sub];
8632    genfn(tcg_res, tcg_op1, tcg_op2);
8633}
8634
8635static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8636                                int opcode, int rd, int rn, int rm)
8637{
8638    /* 3-reg-different widening insns: 64 x 64 -> 128 */
8639    TCGv_i64 tcg_res[2];
8640    int pass, accop;
8641
8642    tcg_res[0] = tcg_temp_new_i64();
8643    tcg_res[1] = tcg_temp_new_i64();
8644
8645    /* Does this op do an adding accumulate, a subtracting accumulate,
8646     * or no accumulate at all?
8647     */
8648    switch (opcode) {
8649    case 5:
8650    case 8:
8651    case 9:
8652        accop = 1;
8653        break;
8654    case 10:
8655    case 11:
8656        accop = -1;
8657        break;
8658    default:
8659        accop = 0;
8660        break;
8661    }
8662
8663    if (accop != 0) {
8664        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8665        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8666    }
8667
8668    /* size == 2 means two 32x32->64 operations; this is worth special
8669     * casing because we can generally handle it inline.
8670     */
8671    if (size == 2) {
8672        for (pass = 0; pass < 2; pass++) {
8673            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8674            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8675            TCGv_i64 tcg_passres;
8676            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8677
8678            int elt = pass + is_q * 2;
8679
8680            read_vec_element(s, tcg_op1, rn, elt, memop);
8681            read_vec_element(s, tcg_op2, rm, elt, memop);
8682
8683            if (accop == 0) {
8684                tcg_passres = tcg_res[pass];
8685            } else {
8686                tcg_passres = tcg_temp_new_i64();
8687            }
8688
8689            switch (opcode) {
8690            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8691                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8692                break;
8693            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8694                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8695                break;
8696            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8697            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8698            {
8699                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8700                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8701
8702                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8703                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8704                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8705                                    tcg_passres,
8706                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8707                tcg_temp_free_i64(tcg_tmp1);
8708                tcg_temp_free_i64(tcg_tmp2);
8709                break;
8710            }
8711            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8712            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8713            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8714                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8715                break;
8716            case 9: /* SQDMLAL, SQDMLAL2 */
8717            case 11: /* SQDMLSL, SQDMLSL2 */
8718            case 13: /* SQDMULL, SQDMULL2 */
8719                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8720                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8721                                                  tcg_passres, tcg_passres);
8722                break;
8723            default:
8724                g_assert_not_reached();
8725            }
8726
8727            if (opcode == 9 || opcode == 11) {
8728                /* saturating accumulate ops */
8729                if (accop < 0) {
8730                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
8731                }
8732                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8733                                                  tcg_res[pass], tcg_passres);
8734            } else if (accop > 0) {
8735                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8736            } else if (accop < 0) {
8737                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8738            }
8739
8740            if (accop != 0) {
8741                tcg_temp_free_i64(tcg_passres);
8742            }
8743
8744            tcg_temp_free_i64(tcg_op1);
8745            tcg_temp_free_i64(tcg_op2);
8746        }
8747    } else {
8748        /* size 0 or 1, generally helper functions */
8749        for (pass = 0; pass < 2; pass++) {
8750            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8751            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8752            TCGv_i64 tcg_passres;
8753            int elt = pass + is_q * 2;
8754
8755            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8756            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8757
8758            if (accop == 0) {
8759                tcg_passres = tcg_res[pass];
8760            } else {
8761                tcg_passres = tcg_temp_new_i64();
8762            }
8763
8764            switch (opcode) {
8765            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8766            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8767            {
8768                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8769                static NeonGenWidenFn * const widenfns[2][2] = {
8770                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8771                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8772                };
8773                NeonGenWidenFn *widenfn = widenfns[size][is_u];
8774
8775                widenfn(tcg_op2_64, tcg_op2);
8776                widenfn(tcg_passres, tcg_op1);
8777                gen_neon_addl(size, (opcode == 2), tcg_passres,
8778                              tcg_passres, tcg_op2_64);
8779                tcg_temp_free_i64(tcg_op2_64);
8780                break;
8781            }
8782            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8783            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8784                if (size == 0) {
8785                    if (is_u) {
8786                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8787                    } else {
8788                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8789                    }
8790                } else {
8791                    if (is_u) {
8792                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8793                    } else {
8794                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8795                    }
8796                }
8797                break;
8798            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8799            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8800            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8801                if (size == 0) {
8802                    if (is_u) {
8803                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8804                    } else {
8805                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8806                    }
8807                } else {
8808                    if (is_u) {
8809                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8810                    } else {
8811                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8812                    }
8813                }
8814                break;
8815            case 9: /* SQDMLAL, SQDMLAL2 */
8816            case 11: /* SQDMLSL, SQDMLSL2 */
8817            case 13: /* SQDMULL, SQDMULL2 */
8818                assert(size == 1);
8819                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8820                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8821                                                  tcg_passres, tcg_passres);
8822                break;
8823            case 14: /* PMULL */
8824                assert(size == 0);
8825                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8826                break;
8827            default:
8828                g_assert_not_reached();
8829            }
8830            tcg_temp_free_i32(tcg_op1);
8831            tcg_temp_free_i32(tcg_op2);
8832
8833            if (accop != 0) {
8834                if (opcode == 9 || opcode == 11) {
8835                    /* saturating accumulate ops */
8836                    if (accop < 0) {
8837                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8838                    }
8839                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8840                                                      tcg_res[pass],
8841                                                      tcg_passres);
8842                } else {
8843                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
8844                                  tcg_res[pass], tcg_passres);
8845                }
8846                tcg_temp_free_i64(tcg_passres);
8847            }
8848        }
8849    }
8850
8851    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8852    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8853    tcg_temp_free_i64(tcg_res[0]);
8854    tcg_temp_free_i64(tcg_res[1]);
8855}
8856
8857static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8858                            int opcode, int rd, int rn, int rm)
8859{
8860    TCGv_i64 tcg_res[2];
8861    int part = is_q ? 2 : 0;
8862    int pass;
8863
8864    for (pass = 0; pass < 2; pass++) {
8865        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8866        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8867        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8868        static NeonGenWidenFn * const widenfns[3][2] = {
8869            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8870            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8871            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8872        };
8873        NeonGenWidenFn *widenfn = widenfns[size][is_u];
8874
8875        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8876        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8877        widenfn(tcg_op2_wide, tcg_op2);
8878        tcg_temp_free_i32(tcg_op2);
8879        tcg_res[pass] = tcg_temp_new_i64();
8880        gen_neon_addl(size, (opcode == 3),
8881                      tcg_res[pass], tcg_op1, tcg_op2_wide);
8882        tcg_temp_free_i64(tcg_op1);
8883        tcg_temp_free_i64(tcg_op2_wide);
8884    }
8885
8886    for (pass = 0; pass < 2; pass++) {
8887        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8888        tcg_temp_free_i64(tcg_res[pass]);
8889    }
8890}
8891
8892static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8893{
8894    tcg_gen_addi_i64(in, in, 1U << 31);
8895    tcg_gen_extrh_i64_i32(res, in);
8896}
8897
8898static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8899                                 int opcode, int rd, int rn, int rm)
8900{
8901    TCGv_i32 tcg_res[2];
8902    int part = is_q ? 2 : 0;
8903    int pass;
8904
8905    for (pass = 0; pass < 2; pass++) {
8906        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8907        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8908        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8909        static NeonGenNarrowFn * const narrowfns[3][2] = {
8910            { gen_helper_neon_narrow_high_u8,
8911              gen_helper_neon_narrow_round_high_u8 },
8912            { gen_helper_neon_narrow_high_u16,
8913              gen_helper_neon_narrow_round_high_u16 },
8914            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8915        };
8916        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8917
8918        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8919        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8920
8921        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8922
8923        tcg_temp_free_i64(tcg_op1);
8924        tcg_temp_free_i64(tcg_op2);
8925
8926        tcg_res[pass] = tcg_temp_new_i32();
8927        gennarrow(tcg_res[pass], tcg_wideres);
8928        tcg_temp_free_i64(tcg_wideres);
8929    }
8930
8931    for (pass = 0; pass < 2; pass++) {
8932        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8933        tcg_temp_free_i32(tcg_res[pass]);
8934    }
8935    if (!is_q) {
8936        clear_vec_high(s, rd);
8937    }
8938}
8939
8940static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8941{
8942    /* PMULL of 64 x 64 -> 128 is an odd special case because it
8943     * is the only three-reg-diff instruction which produces a
8944     * 128-bit wide result from a single operation. However since
8945     * it's possible to calculate the two halves more or less
8946     * separately we just use two helper calls.
8947     */
8948    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8949    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8950    TCGv_i64 tcg_res = tcg_temp_new_i64();
8951
8952    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8953    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8954    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8955    write_vec_element(s, tcg_res, rd, 0, MO_64);
8956    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8957    write_vec_element(s, tcg_res, rd, 1, MO_64);
8958
8959    tcg_temp_free_i64(tcg_op1);
8960    tcg_temp_free_i64(tcg_op2);
8961    tcg_temp_free_i64(tcg_res);
8962}
8963
8964/* C3.6.15 AdvSIMD three different
8965 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8966 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8967 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8968 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8969 */
8970static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8971{
8972    /* Instructions in this group fall into three basic classes
8973     * (in each case with the operation working on each element in
8974     * the input vectors):
8975     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8976     *     128 bit input)
8977     * (2) wide 64 x 128 -> 128
8978     * (3) narrowing 128 x 128 -> 64
8979     * Here we do initial decode, catch unallocated cases and
8980     * dispatch to separate functions for each class.
8981     */
8982    int is_q = extract32(insn, 30, 1);
8983    int is_u = extract32(insn, 29, 1);
8984    int size = extract32(insn, 22, 2);
8985    int opcode = extract32(insn, 12, 4);
8986    int rm = extract32(insn, 16, 5);
8987    int rn = extract32(insn, 5, 5);
8988    int rd = extract32(insn, 0, 5);
8989
8990    switch (opcode) {
8991    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8992    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8993        /* 64 x 128 -> 128 */
8994        if (size == 3) {
8995            unallocated_encoding(s);
8996            return;
8997        }
8998        if (!fp_access_check(s)) {
8999            return;
9000        }

9001        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
9002        break;
9003    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
9004    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
9005        /* 128 x 128 -> 64 */
9006        if (size == 3) {
9007            unallocated_encoding(s);
9008            return;
9009        }
9010        if (!fp_access_check(s)) {
9011            return;
9012        }
9013        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9014        break;
9015    case 14: /* PMULL, PMULL2 */
9016        if (is_u || size == 1 || size == 2) {
9017            unallocated_encoding(s);
9018            return;
9019        }
9020        if (size == 3) {
9021            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9022                unallocated_encoding(s);
9023                return;
9024            }
9025            if (!fp_access_check(s)) {
9026                return;
9027            }
9028            handle_pmull_64(s, is_q, rd, rn, rm);
9029            return;
9030        }
9031        goto is_widening;
9032    case 9: /* SQDMLAL, SQDMLAL2 */
9033    case 11: /* SQDMLSL, SQDMLSL2 */
9034    case 13: /* SQDMULL, SQDMULL2 */
9035        if (is_u || size == 0) {
9036            unallocated_encoding(s);
9037            return;
9038        }
9039        /* fall through */
9040    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9041    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9042    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9043    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9044    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9045    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9046    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9047        /* 64 x 64 -> 128 */
9048        if (size == 3) {
9049            unallocated_encoding(s);
9050            return;
9051        }
9052    is_widening:
9053        if (!fp_access_check(s)) {
9054            return;
9055        }
9056
9057        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9058        break;
9059    default:
9060        /* opcode 15 not allocated */
9061        unallocated_encoding(s);
9062        break;
9063    }
9064}
9065
9066/* Logic op (opcode == 3) subgroup of C3.6.16. */
9067static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9068{
9069    int rd = extract32(insn, 0, 5);
9070    int rn = extract32(insn, 5, 5);
9071    int rm = extract32(insn, 16, 5);
9072    int size = extract32(insn, 22, 2);
9073    bool is_u = extract32(insn, 29, 1);
9074    bool is_q = extract32(insn, 30, 1);
9075    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
9076    int pass;
9077
9078    if (!fp_access_check(s)) {
9079        return;
9080    }
9081
9082    tcg_op1 = tcg_temp_new_i64();
9083    tcg_op2 = tcg_temp_new_i64();
9084    tcg_res[0] = tcg_temp_new_i64();
9085    tcg_res[1] = tcg_temp_new_i64();
9086
9087    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9088        read_vec_element(s, tcg_op1, rn, pass, MO_64);
9089        read_vec_element(s, tcg_op2, rm, pass, MO_64);
9090
9091        if (!is_u) {
9092            switch (size) {
9093            case 0: /* AND */
9094                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
9095                break;
9096            case 1: /* BIC */
9097                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9098                break;
9099            case 2: /* ORR */
9100                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9101                break;
9102            case 3: /* ORN */
9103                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9104                break;
9105            }
9106        } else {
9107            if (size != 0) {
9108                /* B* ops need res loaded to operate on */
9109                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9110            }
9111
9112            switch (size) {
9113            case 0: /* EOR */
9114                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9115                break;
9116            case 1: /* BSL bitwise select */
9117                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9118                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9119                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9120                break;
9121            case 2: /* BIT, bitwise insert if true */
9122                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9123                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9124                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9125                break;
9126            case 3: /* BIF, bitwise insert if false */
9127                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9128                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9129                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9130                break;
9131            }
9132        }
9133    }
9134
9135    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9136    if (!is_q) {
9137        tcg_gen_movi_i64(tcg_res[1], 0);
9138    }
9139    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9140
9141    tcg_temp_free_i64(tcg_op1);
9142    tcg_temp_free_i64(tcg_op2);
9143    tcg_temp_free_i64(tcg_res[0]);
9144    tcg_temp_free_i64(tcg_res[1]);
9145}
9146
9147/* Helper functions for 32 bit comparisons */
9148static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9149{
9150    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9151}
9152
9153static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9154{
9155    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9156}
9157
9158static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9159{
9160    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9161}
9162
9163static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9164{
9165    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9166}
9167
9168/* Pairwise op subgroup of C3.6.16.
9169 *
9170 * This is called directly or via the handle_3same_float for float pairwise
9171 * operations where the opcode and size are calculated differently.
9172 */
9173static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9174                                   int size, int rn, int rm, int rd)
9175{
9176    TCGv_ptr fpst;
9177    int pass;
9178
9179    /* Floating point operations need fpst */
9180    if (opcode >= 0x58) {
9181        fpst = get_fpstatus_ptr();
9182    } else {
9183        TCGV_UNUSED_PTR(fpst);
9184    }
9185
9186    if (!fp_access_check(s)) {
9187        return;
9188    }
9189
9190    /* These operations work on the concatenated rm:rn, with each pair of
9191     * adjacent elements being operated on to produce an element in the result.
9192     */
9193    if (size == 3) {
9194        TCGv_i64 tcg_res[2];
9195
9196        for (pass = 0; pass < 2; pass++) {
9197            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9198            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9199            int passreg = (pass == 0) ? rn : rm;
9200
9201            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9202            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9203            tcg_res[pass] = tcg_temp_new_i64();
9204
9205            switch (opcode) {
9206            case 0x17: /* ADDP */
9207                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9208                break;
9209            case 0x58: /* FMAXNMP */
9210                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9211                break;
9212            case 0x5a: /* FADDP */
9213                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9214                break;
9215            case 0x5e: /* FMAXP */
9216                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9217                break;
9218            case 0x78: /* FMINNMP */
9219                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9220                break;
9221            case 0x7e: /* FMINP */
9222                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9223                break;
9224            default:
9225                g_assert_not_reached();
9226            }
9227
9228            tcg_temp_free_i64(tcg_op1);
9229            tcg_temp_free_i64(tcg_op2);
9230        }
9231
9232        for (pass = 0; pass < 2; pass++) {
9233            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9234            tcg_temp_free_i64(tcg_res[pass]);
9235        }
9236    } else {
9237        int maxpass = is_q ? 4 : 2;
9238        TCGv_i32 tcg_res[4];
9239
9240        for (pass = 0; pass < maxpass; pass++) {
9241            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9242            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9243            NeonGenTwoOpFn *genfn = NULL;
9244            int passreg = pass < (maxpass / 2) ? rn : rm;
9245            int passelt = (is_q && (pass & 1)) ? 2 : 0;
9246
9247            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9248            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9249            tcg_res[pass] = tcg_temp_new_i32();
9250
9251            switch (opcode) {
9252            case 0x17: /* ADDP */
9253            {
9254                static NeonGenTwoOpFn * const fns[3] = {
9255                    gen_helper_neon_padd_u8,
9256                    gen_helper_neon_padd_u16,
9257                    tcg_gen_add_i32,
9258                };
9259                genfn = fns[size];
9260                break;
9261            }
9262            case 0x14: /* SMAXP, UMAXP */
9263            {
9264                static NeonGenTwoOpFn * const fns[3][2] = {
9265                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9266                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9267                    { gen_max_s32, gen_max_u32 },
9268                };
9269                genfn = fns[size][u];
9270                break;
9271            }
9272            case 0x15: /* SMINP, UMINP */
9273            {
9274                static NeonGenTwoOpFn * const fns[3][2] = {
9275                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9276                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9277                    { gen_min_s32, gen_min_u32 },
9278                };
9279                genfn = fns[size][u];
9280                break;
9281            }
9282            /* The FP operations are all on single floats (32 bit) */
9283            case 0x58: /* FMAXNMP */
9284                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9285                break;
9286            case 0x5a: /* FADDP */
9287                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9288                break;
9289            case 0x5e: /* FMAXP */
9290                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9291                break;
9292            case 0x78: /* FMINNMP */
9293                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9294                break;
9295            case 0x7e: /* FMINP */
9296                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9297                break;
9298            default:
9299                g_assert_not_reached();
9300            }
9301
9302            /* FP ops called directly, otherwise call now */
9303            if (genfn) {
9304                genfn(tcg_res[pass], tcg_op1, tcg_op2);
9305            }
9306
9307            tcg_temp_free_i32(tcg_op1);
9308            tcg_temp_free_i32(tcg_op2);
9309        }
9310
9311        for (pass = 0; pass < maxpass; pass++) {
9312            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9313            tcg_temp_free_i32(tcg_res[pass]);
9314        }
9315        if (!is_q) {
9316            clear_vec_high(s, rd);
9317        }
9318    }
9319
9320    if (!TCGV_IS_UNUSED_PTR(fpst)) {
9321        tcg_temp_free_ptr(fpst);
9322    }
9323}
9324
9325/* Floating point op subgroup of C3.6.16. */
9326static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9327{
9328    /* For floating point ops, the U, size[1] and opcode bits
9329     * together indicate the operation. size[0] indicates single
9330     * or double.
9331     */
9332    int fpopcode = extract32(insn, 11, 5)
9333        | (extract32(insn, 23, 1) << 5)
9334        | (extract32(insn, 29, 1) << 6);
9335    int is_q = extract32(insn, 30, 1);
9336    int size = extract32(insn, 22, 1);
9337    int rm = extract32(insn, 16, 5);
9338    int rn = extract32(insn, 5, 5);
9339    int rd = extract32(insn, 0, 5);
9340
9341    int datasize = is_q ? 128 : 64;
9342    int esize = 32 << size;
9343    int elements = datasize / esize;
9344
9345    if (size == 1 && !is_q) {
9346        unallocated_encoding(s);
9347        return;
9348    }
9349
9350    switch (fpopcode) {
9351    case 0x58: /* FMAXNMP */
9352    case 0x5a: /* FADDP */
9353    case 0x5e: /* FMAXP */
9354    case 0x78: /* FMINNMP */
9355    case 0x7e: /* FMINP */
9356        if (size && !is_q) {
9357            unallocated_encoding(s);
9358            return;
9359        }
9360        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9361                               rn, rm, rd);
9362        return;
9363    case 0x1b: /* FMULX */
9364    case 0x1f: /* FRECPS */
9365    case 0x3f: /* FRSQRTS */
9366    case 0x5d: /* FACGE */
9367    case 0x7d: /* FACGT */
9368    case 0x19: /* FMLA */
9369    case 0x39: /* FMLS */
9370    case 0x18: /* FMAXNM */
9371    case 0x1a: /* FADD */
9372    case 0x1c: /* FCMEQ */
9373    case 0x1e: /* FMAX */
9374    case 0x38: /* FMINNM */
9375    case 0x3a: /* FSUB */
9376    case 0x3e: /* FMIN */
9377    case 0x5b: /* FMUL */
9378    case 0x5c: /* FCMGE */
9379    case 0x5f: /* FDIV */
9380    case 0x7a: /* FABD */
9381    case 0x7c: /* FCMGT */
9382        if (!fp_access_check(s)) {
9383            return;
9384        }
9385
9386        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9387        return;
9388    default:
9389        unallocated_encoding(s);
9390        return;
9391    }
9392}
9393
9394/* Integer op subgroup of C3.6.16. */
9395static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9396{
9397    int is_q = extract32(insn, 30, 1);
9398    int u = extract32(insn, 29, 1);
9399    int size = extract32(insn, 22, 2);
9400    int opcode = extract32(insn, 11, 5);
9401    int rm = extract32(insn, 16, 5);
9402    int rn = extract32(insn, 5, 5);
9403    int rd = extract32(insn, 0, 5);
9404    int pass;
9405
9406    switch (opcode) {
9407    case 0x13: /* MUL, PMUL */
9408        if (u && size != 0) {
9409            unallocated_encoding(s);
9410            return;
9411        }
9412        /* fall through */
9413    case 0x0: /* SHADD, UHADD */
9414    case 0x2: /* SRHADD, URHADD */
9415    case 0x4: /* SHSUB, UHSUB */
9416    case 0xc: /* SMAX, UMAX */
9417    case 0xd: /* SMIN, UMIN */
9418    case 0xe: /* SABD, UABD */
9419    case 0xf: /* SABA, UABA */
9420    case 0x12: /* MLA, MLS */
9421        if (size == 3) {
9422            unallocated_encoding(s);
9423            return;
9424        }
9425        break;
9426    case 0x16: /* SQDMULH, SQRDMULH */
9427        if (size == 0 || size == 3) {
9428            unallocated_encoding(s);
9429            return;
9430        }
9431        break;
9432    default:
9433        if (size == 3 && !is_q) {
9434            unallocated_encoding(s);
9435            return;
9436        }
9437        break;
9438    }
9439
9440    if (!fp_access_check(s)) {
9441        return;
9442    }
9443
9444    if (size == 3) {
9445        assert(is_q);
9446        for (pass = 0; pass < 2; pass++) {
9447            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9448            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9449            TCGv_i64 tcg_res = tcg_temp_new_i64();
9450
9451            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9452            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9453
9454            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9455
9456            write_vec_element(s, tcg_res, rd, pass, MO_64);
9457
9458            tcg_temp_free_i64(tcg_res);
9459            tcg_temp_free_i64(tcg_op1);
9460            tcg_temp_free_i64(tcg_op2);
9461        }
9462    } else {
9463        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9464            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9465            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9466            TCGv_i32 tcg_res = tcg_temp_new_i32();
9467            NeonGenTwoOpFn *genfn = NULL;
9468            NeonGenTwoOpEnvFn *genenvfn = NULL;
9469
9470            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9471            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9472
9473            switch (opcode) {
9474            case 0x0: /* SHADD, UHADD */
9475            {
9476                static NeonGenTwoOpFn * const fns[3][2] = {
9477                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9478                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9479                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9480                };
9481                genfn = fns[size][u];
9482                break;
9483            }
9484            case 0x1: /* SQADD, UQADD */
9485            {
9486                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9487                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9488                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9489                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9490                };
9491                genenvfn = fns[size][u];
9492                break;
9493            }
9494            case 0x2: /* SRHADD, URHADD */
9495            {
9496                static NeonGenTwoOpFn * const fns[3][2] = {
9497                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9498                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9499                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9500                };
9501                genfn = fns[size][u];
9502                break;
9503            }
9504            case 0x4: /* SHSUB, UHSUB */
9505            {
9506                static NeonGenTwoOpFn * const fns[3][2] = {
9507                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9508                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9509                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9510                };
9511                genfn = fns[size][u];
9512                break;
9513            }
9514            case 0x5: /* SQSUB, UQSUB */
9515            {
9516                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9517                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9518                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9519                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9520                };
9521                genenvfn = fns[size][u];
9522                break;
9523            }
9524            case 0x6: /* CMGT, CMHI */
9525            {
9526                static NeonGenTwoOpFn * const fns[3][2] = {
9527                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9528                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9529                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9530                };
9531                genfn = fns[size][u];
9532                break;
9533            }
9534            case 0x7: /* CMGE, CMHS */
9535            {
9536                static NeonGenTwoOpFn * const fns[3][2] = {
9537                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9538                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9539                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9540                };
9541                genfn = fns[size][u];
9542                break;
9543            }
9544            case 0x8: /* SSHL, USHL */
9545            {
9546                static NeonGenTwoOpFn * const fns[3][2] = {
9547                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9548                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9549                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9550                };
9551                genfn = fns[size][u];
9552                break;
9553            }
9554            case 0x9: /* SQSHL, UQSHL */
9555            {
9556                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9557                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9558                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9559                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9560                };
9561                genenvfn = fns[size][u];
9562                break;
9563            }
9564            case 0xa: /* SRSHL, URSHL */
9565            {
9566                static NeonGenTwoOpFn * const fns[3][2] = {
9567                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9568                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9569                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9570                };
9571                genfn = fns[size][u];
9572                break;
9573            }
9574            case 0xb: /* SQRSHL, UQRSHL */
9575            {
9576                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9577                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9578                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9579                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9580                };
9581                genenvfn = fns[size][u];
9582                break;
9583            }
9584            case 0xc: /* SMAX, UMAX */
9585            {
9586                static NeonGenTwoOpFn * const fns[3][2] = {
9587                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9588                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9589                    { gen_max_s32, gen_max_u32 },
9590                };
9591                genfn = fns[size][u];
9592                break;
9593            }
9594
9595            case 0xd: /* SMIN, UMIN */
9596            {
9597                static NeonGenTwoOpFn * const fns[3][2] = {
9598                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9599                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9600                    { gen_min_s32, gen_min_u32 },
9601                };
9602                genfn = fns[size][u];
9603                break;
9604            }
9605            case 0xe: /* SABD, UABD */
9606            case 0xf: /* SABA, UABA */
9607            {
9608                static NeonGenTwoOpFn * const fns[3][2] = {
9609                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9610                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9611                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9612                };
9613                genfn = fns[size][u];
9614                break;
9615            }
9616            case 0x10: /* ADD, SUB */
9617            {
9618                static NeonGenTwoOpFn * const fns[3][2] = {
9619                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9620                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9621                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9622                };
9623                genfn = fns[size][u];
9624                break;
9625            }
9626            case 0x11: /* CMTST, CMEQ */
9627            {
9628                static NeonGenTwoOpFn * const fns[3][2] = {
9629                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9630                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9631                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9632                };
9633                genfn = fns[size][u];
9634                break;
9635            }
9636            case 0x13: /* MUL, PMUL */
9637                if (u) {
9638                    /* PMUL */
9639                    assert(size == 0);
9640                    genfn = gen_helper_neon_mul_p8;
9641                    break;
9642                }
9643                /* fall through : MUL */
9644            case 0x12: /* MLA, MLS */
9645            {
9646                static NeonGenTwoOpFn * const fns[3] = {
9647                    gen_helper_neon_mul_u8,
9648                    gen_helper_neon_mul_u16,
9649                    tcg_gen_mul_i32,
9650                };
9651                genfn = fns[size];
9652                break;
9653            }
9654            case 0x16: /* SQDMULH, SQRDMULH */
9655            {
9656                static NeonGenTwoOpEnvFn * const fns[2][2] = {
9657                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9658                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9659                };
9660                assert(size == 1 || size == 2);
9661                genenvfn = fns[size - 1][u];
9662                break;
9663            }
9664            default:
9665                g_assert_not_reached();
9666            }
9667
9668            if (genenvfn) {
9669                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9670            } else {
9671                genfn(tcg_res, tcg_op1, tcg_op2);
9672            }
9673
9674            if (opcode == 0xf || opcode == 0x12) {
9675                /* SABA, UABA, MLA, MLS: accumulating ops */
9676                static NeonGenTwoOpFn * const fns[3][2] = {
9677                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9678                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9679                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9680                };
9681                bool is_sub = (opcode == 0x12 && u); /* MLS */
9682
9683                genfn = fns[size][is_sub];
9684                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9685                genfn(tcg_res, tcg_op1, tcg_res);
9686            }
9687
9688            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9689
9690            tcg_temp_free_i32(tcg_res);
9691            tcg_temp_free_i32(tcg_op1);
9692            tcg_temp_free_i32(tcg_op2);
9693        }
9694    }
9695
9696    if (!is_q) {
9697        clear_vec_high(s, rd);
9698    }
9699}
9700
9701/* C3.6.16 AdvSIMD three same
9702 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9703 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9704 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9705 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9706 */
9707static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9708{
9709    int opcode = extract32(insn, 11, 5);
9710
9711    switch (opcode) {
9712    case 0x3: /* logic ops */
9713        disas_simd_3same_logic(s, insn);
9714        break;
9715    case 0x17: /* ADDP */
9716    case 0x14: /* SMAXP, UMAXP */
9717    case 0x15: /* SMINP, UMINP */
9718    {
9719        /* Pairwise operations */
9720        int is_q = extract32(insn, 30, 1);
9721        int u = extract32(insn, 29, 1);
9722        int size = extract32(insn, 22, 2);
9723        int rm = extract32(insn, 16, 5);
9724        int rn = extract32(insn, 5, 5);
9725        int rd = extract32(insn, 0, 5);
9726        if (opcode == 0x17) {
9727            if (u || (size == 3 && !is_q)) {
9728                unallocated_encoding(s);
9729                return;
9730            }
9731        } else {
9732            if (size == 3) {
9733                unallocated_encoding(s);
9734                return;
9735            }
9736        }
9737        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9738        break;
9739    }
9740    case 0x18 ... 0x31:
9741        /* floating point ops, sz[1] and U are part of opcode */
9742        disas_simd_3same_float(s, insn);
9743        break;
9744    default:
9745        disas_simd_3same_int(s, insn);
9746        break;
9747    }
9748}
9749
9750static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9751                                  int size, int rn, int rd)
9752{
9753    /* Handle 2-reg-misc ops which are widening (so each size element
9754     * in the source becomes a 2*size element in the destination.
9755     * The only instruction like this is FCVTL.
9756     */
9757    int pass;
9758
9759    if (size == 3) {
9760        /* 32 -> 64 bit fp conversion */
9761        TCGv_i64 tcg_res[2];
9762        int srcelt = is_q ? 2 : 0;
9763
9764        for (pass = 0; pass < 2; pass++) {
9765            TCGv_i32 tcg_op = tcg_temp_new_i32();
9766            tcg_res[pass] = tcg_temp_new_i64();
9767
9768            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9769            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9770            tcg_temp_free_i32(tcg_op);
9771        }
9772        for (pass = 0; pass < 2; pass++) {
9773            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9774            tcg_temp_free_i64(tcg_res[pass]);
9775        }
9776    } else {
9777        /* 16 -> 32 bit fp conversion */
9778        int srcelt = is_q ? 4 : 0;
9779        TCGv_i32 tcg_res[4];
9780
9781        for (pass = 0; pass < 4; pass++) {
9782            tcg_res[pass] = tcg_temp_new_i32();
9783
9784            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9785            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9786                                           cpu_env);
9787        }
9788        for (pass = 0; pass < 4; pass++) {
9789            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9790            tcg_temp_free_i32(tcg_res[pass]);
9791        }
9792    }
9793}
9794
9795static void handle_rev(DisasContext *s, int opcode, bool u,
9796                       bool is_q, int size, int rn, int rd)
9797{
9798    int op = (opcode << 1) | u;
9799    int opsz = op + size;
9800    int grp_size = 3 - opsz;
9801    int dsize = is_q ? 128 : 64;
9802    int i;
9803
9804    if (opsz >= 3) {
9805        unallocated_encoding(s);
9806        return;
9807    }
9808
9809    if (!fp_access_check(s)) {
9810        return;
9811    }
9812
9813    if (size == 0) {
9814        /* Special case bytes, use bswap op on each group of elements */
9815        int groups = dsize / (8 << grp_size);
9816
9817        for (i = 0; i < groups; i++) {
9818            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9819
9820            read_vec_element(s, tcg_tmp, rn, i, grp_size);
9821            switch (grp_size) {
9822            case MO_16:
9823                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9824                break;
9825            case MO_32:
9826                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9827                break;
9828            case MO_64:
9829                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9830                break;
9831            default:
9832                g_assert_not_reached();
9833            }
9834            write_vec_element(s, tcg_tmp, rd, i, grp_size);
9835            tcg_temp_free_i64(tcg_tmp);
9836        }
9837        if (!is_q) {
9838            clear_vec_high(s, rd);
9839        }
9840    } else {
9841        int revmask = (1 << grp_size) - 1;
9842        int esize = 8 << size;
9843        int elements = dsize / esize;
9844        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9845        TCGv_i64 tcg_rd = tcg_const_i64(0);
9846        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9847
9848        for (i = 0; i < elements; i++) {
9849            int e_rev = (i & 0xf) ^ revmask;
9850            int off = e_rev * esize;
9851            read_vec_element(s, tcg_rn, rn, i, size);
9852            if (off >= 64) {
9853                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9854                                    tcg_rn, off - 64, esize);
9855            } else {
9856                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9857            }
9858        }
9859        write_vec_element(s, tcg_rd, rd, 0, MO_64);
9860        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9861
9862        tcg_temp_free_i64(tcg_rd_hi);
9863        tcg_temp_free_i64(tcg_rd);
9864        tcg_temp_free_i64(tcg_rn);
9865    }
9866}
9867
9868static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9869                                  bool is_q, int size, int rn, int rd)
9870{
9871    /* Implement the pairwise operations from 2-misc:
9872     * SADDLP, UADDLP, SADALP, UADALP.
9873     * These all add pairs of elements in the input to produce a
9874     * double-width result element in the output (possibly accumulating).
9875     */
9876    bool accum = (opcode == 0x6);
9877    int maxpass = is_q ? 2 : 1;
9878    int pass;
9879    TCGv_i64 tcg_res[2];
9880
9881    if (size == 2) {
9882        /* 32 + 32 -> 64 op */
9883        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9884
9885        for (pass = 0; pass < maxpass; pass++) {
9886            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9887            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9888
9889            tcg_res[pass] = tcg_temp_new_i64();
9890
9891            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9892            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9893            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9894            if (accum) {
9895                read_vec_element(s, tcg_op1, rd, pass, MO_64);
9896                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9897            }
9898
9899            tcg_temp_free_i64(tcg_op1);
9900            tcg_temp_free_i64(tcg_op2);
9901        }
9902    } else {
9903        for (pass = 0; pass < maxpass; pass++) {
9904            TCGv_i64 tcg_op = tcg_temp_new_i64();
9905            NeonGenOneOpFn *genfn;
9906            static NeonGenOneOpFn * const fns[2][2] = {
9907                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9908                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9909            };
9910
9911            genfn = fns[size][u];
9912
9913            tcg_res[pass] = tcg_temp_new_i64();
9914
9915            read_vec_element(s, tcg_op, rn, pass, MO_64);
9916            genfn(tcg_res[pass], tcg_op);
9917
9918            if (accum) {
9919                read_vec_element(s, tcg_op, rd, pass, MO_64);
9920                if (size == 0) {
9921                    gen_helper_neon_addl_u16(tcg_res[pass],
9922                                             tcg_res[pass], tcg_op);
9923                } else {
9924                    gen_helper_neon_addl_u32(tcg_res[pass],
9925                                             tcg_res[pass], tcg_op);
9926                }
9927            }
9928            tcg_temp_free_i64(tcg_op);
9929        }
9930    }
9931    if (!is_q) {
9932        tcg_res[1] = tcg_const_i64(0);
9933    }
9934    for (pass = 0; pass < 2; pass++) {
9935        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9936        tcg_temp_free_i64(tcg_res[pass]);
9937    }
9938}
9939
9940static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9941{
9942    /* Implement SHLL and SHLL2 */
9943    int pass;
9944    int part = is_q ? 2 : 0;
9945    TCGv_i64 tcg_res[2];
9946
9947    for (pass = 0; pass < 2; pass++) {
9948        static NeonGenWidenFn * const widenfns[3] = {
9949            gen_helper_neon_widen_u8,
9950            gen_helper_neon_widen_u16,
9951            tcg_gen_extu_i32_i64,
9952        };
9953        NeonGenWidenFn *widenfn = widenfns[size];
9954        TCGv_i32 tcg_op = tcg_temp_new_i32();
9955
9956        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9957        tcg_res[pass] = tcg_temp_new_i64();
9958        widenfn(tcg_res[pass], tcg_op);
9959        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9960
9961        tcg_temp_free_i32(tcg_op);
9962    }
9963
9964    for (pass = 0; pass < 2; pass++) {
9965        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9966        tcg_temp_free_i64(tcg_res[pass]);
9967    }
9968}
9969
9970/* C3.6.17 AdvSIMD two reg misc
9971 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9972 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9973 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9974 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9975 */
9976static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9977{
9978    int size = extract32(insn, 22, 2);
9979    int opcode = extract32(insn, 12, 5);
9980    bool u = extract32(insn, 29, 1);
9981    bool is_q = extract32(insn, 30, 1);
9982    int rn = extract32(insn, 5, 5);
9983    int rd = extract32(insn, 0, 5);
9984    bool need_fpstatus = false;
9985    bool need_rmode = false;
9986    int rmode = -1;
9987    TCGv_i32 tcg_rmode;
9988    TCGv_ptr tcg_fpstatus;
9989
9990    switch (opcode) {
9991    case 0x0: /* REV64, REV32 */
9992    case 0x1: /* REV16 */
9993        handle_rev(s, opcode, u, is_q, size, rn, rd);
9994        return;
9995    case 0x5: /* CNT, NOT, RBIT */
9996        if (u && size == 0) {
9997            /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9998            size = 3;
9999            break;
10000        } else if (u && size == 1) {

10001            /* RBIT */
10002            break;
10003        } else if (!u && size == 0) {
10004            /* CNT */
10005            break;
10006        }
10007        unallocated_encoding(s);
10008        return;
10009    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
10010    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
10011        if (size == 3) {
10012            unallocated_encoding(s);
10013            return;
10014        }
10015        if (!fp_access_check(s)) {
10016            return;
10017        }
10018
10019        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10020        return;
10021    case 0x4: /* CLS, CLZ */
10022        if (size == 3) {
10023            unallocated_encoding(s);
10024            return;
10025        }
10026        break;
10027    case 0x2: /* SADDLP, UADDLP */
10028    case 0x6: /* SADALP, UADALP */
10029        if (size == 3) {
10030            unallocated_encoding(s);
10031            return;
10032        }
10033        if (!fp_access_check(s)) {
10034            return;
10035        }
10036        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10037        return;
10038    case 0x13: /* SHLL, SHLL2 */
10039        if (u == 0 || size == 3) {
10040            unallocated_encoding(s);
10041            return;
10042        }
10043        if (!fp_access_check(s)) {
10044            return;
10045        }
10046        handle_shll(s, is_q, size, rn, rd);
10047        return;
10048    case 0xa: /* CMLT */
10049        if (u == 1) {
10050            unallocated_encoding(s);
10051            return;
10052        }
10053        /* fall through */
10054    case 0x8: /* CMGT, CMGE */
10055    case 0x9: /* CMEQ, CMLE */
10056    case 0xb: /* ABS, NEG */
10057        if (size == 3 && !is_q) {
10058            unallocated_encoding(s);
10059            return;
10060        }
10061        break;
10062    case 0x3: /* SUQADD, USQADD */
10063        if (size == 3 && !is_q) {
10064            unallocated_encoding(s);
10065            return;
10066        }
10067        if (!fp_access_check(s)) {
10068            return;
10069        }
10070        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10071        return;
10072    case 0x7: /* SQABS, SQNEG */
10073        if (size == 3 && !is_q) {
10074            unallocated_encoding(s);
10075            return;
10076        }
10077        break;
10078    case 0xc ... 0xf:
10079    case 0x16 ... 0x1d:
10080    case 0x1f:
10081    {
10082        /* Floating point: U, size[1] and opcode indicate operation;
10083         * size[0] indicates single or double precision.
10084         */
10085        int is_double = extract32(size, 0, 1);
10086        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10087        size = is_double ? 3 : 2;
10088        switch (opcode) {
10089        case 0x2f: /* FABS */
10090        case 0x6f: /* FNEG */
10091            if (size == 3 && !is_q) {
10092                unallocated_encoding(s);
10093                return;
10094            }
10095            break;
10096        case 0x1d: /* SCVTF */
10097        case 0x5d: /* UCVTF */
10098        {
10099            bool is_signed = (opcode == 0x1d) ? true : false;
10100            int elements = is_double ? 2 : is_q ? 4 : 2;
10101            if (is_double && !is_q) {
10102                unallocated_encoding(s);
10103                return;
10104            }
10105            if (!fp_access_check(s)) {
10106                return;
10107            }
10108            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10109            return;
10110        }
10111        case 0x2c: /* FCMGT (zero) */
10112        case 0x2d: /* FCMEQ (zero) */
10113        case 0x2e: /* FCMLT (zero) */
10114        case 0x6c: /* FCMGE (zero) */
10115        case 0x6d: /* FCMLE (zero) */
10116            if (size == 3 && !is_q) {
10117                unallocated_encoding(s);
10118                return;
10119            }
10120            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10121            return;
10122        case 0x7f: /* FSQRT */
10123            if (size == 3 && !is_q) {
10124                unallocated_encoding(s);
10125                return;
10126            }
10127            break;
10128        case 0x1a: /* FCVTNS */
10129        case 0x1b: /* FCVTMS */
10130        case 0x3a: /* FCVTPS */
10131        case 0x3b: /* FCVTZS */
10132        case 0x5a: /* FCVTNU */
10133        case 0x5b: /* FCVTMU */
10134        case 0x7a: /* FCVTPU */
10135        case 0x7b: /* FCVTZU */
10136            need_fpstatus = true;
10137            need_rmode = true;
10138            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10139            if (size == 3 && !is_q) {
10140                unallocated_encoding(s);
10141                return;
10142            }
10143            break;
10144        case 0x5c: /* FCVTAU */
10145        case 0x1c: /* FCVTAS */
10146            need_fpstatus = true;
10147            need_rmode = true;
10148            rmode = FPROUNDING_TIEAWAY;
10149            if (size == 3 && !is_q) {
10150                unallocated_encoding(s);
10151                return;
10152            }
10153            break;
10154        case 0x3c: /* URECPE */
10155            if (size == 3) {
10156                unallocated_encoding(s);
10157                return;
10158            }
10159            /* fall through */
10160        case 0x3d: /* FRECPE */
10161        case 0x7d: /* FRSQRTE */
10162            if (size == 3 && !is_q) {
10163                unallocated_encoding(s);
10164                return;
10165            }
10166            if (!fp_access_check(s)) {
10167                return;
10168            }
10169            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10170            return;
10171        case 0x56: /* FCVTXN, FCVTXN2 */
10172            if (size == 2) {
10173                unallocated_encoding(s);
10174                return;
10175            }
10176            /* fall through */
10177        case 0x16: /* FCVTN, FCVTN2 */
10178            /* handle_2misc_narrow does a 2*size -> size operation, but these
10179             * instructions encode the source size rather than dest size.
10180             */
10181            if (!fp_access_check(s)) {
10182                return;
10183            }
10184            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10185            return;
10186        case 0x17: /* FCVTL, FCVTL2 */
10187            if (!fp_access_check(s)) {
10188                return;
10189            }
10190            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10191            return;
10192        case 0x18: /* FRINTN */
10193        case 0x19: /* FRINTM */
10194        case 0x38: /* FRINTP */
10195        case 0x39: /* FRINTZ */
10196            need_rmode = true;
10197            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10198            /* fall through */
10199        case 0x59: /* FRINTX */
10200        case 0x79: /* FRINTI */
10201            need_fpstatus = true;
10202            if (size == 3 && !is_q) {
10203                unallocated_encoding(s);
10204                return;
10205            }
10206            break;
10207        case 0x58: /* FRINTA */
10208            need_rmode = true;
10209            rmode = FPROUNDING_TIEAWAY;
10210            need_fpstatus = true;
10211            if (size == 3 && !is_q) {
10212                unallocated_encoding(s);
10213                return;
10214            }
10215            break;
10216        case 0x7c: /* URSQRTE */
10217            if (size == 3) {
10218                unallocated_encoding(s);
10219                return;
10220            }
10221            need_fpstatus = true;
10222            break;
10223        default:
10224            unallocated_encoding(s);
10225            return;
10226        }
10227        break;
10228    }
10229    default:
10230        unallocated_encoding(s);
10231        return;
10232    }
10233
10234    if (!fp_access_check(s)) {
10235        return;
10236    }
10237
10238    if (need_fpstatus) {
10239        tcg_fpstatus = get_fpstatus_ptr();
10240    } else {
10241        TCGV_UNUSED_PTR(tcg_fpstatus);
10242    }
10243    if (need_rmode) {
10244        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10245        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10246    } else {
10247        TCGV_UNUSED_I32(tcg_rmode);
10248    }
10249
10250    if (size == 3) {
10251        /* All 64-bit element operations can be shared with scalar 2misc */
10252        int pass;
10253
10254        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10255            TCGv_i64 tcg_op = tcg_temp_new_i64();
10256            TCGv_i64 tcg_res = tcg_temp_new_i64();
10257
10258            read_vec_element(s, tcg_op, rn, pass, MO_64);
10259
10260            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10261                            tcg_rmode, tcg_fpstatus);
10262
10263            write_vec_element(s, tcg_res, rd, pass, MO_64);
10264
10265            tcg_temp_free_i64(tcg_res);
10266            tcg_temp_free_i64(tcg_op);
10267        }
10268    } else {
10269        int pass;
10270
10271        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10272            TCGv_i32 tcg_op = tcg_temp_new_i32();
10273            TCGv_i32 tcg_res = tcg_temp_new_i32();
10274            TCGCond cond;
10275
10276            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10277
10278            if (size == 2) {
10279                /* Special cases for 32 bit elements */
10280                switch (opcode) {
10281                case 0xa: /* CMLT */
10282                    /* 32 bit integer comparison against zero, result is
10283                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
10284                     * and inverting.
10285                     */
10286                    cond = TCG_COND_LT;
10287                do_cmop:
10288                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10289                    tcg_gen_neg_i32(tcg_res, tcg_res);
10290                    break;
10291                case 0x8: /* CMGT, CMGE */
10292                    cond = u ? TCG_COND_GE : TCG_COND_GT;
10293                    goto do_cmop;
10294                case 0x9: /* CMEQ, CMLE */
10295                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
10296                    goto do_cmop;
10297                case 0x4: /* CLS */
10298                    if (u) {
10299                        gen_helper_clz32(tcg_res, tcg_op);
10300                    } else {
10301                        gen_helper_cls32(tcg_res, tcg_op);
10302                    }
10303                    break;
10304                case 0x7: /* SQABS, SQNEG */
10305                    if (u) {
10306                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10307                    } else {
10308                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10309                    }
10310                    break;
10311                case 0xb: /* ABS, NEG */
10312                    if (u) {
10313                        tcg_gen_neg_i32(tcg_res, tcg_op);
10314                    } else {
10315                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10316                        tcg_gen_neg_i32(tcg_res, tcg_op);
10317                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10318                                            tcg_zero, tcg_op, tcg_res);
10319                        tcg_temp_free_i32(tcg_zero);
10320                    }
10321                    break;
10322                case 0x2f: /* FABS */
10323                    gen_helper_vfp_abss(tcg_res, tcg_op);
10324                    break;
10325                case 0x6f: /* FNEG */
10326                    gen_helper_vfp_negs(tcg_res, tcg_op);
10327                    break;
10328                case 0x7f: /* FSQRT */
10329                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10330                    break;
10331                case 0x1a: /* FCVTNS */
10332                case 0x1b: /* FCVTMS */
10333                case 0x1c: /* FCVTAS */
10334                case 0x3a: /* FCVTPS */
10335                case 0x3b: /* FCVTZS */
10336                {
10337                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10338                    gen_helper_vfp_tosls(tcg_res, tcg_op,
10339                                         tcg_shift, tcg_fpstatus);
10340                    tcg_temp_free_i32(tcg_shift);
10341                    break;
10342                }
10343                case 0x5a: /* FCVTNU */
10344                case 0x5b: /* FCVTMU */
10345                case 0x5c: /* FCVTAU */
10346                case 0x7a: /* FCVTPU */
10347                case 0x7b: /* FCVTZU */
10348                {
10349                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10350                    gen_helper_vfp_touls(tcg_res, tcg_op,
10351                                         tcg_shift, tcg_fpstatus);
10352                    tcg_temp_free_i32(tcg_shift);
10353                    break;
10354                }
10355                case 0x18: /* FRINTN */
10356                case 0x19: /* FRINTM */
10357                case 0x38: /* FRINTP */
10358                case 0x39: /* FRINTZ */
10359                case 0x58: /* FRINTA */
10360                case 0x79: /* FRINTI */
10361                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10362                    break;
10363                case 0x59: /* FRINTX */
10364                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10365                    break;
10366                case 0x7c: /* URSQRTE */
10367                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10368                    break;
10369                default:
10370                    g_assert_not_reached();
10371                }
10372            } else {
10373                /* Use helpers for 8 and 16 bit elements */
10374                switch (opcode) {
10375                case 0x5: /* CNT, RBIT */
10376                    /* For these two insns size is part of the opcode specifier
10377                     * (handled earlier); they always operate on byte elements.
10378                     */
10379                    if (u) {
10380                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10381                    } else {
10382                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10383                    }
10384                    break;
10385                case 0x7: /* SQABS, SQNEG */
10386                {
10387                    NeonGenOneOpEnvFn *genfn;
10388                    static NeonGenOneOpEnvFn * const fns[2][2] = {
10389                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10390                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10391                    };
10392                    genfn = fns[size][u];
10393                    genfn(tcg_res, cpu_env, tcg_op);
10394                    break;
10395                }
10396                case 0x8: /* CMGT, CMGE */
10397                case 0x9: /* CMEQ, CMLE */
10398                case 0xa: /* CMLT */
10399                {
10400                    static NeonGenTwoOpFn * const fns[3][2] = {
10401                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10402                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10403                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10404                    };
10405                    NeonGenTwoOpFn *genfn;
10406                    int comp;
10407                    bool reverse;
10408                    TCGv_i32 tcg_zero = tcg_const_i32(0);
10409
10410                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10411                    comp = (opcode - 0x8) * 2 + u;
10412                    /* ...but LE, LT are implemented as reverse GE, GT */
10413                    reverse = (comp > 2);
10414                    if (reverse) {
10415                        comp = 4 - comp;
10416                    }
10417                    genfn = fns[comp][size];
10418                    if (reverse) {
10419                        genfn(tcg_res, tcg_zero, tcg_op);
10420                    } else {
10421                        genfn(tcg_res, tcg_op, tcg_zero);
10422                    }
10423                    tcg_temp_free_i32(tcg_zero);
10424                    break;
10425                }
10426                case 0xb: /* ABS, NEG */
10427                    if (u) {
10428                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10429                        if (size) {
10430                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10431                        } else {
10432                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10433                        }
10434                        tcg_temp_free_i32(tcg_zero);
10435                    } else {
10436                        if (size) {
10437                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
10438                        } else {
10439                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
10440                        }
10441                    }
10442                    break;
10443                case 0x4: /* CLS, CLZ */
10444                    if (u) {
10445                        if (size == 0) {
10446                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
10447                        } else {
10448                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
10449                        }
10450                    } else {
10451                        if (size == 0) {
10452                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
10453                        } else {
10454                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
10455                        }
10456                    }
10457                    break;
10458                default:
10459                    g_assert_not_reached();
10460                }
10461            }
10462
10463            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10464
10465            tcg_temp_free_i32(tcg_res);
10466            tcg_temp_free_i32(tcg_op);
10467        }
10468    }
10469    if (!is_q) {
10470        clear_vec_high(s, rd);
10471    }
10472
10473    if (need_rmode) {
10474        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10475        tcg_temp_free_i32(tcg_rmode);
10476    }
10477    if (need_fpstatus) {
10478        tcg_temp_free_ptr(tcg_fpstatus);
10479    }
10480}
10481
10482/* C3.6.13 AdvSIMD scalar x indexed element
10483 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10484 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10485 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10486 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10487 * C3.6.18 AdvSIMD vector x indexed element
10488 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10489 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10490 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10491 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10492 */
10493static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10494{
10495    /* This encoding has two kinds of instruction:
10496     *  normal, where we perform elt x idxelt => elt for each
10497     *     element in the vector
10498     *  long, where we perform elt x idxelt and generate a result of
10499     *     double the width of the input element
10500     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10501     */
10502    bool is_scalar = extract32(insn, 28, 1);
10503    bool is_q = extract32(insn, 30, 1);
10504    bool u = extract32(insn, 29, 1);
10505    int size = extract32(insn, 22, 2);
10506    int l = extract32(insn, 21, 1);
10507    int m = extract32(insn, 20, 1);
10508    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10509    int rm = extract32(insn, 16, 4);
10510    int opcode = extract32(insn, 12, 4);
10511    int h = extract32(insn, 11, 1);
10512    int rn = extract32(insn, 5, 5);
10513    int rd = extract32(insn, 0, 5);
10514    bool is_long = false;
10515    bool is_fp = false;
10516    int index;
10517    TCGv_ptr fpst;
10518
10519    switch (opcode) {
10520    case 0x0: /* MLA */
10521    case 0x4: /* MLS */
10522        if (!u || is_scalar) {
10523            unallocated_encoding(s);
10524            return;
10525        }
10526        break;
10527    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10528    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10529    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10530        if (is_scalar) {
10531            unallocated_encoding(s);
10532            return;
10533        }
10534        is_long = true;
10535        break;
10536    case 0x3: /* SQDMLAL, SQDMLAL2 */
10537    case 0x7: /* SQDMLSL, SQDMLSL2 */
10538    case 0xb: /* SQDMULL, SQDMULL2 */
10539        is_long = true;
10540        /* fall through */
10541    case 0xc: /* SQDMULH */
10542    case 0xd: /* SQRDMULH */
10543        if (u) {
10544            unallocated_encoding(s);
10545            return;
10546        }
10547        break;
10548    case 0x8: /* MUL */
10549        if (u || is_scalar) {
10550            unallocated_encoding(s);
10551            return;
10552        }
10553        break;
10554    case 0x1: /* FMLA */
10555    case 0x5: /* FMLS */
10556        if (u) {
10557            unallocated_encoding(s);
10558            return;
10559        }
10560        /* fall through */
10561    case 0x9: /* FMUL, FMULX */
10562        if (!extract32(size, 1, 1)) {
10563            unallocated_encoding(s);
10564            return;
10565        }
10566        is_fp = true;
10567        break;
10568    default:
10569        unallocated_encoding(s);
10570        return;
10571    }
10572
10573    if (is_fp) {
10574        /* low bit of size indicates single/double */
10575        size = extract32(size, 0, 1) ? 3 : 2;
10576        if (size == 2) {
10577            index = h << 1 | l;
10578        } else {
10579            if (l || !is_q) {
10580                unallocated_encoding(s);
10581                return;
10582            }
10583            index = h;
10584        }
10585        rm |= (m << 4);
10586    } else {
10587        switch (size) {
10588        case 1:
10589            index = h << 2 | l << 1 | m;
10590            break;
10591        case 2:
10592            index = h << 1 | l;
10593            rm |= (m << 4);
10594            break;
10595        default:
10596            unallocated_encoding(s);
10597            return;
10598        }
10599    }
10600
10601    if (!fp_access_check(s)) {
10602        return;
10603    }
10604
10605    if (is_fp) {
10606        fpst = get_fpstatus_ptr();
10607    } else {
10608        TCGV_UNUSED_PTR(fpst);
10609    }
10610
10611    if (size == 3) {
10612        TCGv_i64 tcg_idx = tcg_temp_new_i64();
10613        int pass;
10614
10615        assert(is_fp && is_q && !is_long);
10616
10617        read_vec_element(s, tcg_idx, rm, index, MO_64);
10618
10619        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10620            TCGv_i64 tcg_op = tcg_temp_new_i64();
10621            TCGv_i64 tcg_res = tcg_temp_new_i64();
10622
10623            read_vec_element(s, tcg_op, rn, pass, MO_64);
10624
10625            switch (opcode) {
10626            case 0x5: /* FMLS */
10627                /* As usual for ARM, separate negation for fused multiply-add */
10628                gen_helper_vfp_negd(tcg_op, tcg_op);
10629                /* fall through */
10630            case 0x1: /* FMLA */
10631                read_vec_element(s, tcg_res, rd, pass, MO_64);
10632                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10633                break;
10634            case 0x9: /* FMUL, FMULX */
10635                if (u) {
10636                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10637                } else {
10638                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10639                }
10640                break;
10641            default:
10642                g_assert_not_reached();
10643            }
10644
10645            write_vec_element(s, tcg_res, rd, pass, MO_64);
10646            tcg_temp_free_i64(tcg_op);
10647            tcg_temp_free_i64(tcg_res);
10648        }
10649
10650        if (is_scalar) {
10651            clear_vec_high(s, rd);
10652        }
10653
10654        tcg_temp_free_i64(tcg_idx);
10655    } else if (!is_long) {
10656        /* 32 bit floating point, or 16 or 32 bit integer.
10657         * For the 16 bit scalar case we use the usual Neon helpers and
10658         * rely on the fact that 0 op 0 == 0 with no side effects.
10659         */
10660        TCGv_i32 tcg_idx = tcg_temp_new_i32();
10661        int pass, maxpasses;
10662
10663        if (is_scalar) {
10664            maxpasses = 1;
10665        } else {
10666            maxpasses = is_q ? 4 : 2;
10667        }
10668
10669        read_vec_element_i32(s, tcg_idx, rm, index, size);
10670
10671        if (size == 1 && !is_scalar) {
10672            /* The simplest way to handle the 16x16 indexed ops is to duplicate
10673             * the index into both halves of the 32 bit tcg_idx and then use
10674             * the usual Neon helpers.
10675             */
10676            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10677        }
10678
10679        for (pass = 0; pass < maxpasses; pass++) {
10680            TCGv_i32 tcg_op = tcg_temp_new_i32();
10681            TCGv_i32 tcg_res = tcg_temp_new_i32();
10682
10683            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10684
10685            switch (opcode) {
10686            case 0x0: /* MLA */
10687            case 0x4: /* MLS */
10688            case 0x8: /* MUL */
10689            {
10690                static NeonGenTwoOpFn * const fns[2][2] = {
10691                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10692                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
10693                };
10694                NeonGenTwoOpFn *genfn;
10695                bool is_sub = opcode == 0x4;
10696
10697                if (size == 1) {
10698                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10699                } else {
10700                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10701                }
10702                if (opcode == 0x8) {
10703                    break;
10704                }
10705                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10706                genfn = fns[size - 1][is_sub];
10707                genfn(tcg_res, tcg_op, tcg_res);
10708                break;
10709            }
10710            case 0x5: /* FMLS */
10711                /* As usual for ARM, separate negation for fused multiply-add */
10712                gen_helper_vfp_negs(tcg_op, tcg_op);
10713                /* fall through */
10714            case 0x1: /* FMLA */
10715                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10716                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10717                break;
10718            case 0x9: /* FMUL, FMULX */
10719                if (u) {
10720                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10721                } else {
10722                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10723                }
10724                break;
10725            case 0xc: /* SQDMULH */
10726                if (size == 1) {
10727                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10728                                               tcg_op, tcg_idx);
10729                } else {
10730                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10731                                               tcg_op, tcg_idx);
10732                }
10733                break;
10734            case 0xd: /* SQRDMULH */
10735                if (size == 1) {
10736                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10737                                                tcg_op, tcg_idx);
10738                } else {
10739                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10740                                                tcg_op, tcg_idx);
10741                }
10742                break;
10743            default:
10744                g_assert_not_reached();
10745            }
10746
10747            if (is_scalar) {
10748                write_fp_sreg(s, rd, tcg_res);
10749            } else {
10750                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10751            }
10752
10753            tcg_temp_free_i32(tcg_op);
10754            tcg_temp_free_i32(tcg_res);
10755        }
10756
10757        tcg_temp_free_i32(tcg_idx);
10758
10759        if (!is_q) {
10760            clear_vec_high(s, rd);
10761        }
10762    } else {
10763        /* long ops: 16x16->32 or 32x32->64 */
10764        TCGv_i64 tcg_res[2];
10765        int pass;
10766        bool satop = extract32(opcode, 0, 1);
10767        TCGMemOp memop = MO_32;
10768
10769        if (satop || !u) {
10770            memop |= MO_SIGN;
10771        }
10772
10773        if (size == 2) {
10774            TCGv_i64 tcg_idx = tcg_temp_new_i64();
10775
10776            read_vec_element(s, tcg_idx, rm, index, memop);
10777
10778            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10779                TCGv_i64 tcg_op = tcg_temp_new_i64();
10780                TCGv_i64 tcg_passres;
10781                int passelt;
10782
10783                if (is_scalar) {
10784                    passelt = 0;
10785                } else {
10786                    passelt = pass + (is_q * 2);
10787                }
10788
10789                read_vec_element(s, tcg_op, rn, passelt, memop);
10790
10791                tcg_res[pass] = tcg_temp_new_i64();
10792
10793                if (opcode == 0xa || opcode == 0xb) {
10794                    /* Non-accumulating ops */
10795                    tcg_passres = tcg_res[pass];
10796                } else {
10797                    tcg_passres = tcg_temp_new_i64();
10798                }
10799
10800                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10801                tcg_temp_free_i64(tcg_op);
10802
10803                if (satop) {
10804                    /* saturating, doubling */
10805                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10806                                                      tcg_passres, tcg_passres);
10807                }
10808
10809                if (opcode == 0xa || opcode == 0xb) {
10810                    continue;
10811                }
10812
10813                /* Accumulating op: handle accumulate step */
10814                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10815
10816                switch (opcode) {
10817                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10818                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10819                    break;
10820                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10821                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10822                    break;
10823                case 0x7: /* SQDMLSL, SQDMLSL2 */
10824                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10825                    /* fall through */
10826                case 0x3: /* SQDMLAL, SQDMLAL2 */
10827                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10828                                                      tcg_res[pass],
10829                                                      tcg_passres);
10830                    break;
10831                default:
10832                    g_assert_not_reached();
10833                }
10834                tcg_temp_free_i64(tcg_passres);
10835            }
10836            tcg_temp_free_i64(tcg_idx);
10837
10838            if (is_scalar) {
10839                clear_vec_high(s, rd);
10840            }
10841        } else {
10842            TCGv_i32 tcg_idx = tcg_temp_new_i32();
10843
10844            assert(size == 1);
10845            read_vec_element_i32(s, tcg_idx, rm, index, size);
10846
10847            if (!is_scalar) {
10848                /* The simplest way to handle the 16x16 indexed ops is to
10849                 * duplicate the index into both halves of the 32 bit tcg_idx
10850                 * and then use the usual Neon helpers.
10851                 */
10852                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10853            }
10854
10855            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10856                TCGv_i32 tcg_op = tcg_temp_new_i32();
10857                TCGv_i64 tcg_passres;
10858
10859                if (is_scalar) {
10860                    read_vec_element_i32(s, tcg_op, rn, pass, size);
10861                } else {
10862                    read_vec_element_i32(s, tcg_op, rn,
10863                                         pass + (is_q * 2), MO_32);
10864                }
10865
10866                tcg_res[pass] = tcg_temp_new_i64();
10867
10868                if (opcode == 0xa || opcode == 0xb) {
10869                    /* Non-accumulating ops */
10870                    tcg_passres = tcg_res[pass];
10871                } else {
10872                    tcg_passres = tcg_temp_new_i64();
10873                }
10874
10875                if (memop & MO_SIGN) {
10876                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10877                } else {
10878                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10879                }
10880                if (satop) {
10881                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10882                                                      tcg_passres, tcg_passres);
10883                }
10884                tcg_temp_free_i32(tcg_op);
10885
10886                if (opcode == 0xa || opcode == 0xb) {
10887                    continue;
10888                }
10889
10890                /* Accumulating op: handle accumulate step */
10891                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10892
10893                switch (opcode) {
10894                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10895                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10896                                             tcg_passres);
10897                    break;
10898                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10899                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10900                                             tcg_passres);
10901                    break;
10902                case 0x7: /* SQDMLSL, SQDMLSL2 */
10903                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10904                    /* fall through */
10905                case 0x3: /* SQDMLAL, SQDMLAL2 */
10906                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10907                                                      tcg_res[pass],
10908                                                      tcg_passres);
10909                    break;
10910                default:
10911                    g_assert_not_reached();
10912                }
10913                tcg_temp_free_i64(tcg_passres);
10914            }
10915            tcg_temp_free_i32(tcg_idx);
10916
10917            if (is_scalar) {
10918                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10919            }
10920        }
10921
10922        if (is_scalar) {
10923            tcg_res[1] = tcg_const_i64(0);
10924        }
10925
10926        for (pass = 0; pass < 2; pass++) {
10927            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10928            tcg_temp_free_i64(tcg_res[pass]);
10929        }
10930    }
10931
10932    if (!TCGV_IS_UNUSED_PTR(fpst)) {
10933        tcg_temp_free_ptr(fpst);
10934    }
10935}
10936
10937/* C3.6.19 Crypto AES
10938 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10939 * +-----------------+------+-----------+--------+-----+------+------+
10940 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10941 * +-----------------+------+-----------+--------+-----+------+------+
10942 */
10943static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10944{
10945    int size = extract32(insn, 22, 2);
10946    int opcode = extract32(insn, 12, 5);
10947    int rn = extract32(insn, 5, 5);
10948    int rd = extract32(insn, 0, 5);
10949    int decrypt;
10950    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10951    CryptoThreeOpEnvFn *genfn;
10952
10953    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10954        || size != 0) {
10955        unallocated_encoding(s);
10956        return;
10957    }
10958
10959    switch (opcode) {
10960    case 0x4: /* AESE */
10961        decrypt = 0;
10962        genfn = gen_helper_crypto_aese;
10963        break;
10964    case 0x6: /* AESMC */
10965        decrypt = 0;
10966        genfn = gen_helper_crypto_aesmc;
10967        break;
10968    case 0x5: /* AESD */
10969        decrypt = 1;
10970        genfn = gen_helper_crypto_aese;
10971        break;
10972    case 0x7: /* AESIMC */
10973        decrypt = 1;
10974        genfn = gen_helper_crypto_aesmc;
10975        break;
10976    default:
10977        unallocated_encoding(s);
10978        return;
10979    }
10980
10981    /* Note that we convert the Vx register indexes into the
10982     * index within the vfp.regs[] array, so we can share the
10983     * helper with the AArch32 instructions.
10984     */
10985    tcg_rd_regno = tcg_const_i32(rd << 1);
10986    tcg_rn_regno = tcg_const_i32(rn << 1);
10987    tcg_decrypt = tcg_const_i32(decrypt);
10988
10989    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10990
10991    tcg_temp_free_i32(tcg_rd_regno);
10992    tcg_temp_free_i32(tcg_rn_regno);
10993    tcg_temp_free_i32(tcg_decrypt);
10994}
10995
10996/* C3.6.20 Crypto three-reg SHA
10997 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10998 * +-----------------+------+---+------+---+--------+-----+------+------+
10999 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
11000 * +-----------------+------+---+------+---+--------+-----+------+------+

11001 */
11002static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
11003{
11004    int size = extract32(insn, 22, 2);
11005    int opcode = extract32(insn, 12, 3);
11006    int rm = extract32(insn, 16, 5);
11007    int rn = extract32(insn, 5, 5);
11008    int rd = extract32(insn, 0, 5);
11009    CryptoThreeOpEnvFn *genfn;
11010    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
11011    int feature = ARM_FEATURE_V8_SHA256;
11012
11013    if (size != 0) {
11014        unallocated_encoding(s);
11015        return;
11016    }
11017
11018    switch (opcode) {
11019    case 0: /* SHA1C */
11020    case 1: /* SHA1P */
11021    case 2: /* SHA1M */
11022    case 3: /* SHA1SU0 */
11023        genfn = NULL;
11024        feature = ARM_FEATURE_V8_SHA1;
11025        break;
11026    case 4: /* SHA256H */
11027        genfn = gen_helper_crypto_sha256h;
11028        break;
11029    case 5: /* SHA256H2 */
11030        genfn = gen_helper_crypto_sha256h2;
11031        break;
11032    case 6: /* SHA256SU1 */
11033        genfn = gen_helper_crypto_sha256su1;
11034        break;
11035    default:
11036        unallocated_encoding(s);
11037        return;
11038    }
11039
11040    if (!arm_dc_feature(s, feature)) {
11041        unallocated_encoding(s);
11042        return;
11043    }
11044
11045    tcg_rd_regno = tcg_const_i32(rd << 1);
11046    tcg_rn_regno = tcg_const_i32(rn << 1);
11047    tcg_rm_regno = tcg_const_i32(rm << 1);
11048
11049    if (genfn) {
11050        genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
11051    } else {
11052        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11053
11054        gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
11055                                    tcg_rn_regno, tcg_rm_regno, tcg_opcode);
11056        tcg_temp_free_i32(tcg_opcode);
11057    }
11058
11059    tcg_temp_free_i32(tcg_rd_regno);
11060    tcg_temp_free_i32(tcg_rn_regno);
11061    tcg_temp_free_i32(tcg_rm_regno);
11062}
11063
11064/* C3.6.21 Crypto two-reg SHA
11065 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11066 * +-----------------+------+-----------+--------+-----+------+------+
11067 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11068 * +-----------------+------+-----------+--------+-----+------+------+
11069 */
11070static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11071{
11072    int size = extract32(insn, 22, 2);
11073    int opcode = extract32(insn, 12, 5);
11074    int rn = extract32(insn, 5, 5);
11075    int rd = extract32(insn, 0, 5);
11076    CryptoTwoOpEnvFn *genfn;
11077    int feature;
11078    TCGv_i32 tcg_rd_regno, tcg_rn_regno;
11079
11080    if (size != 0) {
11081        unallocated_encoding(s);
11082        return;
11083    }
11084
11085    switch (opcode) {
11086    case 0: /* SHA1H */
11087        feature = ARM_FEATURE_V8_SHA1;
11088        genfn = gen_helper_crypto_sha1h;
11089        break;
11090    case 1: /* SHA1SU1 */
11091        feature = ARM_FEATURE_V8_SHA1;
11092        genfn = gen_helper_crypto_sha1su1;
11093        break;
11094    case 2: /* SHA256SU0 */
11095        feature = ARM_FEATURE_V8_SHA256;
11096        genfn = gen_helper_crypto_sha256su0;
11097        break;
11098    default:
11099        unallocated_encoding(s);
11100        return;
11101    }
11102
11103    if (!arm_dc_feature(s, feature)) {
11104        unallocated_encoding(s);
11105        return;
11106    }
11107
11108    tcg_rd_regno = tcg_const_i32(rd << 1);
11109    tcg_rn_regno = tcg_const_i32(rn << 1);
11110
11111    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11112
11113    tcg_temp_free_i32(tcg_rd_regno);
11114    tcg_temp_free_i32(tcg_rn_regno);
11115}
11116
11117/* C3.6 Data processing - SIMD, inc Crypto
11118 *
11119 * As the decode gets a little complex we are using a table based
11120 * approach for this part of the decode.
11121 */
11122static const AArch64DecodeTable data_proc_simd[] = {
11123    /* pattern  ,  mask     ,  fn                        */
11124    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11125    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11126    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11127    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11128    { 0x0e000400, 0x9fe08400, disas_simd_copy },
11129    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11130    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11131    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11132    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11133    { 0x0e000000, 0xbf208c00, disas_simd_tb },
11134    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11135    { 0x2e000000, 0xbf208400, disas_simd_ext },
11136    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11137    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11138    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11139    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11140    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11141    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11142    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11143    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11144    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11145    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11146    { 0x00000000, 0x00000000, NULL }
11147};
11148
11149static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11150{
11151    /* Note that this is called with all non-FP cases from
11152     * table C3-6 so it must UNDEF for entries not specifically
11153     * allocated to instructions in that table.
11154     */
11155    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11156    if (fn) {
11157        fn(s, insn);
11158    } else {
11159        unallocated_encoding(s);
11160    }
11161}
11162
11163/* C3.6 Data processing - SIMD and floating point */
11164static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11165{
11166    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11167        disas_data_proc_fp(s, insn);
11168    } else {
11169        /* SIMD, including crypto */
11170        disas_data_proc_simd(s, insn);
11171    }
11172}
11173
11174/* C3.1 A64 instruction index by encoding */
11175static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11176{
11177    uint32_t insn;
11178
11179    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11180    s->insn = insn;
11181    s->pc += 4;
11182
11183    s->fp_access_checked = false;
11184
11185    switch (extract32(insn, 25, 4)) {
11186    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11187        unallocated_encoding(s);
11188        break;
11189    case 0x8: case 0x9: /* Data processing - immediate */
11190        disas_data_proc_imm(s, insn);
11191        break;
11192    case 0xa: case 0xb: /* Branch, exception generation and system insns */
11193        disas_b_exc_sys(s, insn);
11194        break;
11195    case 0x4:
11196    case 0x6:
11197    case 0xc:
11198    case 0xe:      /* Loads and stores */
11199        disas_ldst(s, insn);
11200        break;
11201    case 0x5:
11202    case 0xd:      /* Data processing - register */
11203        disas_data_proc_reg(s, insn);
11204        break;
11205    case 0x7:
11206    case 0xf:      /* Data processing - SIMD and floating point */
11207        disas_data_proc_simd_fp(s, insn);
11208        break;
11209    default:
11210        assert(FALSE); /* all 15 cases should be handled above */
11211        break;
11212    }
11213
11214    /* if we allocated any temporaries, free them here */
11215    free_tmp_a64(s);
11216}
11217
11218void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11219{
11220    CPUState *cs = CPU(cpu);
11221    CPUARMState *env = &cpu->env;
11222    DisasContext dc1, *dc = &dc1;
11223    target_ulong pc_start;
11224    target_ulong next_page_start;
11225    int num_insns;
11226    int max_insns;
11227
11228    pc_start = tb->pc;
11229
11230    dc->tb = tb;
11231
11232    dc->is_jmp = DISAS_NEXT;
11233    dc->pc = pc_start;
11234    dc->singlestep_enabled = cs->singlestep_enabled;
11235    dc->condjmp = 0;
11236
11237    dc->aarch64 = 1;
11238    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11239     * there is no secure EL1, so we route exceptions to EL3.
11240     */
11241    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11242                               !arm_el_is_aa64(env, 3);
11243    dc->thumb = 0;
11244    dc->sctlr_b = 0;
11245    dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11246    dc->condexec_mask = 0;
11247    dc->condexec_cond = 0;
11248    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11249    dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
11250    dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
11251    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11252#if !defined(CONFIG_USER_ONLY)
11253    dc->user = (dc->current_el == 0);
11254    dc->ns = ARM_TBFLAG_NS(tb->flags);
11255#endif
11256    dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11257    dc->vec_len = 0;
11258    dc->vec_stride = 0;
11259    dc->cp_regs = cpu->cp_regs;
11260    dc->features = env->features;
11261
11262    /* Single step state. The code-generation logic here is:
11263     *  SS_ACTIVE == 0:
11264     *   generate code with no special handling for single-stepping (except
11265     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11266     *   this happens anyway because those changes are all system register or
11267     *   PSTATE writes).
11268     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11269     *   emit code for one insn
11270     *   emit code to clear PSTATE.SS
11271     *   emit code to generate software step exception for completed step
11272     *   end TB (as usual for having generated an exception)
11273     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11274     *   emit code to generate a software step exception
11275     *   end the TB
11276     */
11277    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11278    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11279    dc->is_ldex = false;
11280    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11281
11282    init_tmp_a64_array(dc);
11283
11284    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11285    num_insns = 0;
11286    max_insns = tb->cflags & CF_COUNT_MASK;
11287    if (max_insns == 0) {
11288        max_insns = CF_COUNT_MASK;
11289    }
11290    if (max_insns > TCG_MAX_INSNS) {
11291        max_insns = TCG_MAX_INSNS;
11292    }
11293
11294    gen_tb_start(tb);
11295
11296    tcg_clear_temp_count();
11297
11298    do {
11299        dc->insn_start_idx = tcg_op_buf_count();
11300        tcg_gen_insn_start(dc->pc, 0, 0);
11301        num_insns++;
11302
11303        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11304            CPUBreakpoint *bp;
11305            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11306                if (bp->pc == dc->pc) {
11307                    if (bp->flags & BP_CPU) {
11308                        gen_a64_set_pc_im(dc->pc);
11309                        gen_helper_check_breakpoints(cpu_env);
11310                        /* End the TB early; it likely won't be executed */
11311                        dc->is_jmp = DISAS_UPDATE;
11312                    } else {
11313                        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11314                        /* The address covered by the breakpoint must be
11315                           included in [tb->pc, tb->pc + tb->size) in order
11316                           to for it to be properly cleared -- thus we
11317                           increment the PC here so that the logic setting
11318                           tb->size below does the right thing.  */
11319                        dc->pc += 4;
11320                        goto done_generating;
11321                    }
11322                    break;
11323                }
11324            }
11325        }
11326
11327        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11328            gen_io_start();
11329        }
11330
11331        if (dc->ss_active && !dc->pstate_ss) {
11332            /* Singlestep state is Active-pending.
11333             * If we're in this state at the start of a TB then either
11334             *  a) we just took an exception to an EL which is being debugged
11335             *     and this is the first insn in the exception handler
11336             *  b) debug exceptions were masked and we just unmasked them
11337             *     without changing EL (eg by clearing PSTATE.D)
11338             * In either case we're going to take a swstep exception in the
11339             * "did not step an insn" case, and so the syndrome ISV and EX
11340             * bits should be zero.
11341             */
11342            assert(num_insns == 1);
11343            gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11344                          default_exception_el(dc));
11345            dc->is_jmp = DISAS_EXC;
11346            break;
11347        }
11348
11349        disas_a64_insn(env, dc);
11350
11351        if (tcg_check_temp_count()) {
11352            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11353                    dc->pc);
11354        }
11355
11356        /* Translation stops when a conditional branch is encountered.
11357         * Otherwise the subsequent code could get translated several times.
11358         * Also stop translation when a page boundary is reached.  This
11359         * ensures prefetch aborts occur at the right place.
11360         */
11361    } while (!dc->is_jmp && !tcg_op_buf_full() &&
11362             !cs->singlestep_enabled &&
11363             !singlestep &&
11364             !dc->ss_active &&
11365             dc->pc < next_page_start &&
11366             num_insns < max_insns);
11367
11368    if (tb->cflags & CF_LAST_IO) {
11369        gen_io_end();
11370    }
11371
11372    if (unlikely(cs->singlestep_enabled || dc->ss_active)
11373        && dc->is_jmp != DISAS_EXC) {
11374        /* Note that this means single stepping WFI doesn't halt the CPU.
11375         * For conditional branch insns this is harmless unreachable code as
11376         * gen_goto_tb() has already handled emitting the debug exception
11377         * (and thus a tb-jump is not possible when singlestepping).
11378         */
11379        assert(dc->is_jmp != DISAS_TB_JUMP);
11380        if (dc->is_jmp != DISAS_JUMP) {
11381            gen_a64_set_pc_im(dc->pc);
11382        }
11383        if (cs->singlestep_enabled) {
11384            gen_exception_internal(EXCP_DEBUG);
11385        } else {
11386            gen_step_complete_exception(dc);
11387        }
11388    } else {
11389        switch (dc->is_jmp) {
11390        case DISAS_NEXT:
11391            gen_goto_tb(dc, 1, dc->pc);
11392            break;
11393        default:
11394        case DISAS_UPDATE:
11395            gen_a64_set_pc_im(dc->pc);
11396            /* fall through */
11397        case DISAS_JUMP:
11398            /* indicate that the hash table must be used to find the next TB */
11399            tcg_gen_exit_tb(0);
11400            break;
11401        case DISAS_TB_JUMP:
11402        case DISAS_EXC:
11403        case DISAS_SWI:
11404            break;
11405        case DISAS_WFE:
11406            gen_a64_set_pc_im(dc->pc);
11407            gen_helper_wfe(cpu_env);
11408            tcg_gen_exit_tb(0);
11409            break;
11410        case DISAS_YIELD:
11411            gen_a64_set_pc_im(dc->pc);
11412            gen_helper_yield(cpu_env);
11413            break;
11414        case DISAS_WFI:
11415            /* This is a special case because we don't want to just halt the CPU
11416             * if trying to debug across a WFI.
11417             */
11418            gen_a64_set_pc_im(dc->pc);
11419            gen_helper_wfi(cpu_env);
11420            /* The helper doesn't necessarily throw an exception, but we
11421             * must go back to the main loop to check for interrupts anyway.
11422             */
11423            tcg_gen_exit_tb(0);
11424            break;
11425        }
11426    }
11427
11428done_generating:
11429    gen_tb_end(tb, num_insns);
11430
11431#ifdef DEBUG_DISAS
11432    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11433        qemu_log_in_addr_range(pc_start)) {
11434        qemu_log_lock();
11435        qemu_log("----------------\n");
11436        qemu_log("IN: %s\n", lookup_symbol(pc_start));
11437        log_target_disas(cs, pc_start, dc->pc - pc_start,
11438                         4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11439        qemu_log("\n");
11440        qemu_log_unlock();
11441    }
11442#endif
11443    tb->size = dc->pc - pc_start;
11444    tb->icount = num_insns;
11445}
11446