LXR qemu/target/arm/translate-a64.c

   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "qemu/log.h"
  25#include "arm_ldst.h"
  26#include "translate.h"
  27#include "internals.h"
  28#include "qemu/host-utils.h"
  29
  30#include "exec/semihost.h"
  31#include "exec/gen-icount.h"
  32
  33#include "exec/helper-proto.h"
  34#include "exec/helper-gen.h"
  35#include "exec/log.h"
  36
  37#include "trace-tcg.h"
  38
  39static TCGv_i64 cpu_X[32];
  40static TCGv_i64 cpu_pc;
  41
  42/* Load/store exclusive handling */
  43static TCGv_i64 cpu_exclusive_high;
  44static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  45
  46static const char *regnames[] = {
  47    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  48    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  49    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  50    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  51};
  52
  53enum a64_shift_type {
  54    A64_SHIFT_TYPE_LSL = 0,
  55    A64_SHIFT_TYPE_LSR = 1,
  56    A64_SHIFT_TYPE_ASR = 2,
  57    A64_SHIFT_TYPE_ROR = 3
  58};
  59
  60/* Table based decoder typedefs - used when the relevant bits for decode
  61 * are too awkwardly scattered across the instruction (eg SIMD).
  62 */
  63typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  64
  65typedef struct AArch64DecodeTable {
  66    uint32_t pattern;
  67    uint32_t mask;
  68    AArch64DecodeFn *disas_fn;
  69} AArch64DecodeTable;
  70
  71/* Function prototype for gen_ functions for calling Neon helpers */
  72typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  73typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  74typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  75typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  76typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  77typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  78typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  79typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  80typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  81typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  82typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  83typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  84typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  85
  86/* initialize TCG globals.  */
  87void a64_translate_init(void)
  88{
  89    int i;
  90
  91    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  92                                    offsetof(CPUARMState, pc),
  93                                    "pc");
  94    for (i = 0; i < 32; i++) {
  95        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  96                                          offsetof(CPUARMState, xregs[i]),
  97                                          regnames[i]);
  98    }
  99
 100    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 101        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 102}
 103
 104static inline int get_a64_user_mem_index(DisasContext *s)
 105{
 106    /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 107     *  if EL1, access as if EL0; otherwise access at current EL
 108     */
 109    ARMMMUIdx useridx;
 110
 111    switch (s->mmu_idx) {
 112    case ARMMMUIdx_S12NSE1:
 113        useridx = ARMMMUIdx_S12NSE0;
 114        break;
 115    case ARMMMUIdx_S1SE1:
 116        useridx = ARMMMUIdx_S1SE0;
 117        break;
 118    case ARMMMUIdx_S2NS:
 119        g_assert_not_reached();
 120    default:
 121        useridx = s->mmu_idx;
 122        break;
 123    }
 124    return arm_to_core_mmu_idx(useridx);
 125}
 126
 127void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 128                            fprintf_function cpu_fprintf, int flags)
 129{
 130    ARMCPU *cpu = ARM_CPU(cs);
 131    CPUARMState *env = &cpu->env;
 132    uint32_t psr = pstate_read(env);
 133    int i;
 134    int el = arm_current_el(env);
 135    const char *ns_status;
 136
 137    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 138            env->pc, env->xregs[31]);
 139    for (i = 0; i < 31; i++) {
 140        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 141        if ((i % 4) == 3) {
 142            cpu_fprintf(f, "\n");
 143        } else {
 144            cpu_fprintf(f, " ");
 145        }
 146    }
 147
 148    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 149        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 150    } else {
 151        ns_status = "";
 152    }
 153
 154    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 155                psr,
 156                psr & PSTATE_N ? 'N' : '-',
 157                psr & PSTATE_Z ? 'Z' : '-',
 158                psr & PSTATE_C ? 'C' : '-',
 159                psr & PSTATE_V ? 'V' : '-',
 160                ns_status,
 161                el,
 162                psr & PSTATE_SP ? 'h' : 't');
 163
 164    if (flags & CPU_DUMP_FPU) {
 165        int numvfpregs = 32;
 166        for (i = 0; i < numvfpregs; i += 2) {
 167            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 168            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 169            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 170                        i, vhi, vlo);
 171            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 172            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 173            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 174                        i + 1, vhi, vlo);
 175        }
 176        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 177                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 178    }
 179}
 180
 181void gen_a64_set_pc_im(uint64_t val)
 182{
 183    tcg_gen_movi_i64(cpu_pc, val);
 184}
 185
 186/* Load the PC from a generic TCG variable.
 187 *
 188 * If address tagging is enabled via the TCR TBI bits, then loading
 189 * an address into the PC will clear out any tag in the it:
 190 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 191 *    then the address is zero-extended, clearing bits [63:56]
 192 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 193 *    and TBI1 controls addressses with bit 55 == 1.
 194 *    If the appropriate TBI bit is set for the address then
 195 *    the address is sign-extended from bit 55 into bits [63:56]
 196 *
 197 * We can avoid doing this for relative-branches, because the
 198 * PC + offset can never overflow into the tag bits (assuming
 199 * that virtual addresses are less than 56 bits wide, as they
 200 * are currently), but we must handle it for branch-to-register.
 201 */
 202static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 203{
 204
 205    if (s->current_el <= 1) {
 206        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 207         * examine bit 55 of address, can just generate code.
 208         * If mixed, then test via generated code
 209         */
 210        if (s->tbi0 && s->tbi1) {
 211            TCGv_i64 tmp_reg = tcg_temp_new_i64();
 212            /* Both bits set, sign extension from bit 55 into [63:56] will
 213             * cover both cases
 214             */
 215            tcg_gen_shli_i64(tmp_reg, src, 8);
 216            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 217            tcg_temp_free_i64(tmp_reg);
 218        } else if (!s->tbi0 && !s->tbi1) {
 219            /* Neither bit set, just load it as-is */
 220            tcg_gen_mov_i64(cpu_pc, src);
 221        } else {
 222            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 223            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 224            TCGv_i64 tcg_zero   = tcg_const_i64(0);
 225
 226            tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 227
 228            if (s->tbi0) {
 229                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 230                tcg_gen_andi_i64(tcg_tmpval, src,
 231                                 0x00FFFFFFFFFFFFFFull);
 232                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 233                                    tcg_tmpval, src);
 234            } else {
 235                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 236                tcg_gen_ori_i64(tcg_tmpval, src,
 237                                0xFF00000000000000ull);
 238                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 239                                    tcg_tmpval, src);
 240            }
 241            tcg_temp_free_i64(tcg_zero);
 242            tcg_temp_free_i64(tcg_bit55);
 243            tcg_temp_free_i64(tcg_tmpval);
 244        }
 245    } else {  /* EL > 1 */
 246        if (s->tbi0) {
 247            /* Force tag byte to all zero */
 248            tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 249        } else {
 250            /* Load unmodified address */
 251            tcg_gen_mov_i64(cpu_pc, src);
 252        }
 253    }
 254}
 255
 256typedef struct DisasCompare64 {
 257    TCGCond cond;
 258    TCGv_i64 value;
 259} DisasCompare64;
 260
 261static void a64_test_cc(DisasCompare64 *c64, int cc)
 262{
 263    DisasCompare c32;
 264
 265    arm_test_cc(&c32, cc);
 266
 267    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 268       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 269    c64->cond = c32.cond;
 270    c64->value = tcg_temp_new_i64();
 271    tcg_gen_ext_i32_i64(c64->value, c32.value);
 272
 273    arm_free_cc(&c32);
 274}
 275
 276static void a64_free_cc(DisasCompare64 *c64)
 277{
 278    tcg_temp_free_i64(c64->value);
 279}
 280
 281static void gen_exception_internal(int excp)
 282{
 283    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 284
 285    assert(excp_is_internal(excp));
 286    gen_helper_exception_internal(cpu_env, tcg_excp);
 287    tcg_temp_free_i32(tcg_excp);
 288}
 289
 290static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 291{
 292    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 293    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 294    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 295
 296    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 297                                       tcg_syn, tcg_el);
 298    tcg_temp_free_i32(tcg_el);
 299    tcg_temp_free_i32(tcg_syn);
 300    tcg_temp_free_i32(tcg_excp);
 301}
 302
 303static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 304{
 305    gen_a64_set_pc_im(s->pc - offset);
 306    gen_exception_internal(excp);
 307    s->base.is_jmp = DISAS_NORETURN;
 308}
 309
 310static void gen_exception_insn(DisasContext *s, int offset, int excp,
 311                               uint32_t syndrome, uint32_t target_el)
 312{
 313    gen_a64_set_pc_im(s->pc - offset);
 314    gen_exception(excp, syndrome, target_el);
 315    s->base.is_jmp = DISAS_NORETURN;
 316}
 317
 318static void gen_ss_advance(DisasContext *s)
 319{
 320    /* If the singlestep state is Active-not-pending, advance to
 321     * Active-pending.
 322     */
 323    if (s->ss_active) {
 324        s->pstate_ss = 0;
 325        gen_helper_clear_pstate_ss(cpu_env);
 326    }
 327}
 328
 329static void gen_step_complete_exception(DisasContext *s)
 330{
 331    /* We just completed step of an insn. Move from Active-not-pending
 332     * to Active-pending, and then also take the swstep exception.
 333     * This corresponds to making the (IMPDEF) choice to prioritize
 334     * swstep exceptions over asynchronous exceptions taken to an exception
 335     * level where debug is disabled. This choice has the advantage that
 336     * we do not need to maintain internal state corresponding to the
 337     * ISV/EX syndrome bits between completion of the step and generation
 338     * of the exception, and our syndrome information is always correct.
 339     */
 340    gen_ss_advance(s);
 341    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 342                  default_exception_el(s));
 343    s->base.is_jmp = DISAS_NORETURN;
 344}
 345
 346static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 347{
 348    /* No direct tb linking with singlestep (either QEMU's or the ARM
 349     * debug architecture kind) or deterministic io
 350     */
 351    if (s->base.singlestep_enabled || s->ss_active ||
 352        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 353        return false;
 354    }
 355
 356#ifndef CONFIG_USER_ONLY
 357    /* Only link tbs from inside the same guest page */
 358    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 359        return false;
 360    }
 361#endif
 362
 363    return true;
 364}
 365
 366static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 367{
 368    TranslationBlock *tb;
 369
 370    tb = s->base.tb;
 371    if (use_goto_tb(s, n, dest)) {
 372        tcg_gen_goto_tb(n);
 373        gen_a64_set_pc_im(dest);
 374        tcg_gen_exit_tb((intptr_t)tb + n);
 375        s->base.is_jmp = DISAS_NORETURN;
 376    } else {
 377        gen_a64_set_pc_im(dest);
 378        if (s->ss_active) {
 379            gen_step_complete_exception(s);
 380        } else if (s->base.singlestep_enabled) {
 381            gen_exception_internal(EXCP_DEBUG);
 382        } else {
 383            tcg_gen_lookup_and_goto_ptr();
 384            s->base.is_jmp = DISAS_NORETURN;
 385        }
 386    }
 387}
 388
 389static void unallocated_encoding(DisasContext *s)
 390{
 391    /* Unallocated and reserved encodings are uncategorized */
 392    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 393                       default_exception_el(s));
 394}
 395
 396#define unsupported_encoding(s, insn)                                    \
 397    do {                                                                 \
 398        qemu_log_mask(LOG_UNIMP,                                         \
 399                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 400                      "at pc=%016" PRIx64 "\n",                          \
 401                      __FILE__, __LINE__, insn, s->pc - 4);              \
 402        unallocated_encoding(s);                                         \
 403    } while (0);
 404
 405static void init_tmp_a64_array(DisasContext *s)
 406{
 407#ifdef CONFIG_DEBUG_TCG
 408    int i;
 409    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 410        TCGV_UNUSED_I64(s->tmp_a64[i]);
 411    }
 412#endif
 413    s->tmp_a64_count = 0;
 414}
 415
 416static void free_tmp_a64(DisasContext *s)
 417{
 418    int i;
 419    for (i = 0; i < s->tmp_a64_count; i++) {
 420        tcg_temp_free_i64(s->tmp_a64[i]);
 421    }
 422    init_tmp_a64_array(s);
 423}
 424
 425static TCGv_i64 new_tmp_a64(DisasContext *s)
 426{
 427    assert(s->tmp_a64_count < TMP_A64_MAX);
 428    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 429}
 430
 431static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 432{
 433    TCGv_i64 t = new_tmp_a64(s);
 434    tcg_gen_movi_i64(t, 0);
 435    return t;
 436}
 437
 438/*
 439 * Register access functions
 440 *
 441 * These functions are used for directly accessing a register in where
 442 * changes to the final register value are likely to be made. If you
 443 * need to use a register for temporary calculation (e.g. index type
 444 * operations) use the read_* form.
 445 *
 446 * B1.2.1 Register mappings
 447 *
 448 * In instruction register encoding 31 can refer to ZR (zero register) or
 449 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 450 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 451 * This is the point of the _sp forms.
 452 */
 453static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 454{
 455    if (reg == 31) {
 456        return new_tmp_a64_zero(s);
 457    } else {
 458        return cpu_X[reg];
 459    }
 460}
 461
 462/* register access for when 31 == SP */
 463static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 464{
 465    return cpu_X[reg];
 466}
 467
 468/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 469 * representing the register contents. This TCGv is an auto-freed
 470 * temporary so it need not be explicitly freed, and may be modified.
 471 */
 472static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 473{
 474    TCGv_i64 v = new_tmp_a64(s);
 475    if (reg != 31) {
 476        if (sf) {
 477            tcg_gen_mov_i64(v, cpu_X[reg]);
 478        } else {
 479            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 480        }
 481    } else {
 482        tcg_gen_movi_i64(v, 0);
 483    }
 484    return v;
 485}
 486
 487static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 488{
 489    TCGv_i64 v = new_tmp_a64(s);
 490    if (sf) {
 491        tcg_gen_mov_i64(v, cpu_X[reg]);
 492    } else {
 493        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 494    }
 495    return v;
 496}
 497
 498/* We should have at some point before trying to access an FP register
 499 * done the necessary access check, so assert that
 500 * (a) we did the check and
 501 * (b) we didn't then just plough ahead anyway if it failed.
 502 * Print the instruction pattern in the abort message so we can figure
 503 * out what we need to fix if a user encounters this problem in the wild.
 504 */
 505static inline void assert_fp_access_checked(DisasContext *s)
 506{
 507#ifdef CONFIG_DEBUG_TCG
 508    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 509        fprintf(stderr, "target-arm: FP access check missing for "
 510                "instruction 0x%08x\n", s->insn);
 511        abort();
 512    }
 513#endif
 514}
 515
 516/* Return the offset into CPUARMState of an element of specified
 517 * size, 'element' places in from the least significant end of
 518 * the FP/vector register Qn.
 519 */
 520static inline int vec_reg_offset(DisasContext *s, int regno,
 521                                 int element, TCGMemOp size)
 522{
 523    int offs = 0;
 524#ifdef HOST_WORDS_BIGENDIAN
 525    /* This is complicated slightly because vfp.regs[2n] is
 526     * still the low half and  vfp.regs[2n+1] the high half
 527     * of the 128 bit vector, even on big endian systems.
 528     * Calculate the offset assuming a fully bigendian 128 bits,
 529     * then XOR to account for the order of the two 64 bit halves.
 530     */
 531    offs += (16 - ((element + 1) * (1 << size)));
 532    offs ^= 8;
 533#else
 534    offs += element * (1 << size);
 535#endif
 536    offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
 537    assert_fp_access_checked(s);
 538    return offs;
 539}
 540
 541/* Return the offset into CPUARMState of a slice (from
 542 * the least significant end) of FP register Qn (ie
 543 * Dn, Sn, Hn or Bn).
 544 * (Note that this is not the same mapping as for A32; see cpu.h)
 545 */
 546static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 547{
 548    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 549#ifdef HOST_WORDS_BIGENDIAN
 550    offs += (8 - (1 << size));
 551#endif
 552    assert_fp_access_checked(s);
 553    return offs;
 554}
 555
 556/* Offset of the high half of the 128 bit vector Qn */
 557static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 558{
 559    assert_fp_access_checked(s);
 560    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 561}
 562
 563/* Convenience accessors for reading and writing single and double
 564 * FP registers. Writing clears the upper parts of the associated
 565 * 128 bit vector register, as required by the architecture.
 566 * Note that unlike the GP register accessors, the values returned
 567 * by the read functions must be manually freed.
 568 */
 569static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 570{
 571    TCGv_i64 v = tcg_temp_new_i64();
 572
 573    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 574    return v;
 575}
 576
 577static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 578{
 579    TCGv_i32 v = tcg_temp_new_i32();
 580
 581    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 582    return v;
 583}
 584
 585static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 586{
 587    TCGv_i64 tcg_zero = tcg_const_i64(0);
 588
 589    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 590    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 591    tcg_temp_free_i64(tcg_zero);
 592}
 593
 594static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 595{
 596    TCGv_i64 tmp = tcg_temp_new_i64();
 597
 598    tcg_gen_extu_i32_i64(tmp, v);
 599    write_fp_dreg(s, reg, tmp);
 600    tcg_temp_free_i64(tmp);
 601}
 602
 603static TCGv_ptr get_fpstatus_ptr(void)
 604{
 605    TCGv_ptr statusptr = tcg_temp_new_ptr();
 606    int offset;
 607
 608    /* In A64 all instructions (both FP and Neon) use the FPCR;
 609     * there is no equivalent of the A32 Neon "standard FPSCR value"
 610     * and all operations use vfp.fp_status.
 611     */
 612    offset = offsetof(CPUARMState, vfp.fp_status);
 613    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 614    return statusptr;
 615}
 616
 617/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 618 * than the 32 bit equivalent.
 619 */
 620static inline void gen_set_NZ64(TCGv_i64 result)
 621{
 622    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 623    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 624}
 625
 626/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 627static inline void gen_logic_CC(int sf, TCGv_i64 result)
 628{
 629    if (sf) {
 630        gen_set_NZ64(result);
 631    } else {
 632        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 633        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 634    }
 635    tcg_gen_movi_i32(cpu_CF, 0);
 636    tcg_gen_movi_i32(cpu_VF, 0);
 637}
 638
 639/* dest = T0 + T1; compute C, N, V and Z flags */
 640static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 641{
 642    if (sf) {
 643        TCGv_i64 result, flag, tmp;
 644        result = tcg_temp_new_i64();
 645        flag = tcg_temp_new_i64();
 646        tmp = tcg_temp_new_i64();
 647
 648        tcg_gen_movi_i64(tmp, 0);
 649        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 650
 651        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 652
 653        gen_set_NZ64(result);
 654
 655        tcg_gen_xor_i64(flag, result, t0);
 656        tcg_gen_xor_i64(tmp, t0, t1);
 657        tcg_gen_andc_i64(flag, flag, tmp);
 658        tcg_temp_free_i64(tmp);
 659        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 660
 661        tcg_gen_mov_i64(dest, result);
 662        tcg_temp_free_i64(result);
 663        tcg_temp_free_i64(flag);
 664    } else {
 665        /* 32 bit arithmetic */
 666        TCGv_i32 t0_32 = tcg_temp_new_i32();
 667        TCGv_i32 t1_32 = tcg_temp_new_i32();
 668        TCGv_i32 tmp = tcg_temp_new_i32();
 669
 670        tcg_gen_movi_i32(tmp, 0);
 671        tcg_gen_extrl_i64_i32(t0_32, t0);
 672        tcg_gen_extrl_i64_i32(t1_32, t1);
 673        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 674        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 675        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 676        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 677        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 678        tcg_gen_extu_i32_i64(dest, cpu_NF);
 679
 680        tcg_temp_free_i32(tmp);
 681        tcg_temp_free_i32(t0_32);
 682        tcg_temp_free_i32(t1_32);
 683    }
 684}
 685
 686/* dest = T0 - T1; compute C, N, V and Z flags */
 687static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 688{
 689    if (sf) {
 690        /* 64 bit arithmetic */
 691        TCGv_i64 result, flag, tmp;
 692
 693        result = tcg_temp_new_i64();
 694        flag = tcg_temp_new_i64();
 695        tcg_gen_sub_i64(result, t0, t1);
 696
 697        gen_set_NZ64(result);
 698
 699        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 700        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 701
 702        tcg_gen_xor_i64(flag, result, t0);
 703        tmp = tcg_temp_new_i64();
 704        tcg_gen_xor_i64(tmp, t0, t1);
 705        tcg_gen_and_i64(flag, flag, tmp);
 706        tcg_temp_free_i64(tmp);
 707        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 708        tcg_gen_mov_i64(dest, result);
 709        tcg_temp_free_i64(flag);
 710        tcg_temp_free_i64(result);
 711    } else {
 712        /* 32 bit arithmetic */
 713        TCGv_i32 t0_32 = tcg_temp_new_i32();
 714        TCGv_i32 t1_32 = tcg_temp_new_i32();
 715        TCGv_i32 tmp;
 716
 717        tcg_gen_extrl_i64_i32(t0_32, t0);
 718        tcg_gen_extrl_i64_i32(t1_32, t1);
 719        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 720        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 721        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 722        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 723        tmp = tcg_temp_new_i32();
 724        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 725        tcg_temp_free_i32(t0_32);
 726        tcg_temp_free_i32(t1_32);
 727        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 728        tcg_temp_free_i32(tmp);
 729        tcg_gen_extu_i32_i64(dest, cpu_NF);
 730    }
 731}
 732
 733/* dest = T0 + T1 + CF; do not compute flags. */
 734static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 735{
 736    TCGv_i64 flag = tcg_temp_new_i64();
 737    tcg_gen_extu_i32_i64(flag, cpu_CF);
 738    tcg_gen_add_i64(dest, t0, t1);
 739    tcg_gen_add_i64(dest, dest, flag);
 740    tcg_temp_free_i64(flag);
 741
 742    if (!sf) {
 743        tcg_gen_ext32u_i64(dest, dest);
 744    }
 745}
 746
 747/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 748static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 749{
 750    if (sf) {
 751        TCGv_i64 result, cf_64, vf_64, tmp;
 752        result = tcg_temp_new_i64();
 753        cf_64 = tcg_temp_new_i64();
 754        vf_64 = tcg_temp_new_i64();
 755        tmp = tcg_const_i64(0);
 756
 757        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 758        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 759        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 760        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 761        gen_set_NZ64(result);
 762
 763        tcg_gen_xor_i64(vf_64, result, t0);
 764        tcg_gen_xor_i64(tmp, t0, t1);
 765        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 766        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 767
 768        tcg_gen_mov_i64(dest, result);
 769
 770        tcg_temp_free_i64(tmp);
 771        tcg_temp_free_i64(vf_64);
 772        tcg_temp_free_i64(cf_64);
 773        tcg_temp_free_i64(result);
 774    } else {
 775        TCGv_i32 t0_32, t1_32, tmp;
 776        t0_32 = tcg_temp_new_i32();
 777        t1_32 = tcg_temp_new_i32();
 778        tmp = tcg_const_i32(0);
 779
 780        tcg_gen_extrl_i64_i32(t0_32, t0);
 781        tcg_gen_extrl_i64_i32(t1_32, t1);
 782        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 783        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 784
 785        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 786        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 787        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 788        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 789        tcg_gen_extu_i32_i64(dest, cpu_NF);
 790
 791        tcg_temp_free_i32(tmp);
 792        tcg_temp_free_i32(t1_32);
 793        tcg_temp_free_i32(t0_32);
 794    }
 795}
 796
 797/*
 798 * Load/Store generators
 799 */
 800
 801/*
 802 * Store from GPR register to memory.
 803 */
 804static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 805                             TCGv_i64 tcg_addr, int size, int memidx,
 806                             bool iss_valid,
 807                             unsigned int iss_srt,
 808                             bool iss_sf, bool iss_ar)
 809{
 810    g_assert(size <= 3);
 811    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 812
 813    if (iss_valid) {
 814        uint32_t syn;
 815
 816        syn = syn_data_abort_with_iss(0,
 817                                      size,
 818                                      false,
 819                                      iss_srt,
 820                                      iss_sf,
 821                                      iss_ar,
 822                                      0, 0, 0, 0, 0, false);
 823        disas_set_insn_syndrome(s, syn);
 824    }
 825}
 826
 827static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 828                      TCGv_i64 tcg_addr, int size,
 829                      bool iss_valid,
 830                      unsigned int iss_srt,
 831                      bool iss_sf, bool iss_ar)
 832{
 833    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 834                     iss_valid, iss_srt, iss_sf, iss_ar);
 835}
 836
 837/*
 838 * Load from memory to GPR register
 839 */
 840static void do_gpr_ld_memidx(DisasContext *s,
 841                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 842                             int size, bool is_signed,
 843                             bool extend, int memidx,
 844                             bool iss_valid, unsigned int iss_srt,
 845                             bool iss_sf, bool iss_ar)
 846{
 847    TCGMemOp memop = s->be_data + size;
 848
 849    g_assert(size <= 3);
 850
 851    if (is_signed) {
 852        memop += MO_SIGN;
 853    }
 854
 855    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 856
 857    if (extend && is_signed) {
 858        g_assert(size < 3);
 859        tcg_gen_ext32u_i64(dest, dest);
 860    }
 861
 862    if (iss_valid) {
 863        uint32_t syn;
 864
 865        syn = syn_data_abort_with_iss(0,
 866                                      size,
 867                                      is_signed,
 868                                      iss_srt,
 869                                      iss_sf,
 870                                      iss_ar,
 871                                      0, 0, 0, 0, 0, false);
 872        disas_set_insn_syndrome(s, syn);
 873    }
 874}
 875
 876static void do_gpr_ld(DisasContext *s,
 877                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 878                      int size, bool is_signed, bool extend,
 879                      bool iss_valid, unsigned int iss_srt,
 880                      bool iss_sf, bool iss_ar)
 881{
 882    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 883                     get_mem_index(s),
 884                     iss_valid, iss_srt, iss_sf, iss_ar);
 885}
 886
 887/*
 888 * Store from FP register to memory
 889 */
 890static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 891{
 892    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 893    TCGv_i64 tmp = tcg_temp_new_i64();
 894    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 895    if (size < 4) {
 896        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 897                            s->be_data + size);
 898    } else {
 899        bool be = s->be_data == MO_BE;
 900        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 901
 902        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 903        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 904                            s->be_data | MO_Q);
 905        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 906        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 907                            s->be_data | MO_Q);
 908        tcg_temp_free_i64(tcg_hiaddr);
 909    }
 910
 911    tcg_temp_free_i64(tmp);
 912}
 913
 914/*
 915 * Load from memory to FP register
 916 */
 917static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 918{
 919    /* This always zero-extends and writes to a full 128 bit wide vector */
 920    TCGv_i64 tmplo = tcg_temp_new_i64();
 921    TCGv_i64 tmphi;
 922
 923    if (size < 4) {
 924        TCGMemOp memop = s->be_data + size;
 925        tmphi = tcg_const_i64(0);
 926        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 927    } else {
 928        bool be = s->be_data == MO_BE;
 929        TCGv_i64 tcg_hiaddr;
 930
 931        tmphi = tcg_temp_new_i64();
 932        tcg_hiaddr = tcg_temp_new_i64();
 933
 934        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 935        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 936                            s->be_data | MO_Q);
 937        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 938                            s->be_data | MO_Q);
 939        tcg_temp_free_i64(tcg_hiaddr);
 940    }
 941
 942    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 943    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 944
 945    tcg_temp_free_i64(tmplo);
 946    tcg_temp_free_i64(tmphi);
 947}
 948
 949/*
 950 * Vector load/store helpers.
 951 *
 952 * The principal difference between this and a FP load is that we don't
 953 * zero extend as we are filling a partial chunk of the vector register.
 954 * These functions don't support 128 bit loads/stores, which would be
 955 * normal load/store operations.
 956 *
 957 * The _i32 versions are useful when operating on 32 bit quantities
 958 * (eg for floating point single or using Neon helper functions).
 959 */
 960
 961/* Get value of an element within a vector register */
 962static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 963                             int element, TCGMemOp memop)
 964{
 965    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 966    switch (memop) {
 967    case MO_8:
 968        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 969        break;
 970    case MO_16:
 971        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 972        break;
 973    case MO_32:
 974        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 975        break;
 976    case MO_8|MO_SIGN:
 977        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 978        break;
 979    case MO_16|MO_SIGN:
 980        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 981        break;
 982    case MO_32|MO_SIGN:
 983        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 984        break;
 985    case MO_64:
 986    case MO_64|MO_SIGN:
 987        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 988        break;
 989    default:
 990        g_assert_not_reached();
 991    }
 992}
 993
 994static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 995                                 int element, TCGMemOp memop)
 996{
 997    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 998    switch (memop) {
 999    case MO_8:
1000        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);

1001        break;
1002    case MO_16:
1003        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1004        break;
1005    case MO_8|MO_SIGN:
1006        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1007        break;
1008    case MO_16|MO_SIGN:
1009        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1010        break;
1011    case MO_32:
1012    case MO_32|MO_SIGN:
1013        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1014        break;
1015    default:
1016        g_assert_not_reached();
1017    }
1018}
1019
1020/* Set value of an element within a vector register */
1021static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1022                              int element, TCGMemOp memop)
1023{
1024    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1025    switch (memop) {
1026    case MO_8:
1027        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1028        break;
1029    case MO_16:
1030        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1031        break;
1032    case MO_32:
1033        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1034        break;
1035    case MO_64:
1036        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1037        break;
1038    default:
1039        g_assert_not_reached();
1040    }
1041}
1042
1043static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1044                                  int destidx, int element, TCGMemOp memop)
1045{
1046    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1047    switch (memop) {
1048    case MO_8:
1049        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1050        break;
1051    case MO_16:
1052        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1053        break;
1054    case MO_32:
1055        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1056        break;
1057    default:
1058        g_assert_not_reached();
1059    }
1060}
1061
1062/* Clear the high 64 bits of a 128 bit vector (in general non-quad
1063 * vector ops all need to do this).
1064 */
1065static void clear_vec_high(DisasContext *s, int rd)
1066{
1067    TCGv_i64 tcg_zero = tcg_const_i64(0);
1068
1069    write_vec_element(s, tcg_zero, rd, 1, MO_64);
1070    tcg_temp_free_i64(tcg_zero);
1071}
1072
1073/* Store from vector register to memory */
1074static void do_vec_st(DisasContext *s, int srcidx, int element,
1075                      TCGv_i64 tcg_addr, int size)
1076{
1077    TCGMemOp memop = s->be_data + size;
1078    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1079
1080    read_vec_element(s, tcg_tmp, srcidx, element, size);
1081    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1082
1083    tcg_temp_free_i64(tcg_tmp);
1084}
1085
1086/* Load from memory to vector register */
1087static void do_vec_ld(DisasContext *s, int destidx, int element,
1088                      TCGv_i64 tcg_addr, int size)
1089{
1090    TCGMemOp memop = s->be_data + size;
1091    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1092
1093    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1094    write_vec_element(s, tcg_tmp, destidx, element, size);
1095
1096    tcg_temp_free_i64(tcg_tmp);
1097}
1098
1099/* Check that FP/Neon access is enabled. If it is, return
1100 * true. If not, emit code to generate an appropriate exception,
1101 * and return false; the caller should not emit any code for
1102 * the instruction. Note that this check must happen after all
1103 * unallocated-encoding checks (otherwise the syndrome information
1104 * for the resulting exception will be incorrect).
1105 */
1106static inline bool fp_access_check(DisasContext *s)
1107{
1108    assert(!s->fp_access_checked);
1109    s->fp_access_checked = true;
1110
1111    if (!s->fp_excp_el) {
1112        return true;
1113    }
1114
1115    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1116                       s->fp_excp_el);
1117    return false;
1118}
1119
1120/*
1121 * This utility function is for doing register extension with an
1122 * optional shift. You will likely want to pass a temporary for the
1123 * destination register. See DecodeRegExtend() in the ARM ARM.
1124 */
1125static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1126                              int option, unsigned int shift)
1127{
1128    int extsize = extract32(option, 0, 2);
1129    bool is_signed = extract32(option, 2, 1);
1130
1131    if (is_signed) {
1132        switch (extsize) {
1133        case 0:
1134            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1135            break;
1136        case 1:
1137            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1138            break;
1139        case 2:
1140            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1141            break;
1142        case 3:
1143            tcg_gen_mov_i64(tcg_out, tcg_in);
1144            break;
1145        }
1146    } else {
1147        switch (extsize) {
1148        case 0:
1149            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1150            break;
1151        case 1:
1152            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1153            break;
1154        case 2:
1155            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1156            break;
1157        case 3:
1158            tcg_gen_mov_i64(tcg_out, tcg_in);
1159            break;
1160        }
1161    }
1162
1163    if (shift) {
1164        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1165    }
1166}
1167
1168static inline void gen_check_sp_alignment(DisasContext *s)
1169{
1170    /* The AArch64 architecture mandates that (if enabled via PSTATE
1171     * or SCTLR bits) there is a check that SP is 16-aligned on every
1172     * SP-relative load or store (with an exception generated if it is not).
1173     * In line with general QEMU practice regarding misaligned accesses,
1174     * we omit these checks for the sake of guest program performance.
1175     * This function is provided as a hook so we can more easily add these
1176     * checks in future (possibly as a "favour catching guest program bugs
1177     * over speed" user selectable option).
1178     */
1179}
1180
1181/*
1182 * This provides a simple table based table lookup decoder. It is
1183 * intended to be used when the relevant bits for decode are too
1184 * awkwardly placed and switch/if based logic would be confusing and
1185 * deeply nested. Since it's a linear search through the table, tables
1186 * should be kept small.
1187 *
1188 * It returns the first handler where insn & mask == pattern, or
1189 * NULL if there is no match.
1190 * The table is terminated by an empty mask (i.e. 0)
1191 */
1192static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1193                                               uint32_t insn)
1194{
1195    const AArch64DecodeTable *tptr = table;
1196
1197    while (tptr->mask) {
1198        if ((insn & tptr->mask) == tptr->pattern) {
1199            return tptr->disas_fn;
1200        }
1201        tptr++;
1202    }
1203    return NULL;
1204}
1205
1206/*
1207 * The instruction disassembly implemented here matches
1208 * the instruction encoding classifications in chapter C4
1209 * of the ARM Architecture Reference Manual (DDI0487B_a);
1210 * classification names and decode diagrams here should generally
1211 * match up with those in the manual.
1212 */
1213
1214/* Unconditional branch (immediate)
1215 *   31  30       26 25                                  0
1216 * +----+-----------+-------------------------------------+
1217 * | op | 0 0 1 0 1 |                 imm26               |
1218 * +----+-----------+-------------------------------------+
1219 */
1220static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1221{
1222    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1223
1224    if (insn & (1U << 31)) {
1225        /* BL Branch with link */
1226        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1227    }
1228
1229    /* B Branch / BL Branch with link */
1230    gen_goto_tb(s, 0, addr);
1231}
1232
1233/* Compare and branch (immediate)
1234 *   31  30         25  24  23                  5 4      0
1235 * +----+-------------+----+---------------------+--------+
1236 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1237 * +----+-------------+----+---------------------+--------+
1238 */
1239static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1240{
1241    unsigned int sf, op, rt;
1242    uint64_t addr;
1243    TCGLabel *label_match;
1244    TCGv_i64 tcg_cmp;
1245
1246    sf = extract32(insn, 31, 1);
1247    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1248    rt = extract32(insn, 0, 5);
1249    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1250
1251    tcg_cmp = read_cpu_reg(s, rt, sf);
1252    label_match = gen_new_label();
1253
1254    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1255                        tcg_cmp, 0, label_match);
1256
1257    gen_goto_tb(s, 0, s->pc);
1258    gen_set_label(label_match);
1259    gen_goto_tb(s, 1, addr);
1260}
1261
1262/* Test and branch (immediate)
1263 *   31  30         25  24  23   19 18          5 4    0
1264 * +----+-------------+----+-------+-------------+------+
1265 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1266 * +----+-------------+----+-------+-------------+------+
1267 */
1268static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1269{
1270    unsigned int bit_pos, op, rt;
1271    uint64_t addr;
1272    TCGLabel *label_match;
1273    TCGv_i64 tcg_cmp;
1274
1275    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1276    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1277    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1278    rt = extract32(insn, 0, 5);
1279
1280    tcg_cmp = tcg_temp_new_i64();
1281    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1282    label_match = gen_new_label();
1283    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1284                        tcg_cmp, 0, label_match);
1285    tcg_temp_free_i64(tcg_cmp);
1286    gen_goto_tb(s, 0, s->pc);
1287    gen_set_label(label_match);
1288    gen_goto_tb(s, 1, addr);
1289}
1290
1291/* Conditional branch (immediate)
1292 *  31           25  24  23                  5   4  3    0
1293 * +---------------+----+---------------------+----+------+
1294 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1295 * +---------------+----+---------------------+----+------+
1296 */
1297static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1298{
1299    unsigned int cond;
1300    uint64_t addr;
1301
1302    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1303        unallocated_encoding(s);
1304        return;
1305    }
1306    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1307    cond = extract32(insn, 0, 4);
1308
1309    if (cond < 0x0e) {
1310        /* genuinely conditional branches */
1311        TCGLabel *label_match = gen_new_label();
1312        arm_gen_test_cc(cond, label_match);
1313        gen_goto_tb(s, 0, s->pc);
1314        gen_set_label(label_match);
1315        gen_goto_tb(s, 1, addr);
1316    } else {
1317        /* 0xe and 0xf are both "always" conditions */
1318        gen_goto_tb(s, 0, addr);
1319    }
1320}
1321
1322/* HINT instruction group, including various allocated HINTs */
1323static void handle_hint(DisasContext *s, uint32_t insn,
1324                        unsigned int op1, unsigned int op2, unsigned int crm)
1325{
1326    unsigned int selector = crm << 3 | op2;
1327
1328    if (op1 != 3) {
1329        unallocated_encoding(s);
1330        return;
1331    }
1332
1333    switch (selector) {
1334    case 0: /* NOP */
1335        return;
1336    case 3: /* WFI */
1337        s->base.is_jmp = DISAS_WFI;
1338        return;
1339        /* When running in MTTCG we don't generate jumps to the yield and
1340         * WFE helpers as it won't affect the scheduling of other vCPUs.
1341         * If we wanted to more completely model WFE/SEV so we don't busy
1342         * spin unnecessarily we would need to do something more involved.
1343         */
1344    case 1: /* YIELD */
1345        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1346            s->base.is_jmp = DISAS_YIELD;
1347        }
1348        return;
1349    case 2: /* WFE */
1350        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1351            s->base.is_jmp = DISAS_WFE;
1352        }
1353        return;
1354    case 4: /* SEV */
1355    case 5: /* SEVL */
1356        /* we treat all as NOP at least for now */
1357        return;
1358    default:
1359        /* default specified as NOP equivalent */
1360        return;
1361    }
1362}
1363
1364static void gen_clrex(DisasContext *s, uint32_t insn)
1365{
1366    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1367}
1368
1369/* CLREX, DSB, DMB, ISB */
1370static void handle_sync(DisasContext *s, uint32_t insn,
1371                        unsigned int op1, unsigned int op2, unsigned int crm)
1372{
1373    TCGBar bar;
1374
1375    if (op1 != 3) {
1376        unallocated_encoding(s);
1377        return;
1378    }
1379
1380    switch (op2) {
1381    case 2: /* CLREX */
1382        gen_clrex(s, insn);
1383        return;
1384    case 4: /* DSB */
1385    case 5: /* DMB */
1386        switch (crm & 3) {
1387        case 1: /* MBReqTypes_Reads */
1388            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1389            break;
1390        case 2: /* MBReqTypes_Writes */
1391            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1392            break;
1393        default: /* MBReqTypes_All */
1394            bar = TCG_BAR_SC | TCG_MO_ALL;
1395            break;
1396        }
1397        tcg_gen_mb(bar);
1398        return;
1399    case 6: /* ISB */
1400        /* We need to break the TB after this insn to execute
1401         * a self-modified code correctly and also to take
1402         * any pending interrupts immediately.
1403         */
1404        gen_goto_tb(s, 0, s->pc);
1405        return;
1406    default:
1407        unallocated_encoding(s);
1408        return;
1409    }
1410}
1411
1412/* MSR (immediate) - move immediate to processor state field */
1413static void handle_msr_i(DisasContext *s, uint32_t insn,
1414                         unsigned int op1, unsigned int op2, unsigned int crm)
1415{
1416    int op = op1 << 3 | op2;
1417    switch (op) {
1418    case 0x05: /* SPSel */
1419        if (s->current_el == 0) {
1420            unallocated_encoding(s);
1421            return;
1422        }
1423        /* fall through */
1424    case 0x1e: /* DAIFSet */
1425    case 0x1f: /* DAIFClear */
1426    {
1427        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1428        TCGv_i32 tcg_op = tcg_const_i32(op);
1429        gen_a64_set_pc_im(s->pc - 4);
1430        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1431        tcg_temp_free_i32(tcg_imm);
1432        tcg_temp_free_i32(tcg_op);
1433        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1434        gen_a64_set_pc_im(s->pc);
1435        s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1436        break;
1437    }
1438    default:
1439        unallocated_encoding(s);
1440        return;
1441    }
1442}
1443
1444static void gen_get_nzcv(TCGv_i64 tcg_rt)
1445{
1446    TCGv_i32 tmp = tcg_temp_new_i32();
1447    TCGv_i32 nzcv = tcg_temp_new_i32();
1448
1449    /* build bit 31, N */
1450    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1451    /* build bit 30, Z */
1452    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1453    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1454    /* build bit 29, C */
1455    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1456    /* build bit 28, V */
1457    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1458    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1459    /* generate result */
1460    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1461
1462    tcg_temp_free_i32(nzcv);
1463    tcg_temp_free_i32(tmp);
1464}
1465
1466static void gen_set_nzcv(TCGv_i64 tcg_rt)
1467
1468{
1469    TCGv_i32 nzcv = tcg_temp_new_i32();
1470
1471    /* take NZCV from R[t] */
1472    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1473
1474    /* bit 31, N */
1475    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1476    /* bit 30, Z */
1477    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1478    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1479    /* bit 29, C */
1480    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1481    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1482    /* bit 28, V */
1483    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1484    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1485    tcg_temp_free_i32(nzcv);
1486}
1487
1488/* MRS - move from system register
1489 * MSR (register) - move to system register
1490 * SYS
1491 * SYSL
1492 * These are all essentially the same insn in 'read' and 'write'
1493 * versions, with varying op0 fields.
1494 */
1495static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1496                       unsigned int op0, unsigned int op1, unsigned int op2,
1497                       unsigned int crn, unsigned int crm, unsigned int rt)
1498{
1499    const ARMCPRegInfo *ri;
1500    TCGv_i64 tcg_rt;
1501
1502    ri = get_arm_cp_reginfo(s->cp_regs,
1503                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1504                                               crn, crm, op0, op1, op2));
1505
1506    if (!ri) {
1507        /* Unknown register; this might be a guest error or a QEMU
1508         * unimplemented feature.
1509         */
1510        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1511                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1512                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1513        unallocated_encoding(s);
1514        return;
1515    }
1516
1517    /* Check access permissions */
1518    if (!cp_access_ok(s->current_el, ri, isread)) {
1519        unallocated_encoding(s);
1520        return;
1521    }
1522
1523    if (ri->accessfn) {
1524        /* Emit code to perform further access permissions checks at
1525         * runtime; this may result in an exception.
1526         */
1527        TCGv_ptr tmpptr;
1528        TCGv_i32 tcg_syn, tcg_isread;
1529        uint32_t syndrome;
1530
1531        gen_a64_set_pc_im(s->pc - 4);
1532        tmpptr = tcg_const_ptr(ri);
1533        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1534        tcg_syn = tcg_const_i32(syndrome);
1535        tcg_isread = tcg_const_i32(isread);
1536        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1537        tcg_temp_free_ptr(tmpptr);
1538        tcg_temp_free_i32(tcg_syn);
1539        tcg_temp_free_i32(tcg_isread);
1540    }
1541
1542    /* Handle special cases first */
1543    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1544    case ARM_CP_NOP:
1545        return;
1546    case ARM_CP_NZCV:
1547        tcg_rt = cpu_reg(s, rt);
1548        if (isread) {
1549            gen_get_nzcv(tcg_rt);
1550        } else {
1551            gen_set_nzcv(tcg_rt);
1552        }
1553        return;
1554    case ARM_CP_CURRENTEL:
1555        /* Reads as current EL value from pstate, which is
1556         * guaranteed to be constant by the tb flags.
1557         */
1558        tcg_rt = cpu_reg(s, rt);
1559        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1560        return;
1561    case ARM_CP_DC_ZVA:
1562        /* Writes clear the aligned block of memory which rt points into. */
1563        tcg_rt = cpu_reg(s, rt);
1564        gen_helper_dc_zva(cpu_env, tcg_rt);
1565        return;
1566    default:
1567        break;
1568    }
1569
1570    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1571        gen_io_start();
1572    }
1573
1574    tcg_rt = cpu_reg(s, rt);
1575
1576    if (isread) {
1577        if (ri->type & ARM_CP_CONST) {
1578            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1579        } else if (ri->readfn) {
1580            TCGv_ptr tmpptr;
1581            tmpptr = tcg_const_ptr(ri);
1582            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1583            tcg_temp_free_ptr(tmpptr);
1584        } else {
1585            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1586        }
1587    } else {
1588        if (ri->type & ARM_CP_CONST) {
1589            /* If not forbidden by access permissions, treat as WI */
1590            return;
1591        } else if (ri->writefn) {
1592            TCGv_ptr tmpptr;
1593            tmpptr = tcg_const_ptr(ri);
1594            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1595            tcg_temp_free_ptr(tmpptr);
1596        } else {
1597            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1598        }
1599    }
1600
1601    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1602        /* I/O operations must end the TB here (whether read or write) */
1603        gen_io_end();
1604        s->base.is_jmp = DISAS_UPDATE;
1605    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1606        /* We default to ending the TB on a coprocessor register write,
1607         * but allow this to be suppressed by the register definition
1608         * (usually only necessary to work around guest bugs).
1609         */
1610        s->base.is_jmp = DISAS_UPDATE;
1611    }
1612}
1613
1614/* System
1615 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1616 * +---------------------+---+-----+-----+-------+-------+-----+------+
1617 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1618 * +---------------------+---+-----+-----+-------+-------+-----+------+
1619 */
1620static void disas_system(DisasContext *s, uint32_t insn)
1621{
1622    unsigned int l, op0, op1, crn, crm, op2, rt;
1623    l = extract32(insn, 21, 1);
1624    op0 = extract32(insn, 19, 2);
1625    op1 = extract32(insn, 16, 3);
1626    crn = extract32(insn, 12, 4);
1627    crm = extract32(insn, 8, 4);
1628    op2 = extract32(insn, 5, 3);
1629    rt = extract32(insn, 0, 5);
1630
1631    if (op0 == 0) {
1632        if (l || rt != 31) {
1633            unallocated_encoding(s);
1634            return;
1635        }
1636        switch (crn) {
1637        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1638            handle_hint(s, insn, op1, op2, crm);
1639            break;
1640        case 3: /* CLREX, DSB, DMB, ISB */
1641            handle_sync(s, insn, op1, op2, crm);
1642            break;
1643        case 4: /* MSR (immediate) */
1644            handle_msr_i(s, insn, op1, op2, crm);
1645            break;
1646        default:
1647            unallocated_encoding(s);
1648            break;
1649        }
1650        return;
1651    }
1652    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1653}
1654
1655/* Exception generation
1656 *
1657 *  31             24 23 21 20                     5 4   2 1  0
1658 * +-----------------+-----+------------------------+-----+----+
1659 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1660 * +-----------------------+------------------------+----------+
1661 */
1662static void disas_exc(DisasContext *s, uint32_t insn)
1663{
1664    int opc = extract32(insn, 21, 3);
1665    int op2_ll = extract32(insn, 0, 5);
1666    int imm16 = extract32(insn, 5, 16);
1667    TCGv_i32 tmp;
1668
1669    switch (opc) {
1670    case 0:
1671        /* For SVC, HVC and SMC we advance the single-step state
1672         * machine before taking the exception. This is architecturally
1673         * mandated, to ensure that single-stepping a system call
1674         * instruction works properly.
1675         */
1676        switch (op2_ll) {
1677        case 1:                                                     /* SVC */
1678            gen_ss_advance(s);
1679            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1680                               default_exception_el(s));
1681            break;
1682        case 2:                                                     /* HVC */
1683            if (s->current_el == 0) {
1684                unallocated_encoding(s);
1685                break;
1686            }
1687            /* The pre HVC helper handles cases when HVC gets trapped
1688             * as an undefined insn by runtime configuration.
1689             */
1690            gen_a64_set_pc_im(s->pc - 4);
1691            gen_helper_pre_hvc(cpu_env);
1692            gen_ss_advance(s);
1693            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1694            break;
1695        case 3:                                                     /* SMC */
1696            if (s->current_el == 0) {
1697                unallocated_encoding(s);
1698                break;
1699            }
1700            gen_a64_set_pc_im(s->pc - 4);
1701            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1702            gen_helper_pre_smc(cpu_env, tmp);
1703            tcg_temp_free_i32(tmp);
1704            gen_ss_advance(s);
1705            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1706            break;
1707        default:
1708            unallocated_encoding(s);
1709            break;
1710        }
1711        break;
1712    case 1:
1713        if (op2_ll != 0) {
1714            unallocated_encoding(s);
1715            break;
1716        }
1717        /* BRK */
1718        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1719                           default_exception_el(s));
1720        break;
1721    case 2:
1722        if (op2_ll != 0) {
1723            unallocated_encoding(s);
1724            break;
1725        }
1726        /* HLT. This has two purposes.
1727         * Architecturally, it is an external halting debug instruction.
1728         * Since QEMU doesn't implement external debug, we treat this as
1729         * it is required for halting debug disabled: it will UNDEF.
1730         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1731         */
1732        if (semihosting_enabled() && imm16 == 0xf000) {
1733#ifndef CONFIG_USER_ONLY
1734            /* In system mode, don't allow userspace access to semihosting,
1735             * to provide some semblance of security (and for consistency
1736             * with our 32-bit semihosting).
1737             */
1738            if (s->current_el == 0) {
1739                unsupported_encoding(s, insn);
1740                break;
1741            }
1742#endif
1743            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1744        } else {
1745            unsupported_encoding(s, insn);
1746        }
1747        break;
1748    case 5:
1749        if (op2_ll < 1 || op2_ll > 3) {
1750            unallocated_encoding(s);
1751            break;
1752        }
1753        /* DCPS1, DCPS2, DCPS3 */
1754        unsupported_encoding(s, insn);
1755        break;
1756    default:
1757        unallocated_encoding(s);
1758        break;
1759    }
1760}
1761
1762/* Unconditional branch (register)
1763 *  31           25 24   21 20   16 15   10 9    5 4     0
1764 * +---------------+-------+-------+-------+------+-------+
1765 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1766 * +---------------+-------+-------+-------+------+-------+
1767 */
1768static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1769{
1770    unsigned int opc, op2, op3, rn, op4;
1771
1772    opc = extract32(insn, 21, 4);
1773    op2 = extract32(insn, 16, 5);
1774    op3 = extract32(insn, 10, 6);
1775    rn = extract32(insn, 5, 5);
1776    op4 = extract32(insn, 0, 5);
1777
1778    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1779        unallocated_encoding(s);
1780        return;
1781    }
1782
1783    switch (opc) {
1784    case 0: /* BR */
1785    case 1: /* BLR */
1786    case 2: /* RET */
1787        gen_a64_set_pc(s, cpu_reg(s, rn));
1788        /* BLR also needs to load return address */
1789        if (opc == 1) {
1790            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1791        }
1792        break;
1793    case 4: /* ERET */
1794        if (s->current_el == 0) {
1795            unallocated_encoding(s);
1796            return;
1797        }
1798        gen_helper_exception_return(cpu_env);
1799        /* Must exit loop to check un-masked IRQs */
1800        s->base.is_jmp = DISAS_EXIT;
1801        return;
1802    case 5: /* DRPS */
1803        if (rn != 0x1f) {
1804            unallocated_encoding(s);
1805        } else {
1806            unsupported_encoding(s, insn);
1807        }
1808        return;
1809    default:
1810        unallocated_encoding(s);
1811        return;
1812    }
1813
1814    s->base.is_jmp = DISAS_JUMP;
1815}
1816
1817/* Branches, exception generating and system instructions */
1818static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1819{
1820    switch (extract32(insn, 25, 7)) {
1821    case 0x0a: case 0x0b:
1822    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1823        disas_uncond_b_imm(s, insn);
1824        break;
1825    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1826        disas_comp_b_imm(s, insn);
1827        break;
1828    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1829        disas_test_b_imm(s, insn);
1830        break;
1831    case 0x2a: /* Conditional branch (immediate) */
1832        disas_cond_b_imm(s, insn);
1833        break;
1834    case 0x6a: /* Exception generation / System */
1835        if (insn & (1 << 24)) {
1836            disas_system(s, insn);
1837        } else {
1838            disas_exc(s, insn);
1839        }
1840        break;
1841    case 0x6b: /* Unconditional branch (register) */
1842        disas_uncond_b_reg(s, insn);
1843        break;
1844    default:
1845        unallocated_encoding(s);
1846        break;
1847    }
1848}
1849
1850/*
1851 * Load/Store exclusive instructions are implemented by remembering
1852 * the value/address loaded, and seeing if these are the same
1853 * when the store is performed. This is not actually the architecturally
1854 * mandated semantics, but it works for typical guest code sequences
1855 * and avoids having to monitor regular stores.
1856 *
1857 * The store exclusive uses the atomic cmpxchg primitives to avoid
1858 * races in multi-threaded linux-user and when MTTCG softmmu is
1859 * enabled.
1860 */
1861static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1862                               TCGv_i64 addr, int size, bool is_pair)
1863{
1864    int idx = get_mem_index(s);
1865    TCGMemOp memop = s->be_data;
1866
1867    g_assert(size <= 3);
1868    if (is_pair) {
1869        g_assert(size >= 2);
1870        if (size == 2) {
1871            /* The pair must be single-copy atomic for the doubleword.  */
1872            memop |= MO_64 | MO_ALIGN;
1873            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1874            if (s->be_data == MO_LE) {
1875                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
1876                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
1877            } else {
1878                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
1879                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
1880            }
1881        } else {
1882            /* The pair must be single-copy atomic for *each* doubleword, not
1883               the entire quadword, however it must be quadword aligned.  */
1884            memop |= MO_64;
1885            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
1886                                memop | MO_ALIGN_16);
1887
1888            TCGv_i64 addr2 = tcg_temp_new_i64();
1889            tcg_gen_addi_i64(addr2, addr, 8);
1890            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
1891            tcg_temp_free_i64(addr2);
1892
1893            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1894            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
1895        }
1896    } else {
1897        memop |= size | MO_ALIGN;
1898        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1899        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1900    }
1901    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1902}
1903
1904static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1905                                TCGv_i64 addr, int size, int is_pair)
1906{
1907    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1908     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1909     *     [addr] = {Rt};
1910     *     if (is_pair) {
1911     *         [addr + datasize] = {Rt2};
1912     *     }
1913     *     {Rd} = 0;
1914     * } else {
1915     *     {Rd} = 1;
1916     * }
1917     * env->exclusive_addr = -1;
1918     */
1919    TCGLabel *fail_label = gen_new_label();
1920    TCGLabel *done_label = gen_new_label();
1921    TCGv_i64 tmp;
1922
1923    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1924
1925    tmp = tcg_temp_new_i64();
1926    if (is_pair) {
1927        if (size == 2) {
1928            if (s->be_data == MO_LE) {
1929                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1930            } else {
1931                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
1932            }
1933            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
1934                                       cpu_exclusive_val, tmp,
1935                                       get_mem_index(s),
1936                                       MO_64 | MO_ALIGN | s->be_data);
1937            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1938        } else if (s->be_data == MO_LE) {
1939            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
1940                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
1941                                                        cpu_exclusive_addr,
1942                                                        cpu_reg(s, rt),
1943                                                        cpu_reg(s, rt2));
1944            } else {
1945                gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
1946                                               cpu_reg(s, rt), cpu_reg(s, rt2));
1947            }
1948        } else {
1949            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
1950                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
1951                                                        cpu_exclusive_addr,
1952                                                        cpu_reg(s, rt),
1953                                                        cpu_reg(s, rt2));
1954            } else {
1955                gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
1956                                               cpu_reg(s, rt), cpu_reg(s, rt2));
1957            }
1958        }
1959    } else {
1960        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
1961                                   cpu_reg(s, rt), get_mem_index(s),
1962                                   size | MO_ALIGN | s->be_data);
1963        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1964    }
1965    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
1966    tcg_temp_free_i64(tmp);
1967    tcg_gen_br(done_label);
1968
1969    gen_set_label(fail_label);
1970    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1971    gen_set_label(done_label);
1972    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1973}
1974
1975/* Update the Sixty-Four bit (SF) registersize. This logic is derived
1976 * from the ARMv8 specs for LDR (Shared decode for all encodings).
1977 */
1978static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1979{
1980    int opc0 = extract32(opc, 0, 1);
1981    int regsize;
1982
1983    if (is_signed) {
1984        regsize = opc0 ? 32 : 64;
1985    } else {
1986        regsize = size == 3 ? 64 : 32;
1987    }
1988    return regsize == 64;
1989}
1990
1991/* Load/store exclusive
1992 *
1993 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1994 * +-----+-------------+----+---+----+------+----+-------+------+------+
1995 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1996 * +-----+-------------+----+---+----+------+----+-------+------+------+
1997 *
1998 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1999 *   L: 0 -> store, 1 -> load
2000 *  o2: 0 -> exclusive, 1 -> not

2001 *  o1: 0 -> single register, 1 -> register pair
2002 *  o0: 1 -> load-acquire/store-release, 0 -> not
2003 */
2004static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2005{
2006    int rt = extract32(insn, 0, 5);
2007    int rn = extract32(insn, 5, 5);
2008    int rt2 = extract32(insn, 10, 5);
2009    int is_lasr = extract32(insn, 15, 1);
2010    int rs = extract32(insn, 16, 5);
2011    int is_pair = extract32(insn, 21, 1);
2012    int is_store = !extract32(insn, 22, 1);
2013    int is_excl = !extract32(insn, 23, 1);
2014    int size = extract32(insn, 30, 2);
2015    TCGv_i64 tcg_addr;
2016
2017    if ((!is_excl && !is_pair && !is_lasr) ||
2018        (!is_excl && is_pair) ||
2019        (is_pair && size < 2)) {
2020        unallocated_encoding(s);
2021        return;
2022    }
2023
2024    if (rn == 31) {
2025        gen_check_sp_alignment(s);
2026    }
2027    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2028
2029    /* Note that since TCG is single threaded load-acquire/store-release
2030     * semantics require no extra if (is_lasr) { ... } handling.
2031     */
2032
2033    if (is_excl) {
2034        if (!is_store) {
2035            s->is_ldex = true;
2036            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2037            if (is_lasr) {
2038                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2039            }
2040        } else {
2041            if (is_lasr) {
2042                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2043            }
2044            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2045        }
2046    } else {
2047        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2048        bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2049
2050        /* Generate ISS for non-exclusive accesses including LASR.  */
2051        if (is_store) {
2052            if (is_lasr) {
2053                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2054            }
2055            do_gpr_st(s, tcg_rt, tcg_addr, size,
2056                      true, rt, iss_sf, is_lasr);
2057        } else {
2058            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2059                      true, rt, iss_sf, is_lasr);
2060            if (is_lasr) {
2061                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2062            }
2063        }
2064    }
2065}
2066
2067/*
2068 * Load register (literal)
2069 *
2070 *  31 30 29   27  26 25 24 23                5 4     0
2071 * +-----+-------+---+-----+-------------------+-------+
2072 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2073 * +-----+-------+---+-----+-------------------+-------+
2074 *
2075 * V: 1 -> vector (simd/fp)
2076 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2077 *                   10-> 32 bit signed, 11 -> prefetch
2078 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2079 */
2080static void disas_ld_lit(DisasContext *s, uint32_t insn)
2081{
2082    int rt = extract32(insn, 0, 5);
2083    int64_t imm = sextract32(insn, 5, 19) << 2;
2084    bool is_vector = extract32(insn, 26, 1);
2085    int opc = extract32(insn, 30, 2);
2086    bool is_signed = false;
2087    int size = 2;
2088    TCGv_i64 tcg_rt, tcg_addr;
2089
2090    if (is_vector) {
2091        if (opc == 3) {
2092            unallocated_encoding(s);
2093            return;
2094        }
2095        size = 2 + opc;
2096        if (!fp_access_check(s)) {
2097            return;
2098        }
2099    } else {
2100        if (opc == 3) {
2101            /* PRFM (literal) : prefetch */
2102            return;
2103        }
2104        size = 2 + extract32(opc, 0, 1);
2105        is_signed = extract32(opc, 1, 1);
2106    }
2107
2108    tcg_rt = cpu_reg(s, rt);
2109
2110    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2111    if (is_vector) {
2112        do_fp_ld(s, rt, tcg_addr, size);
2113    } else {
2114        /* Only unsigned 32bit loads target 32bit registers.  */
2115        bool iss_sf = opc != 0;
2116
2117        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2118                  true, rt, iss_sf, false);
2119    }
2120    tcg_temp_free_i64(tcg_addr);
2121}
2122
2123/*
2124 * LDNP (Load Pair - non-temporal hint)
2125 * LDP (Load Pair - non vector)
2126 * LDPSW (Load Pair Signed Word - non vector)
2127 * STNP (Store Pair - non-temporal hint)
2128 * STP (Store Pair - non vector)
2129 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2130 * LDP (Load Pair of SIMD&FP)
2131 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2132 * STP (Store Pair of SIMD&FP)
2133 *
2134 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2135 * +-----+-------+---+---+-------+---+-----------------------------+
2136 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2137 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2138 *
2139 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2140 *      LDPSW                    01
2141 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2142 *   V: 0 -> GPR, 1 -> Vector
2143 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2144 *      10 -> signed offset, 11 -> pre-index
2145 *   L: 0 -> Store 1 -> Load
2146 *
2147 * Rt, Rt2 = GPR or SIMD registers to be stored
2148 * Rn = general purpose register containing address
2149 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2150 */
2151static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2152{
2153    int rt = extract32(insn, 0, 5);
2154    int rn = extract32(insn, 5, 5);
2155    int rt2 = extract32(insn, 10, 5);
2156    uint64_t offset = sextract64(insn, 15, 7);
2157    int index = extract32(insn, 23, 2);
2158    bool is_vector = extract32(insn, 26, 1);
2159    bool is_load = extract32(insn, 22, 1);
2160    int opc = extract32(insn, 30, 2);
2161
2162    bool is_signed = false;
2163    bool postindex = false;
2164    bool wback = false;
2165
2166    TCGv_i64 tcg_addr; /* calculated address */
2167    int size;
2168
2169    if (opc == 3) {
2170        unallocated_encoding(s);
2171        return;
2172    }
2173
2174    if (is_vector) {
2175        size = 2 + opc;
2176    } else {
2177        size = 2 + extract32(opc, 1, 1);
2178        is_signed = extract32(opc, 0, 1);
2179        if (!is_load && is_signed) {
2180            unallocated_encoding(s);
2181            return;
2182        }
2183    }
2184
2185    switch (index) {
2186    case 1: /* post-index */
2187        postindex = true;
2188        wback = true;
2189        break;
2190    case 0:
2191        /* signed offset with "non-temporal" hint. Since we don't emulate
2192         * caches we don't care about hints to the cache system about
2193         * data access patterns, and handle this identically to plain
2194         * signed offset.
2195         */
2196        if (is_signed) {
2197            /* There is no non-temporal-hint version of LDPSW */
2198            unallocated_encoding(s);
2199            return;
2200        }
2201        postindex = false;
2202        break;
2203    case 2: /* signed offset, rn not updated */
2204        postindex = false;
2205        break;
2206    case 3: /* pre-index */
2207        postindex = false;
2208        wback = true;
2209        break;
2210    }
2211
2212    if (is_vector && !fp_access_check(s)) {
2213        return;
2214    }
2215
2216    offset <<= size;
2217
2218    if (rn == 31) {
2219        gen_check_sp_alignment(s);
2220    }
2221
2222    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2223
2224    if (!postindex) {
2225        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2226    }
2227
2228    if (is_vector) {
2229        if (is_load) {
2230            do_fp_ld(s, rt, tcg_addr, size);
2231        } else {
2232            do_fp_st(s, rt, tcg_addr, size);
2233        }
2234        tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2235        if (is_load) {
2236            do_fp_ld(s, rt2, tcg_addr, size);
2237        } else {
2238            do_fp_st(s, rt2, tcg_addr, size);
2239        }
2240    } else {
2241        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2242        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2243
2244        if (is_load) {
2245            TCGv_i64 tmp = tcg_temp_new_i64();
2246
2247            /* Do not modify tcg_rt before recognizing any exception
2248             * from the second load.
2249             */
2250            do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2251                      false, 0, false, false);
2252            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2253            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2254                      false, 0, false, false);
2255
2256            tcg_gen_mov_i64(tcg_rt, tmp);
2257            tcg_temp_free_i64(tmp);
2258        } else {
2259            do_gpr_st(s, tcg_rt, tcg_addr, size,
2260                      false, 0, false, false);
2261            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2262            do_gpr_st(s, tcg_rt2, tcg_addr, size,
2263                      false, 0, false, false);
2264        }
2265    }
2266
2267    if (wback) {
2268        if (postindex) {
2269            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2270        } else {
2271            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2272        }
2273        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2274    }
2275}
2276
2277/*
2278 * Load/store (immediate post-indexed)
2279 * Load/store (immediate pre-indexed)
2280 * Load/store (unscaled immediate)
2281 *
2282 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2283 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2284 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2285 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2286 *
2287 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2288         10 -> unprivileged
2289 * V = 0 -> non-vector
2290 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2291 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2292 */
2293static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2294                                int opc,
2295                                int size,
2296                                int rt,
2297                                bool is_vector)
2298{
2299    int rn = extract32(insn, 5, 5);
2300    int imm9 = sextract32(insn, 12, 9);
2301    int idx = extract32(insn, 10, 2);
2302    bool is_signed = false;
2303    bool is_store = false;
2304    bool is_extended = false;
2305    bool is_unpriv = (idx == 2);
2306    bool iss_valid = !is_vector;
2307    bool post_index;
2308    bool writeback;
2309
2310    TCGv_i64 tcg_addr;
2311
2312    if (is_vector) {
2313        size |= (opc & 2) << 1;
2314        if (size > 4 || is_unpriv) {
2315            unallocated_encoding(s);
2316            return;
2317        }
2318        is_store = ((opc & 1) == 0);
2319        if (!fp_access_check(s)) {
2320            return;
2321        }
2322    } else {
2323        if (size == 3 && opc == 2) {
2324            /* PRFM - prefetch */
2325            if (is_unpriv) {
2326                unallocated_encoding(s);
2327                return;
2328            }
2329            return;
2330        }
2331        if (opc == 3 && size > 1) {
2332            unallocated_encoding(s);
2333            return;
2334        }
2335        is_store = (opc == 0);
2336        is_signed = extract32(opc, 1, 1);
2337        is_extended = (size < 3) && extract32(opc, 0, 1);
2338    }
2339
2340    switch (idx) {
2341    case 0:
2342    case 2:
2343        post_index = false;
2344        writeback = false;
2345        break;
2346    case 1:
2347        post_index = true;
2348        writeback = true;
2349        break;
2350    case 3:
2351        post_index = false;
2352        writeback = true;
2353        break;
2354    default:
2355        g_assert_not_reached();
2356    }
2357
2358    if (rn == 31) {
2359        gen_check_sp_alignment(s);
2360    }
2361    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2362
2363    if (!post_index) {
2364        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2365    }
2366
2367    if (is_vector) {
2368        if (is_store) {
2369            do_fp_st(s, rt, tcg_addr, size);
2370        } else {
2371            do_fp_ld(s, rt, tcg_addr, size);
2372        }
2373    } else {
2374        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2375        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2376        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2377
2378        if (is_store) {
2379            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2380                             iss_valid, rt, iss_sf, false);
2381        } else {
2382            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2383                             is_signed, is_extended, memidx,
2384                             iss_valid, rt, iss_sf, false);
2385        }
2386    }
2387
2388    if (writeback) {
2389        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2390        if (post_index) {
2391            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2392        }
2393        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2394    }
2395}
2396
2397/*
2398 * Load/store (register offset)
2399 *
2400 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2401 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2402 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2403 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2404 *
2405 * For non-vector:
2406 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2407 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2408 * For vector:
2409 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2410 *   opc<0>: 0 -> store, 1 -> load
2411 * V: 1 -> vector/simd
2412 * opt: extend encoding (see DecodeRegExtend)
2413 * S: if S=1 then scale (essentially index by sizeof(size))
2414 * Rt: register to transfer into/out of
2415 * Rn: address register or SP for base
2416 * Rm: offset register or ZR for offset
2417 */
2418static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2419                                   int opc,
2420                                   int size,
2421                                   int rt,
2422                                   bool is_vector)
2423{
2424    int rn = extract32(insn, 5, 5);
2425    int shift = extract32(insn, 12, 1);
2426    int rm = extract32(insn, 16, 5);
2427    int opt = extract32(insn, 13, 3);
2428    bool is_signed = false;
2429    bool is_store = false;
2430    bool is_extended = false;
2431
2432    TCGv_i64 tcg_rm;
2433    TCGv_i64 tcg_addr;
2434
2435    if (extract32(opt, 1, 1) == 0) {
2436        unallocated_encoding(s);
2437        return;
2438    }
2439
2440    if (is_vector) {
2441        size |= (opc & 2) << 1;
2442        if (size > 4) {
2443            unallocated_encoding(s);
2444            return;
2445        }
2446        is_store = !extract32(opc, 0, 1);
2447        if (!fp_access_check(s)) {
2448            return;
2449        }
2450    } else {
2451        if (size == 3 && opc == 2) {
2452            /* PRFM - prefetch */
2453            return;
2454        }
2455        if (opc == 3 && size > 1) {
2456            unallocated_encoding(s);
2457            return;
2458        }
2459        is_store = (opc == 0);
2460        is_signed = extract32(opc, 1, 1);
2461        is_extended = (size < 3) && extract32(opc, 0, 1);
2462    }
2463
2464    if (rn == 31) {
2465        gen_check_sp_alignment(s);
2466    }
2467    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2468
2469    tcg_rm = read_cpu_reg(s, rm, 1);
2470    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2471
2472    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2473
2474    if (is_vector) {
2475        if (is_store) {
2476            do_fp_st(s, rt, tcg_addr, size);
2477        } else {
2478            do_fp_ld(s, rt, tcg_addr, size);
2479        }
2480    } else {
2481        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2482        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2483        if (is_store) {
2484            do_gpr_st(s, tcg_rt, tcg_addr, size,
2485                      true, rt, iss_sf, false);
2486        } else {
2487            do_gpr_ld(s, tcg_rt, tcg_addr, size,
2488                      is_signed, is_extended,
2489                      true, rt, iss_sf, false);
2490        }
2491    }
2492}
2493
2494/*
2495 * Load/store (unsigned immediate)
2496 *
2497 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2498 * +----+-------+---+-----+-----+------------+-------+------+
2499 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2500 * +----+-------+---+-----+-----+------------+-------+------+
2501 *
2502 * For non-vector:
2503 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2504 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2505 * For vector:
2506 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2507 *   opc<0>: 0 -> store, 1 -> load
2508 * Rn: base address register (inc SP)
2509 * Rt: target register
2510 */
2511static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2512                                        int opc,
2513                                        int size,
2514                                        int rt,
2515                                        bool is_vector)
2516{
2517    int rn = extract32(insn, 5, 5);
2518    unsigned int imm12 = extract32(insn, 10, 12);
2519    unsigned int offset;
2520
2521    TCGv_i64 tcg_addr;
2522
2523    bool is_store;
2524    bool is_signed = false;
2525    bool is_extended = false;
2526
2527    if (is_vector) {
2528        size |= (opc & 2) << 1;
2529        if (size > 4) {
2530            unallocated_encoding(s);
2531            return;
2532        }
2533        is_store = !extract32(opc, 0, 1);
2534        if (!fp_access_check(s)) {
2535            return;
2536        }
2537    } else {
2538        if (size == 3 && opc == 2) {
2539            /* PRFM - prefetch */
2540            return;
2541        }
2542        if (opc == 3 && size > 1) {
2543            unallocated_encoding(s);
2544            return;
2545        }
2546        is_store = (opc == 0);
2547        is_signed = extract32(opc, 1, 1);
2548        is_extended = (size < 3) && extract32(opc, 0, 1);
2549    }
2550
2551    if (rn == 31) {
2552        gen_check_sp_alignment(s);
2553    }
2554    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2555    offset = imm12 << size;
2556    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2557
2558    if (is_vector) {
2559        if (is_store) {
2560            do_fp_st(s, rt, tcg_addr, size);
2561        } else {
2562            do_fp_ld(s, rt, tcg_addr, size);
2563        }
2564    } else {
2565        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2566        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2567        if (is_store) {
2568            do_gpr_st(s, tcg_rt, tcg_addr, size,
2569                      true, rt, iss_sf, false);
2570        } else {
2571            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2572                      true, rt, iss_sf, false);
2573        }
2574    }
2575}
2576
2577/* Load/store register (all forms) */
2578static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2579{
2580    int rt = extract32(insn, 0, 5);
2581    int opc = extract32(insn, 22, 2);
2582    bool is_vector = extract32(insn, 26, 1);
2583    int size = extract32(insn, 30, 2);
2584
2585    switch (extract32(insn, 24, 2)) {
2586    case 0:
2587        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2588            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2589        } else {
2590            /* Load/store register (unscaled immediate)
2591             * Load/store immediate pre/post-indexed
2592             * Load/store register unprivileged
2593             */
2594            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2595        }
2596        break;
2597    case 1:
2598        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2599        break;
2600    default:
2601        unallocated_encoding(s);
2602        break;
2603    }
2604}
2605
2606/* AdvSIMD load/store multiple structures
2607 *
2608 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2609 * +---+---+---------------+---+-------------+--------+------+------+------+
2610 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2611 * +---+---+---------------+---+-------------+--------+------+------+------+
2612 *
2613 * AdvSIMD load/store multiple structures (post-indexed)
2614 *
2615 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2616 * +---+---+---------------+---+---+---------+--------+------+------+------+
2617 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2618 * +---+---+---------------+---+---+---------+--------+------+------+------+
2619 *
2620 * Rt: first (or only) SIMD&FP register to be transferred
2621 * Rn: base address or SP
2622 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2623 */
2624static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2625{
2626    int rt = extract32(insn, 0, 5);
2627    int rn = extract32(insn, 5, 5);
2628    int size = extract32(insn, 10, 2);
2629    int opcode = extract32(insn, 12, 4);
2630    bool is_store = !extract32(insn, 22, 1);
2631    bool is_postidx = extract32(insn, 23, 1);
2632    bool is_q = extract32(insn, 30, 1);
2633    TCGv_i64 tcg_addr, tcg_rn;
2634
2635    int ebytes = 1 << size;
2636    int elements = (is_q ? 128 : 64) / (8 << size);
2637    int rpt;    /* num iterations */
2638    int selem;  /* structure elements */
2639    int r;
2640
2641    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2642        unallocated_encoding(s);
2643        return;
2644    }
2645
2646    /* From the shared decode logic */
2647    switch (opcode) {
2648    case 0x0:
2649        rpt = 1;
2650        selem = 4;
2651        break;
2652    case 0x2:
2653        rpt = 4;
2654        selem = 1;
2655        break;
2656    case 0x4:
2657        rpt = 1;
2658        selem = 3;
2659        break;
2660    case 0x6:
2661        rpt = 3;
2662        selem = 1;
2663        break;
2664    case 0x7:
2665        rpt = 1;
2666        selem = 1;
2667        break;
2668    case 0x8:
2669        rpt = 1;
2670        selem = 2;
2671        break;
2672    case 0xa:
2673        rpt = 2;
2674        selem = 1;
2675        break;
2676    default:
2677        unallocated_encoding(s);
2678        return;
2679    }
2680
2681    if (size == 3 && !is_q && selem != 1) {
2682        /* reserved */
2683        unallocated_encoding(s);
2684        return;
2685    }
2686
2687    if (!fp_access_check(s)) {
2688        return;
2689    }
2690
2691    if (rn == 31) {
2692        gen_check_sp_alignment(s);
2693    }
2694
2695    tcg_rn = cpu_reg_sp(s, rn);
2696    tcg_addr = tcg_temp_new_i64();
2697    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2698
2699    for (r = 0; r < rpt; r++) {
2700        int e;
2701        for (e = 0; e < elements; e++) {
2702            int tt = (rt + r) % 32;
2703            int xs;
2704            for (xs = 0; xs < selem; xs++) {
2705                if (is_store) {
2706                    do_vec_st(s, tt, e, tcg_addr, size);
2707                } else {
2708                    do_vec_ld(s, tt, e, tcg_addr, size);
2709
2710                    /* For non-quad operations, setting a slice of the low
2711                     * 64 bits of the register clears the high 64 bits (in
2712                     * the ARM ARM pseudocode this is implicit in the fact
2713                     * that 'rval' is a 64 bit wide variable). We optimize
2714                     * by noticing that we only need to do this the first
2715                     * time we touch a register.
2716                     */
2717                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2718                        clear_vec_high(s, tt);
2719                    }
2720                }
2721                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2722                tt = (tt + 1) % 32;
2723            }
2724        }
2725    }
2726
2727    if (is_postidx) {
2728        int rm = extract32(insn, 16, 5);
2729        if (rm == 31) {
2730            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2731        } else {
2732            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2733        }
2734    }
2735    tcg_temp_free_i64(tcg_addr);
2736}
2737
2738/* AdvSIMD load/store single structure
2739 *
2740 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2741 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2742 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2743 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2744 *
2745 * AdvSIMD load/store single structure (post-indexed)
2746 *
2747 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2748 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2749 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2750 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2751 *
2752 * Rt: first (or only) SIMD&FP register to be transferred
2753 * Rn: base address or SP
2754 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2755 * index = encoded in Q:S:size dependent on size
2756 *
2757 * lane_size = encoded in R, opc
2758 * transfer width = encoded in opc, S, size
2759 */
2760static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2761{
2762    int rt = extract32(insn, 0, 5);
2763    int rn = extract32(insn, 5, 5);
2764    int size = extract32(insn, 10, 2);
2765    int S = extract32(insn, 12, 1);
2766    int opc = extract32(insn, 13, 3);
2767    int R = extract32(insn, 21, 1);
2768    int is_load = extract32(insn, 22, 1);
2769    int is_postidx = extract32(insn, 23, 1);
2770    int is_q = extract32(insn, 30, 1);
2771
2772    int scale = extract32(opc, 1, 2);
2773    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2774    bool replicate = false;
2775    int index = is_q << 3 | S << 2 | size;
2776    int ebytes, xs;
2777    TCGv_i64 tcg_addr, tcg_rn;
2778
2779    switch (scale) {
2780    case 3:
2781        if (!is_load || S) {
2782            unallocated_encoding(s);
2783            return;
2784        }
2785        scale = size;
2786        replicate = true;
2787        break;
2788    case 0:
2789        break;
2790    case 1:
2791        if (extract32(size, 0, 1)) {
2792            unallocated_encoding(s);
2793            return;
2794        }
2795        index >>= 1;
2796        break;
2797    case 2:
2798        if (extract32(size, 1, 1)) {
2799            unallocated_encoding(s);
2800            return;
2801        }
2802        if (!extract32(size, 0, 1)) {
2803            index >>= 2;
2804        } else {
2805            if (S) {
2806                unallocated_encoding(s);
2807                return;
2808            }
2809            index >>= 3;
2810            scale = 3;
2811        }
2812        break;
2813    default:
2814        g_assert_not_reached();
2815    }
2816
2817    if (!fp_access_check(s)) {
2818        return;
2819    }
2820
2821    ebytes = 1 << scale;
2822
2823    if (rn == 31) {
2824        gen_check_sp_alignment(s);
2825    }
2826
2827    tcg_rn = cpu_reg_sp(s, rn);
2828    tcg_addr = tcg_temp_new_i64();
2829    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2830
2831    for (xs = 0; xs < selem; xs++) {
2832        if (replicate) {
2833            /* Load and replicate to all elements */
2834            uint64_t mulconst;
2835            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2836
2837            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2838                                get_mem_index(s), s->be_data + scale);
2839            switch (scale) {
2840            case 0:
2841                mulconst = 0x0101010101010101ULL;
2842                break;
2843            case 1:
2844                mulconst = 0x0001000100010001ULL;
2845                break;
2846            case 2:
2847                mulconst = 0x0000000100000001ULL;
2848                break;
2849            case 3:
2850                mulconst = 0;
2851                break;
2852            default:
2853                g_assert_not_reached();
2854            }
2855            if (mulconst) {
2856                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2857            }
2858            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2859            if (is_q) {
2860                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2861            } else {
2862                clear_vec_high(s, rt);
2863            }
2864            tcg_temp_free_i64(tcg_tmp);
2865        } else {
2866            /* Load/store one element per register */
2867            if (is_load) {
2868                do_vec_ld(s, rt, index, tcg_addr, scale);
2869            } else {
2870                do_vec_st(s, rt, index, tcg_addr, scale);
2871            }
2872        }
2873        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2874        rt = (rt + 1) % 32;
2875    }
2876
2877    if (is_postidx) {
2878        int rm = extract32(insn, 16, 5);
2879        if (rm == 31) {
2880            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2881        } else {
2882            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2883        }
2884    }
2885    tcg_temp_free_i64(tcg_addr);
2886}
2887
2888/* Loads and stores */
2889static void disas_ldst(DisasContext *s, uint32_t insn)
2890{
2891    switch (extract32(insn, 24, 6)) {
2892    case 0x08: /* Load/store exclusive */
2893        disas_ldst_excl(s, insn);
2894        break;
2895    case 0x18: case 0x1c: /* Load register (literal) */
2896        disas_ld_lit(s, insn);
2897        break;
2898    case 0x28: case 0x29:
2899    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2900        disas_ldst_pair(s, insn);
2901        break;
2902    case 0x38: case 0x39:
2903    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2904        disas_ldst_reg(s, insn);
2905        break;
2906    case 0x0c: /* AdvSIMD load/store multiple structures */
2907        disas_ldst_multiple_struct(s, insn);
2908        break;
2909    case 0x0d: /* AdvSIMD load/store single structure */
2910        disas_ldst_single_struct(s, insn);
2911        break;
2912    default:
2913        unallocated_encoding(s);
2914        break;
2915    }
2916}
2917
2918/* PC-rel. addressing
2919 *   31  30   29 28       24 23                5 4    0
2920 * +----+-------+-----------+-------------------+------+
2921 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2922 * +----+-------+-----------+-------------------+------+
2923 */
2924static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2925{
2926    unsigned int page, rd;
2927    uint64_t base;
2928    uint64_t offset;
2929
2930    page = extract32(insn, 31, 1);
2931    /* SignExtend(immhi:immlo) -> offset */
2932    offset = sextract64(insn, 5, 19);
2933    offset = offset << 2 | extract32(insn, 29, 2);
2934    rd = extract32(insn, 0, 5);
2935    base = s->pc - 4;
2936
2937    if (page) {
2938        /* ADRP (page based) */
2939        base &= ~0xfff;
2940        offset <<= 12;
2941    }
2942
2943    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2944}
2945
2946/*
2947 * Add/subtract (immediate)
2948 *
2949 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2950 * +--+--+--+-----------+-----+-------------+-----+-----+
2951 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2952 * +--+--+--+-----------+-----+-------------+-----+-----+
2953 *
2954 *    sf: 0 -> 32bit, 1 -> 64bit
2955 *    op: 0 -> add  , 1 -> sub
2956 *     S: 1 -> set flags
2957 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2958 */
2959static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2960{
2961    int rd = extract32(insn, 0, 5);
2962    int rn = extract32(insn, 5, 5);
2963    uint64_t imm = extract32(insn, 10, 12);
2964    int shift = extract32(insn, 22, 2);
2965    bool setflags = extract32(insn, 29, 1);
2966    bool sub_op = extract32(insn, 30, 1);
2967    bool is_64bit = extract32(insn, 31, 1);
2968
2969    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2970    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2971    TCGv_i64 tcg_result;
2972
2973    switch (shift) {
2974    case 0x0:
2975        break;
2976    case 0x1:
2977        imm <<= 12;
2978        break;
2979    default:
2980        unallocated_encoding(s);
2981        return;
2982    }
2983
2984    tcg_result = tcg_temp_new_i64();
2985    if (!setflags) {
2986        if (sub_op) {
2987            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2988        } else {
2989            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2990        }
2991    } else {
2992        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2993        if (sub_op) {
2994            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2995        } else {
2996            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2997        }
2998        tcg_temp_free_i64(tcg_imm);
2999    }
3000

3001    if (is_64bit) {
3002        tcg_gen_mov_i64(tcg_rd, tcg_result);
3003    } else {
3004        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3005    }
3006
3007    tcg_temp_free_i64(tcg_result);
3008}
3009
3010/* The input should be a value in the bottom e bits (with higher
3011 * bits zero); returns that value replicated into every element
3012 * of size e in a 64 bit integer.
3013 */
3014static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3015{
3016    assert(e != 0);
3017    while (e < 64) {
3018        mask |= mask << e;
3019        e *= 2;
3020    }
3021    return mask;
3022}
3023
3024/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3025static inline uint64_t bitmask64(unsigned int length)
3026{
3027    assert(length > 0 && length <= 64);
3028    return ~0ULL >> (64 - length);
3029}
3030
3031/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3032 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3033 * value (ie should cause a guest UNDEF exception), and true if they are
3034 * valid, in which case the decoded bit pattern is written to result.
3035 */
3036static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3037                                   unsigned int imms, unsigned int immr)
3038{
3039    uint64_t mask;
3040    unsigned e, levels, s, r;
3041    int len;
3042
3043    assert(immn < 2 && imms < 64 && immr < 64);
3044
3045    /* The bit patterns we create here are 64 bit patterns which
3046     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3047     * 64 bits each. Each element contains the same value: a run
3048     * of between 1 and e-1 non-zero bits, rotated within the
3049     * element by between 0 and e-1 bits.
3050     *
3051     * The element size and run length are encoded into immn (1 bit)
3052     * and imms (6 bits) as follows:
3053     * 64 bit elements: immn = 1, imms = <length of run - 1>
3054     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3055     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3056     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3057     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3058     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3059     * Notice that immn = 0, imms = 11111x is the only combination
3060     * not covered by one of the above options; this is reserved.
3061     * Further, <length of run - 1> all-ones is a reserved pattern.
3062     *
3063     * In all cases the rotation is by immr % e (and immr is 6 bits).
3064     */
3065
3066    /* First determine the element size */
3067    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3068    if (len < 1) {
3069        /* This is the immn == 0, imms == 0x11111x case */
3070        return false;
3071    }
3072    e = 1 << len;
3073
3074    levels = e - 1;
3075    s = imms & levels;
3076    r = immr & levels;
3077
3078    if (s == levels) {
3079        /* <length of run - 1> mustn't be all-ones. */
3080        return false;
3081    }
3082
3083    /* Create the value of one element: s+1 set bits rotated
3084     * by r within the element (which is e bits wide)...
3085     */
3086    mask = bitmask64(s + 1);
3087    if (r) {
3088        mask = (mask >> r) | (mask << (e - r));
3089        mask &= bitmask64(e);
3090    }
3091    /* ...then replicate the element over the whole 64 bit value */
3092    mask = bitfield_replicate(mask, e);
3093    *result = mask;
3094    return true;
3095}
3096
3097/* Logical (immediate)
3098 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3099 * +----+-----+-------------+---+------+------+------+------+
3100 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3101 * +----+-----+-------------+---+------+------+------+------+
3102 */
3103static void disas_logic_imm(DisasContext *s, uint32_t insn)
3104{
3105    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3106    TCGv_i64 tcg_rd, tcg_rn;
3107    uint64_t wmask;
3108    bool is_and = false;
3109
3110    sf = extract32(insn, 31, 1);
3111    opc = extract32(insn, 29, 2);
3112    is_n = extract32(insn, 22, 1);
3113    immr = extract32(insn, 16, 6);
3114    imms = extract32(insn, 10, 6);
3115    rn = extract32(insn, 5, 5);
3116    rd = extract32(insn, 0, 5);
3117
3118    if (!sf && is_n) {
3119        unallocated_encoding(s);
3120        return;
3121    }
3122
3123    if (opc == 0x3) { /* ANDS */
3124        tcg_rd = cpu_reg(s, rd);
3125    } else {
3126        tcg_rd = cpu_reg_sp(s, rd);
3127    }
3128    tcg_rn = cpu_reg(s, rn);
3129
3130    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3131        /* some immediate field values are reserved */
3132        unallocated_encoding(s);
3133        return;
3134    }
3135
3136    if (!sf) {
3137        wmask &= 0xffffffff;
3138    }
3139
3140    switch (opc) {
3141    case 0x3: /* ANDS */
3142    case 0x0: /* AND */
3143        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3144        is_and = true;
3145        break;
3146    case 0x1: /* ORR */
3147        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3148        break;
3149    case 0x2: /* EOR */
3150        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3151        break;
3152    default:
3153        assert(FALSE); /* must handle all above */
3154        break;
3155    }
3156
3157    if (!sf && !is_and) {
3158        /* zero extend final result; we know we can skip this for AND
3159         * since the immediate had the high 32 bits clear.
3160         */
3161        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3162    }
3163
3164    if (opc == 3) { /* ANDS */
3165        gen_logic_CC(sf, tcg_rd);
3166    }
3167}
3168
3169/*
3170 * Move wide (immediate)
3171 *
3172 *  31 30 29 28         23 22 21 20             5 4    0
3173 * +--+-----+-------------+-----+----------------+------+
3174 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3175 * +--+-----+-------------+-----+----------------+------+
3176 *
3177 * sf: 0 -> 32 bit, 1 -> 64 bit
3178 * opc: 00 -> N, 10 -> Z, 11 -> K
3179 * hw: shift/16 (0,16, and sf only 32, 48)
3180 */
3181static void disas_movw_imm(DisasContext *s, uint32_t insn)
3182{
3183    int rd = extract32(insn, 0, 5);
3184    uint64_t imm = extract32(insn, 5, 16);
3185    int sf = extract32(insn, 31, 1);
3186    int opc = extract32(insn, 29, 2);
3187    int pos = extract32(insn, 21, 2) << 4;
3188    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3189    TCGv_i64 tcg_imm;
3190
3191    if (!sf && (pos >= 32)) {
3192        unallocated_encoding(s);
3193        return;
3194    }
3195
3196    switch (opc) {
3197    case 0: /* MOVN */
3198    case 2: /* MOVZ */
3199        imm <<= pos;
3200        if (opc == 0) {
3201            imm = ~imm;
3202        }
3203        if (!sf) {
3204            imm &= 0xffffffffu;
3205        }
3206        tcg_gen_movi_i64(tcg_rd, imm);
3207        break;
3208    case 3: /* MOVK */
3209        tcg_imm = tcg_const_i64(imm);
3210        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3211        tcg_temp_free_i64(tcg_imm);
3212        if (!sf) {
3213            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3214        }
3215        break;
3216    default:
3217        unallocated_encoding(s);
3218        break;
3219    }
3220}
3221
3222/* Bitfield
3223 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3224 * +----+-----+-------------+---+------+------+------+------+
3225 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3226 * +----+-----+-------------+---+------+------+------+------+
3227 */
3228static void disas_bitfield(DisasContext *s, uint32_t insn)
3229{
3230    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3231    TCGv_i64 tcg_rd, tcg_tmp;
3232
3233    sf = extract32(insn, 31, 1);
3234    opc = extract32(insn, 29, 2);
3235    n = extract32(insn, 22, 1);
3236    ri = extract32(insn, 16, 6);
3237    si = extract32(insn, 10, 6);
3238    rn = extract32(insn, 5, 5);
3239    rd = extract32(insn, 0, 5);
3240    bitsize = sf ? 64 : 32;
3241
3242    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3243        unallocated_encoding(s);
3244        return;
3245    }
3246
3247    tcg_rd = cpu_reg(s, rd);
3248
3249    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3250       to be smaller than bitsize, we'll never reference data outside the
3251       low 32-bits anyway.  */
3252    tcg_tmp = read_cpu_reg(s, rn, 1);
3253
3254    /* Recognize simple(r) extractions.  */
3255    if (si >= ri) {
3256        /* Wd<s-r:0> = Wn<s:r> */
3257        len = (si - ri) + 1;
3258        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3259            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3260            goto done;
3261        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3262            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3263            return;
3264        }
3265        /* opc == 1, BXFIL fall through to deposit */
3266        tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3267        pos = 0;
3268    } else {
3269        /* Handle the ri > si case with a deposit
3270         * Wd<32+s-r,32-r> = Wn<s:0>
3271         */
3272        len = si + 1;
3273        pos = (bitsize - ri) & (bitsize - 1);
3274    }
3275
3276    if (opc == 0 && len < ri) {
3277        /* SBFM: sign extend the destination field from len to fill
3278           the balance of the word.  Let the deposit below insert all
3279           of those sign bits.  */
3280        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3281        len = ri;
3282    }
3283
3284    if (opc == 1) { /* BFM, BXFIL */
3285        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3286    } else {
3287        /* SBFM or UBFM: We start with zero, and we haven't modified
3288           any bits outside bitsize, therefore the zero-extension
3289           below is unneeded.  */
3290        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3291        return;
3292    }
3293
3294 done:
3295    if (!sf) { /* zero extend final result */
3296        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3297    }
3298}
3299
3300/* Extract
3301 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3302 * +----+------+-------------+---+----+------+--------+------+------+
3303 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3304 * +----+------+-------------+---+----+------+--------+------+------+
3305 */
3306static void disas_extract(DisasContext *s, uint32_t insn)
3307{
3308    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3309
3310    sf = extract32(insn, 31, 1);
3311    n = extract32(insn, 22, 1);
3312    rm = extract32(insn, 16, 5);
3313    imm = extract32(insn, 10, 6);
3314    rn = extract32(insn, 5, 5);
3315    rd = extract32(insn, 0, 5);
3316    op21 = extract32(insn, 29, 2);
3317    op0 = extract32(insn, 21, 1);
3318    bitsize = sf ? 64 : 32;
3319
3320    if (sf != n || op21 || op0 || imm >= bitsize) {
3321        unallocated_encoding(s);
3322    } else {
3323        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3324
3325        tcg_rd = cpu_reg(s, rd);
3326
3327        if (unlikely(imm == 0)) {
3328            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3329             * so an extract from bit 0 is a special case.
3330             */
3331            if (sf) {
3332                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3333            } else {
3334                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3335            }
3336        } else if (rm == rn) { /* ROR */
3337            tcg_rm = cpu_reg(s, rm);
3338            if (sf) {
3339                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3340            } else {
3341                TCGv_i32 tmp = tcg_temp_new_i32();
3342                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3343                tcg_gen_rotri_i32(tmp, tmp, imm);
3344                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3345                tcg_temp_free_i32(tmp);
3346            }
3347        } else {
3348            tcg_rm = read_cpu_reg(s, rm, sf);
3349            tcg_rn = read_cpu_reg(s, rn, sf);
3350            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3351            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3352            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3353            if (!sf) {
3354                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3355            }
3356        }
3357    }
3358}
3359
3360/* Data processing - immediate */
3361static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3362{
3363    switch (extract32(insn, 23, 6)) {
3364    case 0x20: case 0x21: /* PC-rel. addressing */
3365        disas_pc_rel_adr(s, insn);
3366        break;
3367    case 0x22: case 0x23: /* Add/subtract (immediate) */
3368        disas_add_sub_imm(s, insn);
3369        break;
3370    case 0x24: /* Logical (immediate) */
3371        disas_logic_imm(s, insn);
3372        break;
3373    case 0x25: /* Move wide (immediate) */
3374        disas_movw_imm(s, insn);
3375        break;
3376    case 0x26: /* Bitfield */
3377        disas_bitfield(s, insn);
3378        break;
3379    case 0x27: /* Extract */
3380        disas_extract(s, insn);
3381        break;
3382    default:
3383        unallocated_encoding(s);
3384        break;
3385    }
3386}
3387
3388/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3389 * Note that it is the caller's responsibility to ensure that the
3390 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3391 * mandated semantics for out of range shifts.
3392 */
3393static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3394                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3395{
3396    switch (shift_type) {
3397    case A64_SHIFT_TYPE_LSL:
3398        tcg_gen_shl_i64(dst, src, shift_amount);
3399        break;
3400    case A64_SHIFT_TYPE_LSR:
3401        tcg_gen_shr_i64(dst, src, shift_amount);
3402        break;
3403    case A64_SHIFT_TYPE_ASR:
3404        if (!sf) {
3405            tcg_gen_ext32s_i64(dst, src);
3406        }
3407        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3408        break;
3409    case A64_SHIFT_TYPE_ROR:
3410        if (sf) {
3411            tcg_gen_rotr_i64(dst, src, shift_amount);
3412        } else {
3413            TCGv_i32 t0, t1;
3414            t0 = tcg_temp_new_i32();
3415            t1 = tcg_temp_new_i32();
3416            tcg_gen_extrl_i64_i32(t0, src);
3417            tcg_gen_extrl_i64_i32(t1, shift_amount);
3418            tcg_gen_rotr_i32(t0, t0, t1);
3419            tcg_gen_extu_i32_i64(dst, t0);
3420            tcg_temp_free_i32(t0);
3421            tcg_temp_free_i32(t1);
3422        }
3423        break;
3424    default:
3425        assert(FALSE); /* all shift types should be handled */
3426        break;
3427    }
3428
3429    if (!sf) { /* zero extend final result */
3430        tcg_gen_ext32u_i64(dst, dst);
3431    }
3432}
3433
3434/* Shift a TCGv src by immediate, put result in dst.
3435 * The shift amount must be in range (this should always be true as the
3436 * relevant instructions will UNDEF on bad shift immediates).
3437 */
3438static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3439                          enum a64_shift_type shift_type, unsigned int shift_i)
3440{
3441    assert(shift_i < (sf ? 64 : 32));
3442
3443    if (shift_i == 0) {
3444        tcg_gen_mov_i64(dst, src);
3445    } else {
3446        TCGv_i64 shift_const;
3447
3448        shift_const = tcg_const_i64(shift_i);
3449        shift_reg(dst, src, sf, shift_type, shift_const);
3450        tcg_temp_free_i64(shift_const);
3451    }
3452}
3453
3454/* Logical (shifted register)
3455 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3456 * +----+-----+-----------+-------+---+------+--------+------+------+
3457 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3458 * +----+-----+-----------+-------+---+------+--------+------+------+
3459 */
3460static void disas_logic_reg(DisasContext *s, uint32_t insn)
3461{
3462    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3463    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3464
3465    sf = extract32(insn, 31, 1);
3466    opc = extract32(insn, 29, 2);
3467    shift_type = extract32(insn, 22, 2);
3468    invert = extract32(insn, 21, 1);
3469    rm = extract32(insn, 16, 5);
3470    shift_amount = extract32(insn, 10, 6);
3471    rn = extract32(insn, 5, 5);
3472    rd = extract32(insn, 0, 5);
3473
3474    if (!sf && (shift_amount & (1 << 5))) {
3475        unallocated_encoding(s);
3476        return;
3477    }
3478
3479    tcg_rd = cpu_reg(s, rd);
3480
3481    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3482        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3483         * register-register MOV and MVN, so it is worth special casing.
3484         */
3485        tcg_rm = cpu_reg(s, rm);
3486        if (invert) {
3487            tcg_gen_not_i64(tcg_rd, tcg_rm);
3488            if (!sf) {
3489                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3490            }
3491        } else {
3492            if (sf) {
3493                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3494            } else {
3495                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3496            }
3497        }
3498        return;
3499    }
3500
3501    tcg_rm = read_cpu_reg(s, rm, sf);
3502
3503    if (shift_amount) {
3504        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3505    }
3506
3507    tcg_rn = cpu_reg(s, rn);
3508
3509    switch (opc | (invert << 2)) {
3510    case 0: /* AND */
3511    case 3: /* ANDS */
3512        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3513        break;
3514    case 1: /* ORR */
3515        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3516        break;
3517    case 2: /* EOR */
3518        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3519        break;
3520    case 4: /* BIC */
3521    case 7: /* BICS */
3522        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3523        break;
3524    case 5: /* ORN */
3525        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3526        break;
3527    case 6: /* EON */
3528        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3529        break;
3530    default:
3531        assert(FALSE);
3532        break;
3533    }
3534
3535    if (!sf) {
3536        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3537    }
3538
3539    if (opc == 3) {
3540        gen_logic_CC(sf, tcg_rd);
3541    }
3542}
3543
3544/*
3545 * Add/subtract (extended register)
3546 *
3547 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3548 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3549 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3550 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3551 *
3552 *  sf: 0 -> 32bit, 1 -> 64bit
3553 *  op: 0 -> add  , 1 -> sub
3554 *   S: 1 -> set flags
3555 * opt: 00
3556 * option: extension type (see DecodeRegExtend)
3557 * imm3: optional shift to Rm
3558 *
3559 * Rd = Rn + LSL(extend(Rm), amount)
3560 */
3561static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3562{
3563    int rd = extract32(insn, 0, 5);
3564    int rn = extract32(insn, 5, 5);
3565    int imm3 = extract32(insn, 10, 3);
3566    int option = extract32(insn, 13, 3);
3567    int rm = extract32(insn, 16, 5);
3568    bool setflags = extract32(insn, 29, 1);
3569    bool sub_op = extract32(insn, 30, 1);
3570    bool sf = extract32(insn, 31, 1);
3571
3572    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3573    TCGv_i64 tcg_rd;
3574    TCGv_i64 tcg_result;
3575
3576    if (imm3 > 4) {
3577        unallocated_encoding(s);
3578        return;
3579    }
3580
3581    /* non-flag setting ops may use SP */
3582    if (!setflags) {
3583        tcg_rd = cpu_reg_sp(s, rd);
3584    } else {
3585        tcg_rd = cpu_reg(s, rd);
3586    }
3587    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3588
3589    tcg_rm = read_cpu_reg(s, rm, sf);
3590    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3591
3592    tcg_result = tcg_temp_new_i64();
3593
3594    if (!setflags) {
3595        if (sub_op) {
3596            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3597        } else {
3598            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3599        }
3600    } else {
3601        if (sub_op) {
3602            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3603        } else {
3604            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3605        }
3606    }
3607
3608    if (sf) {
3609        tcg_gen_mov_i64(tcg_rd, tcg_result);
3610    } else {
3611        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3612    }
3613
3614    tcg_temp_free_i64(tcg_result);
3615}
3616
3617/*
3618 * Add/subtract (shifted register)
3619 *
3620 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3621 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3622 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3623 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3624 *
3625 *    sf: 0 -> 32bit, 1 -> 64bit
3626 *    op: 0 -> add  , 1 -> sub
3627 *     S: 1 -> set flags
3628 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3629 *  imm6: Shift amount to apply to Rm before the add/sub
3630 */
3631static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3632{
3633    int rd = extract32(insn, 0, 5);
3634    int rn = extract32(insn, 5, 5);
3635    int imm6 = extract32(insn, 10, 6);
3636    int rm = extract32(insn, 16, 5);
3637    int shift_type = extract32(insn, 22, 2);
3638    bool setflags = extract32(insn, 29, 1);
3639    bool sub_op = extract32(insn, 30, 1);
3640    bool sf = extract32(insn, 31, 1);
3641
3642    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3643    TCGv_i64 tcg_rn, tcg_rm;
3644    TCGv_i64 tcg_result;
3645
3646    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3647        unallocated_encoding(s);
3648        return;
3649    }
3650
3651    tcg_rn = read_cpu_reg(s, rn, sf);
3652    tcg_rm = read_cpu_reg(s, rm, sf);
3653
3654    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3655
3656    tcg_result = tcg_temp_new_i64();
3657
3658    if (!setflags) {
3659        if (sub_op) {
3660            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3661        } else {
3662            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3663        }
3664    } else {
3665        if (sub_op) {
3666            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3667        } else {
3668            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3669        }
3670    }
3671
3672    if (sf) {
3673        tcg_gen_mov_i64(tcg_rd, tcg_result);
3674    } else {
3675        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3676    }
3677
3678    tcg_temp_free_i64(tcg_result);
3679}
3680
3681/* Data-processing (3 source)
3682 *
3683 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3684 *  +--+------+-----------+------+------+----+------+------+------+
3685 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3686 *  +--+------+-----------+------+------+----+------+------+------+
3687 */
3688static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3689{
3690    int rd = extract32(insn, 0, 5);
3691    int rn = extract32(insn, 5, 5);
3692    int ra = extract32(insn, 10, 5);
3693    int rm = extract32(insn, 16, 5);
3694    int op_id = (extract32(insn, 29, 3) << 4) |
3695        (extract32(insn, 21, 3) << 1) |
3696        extract32(insn, 15, 1);
3697    bool sf = extract32(insn, 31, 1);
3698    bool is_sub = extract32(op_id, 0, 1);
3699    bool is_high = extract32(op_id, 2, 1);
3700    bool is_signed = false;
3701    TCGv_i64 tcg_op1;
3702    TCGv_i64 tcg_op2;
3703    TCGv_i64 tcg_tmp;
3704
3705    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3706    switch (op_id) {
3707    case 0x42: /* SMADDL */
3708    case 0x43: /* SMSUBL */
3709    case 0x44: /* SMULH */
3710        is_signed = true;
3711        break;
3712    case 0x0: /* MADD (32bit) */
3713    case 0x1: /* MSUB (32bit) */
3714    case 0x40: /* MADD (64bit) */
3715    case 0x41: /* MSUB (64bit) */
3716    case 0x4a: /* UMADDL */
3717    case 0x4b: /* UMSUBL */
3718    case 0x4c: /* UMULH */
3719        break;
3720    default:
3721        unallocated_encoding(s);
3722        return;
3723    }
3724
3725    if (is_high) {
3726        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3727        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3728        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3729        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3730
3731        if (is_signed) {
3732            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3733        } else {
3734            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3735        }
3736
3737        tcg_temp_free_i64(low_bits);
3738        return;
3739    }
3740
3741    tcg_op1 = tcg_temp_new_i64();
3742    tcg_op2 = tcg_temp_new_i64();
3743    tcg_tmp = tcg_temp_new_i64();
3744
3745    if (op_id < 0x42) {
3746        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3747        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3748    } else {
3749        if (is_signed) {
3750            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3751            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3752        } else {
3753            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3754            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3755        }
3756    }
3757
3758    if (ra == 31 && !is_sub) {
3759        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3760        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3761    } else {
3762        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3763        if (is_sub) {
3764            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3765        } else {
3766            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3767        }
3768    }
3769
3770    if (!sf) {
3771        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3772    }
3773
3774    tcg_temp_free_i64(tcg_op1);
3775    tcg_temp_free_i64(tcg_op2);
3776    tcg_temp_free_i64(tcg_tmp);
3777}
3778
3779/* Add/subtract (with carry)
3780 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3781 * +--+--+--+------------------------+------+---------+------+-----+
3782 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3783 * +--+--+--+------------------------+------+---------+------+-----+
3784 *                                            [000000]
3785 */
3786
3787static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3788{
3789    unsigned int sf, op, setflags, rm, rn, rd;
3790    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3791
3792    if (extract32(insn, 10, 6) != 0) {
3793        unallocated_encoding(s);
3794        return;
3795    }
3796
3797    sf = extract32(insn, 31, 1);
3798    op = extract32(insn, 30, 1);
3799    setflags = extract32(insn, 29, 1);
3800    rm = extract32(insn, 16, 5);
3801    rn = extract32(insn, 5, 5);
3802    rd = extract32(insn, 0, 5);
3803
3804    tcg_rd = cpu_reg(s, rd);
3805    tcg_rn = cpu_reg(s, rn);
3806
3807    if (op) {
3808        tcg_y = new_tmp_a64(s);
3809        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3810    } else {
3811        tcg_y = cpu_reg(s, rm);
3812    }
3813
3814    if (setflags) {
3815        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3816    } else {
3817        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3818    }
3819}
3820
3821/* Conditional compare (immediate / register)
3822 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3823 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3824 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3825 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3826 *        [1]                             y                [0]       [0]
3827 */
3828static void disas_cc(DisasContext *s, uint32_t insn)
3829{
3830    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3831    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3832    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3833    DisasCompare c;
3834
3835    if (!extract32(insn, 29, 1)) {
3836        unallocated_encoding(s);
3837        return;
3838    }
3839    if (insn & (1 << 10 | 1 << 4)) {
3840        unallocated_encoding(s);
3841        return;
3842    }
3843    sf = extract32(insn, 31, 1);
3844    op = extract32(insn, 30, 1);
3845    is_imm = extract32(insn, 11, 1);
3846    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3847    cond = extract32(insn, 12, 4);
3848    rn = extract32(insn, 5, 5);
3849    nzcv = extract32(insn, 0, 4);
3850
3851    /* Set T0 = !COND.  */
3852    tcg_t0 = tcg_temp_new_i32();
3853    arm_test_cc(&c, cond);
3854    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3855    arm_free_cc(&c);
3856
3857    /* Load the arguments for the new comparison.  */
3858    if (is_imm) {
3859        tcg_y = new_tmp_a64(s);
3860        tcg_gen_movi_i64(tcg_y, y);
3861    } else {
3862        tcg_y = cpu_reg(s, y);
3863    }
3864    tcg_rn = cpu_reg(s, rn);
3865
3866    /* Set the flags for the new comparison.  */
3867    tcg_tmp = tcg_temp_new_i64();
3868    if (op) {
3869        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3870    } else {
3871        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3872    }
3873    tcg_temp_free_i64(tcg_tmp);
3874
3875    /* If COND was false, force the flags to #nzcv.  Compute two masks
3876     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3877     * For tcg hosts that support ANDC, we can make do with just T1.
3878     * In either case, allow the tcg optimizer to delete any unused mask.
3879     */
3880    tcg_t1 = tcg_temp_new_i32();
3881    tcg_t2 = tcg_temp_new_i32();
3882    tcg_gen_neg_i32(tcg_t1, tcg_t0);
3883    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3884
3885    if (nzcv & 8) { /* N */
3886        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3887    } else {
3888        if (TCG_TARGET_HAS_andc_i32) {
3889            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3890        } else {
3891            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3892        }
3893    }
3894    if (nzcv & 4) { /* Z */
3895        if (TCG_TARGET_HAS_andc_i32) {
3896            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3897        } else {
3898            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3899        }
3900    } else {
3901        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3902    }
3903    if (nzcv & 2) { /* C */
3904        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3905    } else {
3906        if (TCG_TARGET_HAS_andc_i32) {
3907            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3908        } else {
3909            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3910        }
3911    }
3912    if (nzcv & 1) { /* V */
3913        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3914    } else {
3915        if (TCG_TARGET_HAS_andc_i32) {
3916            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3917        } else {
3918            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3919        }
3920    }
3921    tcg_temp_free_i32(tcg_t0);
3922    tcg_temp_free_i32(tcg_t1);
3923    tcg_temp_free_i32(tcg_t2);
3924}
3925
3926/* Conditional select
3927 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3928 * +----+----+---+-----------------+------+------+-----+------+------+
3929 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3930 * +----+----+---+-----------------+------+------+-----+------+------+
3931 */
3932static void disas_cond_select(DisasContext *s, uint32_t insn)
3933{
3934    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3935    TCGv_i64 tcg_rd, zero;
3936    DisasCompare64 c;
3937
3938    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3939        /* S == 1 or op2<1> == 1 */
3940        unallocated_encoding(s);
3941        return;
3942    }
3943    sf = extract32(insn, 31, 1);
3944    else_inv = extract32(insn, 30, 1);
3945    rm = extract32(insn, 16, 5);
3946    cond = extract32(insn, 12, 4);
3947    else_inc = extract32(insn, 10, 1);
3948    rn = extract32(insn, 5, 5);
3949    rd = extract32(insn, 0, 5);
3950
3951    tcg_rd = cpu_reg(s, rd);
3952
3953    a64_test_cc(&c, cond);
3954    zero = tcg_const_i64(0);
3955
3956    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3957        /* CSET & CSETM.  */
3958        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3959        if (else_inv) {
3960            tcg_gen_neg_i64(tcg_rd, tcg_rd);
3961        }
3962    } else {
3963        TCGv_i64 t_true = cpu_reg(s, rn);
3964        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3965        if (else_inv && else_inc) {
3966            tcg_gen_neg_i64(t_false, t_false);
3967        } else if (else_inv) {
3968            tcg_gen_not_i64(t_false, t_false);
3969        } else if (else_inc) {
3970            tcg_gen_addi_i64(t_false, t_false, 1);
3971        }
3972        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3973    }
3974
3975    tcg_temp_free_i64(zero);
3976    a64_free_cc(&c);
3977
3978    if (!sf) {
3979        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3980    }
3981}
3982
3983static void handle_clz(DisasContext *s, unsigned int sf,
3984                       unsigned int rn, unsigned int rd)
3985{
3986    TCGv_i64 tcg_rd, tcg_rn;
3987    tcg_rd = cpu_reg(s, rd);
3988    tcg_rn = cpu_reg(s, rn);
3989
3990    if (sf) {
3991        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
3992    } else {
3993        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3994        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3995        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
3996        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3997        tcg_temp_free_i32(tcg_tmp32);
3998    }
3999}
4000

4001static void handle_cls(DisasContext *s, unsigned int sf,
4002                       unsigned int rn, unsigned int rd)
4003{
4004    TCGv_i64 tcg_rd, tcg_rn;
4005    tcg_rd = cpu_reg(s, rd);
4006    tcg_rn = cpu_reg(s, rn);
4007
4008    if (sf) {
4009        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4010    } else {
4011        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4012        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4013        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4014        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4015        tcg_temp_free_i32(tcg_tmp32);
4016    }
4017}
4018
4019static void handle_rbit(DisasContext *s, unsigned int sf,
4020                        unsigned int rn, unsigned int rd)
4021{
4022    TCGv_i64 tcg_rd, tcg_rn;
4023    tcg_rd = cpu_reg(s, rd);
4024    tcg_rn = cpu_reg(s, rn);
4025
4026    if (sf) {
4027        gen_helper_rbit64(tcg_rd, tcg_rn);
4028    } else {
4029        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4030        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4031        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4032        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4033        tcg_temp_free_i32(tcg_tmp32);
4034    }
4035}
4036
4037/* REV with sf==1, opcode==3 ("REV64") */
4038static void handle_rev64(DisasContext *s, unsigned int sf,
4039                         unsigned int rn, unsigned int rd)
4040{
4041    if (!sf) {
4042        unallocated_encoding(s);
4043        return;
4044    }
4045    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4046}
4047
4048/* REV with sf==0, opcode==2
4049 * REV32 (sf==1, opcode==2)
4050 */
4051static void handle_rev32(DisasContext *s, unsigned int sf,
4052                         unsigned int rn, unsigned int rd)
4053{
4054    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4055
4056    if (sf) {
4057        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4058        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4059
4060        /* bswap32_i64 requires zero high word */
4061        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4062        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4063        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4064        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4065        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4066
4067        tcg_temp_free_i64(tcg_tmp);
4068    } else {
4069        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4070        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4071    }
4072}
4073
4074/* REV16 (opcode==1) */
4075static void handle_rev16(DisasContext *s, unsigned int sf,
4076                         unsigned int rn, unsigned int rd)
4077{
4078    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4079    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4080    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4081    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4082
4083    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4084    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4085    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4086    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4087    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4088
4089    tcg_temp_free_i64(mask);
4090    tcg_temp_free_i64(tcg_tmp);
4091}
4092
4093/* Data-processing (1 source)
4094 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4095 * +----+---+---+-----------------+---------+--------+------+------+
4096 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4097 * +----+---+---+-----------------+---------+--------+------+------+
4098 */
4099static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4100{
4101    unsigned int sf, opcode, rn, rd;
4102
4103    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4104        unallocated_encoding(s);
4105        return;
4106    }
4107
4108    sf = extract32(insn, 31, 1);
4109    opcode = extract32(insn, 10, 6);
4110    rn = extract32(insn, 5, 5);
4111    rd = extract32(insn, 0, 5);
4112
4113    switch (opcode) {
4114    case 0: /* RBIT */
4115        handle_rbit(s, sf, rn, rd);
4116        break;
4117    case 1: /* REV16 */
4118        handle_rev16(s, sf, rn, rd);
4119        break;
4120    case 2: /* REV32 */
4121        handle_rev32(s, sf, rn, rd);
4122        break;
4123    case 3: /* REV64 */
4124        handle_rev64(s, sf, rn, rd);
4125        break;
4126    case 4: /* CLZ */
4127        handle_clz(s, sf, rn, rd);
4128        break;
4129    case 5: /* CLS */
4130        handle_cls(s, sf, rn, rd);
4131        break;
4132    }
4133}
4134
4135static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4136                       unsigned int rm, unsigned int rn, unsigned int rd)
4137{
4138    TCGv_i64 tcg_n, tcg_m, tcg_rd;
4139    tcg_rd = cpu_reg(s, rd);
4140
4141    if (!sf && is_signed) {
4142        tcg_n = new_tmp_a64(s);
4143        tcg_m = new_tmp_a64(s);
4144        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4145        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4146    } else {
4147        tcg_n = read_cpu_reg(s, rn, sf);
4148        tcg_m = read_cpu_reg(s, rm, sf);
4149    }
4150
4151    if (is_signed) {
4152        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4153    } else {
4154        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4155    }
4156
4157    if (!sf) { /* zero extend final result */
4158        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4159    }
4160}
4161
4162/* LSLV, LSRV, ASRV, RORV */
4163static void handle_shift_reg(DisasContext *s,
4164                             enum a64_shift_type shift_type, unsigned int sf,
4165                             unsigned int rm, unsigned int rn, unsigned int rd)
4166{
4167    TCGv_i64 tcg_shift = tcg_temp_new_i64();
4168    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4169    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4170
4171    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4172    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4173    tcg_temp_free_i64(tcg_shift);
4174}
4175
4176/* CRC32[BHWX], CRC32C[BHWX] */
4177static void handle_crc32(DisasContext *s,
4178                         unsigned int sf, unsigned int sz, bool crc32c,
4179                         unsigned int rm, unsigned int rn, unsigned int rd)
4180{
4181    TCGv_i64 tcg_acc, tcg_val;
4182    TCGv_i32 tcg_bytes;
4183
4184    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4185        || (sf == 1 && sz != 3)
4186        || (sf == 0 && sz == 3)) {
4187        unallocated_encoding(s);
4188        return;
4189    }
4190
4191    if (sz == 3) {
4192        tcg_val = cpu_reg(s, rm);
4193    } else {
4194        uint64_t mask;
4195        switch (sz) {
4196        case 0:
4197            mask = 0xFF;
4198            break;
4199        case 1:
4200            mask = 0xFFFF;
4201            break;
4202        case 2:
4203            mask = 0xFFFFFFFF;
4204            break;
4205        default:
4206            g_assert_not_reached();
4207        }
4208        tcg_val = new_tmp_a64(s);
4209        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4210    }
4211
4212    tcg_acc = cpu_reg(s, rn);
4213    tcg_bytes = tcg_const_i32(1 << sz);
4214
4215    if (crc32c) {
4216        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4217    } else {
4218        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4219    }
4220
4221    tcg_temp_free_i32(tcg_bytes);
4222}
4223
4224/* Data-processing (2 source)
4225 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4226 * +----+---+---+-----------------+------+--------+------+------+
4227 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4228 * +----+---+---+-----------------+------+--------+------+------+
4229 */
4230static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4231{
4232    unsigned int sf, rm, opcode, rn, rd;
4233    sf = extract32(insn, 31, 1);
4234    rm = extract32(insn, 16, 5);
4235    opcode = extract32(insn, 10, 6);
4236    rn = extract32(insn, 5, 5);
4237    rd = extract32(insn, 0, 5);
4238
4239    if (extract32(insn, 29, 1)) {
4240        unallocated_encoding(s);
4241        return;
4242    }
4243
4244    switch (opcode) {
4245    case 2: /* UDIV */
4246        handle_div(s, false, sf, rm, rn, rd);
4247        break;
4248    case 3: /* SDIV */
4249        handle_div(s, true, sf, rm, rn, rd);
4250        break;
4251    case 8: /* LSLV */
4252        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4253        break;
4254    case 9: /* LSRV */
4255        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4256        break;
4257    case 10: /* ASRV */
4258        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4259        break;
4260    case 11: /* RORV */
4261        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4262        break;
4263    case 16:
4264    case 17:
4265    case 18:
4266    case 19:
4267    case 20:
4268    case 21:
4269    case 22:
4270    case 23: /* CRC32 */
4271    {
4272        int sz = extract32(opcode, 0, 2);
4273        bool crc32c = extract32(opcode, 2, 1);
4274        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4275        break;
4276    }
4277    default:
4278        unallocated_encoding(s);
4279        break;
4280    }
4281}
4282
4283/* Data processing - register */
4284static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4285{
4286    switch (extract32(insn, 24, 5)) {
4287    case 0x0a: /* Logical (shifted register) */
4288        disas_logic_reg(s, insn);
4289        break;
4290    case 0x0b: /* Add/subtract */
4291        if (insn & (1 << 21)) { /* (extended register) */
4292            disas_add_sub_ext_reg(s, insn);
4293        } else {
4294            disas_add_sub_reg(s, insn);
4295        }
4296        break;
4297    case 0x1b: /* Data-processing (3 source) */
4298        disas_data_proc_3src(s, insn);
4299        break;
4300    case 0x1a:
4301        switch (extract32(insn, 21, 3)) {
4302        case 0x0: /* Add/subtract (with carry) */
4303            disas_adc_sbc(s, insn);
4304            break;
4305        case 0x2: /* Conditional compare */
4306            disas_cc(s, insn); /* both imm and reg forms */
4307            break;
4308        case 0x4: /* Conditional select */
4309            disas_cond_select(s, insn);
4310            break;
4311        case 0x6: /* Data-processing */
4312            if (insn & (1 << 30)) { /* (1 source) */
4313                disas_data_proc_1src(s, insn);
4314            } else {            /* (2 source) */
4315                disas_data_proc_2src(s, insn);
4316            }
4317            break;
4318        default:
4319            unallocated_encoding(s);
4320            break;
4321        }
4322        break;
4323    default:
4324        unallocated_encoding(s);
4325        break;
4326    }
4327}
4328
4329static void handle_fp_compare(DisasContext *s, bool is_double,
4330                              unsigned int rn, unsigned int rm,
4331                              bool cmp_with_zero, bool signal_all_nans)
4332{
4333    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4334    TCGv_ptr fpst = get_fpstatus_ptr();
4335
4336    if (is_double) {
4337        TCGv_i64 tcg_vn, tcg_vm;
4338
4339        tcg_vn = read_fp_dreg(s, rn);
4340        if (cmp_with_zero) {
4341            tcg_vm = tcg_const_i64(0);
4342        } else {
4343            tcg_vm = read_fp_dreg(s, rm);
4344        }
4345        if (signal_all_nans) {
4346            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4347        } else {
4348            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4349        }
4350        tcg_temp_free_i64(tcg_vn);
4351        tcg_temp_free_i64(tcg_vm);
4352    } else {
4353        TCGv_i32 tcg_vn, tcg_vm;
4354
4355        tcg_vn = read_fp_sreg(s, rn);
4356        if (cmp_with_zero) {
4357            tcg_vm = tcg_const_i32(0);
4358        } else {
4359            tcg_vm = read_fp_sreg(s, rm);
4360        }
4361        if (signal_all_nans) {
4362            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4363        } else {
4364            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4365        }
4366        tcg_temp_free_i32(tcg_vn);
4367        tcg_temp_free_i32(tcg_vm);
4368    }
4369
4370    tcg_temp_free_ptr(fpst);
4371
4372    gen_set_nzcv(tcg_flags);
4373
4374    tcg_temp_free_i64(tcg_flags);
4375}
4376
4377/* Floating point compare
4378 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4379 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4380 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4381 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4382 */
4383static void disas_fp_compare(DisasContext *s, uint32_t insn)
4384{
4385    unsigned int mos, type, rm, op, rn, opc, op2r;
4386
4387    mos = extract32(insn, 29, 3);
4388    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4389    rm = extract32(insn, 16, 5);
4390    op = extract32(insn, 14, 2);
4391    rn = extract32(insn, 5, 5);
4392    opc = extract32(insn, 3, 2);
4393    op2r = extract32(insn, 0, 3);
4394
4395    if (mos || op || op2r || type > 1) {
4396        unallocated_encoding(s);
4397        return;
4398    }
4399
4400    if (!fp_access_check(s)) {
4401        return;
4402    }
4403
4404    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4405}
4406
4407/* Floating point conditional compare
4408 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4409 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4410 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4411 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4412 */
4413static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4414{
4415    unsigned int mos, type, rm, cond, rn, op, nzcv;
4416    TCGv_i64 tcg_flags;
4417    TCGLabel *label_continue = NULL;
4418
4419    mos = extract32(insn, 29, 3);
4420    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4421    rm = extract32(insn, 16, 5);
4422    cond = extract32(insn, 12, 4);
4423    rn = extract32(insn, 5, 5);
4424    op = extract32(insn, 4, 1);
4425    nzcv = extract32(insn, 0, 4);
4426
4427    if (mos || type > 1) {
4428        unallocated_encoding(s);
4429        return;
4430    }
4431
4432    if (!fp_access_check(s)) {
4433        return;
4434    }
4435
4436    if (cond < 0x0e) { /* not always */
4437        TCGLabel *label_match = gen_new_label();
4438        label_continue = gen_new_label();
4439        arm_gen_test_cc(cond, label_match);
4440        /* nomatch: */
4441        tcg_flags = tcg_const_i64(nzcv << 28);
4442        gen_set_nzcv(tcg_flags);
4443        tcg_temp_free_i64(tcg_flags);
4444        tcg_gen_br(label_continue);
4445        gen_set_label(label_match);
4446    }
4447
4448    handle_fp_compare(s, type, rn, rm, false, op);
4449
4450    if (cond < 0x0e) {
4451        gen_set_label(label_continue);
4452    }
4453}
4454
4455/* Floating point conditional select
4456 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4457 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4458 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4459 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4460 */
4461static void disas_fp_csel(DisasContext *s, uint32_t insn)
4462{
4463    unsigned int mos, type, rm, cond, rn, rd;
4464    TCGv_i64 t_true, t_false, t_zero;
4465    DisasCompare64 c;
4466
4467    mos = extract32(insn, 29, 3);
4468    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4469    rm = extract32(insn, 16, 5);
4470    cond = extract32(insn, 12, 4);
4471    rn = extract32(insn, 5, 5);
4472    rd = extract32(insn, 0, 5);
4473
4474    if (mos || type > 1) {
4475        unallocated_encoding(s);
4476        return;
4477    }
4478
4479    if (!fp_access_check(s)) {
4480        return;
4481    }
4482
4483    /* Zero extend sreg inputs to 64 bits now.  */
4484    t_true = tcg_temp_new_i64();
4485    t_false = tcg_temp_new_i64();
4486    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4487    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4488
4489    a64_test_cc(&c, cond);
4490    t_zero = tcg_const_i64(0);
4491    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4492    tcg_temp_free_i64(t_zero);
4493    tcg_temp_free_i64(t_false);
4494    a64_free_cc(&c);
4495
4496    /* Note that sregs write back zeros to the high bits,
4497       and we've already done the zero-extension.  */
4498    write_fp_dreg(s, rd, t_true);
4499    tcg_temp_free_i64(t_true);
4500}
4501
4502/* Floating-point data-processing (1 source) - single precision */
4503static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4504{
4505    TCGv_ptr fpst;
4506    TCGv_i32 tcg_op;
4507    TCGv_i32 tcg_res;
4508
4509    fpst = get_fpstatus_ptr();
4510    tcg_op = read_fp_sreg(s, rn);
4511    tcg_res = tcg_temp_new_i32();
4512
4513    switch (opcode) {
4514    case 0x0: /* FMOV */
4515        tcg_gen_mov_i32(tcg_res, tcg_op);
4516        break;
4517    case 0x1: /* FABS */
4518        gen_helper_vfp_abss(tcg_res, tcg_op);
4519        break;
4520    case 0x2: /* FNEG */
4521        gen_helper_vfp_negs(tcg_res, tcg_op);
4522        break;
4523    case 0x3: /* FSQRT */
4524        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4525        break;
4526    case 0x8: /* FRINTN */
4527    case 0x9: /* FRINTP */
4528    case 0xa: /* FRINTM */
4529    case 0xb: /* FRINTZ */
4530    case 0xc: /* FRINTA */
4531    {
4532        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4533
4534        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4535        gen_helper_rints(tcg_res, tcg_op, fpst);
4536
4537        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4538        tcg_temp_free_i32(tcg_rmode);
4539        break;
4540    }
4541    case 0xe: /* FRINTX */
4542        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4543        break;
4544    case 0xf: /* FRINTI */
4545        gen_helper_rints(tcg_res, tcg_op, fpst);
4546        break;
4547    default:
4548        abort();
4549    }
4550
4551    write_fp_sreg(s, rd, tcg_res);
4552
4553    tcg_temp_free_ptr(fpst);
4554    tcg_temp_free_i32(tcg_op);
4555    tcg_temp_free_i32(tcg_res);
4556}
4557
4558/* Floating-point data-processing (1 source) - double precision */
4559static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4560{
4561    TCGv_ptr fpst;
4562    TCGv_i64 tcg_op;
4563    TCGv_i64 tcg_res;
4564
4565    fpst = get_fpstatus_ptr();
4566    tcg_op = read_fp_dreg(s, rn);
4567    tcg_res = tcg_temp_new_i64();
4568
4569    switch (opcode) {
4570    case 0x0: /* FMOV */
4571        tcg_gen_mov_i64(tcg_res, tcg_op);
4572        break;
4573    case 0x1: /* FABS */
4574        gen_helper_vfp_absd(tcg_res, tcg_op);
4575        break;
4576    case 0x2: /* FNEG */
4577        gen_helper_vfp_negd(tcg_res, tcg_op);
4578        break;
4579    case 0x3: /* FSQRT */
4580        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4581        break;
4582    case 0x8: /* FRINTN */
4583    case 0x9: /* FRINTP */
4584    case 0xa: /* FRINTM */
4585    case 0xb: /* FRINTZ */
4586    case 0xc: /* FRINTA */
4587    {
4588        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4589
4590        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4591        gen_helper_rintd(tcg_res, tcg_op, fpst);
4592
4593        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4594        tcg_temp_free_i32(tcg_rmode);
4595        break;
4596    }
4597    case 0xe: /* FRINTX */
4598        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4599        break;
4600    case 0xf: /* FRINTI */
4601        gen_helper_rintd(tcg_res, tcg_op, fpst);
4602        break;
4603    default:
4604        abort();
4605    }
4606
4607    write_fp_dreg(s, rd, tcg_res);
4608
4609    tcg_temp_free_ptr(fpst);
4610    tcg_temp_free_i64(tcg_op);
4611    tcg_temp_free_i64(tcg_res);
4612}
4613
4614static void handle_fp_fcvt(DisasContext *s, int opcode,
4615                           int rd, int rn, int dtype, int ntype)
4616{
4617    switch (ntype) {
4618    case 0x0:
4619    {
4620        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4621        if (dtype == 1) {
4622            /* Single to double */
4623            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4624            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4625            write_fp_dreg(s, rd, tcg_rd);
4626            tcg_temp_free_i64(tcg_rd);
4627        } else {
4628            /* Single to half */
4629            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4630            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4631            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4632            write_fp_sreg(s, rd, tcg_rd);
4633            tcg_temp_free_i32(tcg_rd);
4634        }
4635        tcg_temp_free_i32(tcg_rn);
4636        break;
4637    }
4638    case 0x1:
4639    {
4640        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4641        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4642        if (dtype == 0) {
4643            /* Double to single */
4644            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4645        } else {
4646            /* Double to half */
4647            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4648            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4649        }
4650        write_fp_sreg(s, rd, tcg_rd);
4651        tcg_temp_free_i32(tcg_rd);
4652        tcg_temp_free_i64(tcg_rn);
4653        break;
4654    }
4655    case 0x3:
4656    {
4657        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4658        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4659        if (dtype == 0) {
4660            /* Half to single */
4661            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4662            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4663            write_fp_sreg(s, rd, tcg_rd);
4664            tcg_temp_free_i32(tcg_rd);
4665        } else {
4666            /* Half to double */
4667            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4668            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4669            write_fp_dreg(s, rd, tcg_rd);
4670            tcg_temp_free_i64(tcg_rd);
4671        }
4672        tcg_temp_free_i32(tcg_rn);
4673        break;
4674    }
4675    default:
4676        abort();
4677    }
4678}
4679
4680/* Floating point data-processing (1 source)
4681 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4682 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4683 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4684 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4685 */
4686static void disas_fp_1src(DisasContext *s, uint32_t insn)
4687{
4688    int type = extract32(insn, 22, 2);
4689    int opcode = extract32(insn, 15, 6);
4690    int rn = extract32(insn, 5, 5);
4691    int rd = extract32(insn, 0, 5);
4692
4693    switch (opcode) {
4694    case 0x4: case 0x5: case 0x7:
4695    {
4696        /* FCVT between half, single and double precision */
4697        int dtype = extract32(opcode, 0, 2);
4698        if (type == 2 || dtype == type) {
4699            unallocated_encoding(s);
4700            return;
4701        }
4702        if (!fp_access_check(s)) {
4703            return;
4704        }
4705
4706        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4707        break;
4708    }
4709    case 0x0 ... 0x3:
4710    case 0x8 ... 0xc:
4711    case 0xe ... 0xf:
4712        /* 32-to-32 and 64-to-64 ops */
4713        switch (type) {
4714        case 0:
4715            if (!fp_access_check(s)) {
4716                return;
4717            }
4718
4719            handle_fp_1src_single(s, opcode, rd, rn);
4720            break;
4721        case 1:
4722            if (!fp_access_check(s)) {
4723                return;
4724            }
4725
4726            handle_fp_1src_double(s, opcode, rd, rn);
4727            break;
4728        default:
4729            unallocated_encoding(s);
4730        }
4731        break;
4732    default:
4733        unallocated_encoding(s);
4734        break;
4735    }
4736}
4737
4738/* Floating-point data-processing (2 source) - single precision */
4739static void handle_fp_2src_single(DisasContext *s, int opcode,
4740                                  int rd, int rn, int rm)
4741{
4742    TCGv_i32 tcg_op1;
4743    TCGv_i32 tcg_op2;
4744    TCGv_i32 tcg_res;
4745    TCGv_ptr fpst;
4746
4747    tcg_res = tcg_temp_new_i32();
4748    fpst = get_fpstatus_ptr();
4749    tcg_op1 = read_fp_sreg(s, rn);
4750    tcg_op2 = read_fp_sreg(s, rm);
4751
4752    switch (opcode) {
4753    case 0x0: /* FMUL */
4754        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4755        break;
4756    case 0x1: /* FDIV */
4757        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4758        break;
4759    case 0x2: /* FADD */
4760        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4761        break;
4762    case 0x3: /* FSUB */
4763        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4764        break;
4765    case 0x4: /* FMAX */
4766        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4767        break;
4768    case 0x5: /* FMIN */
4769        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4770        break;
4771    case 0x6: /* FMAXNM */
4772        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4773        break;
4774    case 0x7: /* FMINNM */
4775        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4776        break;
4777    case 0x8: /* FNMUL */
4778        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4779        gen_helper_vfp_negs(tcg_res, tcg_res);
4780        break;
4781    }
4782
4783    write_fp_sreg(s, rd, tcg_res);
4784
4785    tcg_temp_free_ptr(fpst);
4786    tcg_temp_free_i32(tcg_op1);
4787    tcg_temp_free_i32(tcg_op2);
4788    tcg_temp_free_i32(tcg_res);
4789}
4790
4791/* Floating-point data-processing (2 source) - double precision */
4792static void handle_fp_2src_double(DisasContext *s, int opcode,
4793                                  int rd, int rn, int rm)
4794{
4795    TCGv_i64 tcg_op1;
4796    TCGv_i64 tcg_op2;
4797    TCGv_i64 tcg_res;
4798    TCGv_ptr fpst;
4799
4800    tcg_res = tcg_temp_new_i64();
4801    fpst = get_fpstatus_ptr();
4802    tcg_op1 = read_fp_dreg(s, rn);
4803    tcg_op2 = read_fp_dreg(s, rm);
4804
4805    switch (opcode) {
4806    case 0x0: /* FMUL */
4807        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4808        break;
4809    case 0x1: /* FDIV */
4810        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4811        break;
4812    case 0x2: /* FADD */
4813        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4814        break;
4815    case 0x3: /* FSUB */
4816        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4817        break;
4818    case 0x4: /* FMAX */
4819        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4820        break;
4821    case 0x5: /* FMIN */
4822        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4823        break;
4824    case 0x6: /* FMAXNM */
4825        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4826        break;
4827    case 0x7: /* FMINNM */
4828        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4829        break;
4830    case 0x8: /* FNMUL */
4831        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4832        gen_helper_vfp_negd(tcg_res, tcg_res);
4833        break;
4834    }
4835
4836    write_fp_dreg(s, rd, tcg_res);
4837
4838    tcg_temp_free_ptr(fpst);
4839    tcg_temp_free_i64(tcg_op1);
4840    tcg_temp_free_i64(tcg_op2);
4841    tcg_temp_free_i64(tcg_res);
4842}
4843
4844/* Floating point data-processing (2 source)
4845 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4846 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4847 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4848 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4849 */
4850static void disas_fp_2src(DisasContext *s, uint32_t insn)
4851{
4852    int type = extract32(insn, 22, 2);
4853    int rd = extract32(insn, 0, 5);
4854    int rn = extract32(insn, 5, 5);
4855    int rm = extract32(insn, 16, 5);
4856    int opcode = extract32(insn, 12, 4);
4857
4858    if (opcode > 8) {
4859        unallocated_encoding(s);
4860        return;
4861    }
4862
4863    switch (type) {
4864    case 0:
4865        if (!fp_access_check(s)) {
4866            return;
4867        }
4868        handle_fp_2src_single(s, opcode, rd, rn, rm);
4869        break;
4870    case 1:
4871        if (!fp_access_check(s)) {
4872            return;
4873        }
4874        handle_fp_2src_double(s, opcode, rd, rn, rm);
4875        break;
4876    default:
4877        unallocated_encoding(s);
4878    }
4879}
4880
4881/* Floating-point data-processing (3 source) - single precision */
4882static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4883                                  int rd, int rn, int rm, int ra)
4884{
4885    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4886    TCGv_i32 tcg_res = tcg_temp_new_i32();
4887    TCGv_ptr fpst = get_fpstatus_ptr();
4888
4889    tcg_op1 = read_fp_sreg(s, rn);
4890    tcg_op2 = read_fp_sreg(s, rm);
4891    tcg_op3 = read_fp_sreg(s, ra);
4892
4893    /* These are fused multiply-add, and must be done as one
4894     * floating point operation with no rounding between the
4895     * multiplication and addition steps.
4896     * NB that doing the negations here as separate steps is
4897     * correct : an input NaN should come out with its sign bit
4898     * flipped if it is a negated-input.
4899     */
4900    if (o1 == true) {
4901        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4902    }
4903
4904    if (o0 != o1) {
4905        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4906    }
4907
4908    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4909
4910    write_fp_sreg(s, rd, tcg_res);
4911
4912    tcg_temp_free_ptr(fpst);
4913    tcg_temp_free_i32(tcg_op1);
4914    tcg_temp_free_i32(tcg_op2);
4915    tcg_temp_free_i32(tcg_op3);
4916    tcg_temp_free_i32(tcg_res);
4917}
4918
4919/* Floating-point data-processing (3 source) - double precision */
4920static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4921                                  int rd, int rn, int rm, int ra)
4922{
4923    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4924    TCGv_i64 tcg_res = tcg_temp_new_i64();
4925    TCGv_ptr fpst = get_fpstatus_ptr();
4926
4927    tcg_op1 = read_fp_dreg(s, rn);
4928    tcg_op2 = read_fp_dreg(s, rm);
4929    tcg_op3 = read_fp_dreg(s, ra);
4930
4931    /* These are fused multiply-add, and must be done as one
4932     * floating point operation with no rounding between the
4933     * multiplication and addition steps.
4934     * NB that doing the negations here as separate steps is
4935     * correct : an input NaN should come out with its sign bit
4936     * flipped if it is a negated-input.
4937     */
4938    if (o1 == true) {
4939        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4940    }
4941
4942    if (o0 != o1) {
4943        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4944    }
4945
4946    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4947
4948    write_fp_dreg(s, rd, tcg_res);
4949
4950    tcg_temp_free_ptr(fpst);
4951    tcg_temp_free_i64(tcg_op1);
4952    tcg_temp_free_i64(tcg_op2);
4953    tcg_temp_free_i64(tcg_op3);
4954    tcg_temp_free_i64(tcg_res);
4955}
4956
4957/* Floating point data-processing (3 source)
4958 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4959 * +---+---+---+-----------+------+----+------+----+------+------+------+
4960 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4961 * +---+---+---+-----------+------+----+------+----+------+------+------+
4962 */
4963static void disas_fp_3src(DisasContext *s, uint32_t insn)
4964{
4965    int type = extract32(insn, 22, 2);
4966    int rd = extract32(insn, 0, 5);
4967    int rn = extract32(insn, 5, 5);
4968    int ra = extract32(insn, 10, 5);
4969    int rm = extract32(insn, 16, 5);
4970    bool o0 = extract32(insn, 15, 1);
4971    bool o1 = extract32(insn, 21, 1);
4972
4973    switch (type) {
4974    case 0:
4975        if (!fp_access_check(s)) {
4976            return;
4977        }
4978        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4979        break;
4980    case 1:
4981        if (!fp_access_check(s)) {
4982            return;
4983        }
4984        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4985        break;
4986    default:
4987        unallocated_encoding(s);
4988    }
4989}
4990
4991/* Floating point immediate
4992 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4993 * +---+---+---+-----------+------+---+------------+-------+------+------+
4994 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4995 * +---+---+---+-----------+------+---+------------+-------+------+------+
4996 */
4997static void disas_fp_imm(DisasContext *s, uint32_t insn)
4998{
4999    int rd = extract32(insn, 0, 5);
5000    int imm8 = extract32(insn, 13, 8);

5001    int is_double = extract32(insn, 22, 2);
5002    uint64_t imm;
5003    TCGv_i64 tcg_res;
5004
5005    if (is_double > 1) {
5006        unallocated_encoding(s);
5007        return;
5008    }
5009
5010    if (!fp_access_check(s)) {
5011        return;
5012    }
5013
5014    /* The imm8 encodes the sign bit, enough bits to represent
5015     * an exponent in the range 01....1xx to 10....0xx,
5016     * and the most significant 4 bits of the mantissa; see
5017     * VFPExpandImm() in the v8 ARM ARM.
5018     */
5019    if (is_double) {
5020        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5021            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5022            extract32(imm8, 0, 6);
5023        imm <<= 48;
5024    } else {
5025        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5026            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5027            (extract32(imm8, 0, 6) << 3);
5028        imm <<= 16;
5029    }
5030
5031    tcg_res = tcg_const_i64(imm);
5032    write_fp_dreg(s, rd, tcg_res);
5033    tcg_temp_free_i64(tcg_res);
5034}
5035
5036/* Handle floating point <=> fixed point conversions. Note that we can
5037 * also deal with fp <=> integer conversions as a special case (scale == 64)
5038 * OPTME: consider handling that special case specially or at least skipping
5039 * the call to scalbn in the helpers for zero shifts.
5040 */
5041static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5042                           bool itof, int rmode, int scale, int sf, int type)
5043{
5044    bool is_signed = !(opcode & 1);
5045    bool is_double = type;
5046    TCGv_ptr tcg_fpstatus;
5047    TCGv_i32 tcg_shift;
5048
5049    tcg_fpstatus = get_fpstatus_ptr();
5050
5051    tcg_shift = tcg_const_i32(64 - scale);
5052
5053    if (itof) {
5054        TCGv_i64 tcg_int = cpu_reg(s, rn);
5055        if (!sf) {
5056            TCGv_i64 tcg_extend = new_tmp_a64(s);
5057
5058            if (is_signed) {
5059                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5060            } else {
5061                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5062            }
5063
5064            tcg_int = tcg_extend;
5065        }
5066
5067        if (is_double) {
5068            TCGv_i64 tcg_double = tcg_temp_new_i64();
5069            if (is_signed) {
5070                gen_helper_vfp_sqtod(tcg_double, tcg_int,
5071                                     tcg_shift, tcg_fpstatus);
5072            } else {
5073                gen_helper_vfp_uqtod(tcg_double, tcg_int,
5074                                     tcg_shift, tcg_fpstatus);
5075            }
5076            write_fp_dreg(s, rd, tcg_double);
5077            tcg_temp_free_i64(tcg_double);
5078        } else {
5079            TCGv_i32 tcg_single = tcg_temp_new_i32();
5080            if (is_signed) {
5081                gen_helper_vfp_sqtos(tcg_single, tcg_int,
5082                                     tcg_shift, tcg_fpstatus);
5083            } else {
5084                gen_helper_vfp_uqtos(tcg_single, tcg_int,
5085                                     tcg_shift, tcg_fpstatus);
5086            }
5087            write_fp_sreg(s, rd, tcg_single);
5088            tcg_temp_free_i32(tcg_single);
5089        }
5090    } else {
5091        TCGv_i64 tcg_int = cpu_reg(s, rd);
5092        TCGv_i32 tcg_rmode;
5093
5094        if (extract32(opcode, 2, 1)) {
5095            /* There are too many rounding modes to all fit into rmode,
5096             * so FCVTA[US] is a special case.
5097             */
5098            rmode = FPROUNDING_TIEAWAY;
5099        }
5100
5101        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5102
5103        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5104
5105        if (is_double) {
5106            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5107            if (is_signed) {
5108                if (!sf) {
5109                    gen_helper_vfp_tosld(tcg_int, tcg_double,
5110                                         tcg_shift, tcg_fpstatus);
5111                } else {
5112                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
5113                                         tcg_shift, tcg_fpstatus);
5114                }
5115            } else {
5116                if (!sf) {
5117                    gen_helper_vfp_tould(tcg_int, tcg_double,
5118                                         tcg_shift, tcg_fpstatus);
5119                } else {
5120                    gen_helper_vfp_touqd(tcg_int, tcg_double,
5121                                         tcg_shift, tcg_fpstatus);
5122                }
5123            }
5124            tcg_temp_free_i64(tcg_double);
5125        } else {
5126            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5127            if (sf) {
5128                if (is_signed) {
5129                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
5130                                         tcg_shift, tcg_fpstatus);
5131                } else {
5132                    gen_helper_vfp_touqs(tcg_int, tcg_single,
5133                                         tcg_shift, tcg_fpstatus);
5134                }
5135            } else {
5136                TCGv_i32 tcg_dest = tcg_temp_new_i32();
5137                if (is_signed) {
5138                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
5139                                         tcg_shift, tcg_fpstatus);
5140                } else {
5141                    gen_helper_vfp_touls(tcg_dest, tcg_single,
5142                                         tcg_shift, tcg_fpstatus);
5143                }
5144                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5145                tcg_temp_free_i32(tcg_dest);
5146            }
5147            tcg_temp_free_i32(tcg_single);
5148        }
5149
5150        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5151        tcg_temp_free_i32(tcg_rmode);
5152
5153        if (!sf) {
5154            tcg_gen_ext32u_i64(tcg_int, tcg_int);
5155        }
5156    }
5157
5158    tcg_temp_free_ptr(tcg_fpstatus);
5159    tcg_temp_free_i32(tcg_shift);
5160}
5161
5162/* Floating point <-> fixed point conversions
5163 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5164 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5165 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5166 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5167 */
5168static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5169{
5170    int rd = extract32(insn, 0, 5);
5171    int rn = extract32(insn, 5, 5);
5172    int scale = extract32(insn, 10, 6);
5173    int opcode = extract32(insn, 16, 3);
5174    int rmode = extract32(insn, 19, 2);
5175    int type = extract32(insn, 22, 2);
5176    bool sbit = extract32(insn, 29, 1);
5177    bool sf = extract32(insn, 31, 1);
5178    bool itof;
5179
5180    if (sbit || (type > 1)
5181        || (!sf && scale < 32)) {
5182        unallocated_encoding(s);
5183        return;
5184    }
5185
5186    switch ((rmode << 3) | opcode) {
5187    case 0x2: /* SCVTF */
5188    case 0x3: /* UCVTF */
5189        itof = true;
5190        break;
5191    case 0x18: /* FCVTZS */
5192    case 0x19: /* FCVTZU */
5193        itof = false;
5194        break;
5195    default:
5196        unallocated_encoding(s);
5197        return;
5198    }
5199
5200    if (!fp_access_check(s)) {
5201        return;
5202    }
5203
5204    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5205}
5206
5207static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5208{
5209    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5210     * without conversion.
5211     */
5212
5213    if (itof) {
5214        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5215
5216        switch (type) {
5217        case 0:
5218        {
5219            /* 32 bit */
5220            TCGv_i64 tmp = tcg_temp_new_i64();
5221            tcg_gen_ext32u_i64(tmp, tcg_rn);
5222            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5223            tcg_gen_movi_i64(tmp, 0);
5224            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5225            tcg_temp_free_i64(tmp);
5226            break;
5227        }
5228        case 1:
5229        {
5230            /* 64 bit */
5231            TCGv_i64 tmp = tcg_const_i64(0);
5232            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5233            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5234            tcg_temp_free_i64(tmp);
5235            break;
5236        }
5237        case 2:
5238            /* 64 bit to top half. */
5239            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5240            break;
5241        }
5242    } else {
5243        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5244
5245        switch (type) {
5246        case 0:
5247            /* 32 bit */
5248            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5249            break;
5250        case 1:
5251            /* 64 bit */
5252            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5253            break;
5254        case 2:
5255            /* 64 bits from top half */
5256            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5257            break;
5258        }
5259    }
5260}
5261
5262/* Floating point <-> integer conversions
5263 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5264 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5265 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5266 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5267 */
5268static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5269{
5270    int rd = extract32(insn, 0, 5);
5271    int rn = extract32(insn, 5, 5);
5272    int opcode = extract32(insn, 16, 3);
5273    int rmode = extract32(insn, 19, 2);
5274    int type = extract32(insn, 22, 2);
5275    bool sbit = extract32(insn, 29, 1);
5276    bool sf = extract32(insn, 31, 1);
5277
5278    if (sbit) {
5279        unallocated_encoding(s);
5280        return;
5281    }
5282
5283    if (opcode > 5) {
5284        /* FMOV */
5285        bool itof = opcode & 1;
5286
5287        if (rmode >= 2) {
5288            unallocated_encoding(s);
5289            return;
5290        }
5291
5292        switch (sf << 3 | type << 1 | rmode) {
5293        case 0x0: /* 32 bit */
5294        case 0xa: /* 64 bit */
5295        case 0xd: /* 64 bit to top half of quad */
5296            break;
5297        default:
5298            /* all other sf/type/rmode combinations are invalid */
5299            unallocated_encoding(s);
5300            break;
5301        }
5302
5303        if (!fp_access_check(s)) {
5304            return;
5305        }
5306        handle_fmov(s, rd, rn, type, itof);
5307    } else {
5308        /* actual FP conversions */
5309        bool itof = extract32(opcode, 1, 1);
5310
5311        if (type > 1 || (rmode != 0 && opcode > 1)) {
5312            unallocated_encoding(s);
5313            return;
5314        }
5315
5316        if (!fp_access_check(s)) {
5317            return;
5318        }
5319        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5320    }
5321}
5322
5323/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5324 *   31  30  29 28     25 24                          0
5325 * +---+---+---+---------+-----------------------------+
5326 * |   | 0 |   | 1 1 1 1 |                             |
5327 * +---+---+---+---------+-----------------------------+
5328 */
5329static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5330{
5331    if (extract32(insn, 24, 1)) {
5332        /* Floating point data-processing (3 source) */
5333        disas_fp_3src(s, insn);
5334    } else if (extract32(insn, 21, 1) == 0) {
5335        /* Floating point to fixed point conversions */
5336        disas_fp_fixed_conv(s, insn);
5337    } else {
5338        switch (extract32(insn, 10, 2)) {
5339        case 1:
5340            /* Floating point conditional compare */
5341            disas_fp_ccomp(s, insn);
5342            break;
5343        case 2:
5344            /* Floating point data-processing (2 source) */
5345            disas_fp_2src(s, insn);
5346            break;
5347        case 3:
5348            /* Floating point conditional select */
5349            disas_fp_csel(s, insn);
5350            break;
5351        case 0:
5352            switch (ctz32(extract32(insn, 12, 4))) {
5353            case 0: /* [15:12] == xxx1 */
5354                /* Floating point immediate */
5355                disas_fp_imm(s, insn);
5356                break;
5357            case 1: /* [15:12] == xx10 */
5358                /* Floating point compare */
5359                disas_fp_compare(s, insn);
5360                break;
5361            case 2: /* [15:12] == x100 */
5362                /* Floating point data-processing (1 source) */
5363                disas_fp_1src(s, insn);
5364                break;
5365            case 3: /* [15:12] == 1000 */
5366                unallocated_encoding(s);
5367                break;
5368            default: /* [15:12] == 0000 */
5369                /* Floating point <-> integer conversions */
5370                disas_fp_int_conv(s, insn);
5371                break;
5372            }
5373            break;
5374        }
5375    }
5376}
5377
5378static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5379                     int pos)
5380{
5381    /* Extract 64 bits from the middle of two concatenated 64 bit
5382     * vector register slices left:right. The extracted bits start
5383     * at 'pos' bits into the right (least significant) side.
5384     * We return the result in tcg_right, and guarantee not to
5385     * trash tcg_left.
5386     */
5387    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5388    assert(pos > 0 && pos < 64);
5389
5390    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5391    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5392    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5393
5394    tcg_temp_free_i64(tcg_tmp);
5395}
5396
5397/* EXT
5398 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5399 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5400 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5401 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5402 */
5403static void disas_simd_ext(DisasContext *s, uint32_t insn)
5404{
5405    int is_q = extract32(insn, 30, 1);
5406    int op2 = extract32(insn, 22, 2);
5407    int imm4 = extract32(insn, 11, 4);
5408    int rm = extract32(insn, 16, 5);
5409    int rn = extract32(insn, 5, 5);
5410    int rd = extract32(insn, 0, 5);
5411    int pos = imm4 << 3;
5412    TCGv_i64 tcg_resl, tcg_resh;
5413
5414    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5415        unallocated_encoding(s);
5416        return;
5417    }
5418
5419    if (!fp_access_check(s)) {
5420        return;
5421    }
5422
5423    tcg_resh = tcg_temp_new_i64();
5424    tcg_resl = tcg_temp_new_i64();
5425
5426    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5427     * either extracting 128 bits from a 128:128 concatenation, or
5428     * extracting 64 bits from a 64:64 concatenation.
5429     */
5430    if (!is_q) {
5431        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5432        if (pos != 0) {
5433            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5434            do_ext64(s, tcg_resh, tcg_resl, pos);
5435        }
5436        tcg_gen_movi_i64(tcg_resh, 0);
5437    } else {
5438        TCGv_i64 tcg_hh;
5439        typedef struct {
5440            int reg;
5441            int elt;
5442        } EltPosns;
5443        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5444        EltPosns *elt = eltposns;
5445
5446        if (pos >= 64) {
5447            elt++;
5448            pos -= 64;
5449        }
5450
5451        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5452        elt++;
5453        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5454        elt++;
5455        if (pos != 0) {
5456            do_ext64(s, tcg_resh, tcg_resl, pos);
5457            tcg_hh = tcg_temp_new_i64();
5458            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5459            do_ext64(s, tcg_hh, tcg_resh, pos);
5460            tcg_temp_free_i64(tcg_hh);
5461        }
5462    }
5463
5464    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5465    tcg_temp_free_i64(tcg_resl);
5466    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5467    tcg_temp_free_i64(tcg_resh);
5468}
5469
5470/* TBL/TBX
5471 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5472 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5473 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5474 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5475 */
5476static void disas_simd_tb(DisasContext *s, uint32_t insn)
5477{
5478    int op2 = extract32(insn, 22, 2);
5479    int is_q = extract32(insn, 30, 1);
5480    int rm = extract32(insn, 16, 5);
5481    int rn = extract32(insn, 5, 5);
5482    int rd = extract32(insn, 0, 5);
5483    int is_tblx = extract32(insn, 12, 1);
5484    int len = extract32(insn, 13, 2);
5485    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5486    TCGv_i32 tcg_regno, tcg_numregs;
5487
5488    if (op2 != 0) {
5489        unallocated_encoding(s);
5490        return;
5491    }
5492
5493    if (!fp_access_check(s)) {
5494        return;
5495    }
5496
5497    /* This does a table lookup: for every byte element in the input
5498     * we index into a table formed from up to four vector registers,
5499     * and then the output is the result of the lookups. Our helper
5500     * function does the lookup operation for a single 64 bit part of
5501     * the input.
5502     */
5503    tcg_resl = tcg_temp_new_i64();
5504    tcg_resh = tcg_temp_new_i64();
5505
5506    if (is_tblx) {
5507        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5508    } else {
5509        tcg_gen_movi_i64(tcg_resl, 0);
5510    }
5511    if (is_tblx && is_q) {
5512        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5513    } else {
5514        tcg_gen_movi_i64(tcg_resh, 0);
5515    }
5516
5517    tcg_idx = tcg_temp_new_i64();
5518    tcg_regno = tcg_const_i32(rn);
5519    tcg_numregs = tcg_const_i32(len + 1);
5520    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5521    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5522                        tcg_regno, tcg_numregs);
5523    if (is_q) {
5524        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5525        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5526                            tcg_regno, tcg_numregs);
5527    }
5528    tcg_temp_free_i64(tcg_idx);
5529    tcg_temp_free_i32(tcg_regno);
5530    tcg_temp_free_i32(tcg_numregs);
5531
5532    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5533    tcg_temp_free_i64(tcg_resl);
5534    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5535    tcg_temp_free_i64(tcg_resh);
5536}
5537
5538/* ZIP/UZP/TRN
5539 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5540 * +---+---+-------------+------+---+------+---+------------------+------+
5541 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5542 * +---+---+-------------+------+---+------+---+------------------+------+
5543 */
5544static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5545{
5546    int rd = extract32(insn, 0, 5);
5547    int rn = extract32(insn, 5, 5);
5548    int rm = extract32(insn, 16, 5);
5549    int size = extract32(insn, 22, 2);
5550    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5551     * bit 2 indicates 1 vs 2 variant of the insn.
5552     */
5553    int opcode = extract32(insn, 12, 2);
5554    bool part = extract32(insn, 14, 1);
5555    bool is_q = extract32(insn, 30, 1);
5556    int esize = 8 << size;
5557    int i, ofs;
5558    int datasize = is_q ? 128 : 64;
5559    int elements = datasize / esize;
5560    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5561
5562    if (opcode == 0 || (size == 3 && !is_q)) {
5563        unallocated_encoding(s);
5564        return;
5565    }
5566
5567    if (!fp_access_check(s)) {
5568        return;
5569    }
5570
5571    tcg_resl = tcg_const_i64(0);
5572    tcg_resh = tcg_const_i64(0);
5573    tcg_res = tcg_temp_new_i64();
5574
5575    for (i = 0; i < elements; i++) {
5576        switch (opcode) {
5577        case 1: /* UZP1/2 */
5578        {
5579            int midpoint = elements / 2;
5580            if (i < midpoint) {
5581                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5582            } else {
5583                read_vec_element(s, tcg_res, rm,
5584                                 2 * (i - midpoint) + part, size);
5585            }
5586            break;
5587        }
5588        case 2: /* TRN1/2 */
5589            if (i & 1) {
5590                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5591            } else {
5592                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5593            }
5594            break;
5595        case 3: /* ZIP1/2 */
5596        {
5597            int base = part * elements / 2;
5598            if (i & 1) {
5599                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5600            } else {
5601                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5602            }
5603            break;
5604        }
5605        default:
5606            g_assert_not_reached();
5607        }
5608
5609        ofs = i * esize;
5610        if (ofs < 64) {
5611            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5612            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5613        } else {
5614            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5615            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5616        }
5617    }
5618
5619    tcg_temp_free_i64(tcg_res);
5620
5621    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5622    tcg_temp_free_i64(tcg_resl);
5623    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5624    tcg_temp_free_i64(tcg_resh);
5625}
5626
5627static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5628                        int opc, bool is_min, TCGv_ptr fpst)
5629{
5630    /* Helper function for disas_simd_across_lanes: do a single precision
5631     * min/max operation on the specified two inputs,
5632     * and return the result in tcg_elt1.
5633     */
5634    if (opc == 0xc) {
5635        if (is_min) {
5636            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5637        } else {
5638            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5639        }
5640    } else {
5641        assert(opc == 0xf);
5642        if (is_min) {
5643            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5644        } else {
5645            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5646        }
5647    }
5648}
5649
5650/* AdvSIMD across lanes
5651 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5652 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5653 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5654 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5655 */
5656static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5657{
5658    int rd = extract32(insn, 0, 5);
5659    int rn = extract32(insn, 5, 5);
5660    int size = extract32(insn, 22, 2);
5661    int opcode = extract32(insn, 12, 5);
5662    bool is_q = extract32(insn, 30, 1);
5663    bool is_u = extract32(insn, 29, 1);
5664    bool is_fp = false;
5665    bool is_min = false;
5666    int esize;
5667    int elements;
5668    int i;
5669    TCGv_i64 tcg_res, tcg_elt;
5670
5671    switch (opcode) {
5672    case 0x1b: /* ADDV */
5673        if (is_u) {
5674            unallocated_encoding(s);
5675            return;
5676        }
5677        /* fall through */
5678    case 0x3: /* SADDLV, UADDLV */
5679    case 0xa: /* SMAXV, UMAXV */
5680    case 0x1a: /* SMINV, UMINV */
5681        if (size == 3 || (size == 2 && !is_q)) {
5682            unallocated_encoding(s);
5683            return;
5684        }
5685        break;
5686    case 0xc: /* FMAXNMV, FMINNMV */
5687    case 0xf: /* FMAXV, FMINV */
5688        if (!is_u || !is_q || extract32(size, 0, 1)) {
5689            unallocated_encoding(s);
5690            return;
5691        }
5692        /* Bit 1 of size field encodes min vs max, and actual size is always
5693         * 32 bits: adjust the size variable so following code can rely on it
5694         */
5695        is_min = extract32(size, 1, 1);
5696        is_fp = true;
5697        size = 2;
5698        break;
5699    default:
5700        unallocated_encoding(s);
5701        return;
5702    }
5703
5704    if (!fp_access_check(s)) {
5705        return;
5706    }
5707
5708    esize = 8 << size;
5709    elements = (is_q ? 128 : 64) / esize;
5710
5711    tcg_res = tcg_temp_new_i64();
5712    tcg_elt = tcg_temp_new_i64();
5713
5714    /* These instructions operate across all lanes of a vector
5715     * to produce a single result. We can guarantee that a 64
5716     * bit intermediate is sufficient:
5717     *  + for [US]ADDLV the maximum element size is 32 bits, and
5718     *    the result type is 64 bits
5719     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5720     *    same as the element size, which is 32 bits at most
5721     * For the integer operations we can choose to work at 64
5722     * or 32 bits and truncate at the end; for simplicity
5723     * we use 64 bits always. The floating point
5724     * ops do require 32 bit intermediates, though.
5725     */
5726    if (!is_fp) {
5727        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5728
5729        for (i = 1; i < elements; i++) {
5730            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5731
5732            switch (opcode) {
5733            case 0x03: /* SADDLV / UADDLV */
5734            case 0x1b: /* ADDV */
5735                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5736                break;
5737            case 0x0a: /* SMAXV / UMAXV */
5738                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5739                                    tcg_res,
5740                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5741                break;
5742            case 0x1a: /* SMINV / UMINV */
5743                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5744                                    tcg_res,
5745                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5746                break;
5747                break;
5748            default:
5749                g_assert_not_reached();
5750            }
5751
5752        }
5753    } else {
5754        /* Floating point ops which work on 32 bit (single) intermediates.
5755         * Note that correct NaN propagation requires that we do these
5756         * operations in exactly the order specified by the pseudocode.
5757         */
5758        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5759        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5760        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5761        TCGv_ptr fpst = get_fpstatus_ptr();
5762
5763        assert(esize == 32);
5764        assert(elements == 4);
5765
5766        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5767        tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5768        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5769        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5770
5771        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5772
5773        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5774        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5775        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5776        tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5777
5778        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5779
5780        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5781
5782        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5783        tcg_temp_free_i32(tcg_elt1);
5784        tcg_temp_free_i32(tcg_elt2);
5785        tcg_temp_free_i32(tcg_elt3);
5786        tcg_temp_free_ptr(fpst);
5787    }
5788
5789    tcg_temp_free_i64(tcg_elt);
5790
5791    /* Now truncate the result to the width required for the final output */
5792    if (opcode == 0x03) {
5793        /* SADDLV, UADDLV: result is 2*esize */
5794        size++;
5795    }
5796
5797    switch (size) {
5798    case 0:
5799        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5800        break;
5801    case 1:
5802        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5803        break;
5804    case 2:
5805        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5806        break;
5807    case 3:
5808        break;
5809    default:
5810        g_assert_not_reached();
5811    }
5812
5813    write_fp_dreg(s, rd, tcg_res);
5814    tcg_temp_free_i64(tcg_res);
5815}
5816
5817/* DUP (Element, Vector)
5818 *
5819 *  31  30   29              21 20    16 15        10  9    5 4    0
5820 * +---+---+-------------------+--------+-------------+------+------+
5821 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5822 * +---+---+-------------------+--------+-------------+------+------+
5823 *
5824 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5825 */
5826static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5827                             int imm5)
5828{
5829    int size = ctz32(imm5);
5830    int esize = 8 << size;
5831    int elements = (is_q ? 128 : 64) / esize;
5832    int index, i;
5833    TCGv_i64 tmp;
5834
5835    if (size > 3 || (size == 3 && !is_q)) {
5836        unallocated_encoding(s);
5837        return;
5838    }
5839
5840    if (!fp_access_check(s)) {
5841        return;
5842    }
5843
5844    index = imm5 >> (size + 1);
5845
5846    tmp = tcg_temp_new_i64();
5847    read_vec_element(s, tmp, rn, index, size);
5848
5849    for (i = 0; i < elements; i++) {
5850        write_vec_element(s, tmp, rd, i, size);
5851    }
5852
5853    if (!is_q) {
5854        clear_vec_high(s, rd);
5855    }
5856
5857    tcg_temp_free_i64(tmp);
5858}
5859
5860/* DUP (element, scalar)
5861 *  31                   21 20    16 15        10  9    5 4    0
5862 * +-----------------------+--------+-------------+------+------+
5863 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5864 * +-----------------------+--------+-------------+------+------+
5865 */
5866static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5867                              int imm5)
5868{
5869    int size = ctz32(imm5);
5870    int index;
5871    TCGv_i64 tmp;
5872
5873    if (size > 3) {
5874        unallocated_encoding(s);
5875        return;
5876    }
5877
5878    if (!fp_access_check(s)) {
5879        return;
5880    }
5881
5882    index = imm5 >> (size + 1);
5883
5884    /* This instruction just extracts the specified element and
5885     * zero-extends it into the bottom of the destination register.
5886     */
5887    tmp = tcg_temp_new_i64();
5888    read_vec_element(s, tmp, rn, index, size);
5889    write_fp_dreg(s, rd, tmp);
5890    tcg_temp_free_i64(tmp);
5891}
5892
5893/* DUP (General)
5894 *
5895 *  31  30   29              21 20    16 15        10  9    5 4    0
5896 * +---+---+-------------------+--------+-------------+------+------+
5897 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5898 * +---+---+-------------------+--------+-------------+------+------+
5899 *
5900 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5901 */
5902static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5903                             int imm5)
5904{
5905    int size = ctz32(imm5);
5906    int esize = 8 << size;
5907    int elements = (is_q ? 128 : 64)/esize;
5908    int i = 0;
5909
5910    if (size > 3 || ((size == 3) && !is_q)) {
5911        unallocated_encoding(s);
5912        return;
5913    }
5914
5915    if (!fp_access_check(s)) {
5916        return;
5917    }
5918
5919    for (i = 0; i < elements; i++) {
5920        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5921    }
5922    if (!is_q) {
5923        clear_vec_high(s, rd);
5924    }
5925}
5926
5927/* INS (Element)
5928 *
5929 *  31                   21 20    16 15  14    11  10 9    5 4    0
5930 * +-----------------------+--------+------------+---+------+------+
5931 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5932 * +-----------------------+--------+------------+---+------+------+
5933 *
5934 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5935 * index: encoded in imm5<4:size+1>
5936 */
5937static void handle_simd_inse(DisasContext *s, int rd, int rn,
5938                             int imm4, int imm5)
5939{
5940    int size = ctz32(imm5);
5941    int src_index, dst_index;
5942    TCGv_i64 tmp;
5943
5944    if (size > 3) {
5945        unallocated_encoding(s);
5946        return;
5947    }
5948
5949    if (!fp_access_check(s)) {
5950        return;
5951    }
5952
5953    dst_index = extract32(imm5, 1+size, 5);
5954    src_index = extract32(imm4, size, 4);
5955
5956    tmp = tcg_temp_new_i64();
5957
5958    read_vec_element(s, tmp, rn, src_index, size);
5959    write_vec_element(s, tmp, rd, dst_index, size);
5960
5961    tcg_temp_free_i64(tmp);
5962}
5963
5964
5965/* INS (General)
5966 *
5967 *  31                   21 20    16 15        10  9    5 4    0
5968 * +-----------------------+--------+-------------+------+------+
5969 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5970 * +-----------------------+--------+-------------+------+------+
5971 *
5972 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5973 * index: encoded in imm5<4:size+1>
5974 */
5975static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5976{
5977    int size = ctz32(imm5);
5978    int idx;
5979
5980    if (size > 3) {
5981        unallocated_encoding(s);
5982        return;
5983    }
5984
5985    if (!fp_access_check(s)) {
5986        return;
5987    }
5988
5989    idx = extract32(imm5, 1 + size, 4 - size);
5990    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5991}
5992
5993/*
5994 * UMOV (General)
5995 * SMOV (General)
5996 *
5997 *  31  30   29              21 20    16 15    12   10 9    5 4    0
5998 * +---+---+-------------------+--------+-------------+------+------+
5999 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6000 * +---+---+-------------------+--------+-------------+------+------+

6001 *
6002 * U: unsigned when set
6003 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6004 */
6005static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6006                                  int rn, int rd, int imm5)
6007{
6008    int size = ctz32(imm5);
6009    int element;
6010    TCGv_i64 tcg_rd;
6011
6012    /* Check for UnallocatedEncodings */
6013    if (is_signed) {
6014        if (size > 2 || (size == 2 && !is_q)) {
6015            unallocated_encoding(s);
6016            return;
6017        }
6018    } else {
6019        if (size > 3
6020            || (size < 3 && is_q)
6021            || (size == 3 && !is_q)) {
6022            unallocated_encoding(s);
6023            return;
6024        }
6025    }
6026
6027    if (!fp_access_check(s)) {
6028        return;
6029    }
6030
6031    element = extract32(imm5, 1+size, 4);
6032
6033    tcg_rd = cpu_reg(s, rd);
6034    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6035    if (is_signed && !is_q) {
6036        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6037    }
6038}
6039
6040/* AdvSIMD copy
6041 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6042 * +---+---+----+-----------------+------+---+------+---+------+------+
6043 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6044 * +---+---+----+-----------------+------+---+------+---+------+------+
6045 */
6046static void disas_simd_copy(DisasContext *s, uint32_t insn)
6047{
6048    int rd = extract32(insn, 0, 5);
6049    int rn = extract32(insn, 5, 5);
6050    int imm4 = extract32(insn, 11, 4);
6051    int op = extract32(insn, 29, 1);
6052    int is_q = extract32(insn, 30, 1);
6053    int imm5 = extract32(insn, 16, 5);
6054
6055    if (op) {
6056        if (is_q) {
6057            /* INS (element) */
6058            handle_simd_inse(s, rd, rn, imm4, imm5);
6059        } else {
6060            unallocated_encoding(s);
6061        }
6062    } else {
6063        switch (imm4) {
6064        case 0:
6065            /* DUP (element - vector) */
6066            handle_simd_dupe(s, is_q, rd, rn, imm5);
6067            break;
6068        case 1:
6069            /* DUP (general) */
6070            handle_simd_dupg(s, is_q, rd, rn, imm5);
6071            break;
6072        case 3:
6073            if (is_q) {
6074                /* INS (general) */
6075                handle_simd_insg(s, rd, rn, imm5);
6076            } else {
6077                unallocated_encoding(s);
6078            }
6079            break;
6080        case 5:
6081        case 7:
6082            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6083            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6084            break;
6085        default:
6086            unallocated_encoding(s);
6087            break;
6088        }
6089    }
6090}
6091
6092/* AdvSIMD modified immediate
6093 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6094 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6095 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6096 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6097 *
6098 * There are a number of operations that can be carried out here:
6099 *   MOVI - move (shifted) imm into register
6100 *   MVNI - move inverted (shifted) imm into register
6101 *   ORR  - bitwise OR of (shifted) imm with register
6102 *   BIC  - bitwise clear of (shifted) imm with register
6103 */
6104static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6105{
6106    int rd = extract32(insn, 0, 5);
6107    int cmode = extract32(insn, 12, 4);
6108    int cmode_3_1 = extract32(cmode, 1, 3);
6109    int cmode_0 = extract32(cmode, 0, 1);
6110    int o2 = extract32(insn, 11, 1);
6111    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6112    bool is_neg = extract32(insn, 29, 1);
6113    bool is_q = extract32(insn, 30, 1);
6114    uint64_t imm = 0;
6115    TCGv_i64 tcg_rd, tcg_imm;
6116    int i;
6117
6118    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6119        unallocated_encoding(s);
6120        return;
6121    }
6122
6123    if (!fp_access_check(s)) {
6124        return;
6125    }
6126
6127    /* See AdvSIMDExpandImm() in ARM ARM */
6128    switch (cmode_3_1) {
6129    case 0: /* Replicate(Zeros(24):imm8, 2) */
6130    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6131    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6132    case 3: /* Replicate(imm8:Zeros(24), 2) */
6133    {
6134        int shift = cmode_3_1 * 8;
6135        imm = bitfield_replicate(abcdefgh << shift, 32);
6136        break;
6137    }
6138    case 4: /* Replicate(Zeros(8):imm8, 4) */
6139    case 5: /* Replicate(imm8:Zeros(8), 4) */
6140    {
6141        int shift = (cmode_3_1 & 0x1) * 8;
6142        imm = bitfield_replicate(abcdefgh << shift, 16);
6143        break;
6144    }
6145    case 6:
6146        if (cmode_0) {
6147            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6148            imm = (abcdefgh << 16) | 0xffff;
6149        } else {
6150            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6151            imm = (abcdefgh << 8) | 0xff;
6152        }
6153        imm = bitfield_replicate(imm, 32);
6154        break;
6155    case 7:
6156        if (!cmode_0 && !is_neg) {
6157            imm = bitfield_replicate(abcdefgh, 8);
6158        } else if (!cmode_0 && is_neg) {
6159            int i;
6160            imm = 0;
6161            for (i = 0; i < 8; i++) {
6162                if ((abcdefgh) & (1 << i)) {
6163                    imm |= 0xffULL << (i * 8);
6164                }
6165            }
6166        } else if (cmode_0) {
6167            if (is_neg) {
6168                imm = (abcdefgh & 0x3f) << 48;
6169                if (abcdefgh & 0x80) {
6170                    imm |= 0x8000000000000000ULL;
6171                }
6172                if (abcdefgh & 0x40) {
6173                    imm |= 0x3fc0000000000000ULL;
6174                } else {
6175                    imm |= 0x4000000000000000ULL;
6176                }
6177            } else {
6178                imm = (abcdefgh & 0x3f) << 19;
6179                if (abcdefgh & 0x80) {
6180                    imm |= 0x80000000;
6181                }
6182                if (abcdefgh & 0x40) {
6183                    imm |= 0x3e000000;
6184                } else {
6185                    imm |= 0x40000000;
6186                }
6187                imm |= (imm << 32);
6188            }
6189        }
6190        break;
6191    }
6192
6193    if (cmode_3_1 != 7 && is_neg) {
6194        imm = ~imm;
6195    }
6196
6197    tcg_imm = tcg_const_i64(imm);
6198    tcg_rd = new_tmp_a64(s);
6199
6200    for (i = 0; i < 2; i++) {
6201        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6202
6203        if (i == 1 && !is_q) {
6204            /* non-quad ops clear high half of vector */
6205            tcg_gen_movi_i64(tcg_rd, 0);
6206        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6207            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6208            if (is_neg) {
6209                /* AND (BIC) */
6210                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6211            } else {
6212                /* ORR */
6213                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6214            }
6215        } else {
6216            /* MOVI */
6217            tcg_gen_mov_i64(tcg_rd, tcg_imm);
6218        }
6219        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6220    }
6221
6222    tcg_temp_free_i64(tcg_imm);
6223}
6224
6225/* AdvSIMD scalar copy
6226 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6227 * +-----+----+-----------------+------+---+------+---+------+------+
6228 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6229 * +-----+----+-----------------+------+---+------+---+------+------+
6230 */
6231static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6232{
6233    int rd = extract32(insn, 0, 5);
6234    int rn = extract32(insn, 5, 5);
6235    int imm4 = extract32(insn, 11, 4);
6236    int imm5 = extract32(insn, 16, 5);
6237    int op = extract32(insn, 29, 1);
6238
6239    if (op != 0 || imm4 != 0) {
6240        unallocated_encoding(s);
6241        return;
6242    }
6243
6244    /* DUP (element, scalar) */
6245    handle_simd_dupes(s, rd, rn, imm5);
6246}
6247
6248/* AdvSIMD scalar pairwise
6249 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6250 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6251 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6252 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6253 */
6254static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6255{
6256    int u = extract32(insn, 29, 1);
6257    int size = extract32(insn, 22, 2);
6258    int opcode = extract32(insn, 12, 5);
6259    int rn = extract32(insn, 5, 5);
6260    int rd = extract32(insn, 0, 5);
6261    TCGv_ptr fpst;
6262
6263    /* For some ops (the FP ones), size[1] is part of the encoding.
6264     * For ADDP strictly it is not but size[1] is always 1 for valid
6265     * encodings.
6266     */
6267    opcode |= (extract32(size, 1, 1) << 5);
6268
6269    switch (opcode) {
6270    case 0x3b: /* ADDP */
6271        if (u || size != 3) {
6272            unallocated_encoding(s);
6273            return;
6274        }
6275        if (!fp_access_check(s)) {
6276            return;
6277        }
6278
6279        TCGV_UNUSED_PTR(fpst);
6280        break;
6281    case 0xc: /* FMAXNMP */
6282    case 0xd: /* FADDP */
6283    case 0xf: /* FMAXP */
6284    case 0x2c: /* FMINNMP */
6285    case 0x2f: /* FMINP */
6286        /* FP op, size[0] is 32 or 64 bit */
6287        if (!u) {
6288            unallocated_encoding(s);
6289            return;
6290        }
6291        if (!fp_access_check(s)) {
6292            return;
6293        }
6294
6295        size = extract32(size, 0, 1) ? 3 : 2;
6296        fpst = get_fpstatus_ptr();
6297        break;
6298    default:
6299        unallocated_encoding(s);
6300        return;
6301    }
6302
6303    if (size == 3) {
6304        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6305        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6306        TCGv_i64 tcg_res = tcg_temp_new_i64();
6307
6308        read_vec_element(s, tcg_op1, rn, 0, MO_64);
6309        read_vec_element(s, tcg_op2, rn, 1, MO_64);
6310
6311        switch (opcode) {
6312        case 0x3b: /* ADDP */
6313            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6314            break;
6315        case 0xc: /* FMAXNMP */
6316            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6317            break;
6318        case 0xd: /* FADDP */
6319            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6320            break;
6321        case 0xf: /* FMAXP */
6322            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6323            break;
6324        case 0x2c: /* FMINNMP */
6325            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6326            break;
6327        case 0x2f: /* FMINP */
6328            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6329            break;
6330        default:
6331            g_assert_not_reached();
6332        }
6333
6334        write_fp_dreg(s, rd, tcg_res);
6335
6336        tcg_temp_free_i64(tcg_op1);
6337        tcg_temp_free_i64(tcg_op2);
6338        tcg_temp_free_i64(tcg_res);
6339    } else {
6340        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6341        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6342        TCGv_i32 tcg_res = tcg_temp_new_i32();
6343
6344        read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6345        read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6346
6347        switch (opcode) {
6348        case 0xc: /* FMAXNMP */
6349            gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6350            break;
6351        case 0xd: /* FADDP */
6352            gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6353            break;
6354        case 0xf: /* FMAXP */
6355            gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6356            break;
6357        case 0x2c: /* FMINNMP */
6358            gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6359            break;
6360        case 0x2f: /* FMINP */
6361            gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6362            break;
6363        default:
6364            g_assert_not_reached();
6365        }
6366
6367        write_fp_sreg(s, rd, tcg_res);
6368
6369        tcg_temp_free_i32(tcg_op1);
6370        tcg_temp_free_i32(tcg_op2);
6371        tcg_temp_free_i32(tcg_res);
6372    }
6373
6374    if (!TCGV_IS_UNUSED_PTR(fpst)) {
6375        tcg_temp_free_ptr(fpst);
6376    }
6377}
6378
6379/*
6380 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6381 *
6382 * This code is handles the common shifting code and is used by both
6383 * the vector and scalar code.
6384 */
6385static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6386                                    TCGv_i64 tcg_rnd, bool accumulate,
6387                                    bool is_u, int size, int shift)
6388{
6389    bool extended_result = false;
6390    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6391    int ext_lshift = 0;
6392    TCGv_i64 tcg_src_hi;
6393
6394    if (round && size == 3) {
6395        extended_result = true;
6396        ext_lshift = 64 - shift;
6397        tcg_src_hi = tcg_temp_new_i64();
6398    } else if (shift == 64) {
6399        if (!accumulate && is_u) {
6400            /* result is zero */
6401            tcg_gen_movi_i64(tcg_res, 0);
6402            return;
6403        }
6404    }
6405
6406    /* Deal with the rounding step */
6407    if (round) {
6408        if (extended_result) {
6409            TCGv_i64 tcg_zero = tcg_const_i64(0);
6410            if (!is_u) {
6411                /* take care of sign extending tcg_res */
6412                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6413                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6414                                 tcg_src, tcg_src_hi,
6415                                 tcg_rnd, tcg_zero);
6416            } else {
6417                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6418                                 tcg_src, tcg_zero,
6419                                 tcg_rnd, tcg_zero);
6420            }
6421            tcg_temp_free_i64(tcg_zero);
6422        } else {
6423            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6424        }
6425    }
6426
6427    /* Now do the shift right */
6428    if (round && extended_result) {
6429        /* extended case, >64 bit precision required */
6430        if (ext_lshift == 0) {
6431            /* special case, only high bits matter */
6432            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6433        } else {
6434            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6435            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6436            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6437        }
6438    } else {
6439        if (is_u) {
6440            if (shift == 64) {
6441                /* essentially shifting in 64 zeros */
6442                tcg_gen_movi_i64(tcg_src, 0);
6443            } else {
6444                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6445            }
6446        } else {
6447            if (shift == 64) {
6448                /* effectively extending the sign-bit */
6449                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6450            } else {
6451                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6452            }
6453        }
6454    }
6455
6456    if (accumulate) {
6457        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6458    } else {
6459        tcg_gen_mov_i64(tcg_res, tcg_src);
6460    }
6461
6462    if (extended_result) {
6463        tcg_temp_free_i64(tcg_src_hi);
6464    }
6465}
6466
6467/* Common SHL/SLI - Shift left with an optional insert */
6468static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6469                                 bool insert, int shift)
6470{
6471    if (insert) { /* SLI */
6472        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6473    } else { /* SHL */
6474        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6475    }
6476}
6477
6478/* SRI: shift right with insert */
6479static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6480                                 int size, int shift)
6481{
6482    int esize = 8 << size;
6483
6484    /* shift count same as element size is valid but does nothing;
6485     * special case to avoid potential shift by 64.
6486     */
6487    if (shift != esize) {
6488        tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6489        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6490    }
6491}
6492
6493/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6494static void handle_scalar_simd_shri(DisasContext *s,
6495                                    bool is_u, int immh, int immb,
6496                                    int opcode, int rn, int rd)
6497{
6498    const int size = 3;
6499    int immhb = immh << 3 | immb;
6500    int shift = 2 * (8 << size) - immhb;
6501    bool accumulate = false;
6502    bool round = false;
6503    bool insert = false;
6504    TCGv_i64 tcg_rn;
6505    TCGv_i64 tcg_rd;
6506    TCGv_i64 tcg_round;
6507
6508    if (!extract32(immh, 3, 1)) {
6509        unallocated_encoding(s);
6510        return;
6511    }
6512
6513    if (!fp_access_check(s)) {
6514        return;
6515    }
6516
6517    switch (opcode) {
6518    case 0x02: /* SSRA / USRA (accumulate) */
6519        accumulate = true;
6520        break;
6521    case 0x04: /* SRSHR / URSHR (rounding) */
6522        round = true;
6523        break;
6524    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6525        accumulate = round = true;
6526        break;
6527    case 0x08: /* SRI */
6528        insert = true;
6529        break;
6530    }
6531
6532    if (round) {
6533        uint64_t round_const = 1ULL << (shift - 1);
6534        tcg_round = tcg_const_i64(round_const);
6535    } else {
6536        TCGV_UNUSED_I64(tcg_round);
6537    }
6538
6539    tcg_rn = read_fp_dreg(s, rn);
6540    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6541
6542    if (insert) {
6543        handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6544    } else {
6545        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6546                                accumulate, is_u, size, shift);
6547    }
6548
6549    write_fp_dreg(s, rd, tcg_rd);
6550
6551    tcg_temp_free_i64(tcg_rn);
6552    tcg_temp_free_i64(tcg_rd);
6553    if (round) {
6554        tcg_temp_free_i64(tcg_round);
6555    }
6556}
6557
6558/* SHL/SLI - Scalar shift left */
6559static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6560                                    int immh, int immb, int opcode,
6561                                    int rn, int rd)
6562{
6563    int size = 32 - clz32(immh) - 1;
6564    int immhb = immh << 3 | immb;
6565    int shift = immhb - (8 << size);
6566    TCGv_i64 tcg_rn = new_tmp_a64(s);
6567    TCGv_i64 tcg_rd = new_tmp_a64(s);
6568
6569    if (!extract32(immh, 3, 1)) {
6570        unallocated_encoding(s);
6571        return;
6572    }
6573
6574    if (!fp_access_check(s)) {
6575        return;
6576    }
6577
6578    tcg_rn = read_fp_dreg(s, rn);
6579    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6580
6581    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6582
6583    write_fp_dreg(s, rd, tcg_rd);
6584
6585    tcg_temp_free_i64(tcg_rn);
6586    tcg_temp_free_i64(tcg_rd);
6587}
6588
6589/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6590 * (signed/unsigned) narrowing */
6591static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6592                                   bool is_u_shift, bool is_u_narrow,
6593                                   int immh, int immb, int opcode,
6594                                   int rn, int rd)
6595{
6596    int immhb = immh << 3 | immb;
6597    int size = 32 - clz32(immh) - 1;
6598    int esize = 8 << size;
6599    int shift = (2 * esize) - immhb;
6600    int elements = is_scalar ? 1 : (64 / esize);
6601    bool round = extract32(opcode, 0, 1);
6602    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6603    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6604    TCGv_i32 tcg_rd_narrowed;
6605    TCGv_i64 tcg_final;
6606
6607    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6608        { gen_helper_neon_narrow_sat_s8,
6609          gen_helper_neon_unarrow_sat8 },
6610        { gen_helper_neon_narrow_sat_s16,
6611          gen_helper_neon_unarrow_sat16 },
6612        { gen_helper_neon_narrow_sat_s32,
6613          gen_helper_neon_unarrow_sat32 },
6614        { NULL, NULL },
6615    };
6616    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6617        gen_helper_neon_narrow_sat_u8,
6618        gen_helper_neon_narrow_sat_u16,
6619        gen_helper_neon_narrow_sat_u32,
6620        NULL
6621    };
6622    NeonGenNarrowEnvFn *narrowfn;
6623
6624    int i;
6625
6626    assert(size < 4);
6627
6628    if (extract32(immh, 3, 1)) {
6629        unallocated_encoding(s);
6630        return;
6631    }
6632
6633    if (!fp_access_check(s)) {
6634        return;
6635    }
6636
6637    if (is_u_shift) {
6638        narrowfn = unsigned_narrow_fns[size];
6639    } else {
6640        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6641    }
6642
6643    tcg_rn = tcg_temp_new_i64();
6644    tcg_rd = tcg_temp_new_i64();
6645    tcg_rd_narrowed = tcg_temp_new_i32();
6646    tcg_final = tcg_const_i64(0);
6647
6648    if (round) {
6649        uint64_t round_const = 1ULL << (shift - 1);
6650        tcg_round = tcg_const_i64(round_const);
6651    } else {
6652        TCGV_UNUSED_I64(tcg_round);
6653    }
6654
6655    for (i = 0; i < elements; i++) {
6656        read_vec_element(s, tcg_rn, rn, i, ldop);
6657        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6658                                false, is_u_shift, size+1, shift);
6659        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6660        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6661        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6662    }
6663
6664    if (!is_q) {
6665        clear_vec_high(s, rd);
6666        write_vec_element(s, tcg_final, rd, 0, MO_64);
6667    } else {
6668        write_vec_element(s, tcg_final, rd, 1, MO_64);
6669    }
6670
6671    if (round) {
6672        tcg_temp_free_i64(tcg_round);
6673    }
6674    tcg_temp_free_i64(tcg_rn);
6675    tcg_temp_free_i64(tcg_rd);
6676    tcg_temp_free_i32(tcg_rd_narrowed);
6677    tcg_temp_free_i64(tcg_final);
6678    return;
6679}
6680
6681/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6682static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6683                             bool src_unsigned, bool dst_unsigned,
6684                             int immh, int immb, int rn, int rd)
6685{
6686    int immhb = immh << 3 | immb;
6687    int size = 32 - clz32(immh) - 1;
6688    int shift = immhb - (8 << size);
6689    int pass;
6690
6691    assert(immh != 0);
6692    assert(!(scalar && is_q));
6693
6694    if (!scalar) {
6695        if (!is_q && extract32(immh, 3, 1)) {
6696            unallocated_encoding(s);
6697            return;
6698        }
6699
6700        /* Since we use the variable-shift helpers we must
6701         * replicate the shift count into each element of
6702         * the tcg_shift value.
6703         */
6704        switch (size) {
6705        case 0:
6706            shift |= shift << 8;
6707            /* fall through */
6708        case 1:
6709            shift |= shift << 16;
6710            break;
6711        case 2:
6712        case 3:
6713            break;
6714        default:
6715            g_assert_not_reached();
6716        }
6717    }
6718
6719    if (!fp_access_check(s)) {
6720        return;
6721    }
6722
6723    if (size == 3) {
6724        TCGv_i64 tcg_shift = tcg_const_i64(shift);
6725        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6726            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6727            { NULL, gen_helper_neon_qshl_u64 },
6728        };
6729        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6730        int maxpass = is_q ? 2 : 1;
6731
6732        for (pass = 0; pass < maxpass; pass++) {
6733            TCGv_i64 tcg_op = tcg_temp_new_i64();
6734
6735            read_vec_element(s, tcg_op, rn, pass, MO_64);
6736            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6737            write_vec_element(s, tcg_op, rd, pass, MO_64);
6738
6739            tcg_temp_free_i64(tcg_op);
6740        }
6741        tcg_temp_free_i64(tcg_shift);
6742
6743        if (!is_q) {
6744            clear_vec_high(s, rd);
6745        }
6746    } else {
6747        TCGv_i32 tcg_shift = tcg_const_i32(shift);
6748        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6749            {
6750                { gen_helper_neon_qshl_s8,
6751                  gen_helper_neon_qshl_s16,
6752                  gen_helper_neon_qshl_s32 },
6753                { gen_helper_neon_qshlu_s8,
6754                  gen_helper_neon_qshlu_s16,
6755                  gen_helper_neon_qshlu_s32 }
6756            }, {
6757                { NULL, NULL, NULL },
6758                { gen_helper_neon_qshl_u8,
6759                  gen_helper_neon_qshl_u16,
6760                  gen_helper_neon_qshl_u32 }
6761            }
6762        };
6763        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6764        TCGMemOp memop = scalar ? size : MO_32;
6765        int maxpass = scalar ? 1 : is_q ? 4 : 2;
6766
6767        for (pass = 0; pass < maxpass; pass++) {
6768            TCGv_i32 tcg_op = tcg_temp_new_i32();
6769
6770            read_vec_element_i32(s, tcg_op, rn, pass, memop);
6771            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6772            if (scalar) {
6773                switch (size) {
6774                case 0:
6775                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
6776                    break;
6777                case 1:
6778                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
6779                    break;
6780                case 2:
6781                    break;
6782                default:
6783                    g_assert_not_reached();
6784                }
6785                write_fp_sreg(s, rd, tcg_op);
6786            } else {
6787                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6788            }
6789
6790            tcg_temp_free_i32(tcg_op);
6791        }
6792        tcg_temp_free_i32(tcg_shift);
6793
6794        if (!is_q && !scalar) {
6795            clear_vec_high(s, rd);
6796        }
6797    }
6798}
6799
6800/* Common vector code for handling integer to FP conversion */
6801static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6802                                   int elements, int is_signed,
6803                                   int fracbits, int size)
6804{
6805    bool is_double = size == 3 ? true : false;
6806    TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6807    TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6808    TCGv_i64 tcg_int = tcg_temp_new_i64();
6809    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6810    int pass;
6811
6812    for (pass = 0; pass < elements; pass++) {
6813        read_vec_element(s, tcg_int, rn, pass, mop);
6814
6815        if (is_double) {
6816            TCGv_i64 tcg_double = tcg_temp_new_i64();
6817            if (is_signed) {
6818                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6819                                     tcg_shift, tcg_fpst);
6820            } else {
6821                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6822                                     tcg_shift, tcg_fpst);
6823            }
6824            if (elements == 1) {
6825                write_fp_dreg(s, rd, tcg_double);
6826            } else {
6827                write_vec_element(s, tcg_double, rd, pass, MO_64);
6828            }
6829            tcg_temp_free_i64(tcg_double);
6830        } else {
6831            TCGv_i32 tcg_single = tcg_temp_new_i32();
6832            if (is_signed) {
6833                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6834                                     tcg_shift, tcg_fpst);
6835            } else {
6836                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6837                                     tcg_shift, tcg_fpst);
6838            }
6839            if (elements == 1) {
6840                write_fp_sreg(s, rd, tcg_single);
6841            } else {
6842                write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6843            }
6844            tcg_temp_free_i32(tcg_single);
6845        }
6846    }
6847
6848    if (!is_double && elements == 2) {
6849        clear_vec_high(s, rd);
6850    }
6851
6852    tcg_temp_free_i64(tcg_int);
6853    tcg_temp_free_ptr(tcg_fpst);
6854    tcg_temp_free_i32(tcg_shift);
6855}
6856
6857/* UCVTF/SCVTF - Integer to FP conversion */
6858static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6859                                         bool is_q, bool is_u,
6860                                         int immh, int immb, int opcode,
6861                                         int rn, int rd)
6862{
6863    bool is_double = extract32(immh, 3, 1);
6864    int size = is_double ? MO_64 : MO_32;
6865    int elements;
6866    int immhb = immh << 3 | immb;
6867    int fracbits = (is_double ? 128 : 64) - immhb;
6868
6869    if (!extract32(immh, 2, 2)) {
6870        unallocated_encoding(s);
6871        return;
6872    }
6873
6874    if (is_scalar) {
6875        elements = 1;
6876    } else {
6877        elements = is_double ? 2 : is_q ? 4 : 2;
6878        if (is_double && !is_q) {
6879            unallocated_encoding(s);
6880            return;
6881        }
6882    }
6883
6884    if (!fp_access_check(s)) {
6885        return;
6886    }
6887
6888    /* immh == 0 would be a failure of the decode logic */
6889    g_assert(immh);
6890
6891    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6892}
6893
6894/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6895static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6896                                         bool is_q, bool is_u,
6897                                         int immh, int immb, int rn, int rd)
6898{
6899    bool is_double = extract32(immh, 3, 1);
6900    int immhb = immh << 3 | immb;
6901    int fracbits = (is_double ? 128 : 64) - immhb;
6902    int pass;
6903    TCGv_ptr tcg_fpstatus;
6904    TCGv_i32 tcg_rmode, tcg_shift;
6905
6906    if (!extract32(immh, 2, 2)) {
6907        unallocated_encoding(s);
6908        return;
6909    }
6910
6911    if (!is_scalar && !is_q && is_double) {
6912        unallocated_encoding(s);
6913        return;
6914    }
6915
6916    if (!fp_access_check(s)) {
6917        return;
6918    }
6919
6920    assert(!(is_scalar && is_q));
6921
6922    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6923    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6924    tcg_fpstatus = get_fpstatus_ptr();
6925    tcg_shift = tcg_const_i32(fracbits);
6926
6927    if (is_double) {
6928        int maxpass = is_scalar ? 1 : 2;
6929
6930        for (pass = 0; pass < maxpass; pass++) {
6931            TCGv_i64 tcg_op = tcg_temp_new_i64();
6932
6933            read_vec_element(s, tcg_op, rn, pass, MO_64);
6934            if (is_u) {
6935                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6936            } else {
6937                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6938            }
6939            write_vec_element(s, tcg_op, rd, pass, MO_64);
6940            tcg_temp_free_i64(tcg_op);
6941        }
6942        if (!is_q) {
6943            clear_vec_high(s, rd);
6944        }
6945    } else {
6946        int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6947        for (pass = 0; pass < maxpass; pass++) {
6948            TCGv_i32 tcg_op = tcg_temp_new_i32();
6949
6950            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6951            if (is_u) {
6952                gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6953            } else {
6954                gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6955            }
6956            if (is_scalar) {
6957                write_fp_sreg(s, rd, tcg_op);
6958            } else {
6959                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6960            }
6961            tcg_temp_free_i32(tcg_op);
6962        }
6963        if (!is_q && !is_scalar) {
6964            clear_vec_high(s, rd);
6965        }
6966    }
6967
6968    tcg_temp_free_ptr(tcg_fpstatus);
6969    tcg_temp_free_i32(tcg_shift);
6970    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6971    tcg_temp_free_i32(tcg_rmode);
6972}
6973
6974/* AdvSIMD scalar shift by immediate
6975 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6976 * +-----+---+-------------+------+------+--------+---+------+------+
6977 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6978 * +-----+---+-------------+------+------+--------+---+------+------+
6979 *
6980 * This is the scalar version so it works on a fixed sized registers
6981 */
6982static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6983{
6984    int rd = extract32(insn, 0, 5);
6985    int rn = extract32(insn, 5, 5);
6986    int opcode = extract32(insn, 11, 5);
6987    int immb = extract32(insn, 16, 3);
6988    int immh = extract32(insn, 19, 4);
6989    bool is_u = extract32(insn, 29, 1);
6990
6991    if (immh == 0) {
6992        unallocated_encoding(s);
6993        return;
6994    }
6995
6996    switch (opcode) {
6997    case 0x08: /* SRI */
6998        if (!is_u) {
6999            unallocated_encoding(s);
7000            return;

7001        }
7002        /* fall through */
7003    case 0x00: /* SSHR / USHR */
7004    case 0x02: /* SSRA / USRA */
7005    case 0x04: /* SRSHR / URSHR */
7006    case 0x06: /* SRSRA / URSRA */
7007        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7008        break;
7009    case 0x0a: /* SHL / SLI */
7010        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7011        break;
7012    case 0x1c: /* SCVTF, UCVTF */
7013        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7014                                     opcode, rn, rd);
7015        break;
7016    case 0x10: /* SQSHRUN, SQSHRUN2 */
7017    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7018        if (!is_u) {
7019            unallocated_encoding(s);
7020            return;
7021        }
7022        handle_vec_simd_sqshrn(s, true, false, false, true,
7023                               immh, immb, opcode, rn, rd);
7024        break;
7025    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7026    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7027        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7028                               immh, immb, opcode, rn, rd);
7029        break;
7030    case 0xc: /* SQSHLU */
7031        if (!is_u) {
7032            unallocated_encoding(s);
7033            return;
7034        }
7035        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7036        break;
7037    case 0xe: /* SQSHL, UQSHL */
7038        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7039        break;
7040    case 0x1f: /* FCVTZS, FCVTZU */
7041        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7042        break;
7043    default:
7044        unallocated_encoding(s);
7045        break;
7046    }
7047}
7048
7049/* AdvSIMD scalar three different
7050 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7051 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7052 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7053 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7054 */
7055static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7056{
7057    bool is_u = extract32(insn, 29, 1);
7058    int size = extract32(insn, 22, 2);
7059    int opcode = extract32(insn, 12, 4);
7060    int rm = extract32(insn, 16, 5);
7061    int rn = extract32(insn, 5, 5);
7062    int rd = extract32(insn, 0, 5);
7063
7064    if (is_u) {
7065        unallocated_encoding(s);
7066        return;
7067    }
7068
7069    switch (opcode) {
7070    case 0x9: /* SQDMLAL, SQDMLAL2 */
7071    case 0xb: /* SQDMLSL, SQDMLSL2 */
7072    case 0xd: /* SQDMULL, SQDMULL2 */
7073        if (size == 0 || size == 3) {
7074            unallocated_encoding(s);
7075            return;
7076        }
7077        break;
7078    default:
7079        unallocated_encoding(s);
7080        return;
7081    }
7082
7083    if (!fp_access_check(s)) {
7084        return;
7085    }
7086
7087    if (size == 2) {
7088        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7089        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7090        TCGv_i64 tcg_res = tcg_temp_new_i64();
7091
7092        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7093        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7094
7095        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7096        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7097
7098        switch (opcode) {
7099        case 0xd: /* SQDMULL, SQDMULL2 */
7100            break;
7101        case 0xb: /* SQDMLSL, SQDMLSL2 */
7102            tcg_gen_neg_i64(tcg_res, tcg_res);
7103            /* fall through */
7104        case 0x9: /* SQDMLAL, SQDMLAL2 */
7105            read_vec_element(s, tcg_op1, rd, 0, MO_64);
7106            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7107                                              tcg_res, tcg_op1);
7108            break;
7109        default:
7110            g_assert_not_reached();
7111        }
7112
7113        write_fp_dreg(s, rd, tcg_res);
7114
7115        tcg_temp_free_i64(tcg_op1);
7116        tcg_temp_free_i64(tcg_op2);
7117        tcg_temp_free_i64(tcg_res);
7118    } else {
7119        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7120        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7121        TCGv_i64 tcg_res = tcg_temp_new_i64();
7122
7123        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7124        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7125
7126        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7127        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7128
7129        switch (opcode) {
7130        case 0xd: /* SQDMULL, SQDMULL2 */
7131            break;
7132        case 0xb: /* SQDMLSL, SQDMLSL2 */
7133            gen_helper_neon_negl_u32(tcg_res, tcg_res);
7134            /* fall through */
7135        case 0x9: /* SQDMLAL, SQDMLAL2 */
7136        {
7137            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7138            read_vec_element(s, tcg_op3, rd, 0, MO_32);
7139            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7140                                              tcg_res, tcg_op3);
7141            tcg_temp_free_i64(tcg_op3);
7142            break;
7143        }
7144        default:
7145            g_assert_not_reached();
7146        }
7147
7148        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7149        write_fp_dreg(s, rd, tcg_res);
7150
7151        tcg_temp_free_i32(tcg_op1);
7152        tcg_temp_free_i32(tcg_op2);
7153        tcg_temp_free_i64(tcg_res);
7154    }
7155}
7156
7157static void handle_3same_64(DisasContext *s, int opcode, bool u,
7158                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7159{
7160    /* Handle 64x64->64 opcodes which are shared between the scalar
7161     * and vector 3-same groups. We cover every opcode where size == 3
7162     * is valid in either the three-reg-same (integer, not pairwise)
7163     * or scalar-three-reg-same groups. (Some opcodes are not yet
7164     * implemented.)
7165     */
7166    TCGCond cond;
7167
7168    switch (opcode) {
7169    case 0x1: /* SQADD */
7170        if (u) {
7171            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7172        } else {
7173            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7174        }
7175        break;
7176    case 0x5: /* SQSUB */
7177        if (u) {
7178            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7179        } else {
7180            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7181        }
7182        break;
7183    case 0x6: /* CMGT, CMHI */
7184        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7185         * We implement this using setcond (test) and then negating.
7186         */
7187        cond = u ? TCG_COND_GTU : TCG_COND_GT;
7188    do_cmop:
7189        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7190        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7191        break;
7192    case 0x7: /* CMGE, CMHS */
7193        cond = u ? TCG_COND_GEU : TCG_COND_GE;
7194        goto do_cmop;
7195    case 0x11: /* CMTST, CMEQ */
7196        if (u) {
7197            cond = TCG_COND_EQ;
7198            goto do_cmop;
7199        }
7200        /* CMTST : test is "if (X & Y != 0)". */
7201        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7202        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7203        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7204        break;
7205    case 0x8: /* SSHL, USHL */
7206        if (u) {
7207            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7208        } else {
7209            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7210        }
7211        break;
7212    case 0x9: /* SQSHL, UQSHL */
7213        if (u) {
7214            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7215        } else {
7216            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7217        }
7218        break;
7219    case 0xa: /* SRSHL, URSHL */
7220        if (u) {
7221            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7222        } else {
7223            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7224        }
7225        break;
7226    case 0xb: /* SQRSHL, UQRSHL */
7227        if (u) {
7228            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7229        } else {
7230            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7231        }
7232        break;
7233    case 0x10: /* ADD, SUB */
7234        if (u) {
7235            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7236        } else {
7237            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7238        }
7239        break;
7240    default:
7241        g_assert_not_reached();
7242    }
7243}
7244
7245/* Handle the 3-same-operands float operations; shared by the scalar
7246 * and vector encodings. The caller must filter out any encodings
7247 * not allocated for the encoding it is dealing with.
7248 */
7249static void handle_3same_float(DisasContext *s, int size, int elements,
7250                               int fpopcode, int rd, int rn, int rm)
7251{
7252    int pass;
7253    TCGv_ptr fpst = get_fpstatus_ptr();
7254
7255    for (pass = 0; pass < elements; pass++) {
7256        if (size) {
7257            /* Double */
7258            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7259            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7260            TCGv_i64 tcg_res = tcg_temp_new_i64();
7261
7262            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7263            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7264
7265            switch (fpopcode) {
7266            case 0x39: /* FMLS */
7267                /* As usual for ARM, separate negation for fused multiply-add */
7268                gen_helper_vfp_negd(tcg_op1, tcg_op1);
7269                /* fall through */
7270            case 0x19: /* FMLA */
7271                read_vec_element(s, tcg_res, rd, pass, MO_64);
7272                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7273                                       tcg_res, fpst);
7274                break;
7275            case 0x18: /* FMAXNM */
7276                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7277                break;
7278            case 0x1a: /* FADD */
7279                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7280                break;
7281            case 0x1b: /* FMULX */
7282                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7283                break;
7284            case 0x1c: /* FCMEQ */
7285                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7286                break;
7287            case 0x1e: /* FMAX */
7288                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7289                break;
7290            case 0x1f: /* FRECPS */
7291                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7292                break;
7293            case 0x38: /* FMINNM */
7294                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7295                break;
7296            case 0x3a: /* FSUB */
7297                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7298                break;
7299            case 0x3e: /* FMIN */
7300                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7301                break;
7302            case 0x3f: /* FRSQRTS */
7303                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7304                break;
7305            case 0x5b: /* FMUL */
7306                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7307                break;
7308            case 0x5c: /* FCMGE */
7309                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7310                break;
7311            case 0x5d: /* FACGE */
7312                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7313                break;
7314            case 0x5f: /* FDIV */
7315                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7316                break;
7317            case 0x7a: /* FABD */
7318                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7319                gen_helper_vfp_absd(tcg_res, tcg_res);
7320                break;
7321            case 0x7c: /* FCMGT */
7322                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7323                break;
7324            case 0x7d: /* FACGT */
7325                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7326                break;
7327            default:
7328                g_assert_not_reached();
7329            }
7330
7331            write_vec_element(s, tcg_res, rd, pass, MO_64);
7332
7333            tcg_temp_free_i64(tcg_res);
7334            tcg_temp_free_i64(tcg_op1);
7335            tcg_temp_free_i64(tcg_op2);
7336        } else {
7337            /* Single */
7338            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7339            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7340            TCGv_i32 tcg_res = tcg_temp_new_i32();
7341
7342            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7343            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7344
7345            switch (fpopcode) {
7346            case 0x39: /* FMLS */
7347                /* As usual for ARM, separate negation for fused multiply-add */
7348                gen_helper_vfp_negs(tcg_op1, tcg_op1);
7349                /* fall through */
7350            case 0x19: /* FMLA */
7351                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7352                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7353                                       tcg_res, fpst);
7354                break;
7355            case 0x1a: /* FADD */
7356                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7357                break;
7358            case 0x1b: /* FMULX */
7359                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7360                break;
7361            case 0x1c: /* FCMEQ */
7362                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7363                break;
7364            case 0x1e: /* FMAX */
7365                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7366                break;
7367            case 0x1f: /* FRECPS */
7368                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7369                break;
7370            case 0x18: /* FMAXNM */
7371                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7372                break;
7373            case 0x38: /* FMINNM */
7374                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7375                break;
7376            case 0x3a: /* FSUB */
7377                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7378                break;
7379            case 0x3e: /* FMIN */
7380                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7381                break;
7382            case 0x3f: /* FRSQRTS */
7383                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7384                break;
7385            case 0x5b: /* FMUL */
7386                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7387                break;
7388            case 0x5c: /* FCMGE */
7389                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7390                break;
7391            case 0x5d: /* FACGE */
7392                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7393                break;
7394            case 0x5f: /* FDIV */
7395                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7396                break;
7397            case 0x7a: /* FABD */
7398                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7399                gen_helper_vfp_abss(tcg_res, tcg_res);
7400                break;
7401            case 0x7c: /* FCMGT */
7402                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7403                break;
7404            case 0x7d: /* FACGT */
7405                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7406                break;
7407            default:
7408                g_assert_not_reached();
7409            }
7410
7411            if (elements == 1) {
7412                /* scalar single so clear high part */
7413                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7414
7415                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7416                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7417                tcg_temp_free_i64(tcg_tmp);
7418            } else {
7419                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7420            }
7421
7422            tcg_temp_free_i32(tcg_res);
7423            tcg_temp_free_i32(tcg_op1);
7424            tcg_temp_free_i32(tcg_op2);
7425        }
7426    }
7427
7428    tcg_temp_free_ptr(fpst);
7429
7430    if ((elements << size) < 4) {
7431        /* scalar, or non-quad vector op */
7432        clear_vec_high(s, rd);
7433    }
7434}
7435
7436/* AdvSIMD scalar three same
7437 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7438 * +-----+---+-----------+------+---+------+--------+---+------+------+
7439 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7440 * +-----+---+-----------+------+---+------+--------+---+------+------+
7441 */
7442static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7443{
7444    int rd = extract32(insn, 0, 5);
7445    int rn = extract32(insn, 5, 5);
7446    int opcode = extract32(insn, 11, 5);
7447    int rm = extract32(insn, 16, 5);
7448    int size = extract32(insn, 22, 2);
7449    bool u = extract32(insn, 29, 1);
7450    TCGv_i64 tcg_rd;
7451
7452    if (opcode >= 0x18) {
7453        /* Floating point: U, size[1] and opcode indicate operation */
7454        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7455        switch (fpopcode) {
7456        case 0x1b: /* FMULX */
7457        case 0x1f: /* FRECPS */
7458        case 0x3f: /* FRSQRTS */
7459        case 0x5d: /* FACGE */
7460        case 0x7d: /* FACGT */
7461        case 0x1c: /* FCMEQ */
7462        case 0x5c: /* FCMGE */
7463        case 0x7c: /* FCMGT */
7464        case 0x7a: /* FABD */
7465            break;
7466        default:
7467            unallocated_encoding(s);
7468            return;
7469        }
7470
7471        if (!fp_access_check(s)) {
7472            return;
7473        }
7474
7475        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7476        return;
7477    }
7478
7479    switch (opcode) {
7480    case 0x1: /* SQADD, UQADD */
7481    case 0x5: /* SQSUB, UQSUB */
7482    case 0x9: /* SQSHL, UQSHL */
7483    case 0xb: /* SQRSHL, UQRSHL */
7484        break;
7485    case 0x8: /* SSHL, USHL */
7486    case 0xa: /* SRSHL, URSHL */
7487    case 0x6: /* CMGT, CMHI */
7488    case 0x7: /* CMGE, CMHS */
7489    case 0x11: /* CMTST, CMEQ */
7490    case 0x10: /* ADD, SUB (vector) */
7491        if (size != 3) {
7492            unallocated_encoding(s);
7493            return;
7494        }
7495        break;
7496    case 0x16: /* SQDMULH, SQRDMULH (vector) */
7497        if (size != 1 && size != 2) {
7498            unallocated_encoding(s);
7499            return;
7500        }
7501        break;
7502    default:
7503        unallocated_encoding(s);
7504        return;
7505    }
7506
7507    if (!fp_access_check(s)) {
7508        return;
7509    }
7510
7511    tcg_rd = tcg_temp_new_i64();
7512
7513    if (size == 3) {
7514        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7515        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7516
7517        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7518        tcg_temp_free_i64(tcg_rn);
7519        tcg_temp_free_i64(tcg_rm);
7520    } else {
7521        /* Do a single operation on the lowest element in the vector.
7522         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7523         * no side effects for all these operations.
7524         * OPTME: special-purpose helpers would avoid doing some
7525         * unnecessary work in the helper for the 8 and 16 bit cases.
7526         */
7527        NeonGenTwoOpEnvFn *genenvfn;
7528        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7529        TCGv_i32 tcg_rm = tcg_temp_new_i32();
7530        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7531
7532        read_vec_element_i32(s, tcg_rn, rn, 0, size);
7533        read_vec_element_i32(s, tcg_rm, rm, 0, size);
7534
7535        switch (opcode) {
7536        case 0x1: /* SQADD, UQADD */
7537        {
7538            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7539                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7540                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7541                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7542            };
7543            genenvfn = fns[size][u];
7544            break;
7545        }
7546        case 0x5: /* SQSUB, UQSUB */
7547        {
7548            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7549                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7550                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7551                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7552            };
7553            genenvfn = fns[size][u];
7554            break;
7555        }
7556        case 0x9: /* SQSHL, UQSHL */
7557        {
7558            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7559                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7560                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7561                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7562            };
7563            genenvfn = fns[size][u];
7564            break;
7565        }
7566        case 0xb: /* SQRSHL, UQRSHL */
7567        {
7568            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7569                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7570                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7571                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7572            };
7573            genenvfn = fns[size][u];
7574            break;
7575        }
7576        case 0x16: /* SQDMULH, SQRDMULH */
7577        {
7578            static NeonGenTwoOpEnvFn * const fns[2][2] = {
7579                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7580                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7581            };
7582            assert(size == 1 || size == 2);
7583            genenvfn = fns[size - 1][u];
7584            break;
7585        }
7586        default:
7587            g_assert_not_reached();
7588        }
7589
7590        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7591        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7592        tcg_temp_free_i32(tcg_rd32);
7593        tcg_temp_free_i32(tcg_rn);
7594        tcg_temp_free_i32(tcg_rm);
7595    }
7596
7597    write_fp_dreg(s, rd, tcg_rd);
7598
7599    tcg_temp_free_i64(tcg_rd);
7600}
7601
7602static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7603                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7604                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7605{
7606    /* Handle 64->64 opcodes which are shared between the scalar and
7607     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7608     * is valid in either group and also the double-precision fp ops.
7609     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7610     * requires them.
7611     */
7612    TCGCond cond;
7613
7614    switch (opcode) {
7615    case 0x4: /* CLS, CLZ */
7616        if (u) {
7617            tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7618        } else {
7619            tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
7620        }
7621        break;
7622    case 0x5: /* NOT */
7623        /* This opcode is shared with CNT and RBIT but we have earlier
7624         * enforced that size == 3 if and only if this is the NOT insn.
7625         */
7626        tcg_gen_not_i64(tcg_rd, tcg_rn);
7627        break;
7628    case 0x7: /* SQABS, SQNEG */
7629        if (u) {
7630            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7631        } else {
7632            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7633        }
7634        break;
7635    case 0xa: /* CMLT */
7636        /* 64 bit integer comparison against zero, result is
7637         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7638         * subtracting 1.
7639         */
7640        cond = TCG_COND_LT;
7641    do_cmop:
7642        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7643        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7644        break;
7645    case 0x8: /* CMGT, CMGE */
7646        cond = u ? TCG_COND_GE : TCG_COND_GT;
7647        goto do_cmop;
7648    case 0x9: /* CMEQ, CMLE */
7649        cond = u ? TCG_COND_LE : TCG_COND_EQ;
7650        goto do_cmop;
7651    case 0xb: /* ABS, NEG */
7652        if (u) {
7653            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7654        } else {
7655            TCGv_i64 tcg_zero = tcg_const_i64(0);
7656            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7657            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7658                                tcg_rn, tcg_rd);
7659            tcg_temp_free_i64(tcg_zero);
7660        }
7661        break;
7662    case 0x2f: /* FABS */
7663        gen_helper_vfp_absd(tcg_rd, tcg_rn);
7664        break;
7665    case 0x6f: /* FNEG */
7666        gen_helper_vfp_negd(tcg_rd, tcg_rn);
7667        break;
7668    case 0x7f: /* FSQRT */
7669        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7670        break;
7671    case 0x1a: /* FCVTNS */
7672    case 0x1b: /* FCVTMS */
7673    case 0x1c: /* FCVTAS */
7674    case 0x3a: /* FCVTPS */
7675    case 0x3b: /* FCVTZS */
7676    {
7677        TCGv_i32 tcg_shift = tcg_const_i32(0);
7678        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7679        tcg_temp_free_i32(tcg_shift);
7680        break;
7681    }
7682    case 0x5a: /* FCVTNU */
7683    case 0x5b: /* FCVTMU */
7684    case 0x5c: /* FCVTAU */
7685    case 0x7a: /* FCVTPU */
7686    case 0x7b: /* FCVTZU */
7687    {
7688        TCGv_i32 tcg_shift = tcg_const_i32(0);
7689        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7690        tcg_temp_free_i32(tcg_shift);
7691        break;
7692    }
7693    case 0x18: /* FRINTN */
7694    case 0x19: /* FRINTM */
7695    case 0x38: /* FRINTP */
7696    case 0x39: /* FRINTZ */
7697    case 0x58: /* FRINTA */
7698    case 0x79: /* FRINTI */
7699        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7700        break;
7701    case 0x59: /* FRINTX */
7702        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7703        break;
7704    default:
7705        g_assert_not_reached();
7706    }
7707}
7708
7709static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7710                                   bool is_scalar, bool is_u, bool is_q,
7711                                   int size, int rn, int rd)
7712{
7713    bool is_double = (size == 3);
7714    TCGv_ptr fpst;
7715
7716    if (!fp_access_check(s)) {
7717        return;
7718    }
7719
7720    fpst = get_fpstatus_ptr();
7721
7722    if (is_double) {
7723        TCGv_i64 tcg_op = tcg_temp_new_i64();
7724        TCGv_i64 tcg_zero = tcg_const_i64(0);
7725        TCGv_i64 tcg_res = tcg_temp_new_i64();
7726        NeonGenTwoDoubleOPFn *genfn;
7727        bool swap = false;
7728        int pass;
7729
7730        switch (opcode) {
7731        case 0x2e: /* FCMLT (zero) */
7732            swap = true;
7733            /* fallthrough */
7734        case 0x2c: /* FCMGT (zero) */
7735            genfn = gen_helper_neon_cgt_f64;
7736            break;
7737        case 0x2d: /* FCMEQ (zero) */
7738            genfn = gen_helper_neon_ceq_f64;
7739            break;
7740        case 0x6d: /* FCMLE (zero) */
7741            swap = true;
7742            /* fall through */
7743        case 0x6c: /* FCMGE (zero) */
7744            genfn = gen_helper_neon_cge_f64;
7745            break;
7746        default:
7747            g_assert_not_reached();
7748        }
7749
7750        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7751            read_vec_element(s, tcg_op, rn, pass, MO_64);
7752            if (swap) {
7753                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7754            } else {
7755                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7756            }
7757            write_vec_element(s, tcg_res, rd, pass, MO_64);
7758        }
7759        if (is_scalar) {
7760            clear_vec_high(s, rd);
7761        }
7762
7763        tcg_temp_free_i64(tcg_res);
7764        tcg_temp_free_i64(tcg_zero);
7765        tcg_temp_free_i64(tcg_op);
7766    } else {
7767        TCGv_i32 tcg_op = tcg_temp_new_i32();
7768        TCGv_i32 tcg_zero = tcg_const_i32(0);
7769        TCGv_i32 tcg_res = tcg_temp_new_i32();
7770        NeonGenTwoSingleOPFn *genfn;
7771        bool swap = false;
7772        int pass, maxpasses;
7773
7774        switch (opcode) {
7775        case 0x2e: /* FCMLT (zero) */
7776            swap = true;
7777            /* fall through */
7778        case 0x2c: /* FCMGT (zero) */
7779            genfn = gen_helper_neon_cgt_f32;
7780            break;
7781        case 0x2d: /* FCMEQ (zero) */
7782            genfn = gen_helper_neon_ceq_f32;
7783            break;
7784        case 0x6d: /* FCMLE (zero) */
7785            swap = true;
7786            /* fall through */
7787        case 0x6c: /* FCMGE (zero) */
7788            genfn = gen_helper_neon_cge_f32;
7789            break;
7790        default:
7791            g_assert_not_reached();
7792        }
7793
7794        if (is_scalar) {
7795            maxpasses = 1;
7796        } else {
7797            maxpasses = is_q ? 4 : 2;
7798        }
7799
7800        for (pass = 0; pass < maxpasses; pass++) {
7801            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7802            if (swap) {
7803                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7804            } else {
7805                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7806            }
7807            if (is_scalar) {
7808                write_fp_sreg(s, rd, tcg_res);
7809            } else {
7810                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7811            }
7812        }
7813        tcg_temp_free_i32(tcg_res);
7814        tcg_temp_free_i32(tcg_zero);
7815        tcg_temp_free_i32(tcg_op);
7816        if (!is_q && !is_scalar) {
7817            clear_vec_high(s, rd);
7818        }
7819    }
7820
7821    tcg_temp_free_ptr(fpst);
7822}
7823
7824static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7825                                    bool is_scalar, bool is_u, bool is_q,
7826                                    int size, int rn, int rd)
7827{
7828    bool is_double = (size == 3);
7829    TCGv_ptr fpst = get_fpstatus_ptr();
7830
7831    if (is_double) {
7832        TCGv_i64 tcg_op = tcg_temp_new_i64();
7833        TCGv_i64 tcg_res = tcg_temp_new_i64();
7834        int pass;
7835
7836        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7837            read_vec_element(s, tcg_op, rn, pass, MO_64);
7838            switch (opcode) {
7839            case 0x3d: /* FRECPE */
7840                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7841                break;
7842            case 0x3f: /* FRECPX */
7843                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7844                break;
7845            case 0x7d: /* FRSQRTE */
7846                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7847                break;
7848            default:
7849                g_assert_not_reached();
7850            }
7851            write_vec_element(s, tcg_res, rd, pass, MO_64);
7852        }
7853        if (is_scalar) {
7854            clear_vec_high(s, rd);
7855        }
7856
7857        tcg_temp_free_i64(tcg_res);
7858        tcg_temp_free_i64(tcg_op);
7859    } else {
7860        TCGv_i32 tcg_op = tcg_temp_new_i32();
7861        TCGv_i32 tcg_res = tcg_temp_new_i32();
7862        int pass, maxpasses;
7863
7864        if (is_scalar) {
7865            maxpasses = 1;
7866        } else {
7867            maxpasses = is_q ? 4 : 2;
7868        }
7869
7870        for (pass = 0; pass < maxpasses; pass++) {
7871            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7872
7873            switch (opcode) {
7874            case 0x3c: /* URECPE */
7875                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7876                break;
7877            case 0x3d: /* FRECPE */
7878                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7879                break;
7880            case 0x3f: /* FRECPX */
7881                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7882                break;
7883            case 0x7d: /* FRSQRTE */
7884                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7885                break;
7886            default:
7887                g_assert_not_reached();
7888            }
7889
7890            if (is_scalar) {
7891                write_fp_sreg(s, rd, tcg_res);
7892            } else {
7893                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7894            }
7895        }
7896        tcg_temp_free_i32(tcg_res);
7897        tcg_temp_free_i32(tcg_op);
7898        if (!is_q && !is_scalar) {
7899            clear_vec_high(s, rd);
7900        }
7901    }
7902    tcg_temp_free_ptr(fpst);
7903}
7904
7905static void handle_2misc_narrow(DisasContext *s, bool scalar,
7906                                int opcode, bool u, bool is_q,
7907                                int size, int rn, int rd)
7908{
7909    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7910     * in the source becomes a size element in the destination).
7911     */
7912    int pass;
7913    TCGv_i32 tcg_res[2];
7914    int destelt = is_q ? 2 : 0;
7915    int passes = scalar ? 1 : 2;
7916
7917    if (scalar) {
7918        tcg_res[1] = tcg_const_i32(0);
7919    }
7920
7921    for (pass = 0; pass < passes; pass++) {
7922        TCGv_i64 tcg_op = tcg_temp_new_i64();
7923        NeonGenNarrowFn *genfn = NULL;
7924        NeonGenNarrowEnvFn *genenvfn = NULL;
7925
7926        if (scalar) {
7927            read_vec_element(s, tcg_op, rn, pass, size + 1);
7928        } else {
7929            read_vec_element(s, tcg_op, rn, pass, MO_64);
7930        }
7931        tcg_res[pass] = tcg_temp_new_i32();
7932
7933        switch (opcode) {
7934        case 0x12: /* XTN, SQXTUN */
7935        {
7936            static NeonGenNarrowFn * const xtnfns[3] = {
7937                gen_helper_neon_narrow_u8,
7938                gen_helper_neon_narrow_u16,
7939                tcg_gen_extrl_i64_i32,
7940            };
7941            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7942                gen_helper_neon_unarrow_sat8,
7943                gen_helper_neon_unarrow_sat16,
7944                gen_helper_neon_unarrow_sat32,
7945            };
7946            if (u) {
7947                genenvfn = sqxtunfns[size];
7948            } else {
7949                genfn = xtnfns[size];
7950            }
7951            break;
7952        }
7953        case 0x14: /* SQXTN, UQXTN */
7954        {
7955            static NeonGenNarrowEnvFn * const fns[3][2] = {
7956                { gen_helper_neon_narrow_sat_s8,
7957                  gen_helper_neon_narrow_sat_u8 },
7958                { gen_helper_neon_narrow_sat_s16,
7959                  gen_helper_neon_narrow_sat_u16 },
7960                { gen_helper_neon_narrow_sat_s32,
7961                  gen_helper_neon_narrow_sat_u32 },
7962            };
7963            genenvfn = fns[size][u];
7964            break;
7965        }
7966        case 0x16: /* FCVTN, FCVTN2 */
7967            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7968            if (size == 2) {
7969                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7970            } else {
7971                TCGv_i32 tcg_lo = tcg_temp_new_i32();
7972                TCGv_i32 tcg_hi = tcg_temp_new_i32();
7973                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7974                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7975                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7976                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7977                tcg_temp_free_i32(tcg_lo);
7978                tcg_temp_free_i32(tcg_hi);
7979            }
7980            break;
7981        case 0x56:  /* FCVTXN, FCVTXN2 */
7982            /* 64 bit to 32 bit float conversion
7983             * with von Neumann rounding (round to odd)
7984             */
7985            assert(size == 2);
7986            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7987            break;
7988        default:
7989            g_assert_not_reached();
7990        }
7991
7992        if (genfn) {
7993            genfn(tcg_res[pass], tcg_op);
7994        } else if (genenvfn) {
7995            genenvfn(tcg_res[pass], cpu_env, tcg_op);
7996        }
7997
7998        tcg_temp_free_i64(tcg_op);
7999    }
8000

8001    for (pass = 0; pass < 2; pass++) {
8002        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8003        tcg_temp_free_i32(tcg_res[pass]);
8004    }
8005    if (!is_q) {
8006        clear_vec_high(s, rd);
8007    }
8008}
8009
8010/* Remaining saturating accumulating ops */
8011static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8012                                bool is_q, int size, int rn, int rd)
8013{
8014    bool is_double = (size == 3);
8015
8016    if (is_double) {
8017        TCGv_i64 tcg_rn = tcg_temp_new_i64();
8018        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8019        int pass;
8020
8021        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8022            read_vec_element(s, tcg_rn, rn, pass, MO_64);
8023            read_vec_element(s, tcg_rd, rd, pass, MO_64);
8024
8025            if (is_u) { /* USQADD */
8026                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8027            } else { /* SUQADD */
8028                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8029            }
8030            write_vec_element(s, tcg_rd, rd, pass, MO_64);
8031        }
8032        if (is_scalar) {
8033            clear_vec_high(s, rd);
8034        }
8035
8036        tcg_temp_free_i64(tcg_rd);
8037        tcg_temp_free_i64(tcg_rn);
8038    } else {
8039        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8040        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8041        int pass, maxpasses;
8042
8043        if (is_scalar) {
8044            maxpasses = 1;
8045        } else {
8046            maxpasses = is_q ? 4 : 2;
8047        }
8048
8049        for (pass = 0; pass < maxpasses; pass++) {
8050            if (is_scalar) {
8051                read_vec_element_i32(s, tcg_rn, rn, pass, size);
8052                read_vec_element_i32(s, tcg_rd, rd, pass, size);
8053            } else {
8054                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8055                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8056            }
8057
8058            if (is_u) { /* USQADD */
8059                switch (size) {
8060                case 0:
8061                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8062                    break;
8063                case 1:
8064                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8065                    break;
8066                case 2:
8067                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8068                    break;
8069                default:
8070                    g_assert_not_reached();
8071                }
8072            } else { /* SUQADD */
8073                switch (size) {
8074                case 0:
8075                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8076                    break;
8077                case 1:
8078                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8079                    break;
8080                case 2:
8081                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8082                    break;
8083                default:
8084                    g_assert_not_reached();
8085                }
8086            }
8087
8088            if (is_scalar) {
8089                TCGv_i64 tcg_zero = tcg_const_i64(0);
8090                write_vec_element(s, tcg_zero, rd, 0, MO_64);
8091                tcg_temp_free_i64(tcg_zero);
8092            }
8093            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8094        }
8095
8096        if (!is_q) {
8097            clear_vec_high(s, rd);
8098        }
8099
8100        tcg_temp_free_i32(tcg_rd);
8101        tcg_temp_free_i32(tcg_rn);
8102    }
8103}
8104
8105/* AdvSIMD scalar two reg misc
8106 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8107 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8108 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8109 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8110 */
8111static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8112{
8113    int rd = extract32(insn, 0, 5);
8114    int rn = extract32(insn, 5, 5);
8115    int opcode = extract32(insn, 12, 5);
8116    int size = extract32(insn, 22, 2);
8117    bool u = extract32(insn, 29, 1);
8118    bool is_fcvt = false;
8119    int rmode;
8120    TCGv_i32 tcg_rmode;
8121    TCGv_ptr tcg_fpstatus;
8122
8123    switch (opcode) {
8124    case 0x3: /* USQADD / SUQADD*/
8125        if (!fp_access_check(s)) {
8126            return;
8127        }
8128        handle_2misc_satacc(s, true, u, false, size, rn, rd);
8129        return;
8130    case 0x7: /* SQABS / SQNEG */
8131        break;
8132    case 0xa: /* CMLT */
8133        if (u) {
8134            unallocated_encoding(s);
8135            return;
8136        }
8137        /* fall through */
8138    case 0x8: /* CMGT, CMGE */
8139    case 0x9: /* CMEQ, CMLE */
8140    case 0xb: /* ABS, NEG */
8141        if (size != 3) {
8142            unallocated_encoding(s);
8143            return;
8144        }
8145        break;
8146    case 0x12: /* SQXTUN */
8147        if (!u) {
8148            unallocated_encoding(s);
8149            return;
8150        }
8151        /* fall through */
8152    case 0x14: /* SQXTN, UQXTN */
8153        if (size == 3) {
8154            unallocated_encoding(s);
8155            return;
8156        }
8157        if (!fp_access_check(s)) {
8158            return;
8159        }
8160        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8161        return;
8162    case 0xc ... 0xf:
8163    case 0x16 ... 0x1d:
8164    case 0x1f:
8165        /* Floating point: U, size[1] and opcode indicate operation;
8166         * size[0] indicates single or double precision.
8167         */
8168        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8169        size = extract32(size, 0, 1) ? 3 : 2;
8170        switch (opcode) {
8171        case 0x2c: /* FCMGT (zero) */
8172        case 0x2d: /* FCMEQ (zero) */
8173        case 0x2e: /* FCMLT (zero) */
8174        case 0x6c: /* FCMGE (zero) */
8175        case 0x6d: /* FCMLE (zero) */
8176            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8177            return;
8178        case 0x1d: /* SCVTF */
8179        case 0x5d: /* UCVTF */
8180        {
8181            bool is_signed = (opcode == 0x1d);
8182            if (!fp_access_check(s)) {
8183                return;
8184            }
8185            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8186            return;
8187        }
8188        case 0x3d: /* FRECPE */
8189        case 0x3f: /* FRECPX */
8190        case 0x7d: /* FRSQRTE */
8191            if (!fp_access_check(s)) {
8192                return;
8193            }
8194            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8195            return;
8196        case 0x1a: /* FCVTNS */
8197        case 0x1b: /* FCVTMS */
8198        case 0x3a: /* FCVTPS */
8199        case 0x3b: /* FCVTZS */
8200        case 0x5a: /* FCVTNU */
8201        case 0x5b: /* FCVTMU */
8202        case 0x7a: /* FCVTPU */
8203        case 0x7b: /* FCVTZU */
8204            is_fcvt = true;
8205            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8206            break;
8207        case 0x1c: /* FCVTAS */
8208        case 0x5c: /* FCVTAU */
8209            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8210            is_fcvt = true;
8211            rmode = FPROUNDING_TIEAWAY;
8212            break;
8213        case 0x56: /* FCVTXN, FCVTXN2 */
8214            if (size == 2) {
8215                unallocated_encoding(s);
8216                return;
8217            }
8218            if (!fp_access_check(s)) {
8219                return;
8220            }
8221            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8222            return;
8223        default:
8224            unallocated_encoding(s);
8225            return;
8226        }
8227        break;
8228    default:
8229        unallocated_encoding(s);
8230        return;
8231    }
8232
8233    if (!fp_access_check(s)) {
8234        return;
8235    }
8236
8237    if (is_fcvt) {
8238        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8239        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8240        tcg_fpstatus = get_fpstatus_ptr();
8241    } else {
8242        TCGV_UNUSED_I32(tcg_rmode);
8243        TCGV_UNUSED_PTR(tcg_fpstatus);
8244    }
8245
8246    if (size == 3) {
8247        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8248        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8249
8250        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8251        write_fp_dreg(s, rd, tcg_rd);
8252        tcg_temp_free_i64(tcg_rd);
8253        tcg_temp_free_i64(tcg_rn);
8254    } else {
8255        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8256        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8257
8258        read_vec_element_i32(s, tcg_rn, rn, 0, size);
8259
8260        switch (opcode) {
8261        case 0x7: /* SQABS, SQNEG */
8262        {
8263            NeonGenOneOpEnvFn *genfn;
8264            static NeonGenOneOpEnvFn * const fns[3][2] = {
8265                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8266                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8267                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8268            };
8269            genfn = fns[size][u];
8270            genfn(tcg_rd, cpu_env, tcg_rn);
8271            break;
8272        }
8273        case 0x1a: /* FCVTNS */
8274        case 0x1b: /* FCVTMS */
8275        case 0x1c: /* FCVTAS */
8276        case 0x3a: /* FCVTPS */
8277        case 0x3b: /* FCVTZS */
8278        {
8279            TCGv_i32 tcg_shift = tcg_const_i32(0);
8280            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8281            tcg_temp_free_i32(tcg_shift);
8282            break;
8283        }
8284        case 0x5a: /* FCVTNU */
8285        case 0x5b: /* FCVTMU */
8286        case 0x5c: /* FCVTAU */
8287        case 0x7a: /* FCVTPU */
8288        case 0x7b: /* FCVTZU */
8289        {
8290            TCGv_i32 tcg_shift = tcg_const_i32(0);
8291            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8292            tcg_temp_free_i32(tcg_shift);
8293            break;
8294        }
8295        default:
8296            g_assert_not_reached();
8297        }
8298
8299        write_fp_sreg(s, rd, tcg_rd);
8300        tcg_temp_free_i32(tcg_rd);
8301        tcg_temp_free_i32(tcg_rn);
8302    }
8303
8304    if (is_fcvt) {
8305        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8306        tcg_temp_free_i32(tcg_rmode);
8307        tcg_temp_free_ptr(tcg_fpstatus);
8308    }
8309}
8310
8311/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8312static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8313                                 int immh, int immb, int opcode, int rn, int rd)
8314{
8315    int size = 32 - clz32(immh) - 1;
8316    int immhb = immh << 3 | immb;
8317    int shift = 2 * (8 << size) - immhb;
8318    bool accumulate = false;
8319    bool round = false;
8320    bool insert = false;
8321    int dsize = is_q ? 128 : 64;
8322    int esize = 8 << size;
8323    int elements = dsize/esize;
8324    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8325    TCGv_i64 tcg_rn = new_tmp_a64(s);
8326    TCGv_i64 tcg_rd = new_tmp_a64(s);
8327    TCGv_i64 tcg_round;
8328    int i;
8329
8330    if (extract32(immh, 3, 1) && !is_q) {
8331        unallocated_encoding(s);
8332        return;
8333    }
8334
8335    if (size > 3 && !is_q) {
8336        unallocated_encoding(s);
8337        return;
8338    }
8339
8340    if (!fp_access_check(s)) {
8341        return;
8342    }
8343
8344    switch (opcode) {
8345    case 0x02: /* SSRA / USRA (accumulate) */
8346        accumulate = true;
8347        break;
8348    case 0x04: /* SRSHR / URSHR (rounding) */
8349        round = true;
8350        break;
8351    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8352        accumulate = round = true;
8353        break;
8354    case 0x08: /* SRI */
8355        insert = true;
8356        break;
8357    }
8358
8359    if (round) {
8360        uint64_t round_const = 1ULL << (shift - 1);
8361        tcg_round = tcg_const_i64(round_const);
8362    } else {
8363        TCGV_UNUSED_I64(tcg_round);
8364    }
8365
8366    for (i = 0; i < elements; i++) {
8367        read_vec_element(s, tcg_rn, rn, i, memop);
8368        if (accumulate || insert) {
8369            read_vec_element(s, tcg_rd, rd, i, memop);
8370        }
8371
8372        if (insert) {
8373            handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8374        } else {
8375            handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8376                                    accumulate, is_u, size, shift);
8377        }
8378
8379        write_vec_element(s, tcg_rd, rd, i, size);
8380    }
8381
8382    if (!is_q) {
8383        clear_vec_high(s, rd);
8384    }
8385
8386    if (round) {
8387        tcg_temp_free_i64(tcg_round);
8388    }
8389}
8390
8391/* SHL/SLI - Vector shift left */
8392static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8393                                int immh, int immb, int opcode, int rn, int rd)
8394{
8395    int size = 32 - clz32(immh) - 1;
8396    int immhb = immh << 3 | immb;
8397    int shift = immhb - (8 << size);
8398    int dsize = is_q ? 128 : 64;
8399    int esize = 8 << size;
8400    int elements = dsize/esize;
8401    TCGv_i64 tcg_rn = new_tmp_a64(s);
8402    TCGv_i64 tcg_rd = new_tmp_a64(s);
8403    int i;
8404
8405    if (extract32(immh, 3, 1) && !is_q) {
8406        unallocated_encoding(s);
8407        return;
8408    }
8409
8410    if (size > 3 && !is_q) {
8411        unallocated_encoding(s);
8412        return;
8413    }
8414
8415    if (!fp_access_check(s)) {
8416        return;
8417    }
8418
8419    for (i = 0; i < elements; i++) {
8420        read_vec_element(s, tcg_rn, rn, i, size);
8421        if (insert) {
8422            read_vec_element(s, tcg_rd, rd, i, size);
8423        }
8424
8425        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8426
8427        write_vec_element(s, tcg_rd, rd, i, size);
8428    }
8429
8430    if (!is_q) {
8431        clear_vec_high(s, rd);
8432    }
8433}
8434
8435/* USHLL/SHLL - Vector shift left with widening */
8436static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8437                                 int immh, int immb, int opcode, int rn, int rd)
8438{
8439    int size = 32 - clz32(immh) - 1;
8440    int immhb = immh << 3 | immb;
8441    int shift = immhb - (8 << size);
8442    int dsize = 64;
8443    int esize = 8 << size;
8444    int elements = dsize/esize;
8445    TCGv_i64 tcg_rn = new_tmp_a64(s);
8446    TCGv_i64 tcg_rd = new_tmp_a64(s);
8447    int i;
8448
8449    if (size >= 3) {
8450        unallocated_encoding(s);
8451        return;
8452    }
8453
8454    if (!fp_access_check(s)) {
8455        return;
8456    }
8457
8458    /* For the LL variants the store is larger than the load,
8459     * so if rd == rn we would overwrite parts of our input.
8460     * So load everything right now and use shifts in the main loop.
8461     */
8462    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8463
8464    for (i = 0; i < elements; i++) {
8465        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8466        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8467        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8468        write_vec_element(s, tcg_rd, rd, i, size + 1);
8469    }
8470}
8471
8472/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8473static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8474                                 int immh, int immb, int opcode, int rn, int rd)
8475{
8476    int immhb = immh << 3 | immb;
8477    int size = 32 - clz32(immh) - 1;
8478    int dsize = 64;
8479    int esize = 8 << size;
8480    int elements = dsize/esize;
8481    int shift = (2 * esize) - immhb;
8482    bool round = extract32(opcode, 0, 1);
8483    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8484    TCGv_i64 tcg_round;
8485    int i;
8486
8487    if (extract32(immh, 3, 1)) {
8488        unallocated_encoding(s);
8489        return;
8490    }
8491
8492    if (!fp_access_check(s)) {
8493        return;
8494    }
8495
8496    tcg_rn = tcg_temp_new_i64();
8497    tcg_rd = tcg_temp_new_i64();
8498    tcg_final = tcg_temp_new_i64();
8499    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8500
8501    if (round) {
8502        uint64_t round_const = 1ULL << (shift - 1);
8503        tcg_round = tcg_const_i64(round_const);
8504    } else {
8505        TCGV_UNUSED_I64(tcg_round);
8506    }
8507
8508    for (i = 0; i < elements; i++) {
8509        read_vec_element(s, tcg_rn, rn, i, size+1);
8510        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8511                                false, true, size+1, shift);
8512
8513        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8514    }
8515
8516    if (!is_q) {
8517        clear_vec_high(s, rd);
8518        write_vec_element(s, tcg_final, rd, 0, MO_64);
8519    } else {
8520        write_vec_element(s, tcg_final, rd, 1, MO_64);
8521    }
8522
8523    if (round) {
8524        tcg_temp_free_i64(tcg_round);
8525    }
8526    tcg_temp_free_i64(tcg_rn);
8527    tcg_temp_free_i64(tcg_rd);
8528    tcg_temp_free_i64(tcg_final);
8529    return;
8530}
8531
8532
8533/* AdvSIMD shift by immediate
8534 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8535 * +---+---+---+-------------+------+------+--------+---+------+------+
8536 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8537 * +---+---+---+-------------+------+------+--------+---+------+------+
8538 */
8539static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8540{
8541    int rd = extract32(insn, 0, 5);
8542    int rn = extract32(insn, 5, 5);
8543    int opcode = extract32(insn, 11, 5);
8544    int immb = extract32(insn, 16, 3);
8545    int immh = extract32(insn, 19, 4);
8546    bool is_u = extract32(insn, 29, 1);
8547    bool is_q = extract32(insn, 30, 1);
8548
8549    switch (opcode) {
8550    case 0x08: /* SRI */
8551        if (!is_u) {
8552            unallocated_encoding(s);
8553            return;
8554        }
8555        /* fall through */
8556    case 0x00: /* SSHR / USHR */
8557    case 0x02: /* SSRA / USRA (accumulate) */
8558    case 0x04: /* SRSHR / URSHR (rounding) */
8559    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8560        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8561        break;
8562    case 0x0a: /* SHL / SLI */
8563        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8564        break;
8565    case 0x10: /* SHRN */
8566    case 0x11: /* RSHRN / SQRSHRUN */
8567        if (is_u) {
8568            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8569                                   opcode, rn, rd);
8570        } else {
8571            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8572        }
8573        break;
8574    case 0x12: /* SQSHRN / UQSHRN */
8575    case 0x13: /* SQRSHRN / UQRSHRN */
8576        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8577                               opcode, rn, rd);
8578        break;
8579    case 0x14: /* SSHLL / USHLL */
8580        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8581        break;
8582    case 0x1c: /* SCVTF / UCVTF */
8583        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8584                                     opcode, rn, rd);
8585        break;
8586    case 0xc: /* SQSHLU */
8587        if (!is_u) {
8588            unallocated_encoding(s);
8589            return;
8590        }
8591        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8592        break;
8593    case 0xe: /* SQSHL, UQSHL */
8594        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8595        break;
8596    case 0x1f: /* FCVTZS/ FCVTZU */
8597        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8598        return;
8599    default:
8600        unallocated_encoding(s);
8601        return;
8602    }
8603}
8604
8605/* Generate code to do a "long" addition or subtraction, ie one done in
8606 * TCGv_i64 on vector lanes twice the width specified by size.
8607 */
8608static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8609                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8610{
8611    static NeonGenTwo64OpFn * const fns[3][2] = {
8612        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8613        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8614        { tcg_gen_add_i64, tcg_gen_sub_i64 },
8615    };
8616    NeonGenTwo64OpFn *genfn;
8617    assert(size < 3);
8618
8619    genfn = fns[size][is_sub];
8620    genfn(tcg_res, tcg_op1, tcg_op2);
8621}
8622
8623static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8624                                int opcode, int rd, int rn, int rm)
8625{
8626    /* 3-reg-different widening insns: 64 x 64 -> 128 */
8627    TCGv_i64 tcg_res[2];
8628    int pass, accop;
8629
8630    tcg_res[0] = tcg_temp_new_i64();
8631    tcg_res[1] = tcg_temp_new_i64();
8632
8633    /* Does this op do an adding accumulate, a subtracting accumulate,
8634     * or no accumulate at all?
8635     */
8636    switch (opcode) {
8637    case 5:
8638    case 8:
8639    case 9:
8640        accop = 1;
8641        break;
8642    case 10:
8643    case 11:
8644        accop = -1;
8645        break;
8646    default:
8647        accop = 0;
8648        break;
8649    }
8650
8651    if (accop != 0) {
8652        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8653        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8654    }
8655
8656    /* size == 2 means two 32x32->64 operations; this is worth special
8657     * casing because we can generally handle it inline.
8658     */
8659    if (size == 2) {
8660        for (pass = 0; pass < 2; pass++) {
8661            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8662            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8663            TCGv_i64 tcg_passres;
8664            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8665
8666            int elt = pass + is_q * 2;
8667
8668            read_vec_element(s, tcg_op1, rn, elt, memop);
8669            read_vec_element(s, tcg_op2, rm, elt, memop);
8670
8671            if (accop == 0) {
8672                tcg_passres = tcg_res[pass];
8673            } else {
8674                tcg_passres = tcg_temp_new_i64();
8675            }
8676
8677            switch (opcode) {
8678            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8679                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8680                break;
8681            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8682                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8683                break;
8684            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8685            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8686            {
8687                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8688                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8689
8690                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8691                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8692                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8693                                    tcg_passres,
8694                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8695                tcg_temp_free_i64(tcg_tmp1);
8696                tcg_temp_free_i64(tcg_tmp2);
8697                break;
8698            }
8699            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8700            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8701            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8702                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8703                break;
8704            case 9: /* SQDMLAL, SQDMLAL2 */
8705            case 11: /* SQDMLSL, SQDMLSL2 */
8706            case 13: /* SQDMULL, SQDMULL2 */
8707                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8708                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8709                                                  tcg_passres, tcg_passres);
8710                break;
8711            default:
8712                g_assert_not_reached();
8713            }
8714
8715            if (opcode == 9 || opcode == 11) {
8716                /* saturating accumulate ops */
8717                if (accop < 0) {
8718                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
8719                }
8720                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8721                                                  tcg_res[pass], tcg_passres);
8722            } else if (accop > 0) {
8723                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8724            } else if (accop < 0) {
8725                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8726            }
8727
8728            if (accop != 0) {
8729                tcg_temp_free_i64(tcg_passres);
8730            }
8731
8732            tcg_temp_free_i64(tcg_op1);
8733            tcg_temp_free_i64(tcg_op2);
8734        }
8735    } else {
8736        /* size 0 or 1, generally helper functions */
8737        for (pass = 0; pass < 2; pass++) {
8738            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8739            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8740            TCGv_i64 tcg_passres;
8741            int elt = pass + is_q * 2;
8742
8743            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8744            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8745
8746            if (accop == 0) {
8747                tcg_passres = tcg_res[pass];
8748            } else {
8749                tcg_passres = tcg_temp_new_i64();
8750            }
8751
8752            switch (opcode) {
8753            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8754            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8755            {
8756                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8757                static NeonGenWidenFn * const widenfns[2][2] = {
8758                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8759                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8760                };
8761                NeonGenWidenFn *widenfn = widenfns[size][is_u];
8762
8763                widenfn(tcg_op2_64, tcg_op2);
8764                widenfn(tcg_passres, tcg_op1);
8765                gen_neon_addl(size, (opcode == 2), tcg_passres,
8766                              tcg_passres, tcg_op2_64);
8767                tcg_temp_free_i64(tcg_op2_64);
8768                break;
8769            }
8770            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8771            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8772                if (size == 0) {
8773                    if (is_u) {
8774                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8775                    } else {
8776                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8777                    }
8778                } else {
8779                    if (is_u) {
8780                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8781                    } else {
8782                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8783                    }
8784                }
8785                break;
8786            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8787            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8788            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8789                if (size == 0) {
8790                    if (is_u) {
8791                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8792                    } else {
8793                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8794                    }
8795                } else {
8796                    if (is_u) {
8797                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8798                    } else {
8799                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8800                    }
8801                }
8802                break;
8803            case 9: /* SQDMLAL, SQDMLAL2 */
8804            case 11: /* SQDMLSL, SQDMLSL2 */
8805            case 13: /* SQDMULL, SQDMULL2 */
8806                assert(size == 1);
8807                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8808                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8809                                                  tcg_passres, tcg_passres);
8810                break;
8811            case 14: /* PMULL */
8812                assert(size == 0);
8813                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8814                break;
8815            default:
8816                g_assert_not_reached();
8817            }
8818            tcg_temp_free_i32(tcg_op1);
8819            tcg_temp_free_i32(tcg_op2);
8820
8821            if (accop != 0) {
8822                if (opcode == 9 || opcode == 11) {
8823                    /* saturating accumulate ops */
8824                    if (accop < 0) {
8825                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8826                    }
8827                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8828                                                      tcg_res[pass],
8829                                                      tcg_passres);
8830                } else {
8831                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
8832                                  tcg_res[pass], tcg_passres);
8833                }
8834                tcg_temp_free_i64(tcg_passres);
8835            }
8836        }
8837    }
8838
8839    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8840    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8841    tcg_temp_free_i64(tcg_res[0]);
8842    tcg_temp_free_i64(tcg_res[1]);
8843}
8844
8845static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8846                            int opcode, int rd, int rn, int rm)
8847{
8848    TCGv_i64 tcg_res[2];
8849    int part = is_q ? 2 : 0;
8850    int pass;
8851
8852    for (pass = 0; pass < 2; pass++) {
8853        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8854        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8855        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8856        static NeonGenWidenFn * const widenfns[3][2] = {
8857            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8858            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8859            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8860        };
8861        NeonGenWidenFn *widenfn = widenfns[size][is_u];
8862
8863        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8864        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8865        widenfn(tcg_op2_wide, tcg_op2);
8866        tcg_temp_free_i32(tcg_op2);
8867        tcg_res[pass] = tcg_temp_new_i64();
8868        gen_neon_addl(size, (opcode == 3),
8869                      tcg_res[pass], tcg_op1, tcg_op2_wide);
8870        tcg_temp_free_i64(tcg_op1);
8871        tcg_temp_free_i64(tcg_op2_wide);
8872    }
8873
8874    for (pass = 0; pass < 2; pass++) {
8875        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8876        tcg_temp_free_i64(tcg_res[pass]);
8877    }
8878}
8879
8880static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8881{
8882    tcg_gen_addi_i64(in, in, 1U << 31);
8883    tcg_gen_extrh_i64_i32(res, in);
8884}
8885
8886static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8887                                 int opcode, int rd, int rn, int rm)
8888{
8889    TCGv_i32 tcg_res[2];
8890    int part = is_q ? 2 : 0;
8891    int pass;
8892
8893    for (pass = 0; pass < 2; pass++) {
8894        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8895        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8896        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8897        static NeonGenNarrowFn * const narrowfns[3][2] = {
8898            { gen_helper_neon_narrow_high_u8,
8899              gen_helper_neon_narrow_round_high_u8 },
8900            { gen_helper_neon_narrow_high_u16,
8901              gen_helper_neon_narrow_round_high_u16 },
8902            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8903        };
8904        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8905
8906        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8907        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8908
8909        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8910
8911        tcg_temp_free_i64(tcg_op1);
8912        tcg_temp_free_i64(tcg_op2);
8913
8914        tcg_res[pass] = tcg_temp_new_i32();
8915        gennarrow(tcg_res[pass], tcg_wideres);
8916        tcg_temp_free_i64(tcg_wideres);
8917    }
8918
8919    for (pass = 0; pass < 2; pass++) {
8920        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8921        tcg_temp_free_i32(tcg_res[pass]);
8922    }
8923    if (!is_q) {
8924        clear_vec_high(s, rd);
8925    }
8926}
8927
8928static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8929{
8930    /* PMULL of 64 x 64 -> 128 is an odd special case because it
8931     * is the only three-reg-diff instruction which produces a
8932     * 128-bit wide result from a single operation. However since
8933     * it's possible to calculate the two halves more or less
8934     * separately we just use two helper calls.
8935     */
8936    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8937    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8938    TCGv_i64 tcg_res = tcg_temp_new_i64();
8939
8940    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8941    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8942    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8943    write_vec_element(s, tcg_res, rd, 0, MO_64);
8944    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8945    write_vec_element(s, tcg_res, rd, 1, MO_64);
8946
8947    tcg_temp_free_i64(tcg_op1);
8948    tcg_temp_free_i64(tcg_op2);
8949    tcg_temp_free_i64(tcg_res);
8950}
8951
8952/* AdvSIMD three different
8953 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8954 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8955 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8956 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8957 */
8958static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8959{
8960    /* Instructions in this group fall into three basic classes
8961     * (in each case with the operation working on each element in
8962     * the input vectors):
8963     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8964     *     128 bit input)
8965     * (2) wide 64 x 128 -> 128
8966     * (3) narrowing 128 x 128 -> 64
8967     * Here we do initial decode, catch unallocated cases and
8968     * dispatch to separate functions for each class.
8969     */
8970    int is_q = extract32(insn, 30, 1);
8971    int is_u = extract32(insn, 29, 1);
8972    int size = extract32(insn, 22, 2);
8973    int opcode = extract32(insn, 12, 4);
8974    int rm = extract32(insn, 16, 5);
8975    int rn = extract32(insn, 5, 5);
8976    int rd = extract32(insn, 0, 5);
8977
8978    switch (opcode) {
8979    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8980    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8981        /* 64 x 128 -> 128 */
8982        if (size == 3) {
8983            unallocated_encoding(s);
8984            return;
8985        }
8986        if (!fp_access_check(s)) {
8987            return;
8988        }
8989        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8990        break;
8991    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8992    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8993        /* 128 x 128 -> 64 */
8994        if (size == 3) {
8995            unallocated_encoding(s);
8996            return;
8997        }
8998        if (!fp_access_check(s)) {
8999            return;
9000        }

9001        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9002        break;
9003    case 14: /* PMULL, PMULL2 */
9004        if (is_u || size == 1 || size == 2) {
9005            unallocated_encoding(s);
9006            return;
9007        }
9008        if (size == 3) {
9009            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9010                unallocated_encoding(s);
9011                return;
9012            }
9013            if (!fp_access_check(s)) {
9014                return;
9015            }
9016            handle_pmull_64(s, is_q, rd, rn, rm);
9017            return;
9018        }
9019        goto is_widening;
9020    case 9: /* SQDMLAL, SQDMLAL2 */
9021    case 11: /* SQDMLSL, SQDMLSL2 */
9022    case 13: /* SQDMULL, SQDMULL2 */
9023        if (is_u || size == 0) {
9024            unallocated_encoding(s);
9025            return;
9026        }
9027        /* fall through */
9028    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9029    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9030    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9031    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9032    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9033    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9034    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9035        /* 64 x 64 -> 128 */
9036        if (size == 3) {
9037            unallocated_encoding(s);
9038            return;
9039        }
9040    is_widening:
9041        if (!fp_access_check(s)) {
9042            return;
9043        }
9044
9045        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9046        break;
9047    default:
9048        /* opcode 15 not allocated */
9049        unallocated_encoding(s);
9050        break;
9051    }
9052}
9053
9054/* Logic op (opcode == 3) subgroup of C3.6.16. */
9055static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9056{
9057    int rd = extract32(insn, 0, 5);
9058    int rn = extract32(insn, 5, 5);
9059    int rm = extract32(insn, 16, 5);
9060    int size = extract32(insn, 22, 2);
9061    bool is_u = extract32(insn, 29, 1);
9062    bool is_q = extract32(insn, 30, 1);
9063    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
9064    int pass;
9065
9066    if (!fp_access_check(s)) {
9067        return;
9068    }
9069
9070    tcg_op1 = tcg_temp_new_i64();
9071    tcg_op2 = tcg_temp_new_i64();
9072    tcg_res[0] = tcg_temp_new_i64();
9073    tcg_res[1] = tcg_temp_new_i64();
9074
9075    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9076        read_vec_element(s, tcg_op1, rn, pass, MO_64);
9077        read_vec_element(s, tcg_op2, rm, pass, MO_64);
9078
9079        if (!is_u) {
9080            switch (size) {
9081            case 0: /* AND */
9082                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
9083                break;
9084            case 1: /* BIC */
9085                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9086                break;
9087            case 2: /* ORR */
9088                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9089                break;
9090            case 3: /* ORN */
9091                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9092                break;
9093            }
9094        } else {
9095            if (size != 0) {
9096                /* B* ops need res loaded to operate on */
9097                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9098            }
9099
9100            switch (size) {
9101            case 0: /* EOR */
9102                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9103                break;
9104            case 1: /* BSL bitwise select */
9105                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9106                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9107                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9108                break;
9109            case 2: /* BIT, bitwise insert if true */
9110                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9111                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9112                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9113                break;
9114            case 3: /* BIF, bitwise insert if false */
9115                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9116                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9117                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9118                break;
9119            }
9120        }
9121    }
9122
9123    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9124    if (!is_q) {
9125        tcg_gen_movi_i64(tcg_res[1], 0);
9126    }
9127    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9128
9129    tcg_temp_free_i64(tcg_op1);
9130    tcg_temp_free_i64(tcg_op2);
9131    tcg_temp_free_i64(tcg_res[0]);
9132    tcg_temp_free_i64(tcg_res[1]);
9133}
9134
9135/* Helper functions for 32 bit comparisons */
9136static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9137{
9138    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9139}
9140
9141static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9142{
9143    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9144}
9145
9146static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9147{
9148    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9149}
9150
9151static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9152{
9153    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9154}
9155
9156/* Pairwise op subgroup of C3.6.16.
9157 *
9158 * This is called directly or via the handle_3same_float for float pairwise
9159 * operations where the opcode and size are calculated differently.
9160 */
9161static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9162                                   int size, int rn, int rm, int rd)
9163{
9164    TCGv_ptr fpst;
9165    int pass;
9166
9167    /* Floating point operations need fpst */
9168    if (opcode >= 0x58) {
9169        fpst = get_fpstatus_ptr();
9170    } else {
9171        TCGV_UNUSED_PTR(fpst);
9172    }
9173
9174    if (!fp_access_check(s)) {
9175        return;
9176    }
9177
9178    /* These operations work on the concatenated rm:rn, with each pair of
9179     * adjacent elements being operated on to produce an element in the result.
9180     */
9181    if (size == 3) {
9182        TCGv_i64 tcg_res[2];
9183
9184        for (pass = 0; pass < 2; pass++) {
9185            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9186            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9187            int passreg = (pass == 0) ? rn : rm;
9188
9189            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9190            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9191            tcg_res[pass] = tcg_temp_new_i64();
9192
9193            switch (opcode) {
9194            case 0x17: /* ADDP */
9195                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9196                break;
9197            case 0x58: /* FMAXNMP */
9198                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9199                break;
9200            case 0x5a: /* FADDP */
9201                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9202                break;
9203            case 0x5e: /* FMAXP */
9204                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9205                break;
9206            case 0x78: /* FMINNMP */
9207                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9208                break;
9209            case 0x7e: /* FMINP */
9210                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9211                break;
9212            default:
9213                g_assert_not_reached();
9214            }
9215
9216            tcg_temp_free_i64(tcg_op1);
9217            tcg_temp_free_i64(tcg_op2);
9218        }
9219
9220        for (pass = 0; pass < 2; pass++) {
9221            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9222            tcg_temp_free_i64(tcg_res[pass]);
9223        }
9224    } else {
9225        int maxpass = is_q ? 4 : 2;
9226        TCGv_i32 tcg_res[4];
9227
9228        for (pass = 0; pass < maxpass; pass++) {
9229            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9230            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9231            NeonGenTwoOpFn *genfn = NULL;
9232            int passreg = pass < (maxpass / 2) ? rn : rm;
9233            int passelt = (is_q && (pass & 1)) ? 2 : 0;
9234
9235            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9236            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9237            tcg_res[pass] = tcg_temp_new_i32();
9238
9239            switch (opcode) {
9240            case 0x17: /* ADDP */
9241            {
9242                static NeonGenTwoOpFn * const fns[3] = {
9243                    gen_helper_neon_padd_u8,
9244                    gen_helper_neon_padd_u16,
9245                    tcg_gen_add_i32,
9246                };
9247                genfn = fns[size];
9248                break;
9249            }
9250            case 0x14: /* SMAXP, UMAXP */
9251            {
9252                static NeonGenTwoOpFn * const fns[3][2] = {
9253                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9254                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9255                    { gen_max_s32, gen_max_u32 },
9256                };
9257                genfn = fns[size][u];
9258                break;
9259            }
9260            case 0x15: /* SMINP, UMINP */
9261            {
9262                static NeonGenTwoOpFn * const fns[3][2] = {
9263                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9264                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9265                    { gen_min_s32, gen_min_u32 },
9266                };
9267                genfn = fns[size][u];
9268                break;
9269            }
9270            /* The FP operations are all on single floats (32 bit) */
9271            case 0x58: /* FMAXNMP */
9272                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9273                break;
9274            case 0x5a: /* FADDP */
9275                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9276                break;
9277            case 0x5e: /* FMAXP */
9278                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9279                break;
9280            case 0x78: /* FMINNMP */
9281                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9282                break;
9283            case 0x7e: /* FMINP */
9284                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9285                break;
9286            default:
9287                g_assert_not_reached();
9288            }
9289
9290            /* FP ops called directly, otherwise call now */
9291            if (genfn) {
9292                genfn(tcg_res[pass], tcg_op1, tcg_op2);
9293            }
9294
9295            tcg_temp_free_i32(tcg_op1);
9296            tcg_temp_free_i32(tcg_op2);
9297        }
9298
9299        for (pass = 0; pass < maxpass; pass++) {
9300            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9301            tcg_temp_free_i32(tcg_res[pass]);
9302        }
9303        if (!is_q) {
9304            clear_vec_high(s, rd);
9305        }
9306    }
9307
9308    if (!TCGV_IS_UNUSED_PTR(fpst)) {
9309        tcg_temp_free_ptr(fpst);
9310    }
9311}
9312
9313/* Floating point op subgroup of C3.6.16. */
9314static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9315{
9316    /* For floating point ops, the U, size[1] and opcode bits
9317     * together indicate the operation. size[0] indicates single
9318     * or double.
9319     */
9320    int fpopcode = extract32(insn, 11, 5)
9321        | (extract32(insn, 23, 1) << 5)
9322        | (extract32(insn, 29, 1) << 6);
9323    int is_q = extract32(insn, 30, 1);
9324    int size = extract32(insn, 22, 1);
9325    int rm = extract32(insn, 16, 5);
9326    int rn = extract32(insn, 5, 5);
9327    int rd = extract32(insn, 0, 5);
9328
9329    int datasize = is_q ? 128 : 64;
9330    int esize = 32 << size;
9331    int elements = datasize / esize;
9332
9333    if (size == 1 && !is_q) {
9334        unallocated_encoding(s);
9335        return;
9336    }
9337
9338    switch (fpopcode) {
9339    case 0x58: /* FMAXNMP */
9340    case 0x5a: /* FADDP */
9341    case 0x5e: /* FMAXP */
9342    case 0x78: /* FMINNMP */
9343    case 0x7e: /* FMINP */
9344        if (size && !is_q) {
9345            unallocated_encoding(s);
9346            return;
9347        }
9348        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9349                               rn, rm, rd);
9350        return;
9351    case 0x1b: /* FMULX */
9352    case 0x1f: /* FRECPS */
9353    case 0x3f: /* FRSQRTS */
9354    case 0x5d: /* FACGE */
9355    case 0x7d: /* FACGT */
9356    case 0x19: /* FMLA */
9357    case 0x39: /* FMLS */
9358    case 0x18: /* FMAXNM */
9359    case 0x1a: /* FADD */
9360    case 0x1c: /* FCMEQ */
9361    case 0x1e: /* FMAX */
9362    case 0x38: /* FMINNM */
9363    case 0x3a: /* FSUB */
9364    case 0x3e: /* FMIN */
9365    case 0x5b: /* FMUL */
9366    case 0x5c: /* FCMGE */
9367    case 0x5f: /* FDIV */
9368    case 0x7a: /* FABD */
9369    case 0x7c: /* FCMGT */
9370        if (!fp_access_check(s)) {
9371            return;
9372        }
9373
9374        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9375        return;
9376    default:
9377        unallocated_encoding(s);
9378        return;
9379    }
9380}
9381
9382/* Integer op subgroup of C3.6.16. */
9383static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9384{
9385    int is_q = extract32(insn, 30, 1);
9386    int u = extract32(insn, 29, 1);
9387    int size = extract32(insn, 22, 2);
9388    int opcode = extract32(insn, 11, 5);
9389    int rm = extract32(insn, 16, 5);
9390    int rn = extract32(insn, 5, 5);
9391    int rd = extract32(insn, 0, 5);
9392    int pass;
9393
9394    switch (opcode) {
9395    case 0x13: /* MUL, PMUL */
9396        if (u && size != 0) {
9397            unallocated_encoding(s);
9398            return;
9399        }
9400        /* fall through */
9401    case 0x0: /* SHADD, UHADD */
9402    case 0x2: /* SRHADD, URHADD */
9403    case 0x4: /* SHSUB, UHSUB */
9404    case 0xc: /* SMAX, UMAX */
9405    case 0xd: /* SMIN, UMIN */
9406    case 0xe: /* SABD, UABD */
9407    case 0xf: /* SABA, UABA */
9408    case 0x12: /* MLA, MLS */
9409        if (size == 3) {
9410            unallocated_encoding(s);
9411            return;
9412        }
9413        break;
9414    case 0x16: /* SQDMULH, SQRDMULH */
9415        if (size == 0 || size == 3) {
9416            unallocated_encoding(s);
9417            return;
9418        }
9419        break;
9420    default:
9421        if (size == 3 && !is_q) {
9422            unallocated_encoding(s);
9423            return;
9424        }
9425        break;
9426    }
9427
9428    if (!fp_access_check(s)) {
9429        return;
9430    }
9431
9432    if (size == 3) {
9433        assert(is_q);
9434        for (pass = 0; pass < 2; pass++) {
9435            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9436            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9437            TCGv_i64 tcg_res = tcg_temp_new_i64();
9438
9439            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9440            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9441
9442            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9443
9444            write_vec_element(s, tcg_res, rd, pass, MO_64);
9445
9446            tcg_temp_free_i64(tcg_res);
9447            tcg_temp_free_i64(tcg_op1);
9448            tcg_temp_free_i64(tcg_op2);
9449        }
9450    } else {
9451        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9452            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9453            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9454            TCGv_i32 tcg_res = tcg_temp_new_i32();
9455            NeonGenTwoOpFn *genfn = NULL;
9456            NeonGenTwoOpEnvFn *genenvfn = NULL;
9457
9458            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9459            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9460
9461            switch (opcode) {
9462            case 0x0: /* SHADD, UHADD */
9463            {
9464                static NeonGenTwoOpFn * const fns[3][2] = {
9465                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9466                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9467                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9468                };
9469                genfn = fns[size][u];
9470                break;
9471            }
9472            case 0x1: /* SQADD, UQADD */
9473            {
9474                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9475                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9476                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9477                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9478                };
9479                genenvfn = fns[size][u];
9480                break;
9481            }
9482            case 0x2: /* SRHADD, URHADD */
9483            {
9484                static NeonGenTwoOpFn * const fns[3][2] = {
9485                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9486                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9487                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9488                };
9489                genfn = fns[size][u];
9490                break;
9491            }
9492            case 0x4: /* SHSUB, UHSUB */
9493            {
9494                static NeonGenTwoOpFn * const fns[3][2] = {
9495                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9496                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9497                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9498                };
9499                genfn = fns[size][u];
9500                break;
9501            }
9502            case 0x5: /* SQSUB, UQSUB */
9503            {
9504                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9505                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9506                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9507                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9508                };
9509                genenvfn = fns[size][u];
9510                break;
9511            }
9512            case 0x6: /* CMGT, CMHI */
9513            {
9514                static NeonGenTwoOpFn * const fns[3][2] = {
9515                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9516                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9517                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9518                };
9519                genfn = fns[size][u];
9520                break;
9521            }
9522            case 0x7: /* CMGE, CMHS */
9523            {
9524                static NeonGenTwoOpFn * const fns[3][2] = {
9525                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9526                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9527                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9528                };
9529                genfn = fns[size][u];
9530                break;
9531            }
9532            case 0x8: /* SSHL, USHL */
9533            {
9534                static NeonGenTwoOpFn * const fns[3][2] = {
9535                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9536                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9537                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9538                };
9539                genfn = fns[size][u];
9540                break;
9541            }
9542            case 0x9: /* SQSHL, UQSHL */
9543            {
9544                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9545                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9546                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9547                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9548                };
9549                genenvfn = fns[size][u];
9550                break;
9551            }
9552            case 0xa: /* SRSHL, URSHL */
9553            {
9554                static NeonGenTwoOpFn * const fns[3][2] = {
9555                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9556                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9557                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9558                };
9559                genfn = fns[size][u];
9560                break;
9561            }
9562            case 0xb: /* SQRSHL, UQRSHL */
9563            {
9564                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9565                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9566                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9567                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9568                };
9569                genenvfn = fns[size][u];
9570                break;
9571            }
9572            case 0xc: /* SMAX, UMAX */
9573            {
9574                static NeonGenTwoOpFn * const fns[3][2] = {
9575                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9576                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9577                    { gen_max_s32, gen_max_u32 },
9578                };
9579                genfn = fns[size][u];
9580                break;
9581            }
9582
9583            case 0xd: /* SMIN, UMIN */
9584            {
9585                static NeonGenTwoOpFn * const fns[3][2] = {
9586                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9587                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9588                    { gen_min_s32, gen_min_u32 },
9589                };
9590                genfn = fns[size][u];
9591                break;
9592            }
9593            case 0xe: /* SABD, UABD */
9594            case 0xf: /* SABA, UABA */
9595            {
9596                static NeonGenTwoOpFn * const fns[3][2] = {
9597                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9598                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9599                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9600                };
9601                genfn = fns[size][u];
9602                break;
9603            }
9604            case 0x10: /* ADD, SUB */
9605            {
9606                static NeonGenTwoOpFn * const fns[3][2] = {
9607                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9608                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9609                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9610                };
9611                genfn = fns[size][u];
9612                break;
9613            }
9614            case 0x11: /* CMTST, CMEQ */
9615            {
9616                static NeonGenTwoOpFn * const fns[3][2] = {
9617                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9618                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9619                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9620                };
9621                genfn = fns[size][u];
9622                break;
9623            }
9624            case 0x13: /* MUL, PMUL */
9625                if (u) {
9626                    /* PMUL */
9627                    assert(size == 0);
9628                    genfn = gen_helper_neon_mul_p8;
9629                    break;
9630                }
9631                /* fall through : MUL */
9632            case 0x12: /* MLA, MLS */
9633            {
9634                static NeonGenTwoOpFn * const fns[3] = {
9635                    gen_helper_neon_mul_u8,
9636                    gen_helper_neon_mul_u16,
9637                    tcg_gen_mul_i32,
9638                };
9639                genfn = fns[size];
9640                break;
9641            }
9642            case 0x16: /* SQDMULH, SQRDMULH */
9643            {
9644                static NeonGenTwoOpEnvFn * const fns[2][2] = {
9645                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9646                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9647                };
9648                assert(size == 1 || size == 2);
9649                genenvfn = fns[size - 1][u];
9650                break;
9651            }
9652            default:
9653                g_assert_not_reached();
9654            }
9655
9656            if (genenvfn) {
9657                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9658            } else {
9659                genfn(tcg_res, tcg_op1, tcg_op2);
9660            }
9661
9662            if (opcode == 0xf || opcode == 0x12) {
9663                /* SABA, UABA, MLA, MLS: accumulating ops */
9664                static NeonGenTwoOpFn * const fns[3][2] = {
9665                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9666                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9667                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9668                };
9669                bool is_sub = (opcode == 0x12 && u); /* MLS */
9670
9671                genfn = fns[size][is_sub];
9672                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9673                genfn(tcg_res, tcg_op1, tcg_res);
9674            }
9675
9676            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9677
9678            tcg_temp_free_i32(tcg_res);
9679            tcg_temp_free_i32(tcg_op1);
9680            tcg_temp_free_i32(tcg_op2);
9681        }
9682    }
9683
9684    if (!is_q) {
9685        clear_vec_high(s, rd);
9686    }
9687}
9688
9689/* AdvSIMD three same
9690 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9691 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9692 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9693 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9694 */
9695static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9696{
9697    int opcode = extract32(insn, 11, 5);
9698
9699    switch (opcode) {
9700    case 0x3: /* logic ops */
9701        disas_simd_3same_logic(s, insn);
9702        break;
9703    case 0x17: /* ADDP */
9704    case 0x14: /* SMAXP, UMAXP */
9705    case 0x15: /* SMINP, UMINP */
9706    {
9707        /* Pairwise operations */
9708        int is_q = extract32(insn, 30, 1);
9709        int u = extract32(insn, 29, 1);
9710        int size = extract32(insn, 22, 2);
9711        int rm = extract32(insn, 16, 5);
9712        int rn = extract32(insn, 5, 5);
9713        int rd = extract32(insn, 0, 5);
9714        if (opcode == 0x17) {
9715            if (u || (size == 3 && !is_q)) {
9716                unallocated_encoding(s);
9717                return;
9718            }
9719        } else {
9720            if (size == 3) {
9721                unallocated_encoding(s);
9722                return;
9723            }
9724        }
9725        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9726        break;
9727    }
9728    case 0x18 ... 0x31:
9729        /* floating point ops, sz[1] and U are part of opcode */
9730        disas_simd_3same_float(s, insn);
9731        break;
9732    default:
9733        disas_simd_3same_int(s, insn);
9734        break;
9735    }
9736}
9737
9738static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9739                                  int size, int rn, int rd)
9740{
9741    /* Handle 2-reg-misc ops which are widening (so each size element
9742     * in the source becomes a 2*size element in the destination.
9743     * The only instruction like this is FCVTL.
9744     */
9745    int pass;
9746
9747    if (size == 3) {
9748        /* 32 -> 64 bit fp conversion */
9749        TCGv_i64 tcg_res[2];
9750        int srcelt = is_q ? 2 : 0;
9751
9752        for (pass = 0; pass < 2; pass++) {
9753            TCGv_i32 tcg_op = tcg_temp_new_i32();
9754            tcg_res[pass] = tcg_temp_new_i64();
9755
9756            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9757            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9758            tcg_temp_free_i32(tcg_op);
9759        }
9760        for (pass = 0; pass < 2; pass++) {
9761            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9762            tcg_temp_free_i64(tcg_res[pass]);
9763        }
9764    } else {
9765        /* 16 -> 32 bit fp conversion */
9766        int srcelt = is_q ? 4 : 0;
9767        TCGv_i32 tcg_res[4];
9768
9769        for (pass = 0; pass < 4; pass++) {
9770            tcg_res[pass] = tcg_temp_new_i32();
9771
9772            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9773            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9774                                           cpu_env);
9775        }
9776        for (pass = 0; pass < 4; pass++) {
9777            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9778            tcg_temp_free_i32(tcg_res[pass]);
9779        }
9780    }
9781}
9782
9783static void handle_rev(DisasContext *s, int opcode, bool u,
9784                       bool is_q, int size, int rn, int rd)
9785{
9786    int op = (opcode << 1) | u;
9787    int opsz = op + size;
9788    int grp_size = 3 - opsz;
9789    int dsize = is_q ? 128 : 64;
9790    int i;
9791
9792    if (opsz >= 3) {
9793        unallocated_encoding(s);
9794        return;
9795    }
9796
9797    if (!fp_access_check(s)) {
9798        return;
9799    }
9800
9801    if (size == 0) {
9802        /* Special case bytes, use bswap op on each group of elements */
9803        int groups = dsize / (8 << grp_size);
9804
9805        for (i = 0; i < groups; i++) {
9806            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9807
9808            read_vec_element(s, tcg_tmp, rn, i, grp_size);
9809            switch (grp_size) {
9810            case MO_16:
9811                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9812                break;
9813            case MO_32:
9814                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9815                break;
9816            case MO_64:
9817                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9818                break;
9819            default:
9820                g_assert_not_reached();
9821            }
9822            write_vec_element(s, tcg_tmp, rd, i, grp_size);
9823            tcg_temp_free_i64(tcg_tmp);
9824        }
9825        if (!is_q) {
9826            clear_vec_high(s, rd);
9827        }
9828    } else {
9829        int revmask = (1 << grp_size) - 1;
9830        int esize = 8 << size;
9831        int elements = dsize / esize;
9832        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9833        TCGv_i64 tcg_rd = tcg_const_i64(0);
9834        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9835
9836        for (i = 0; i < elements; i++) {
9837            int e_rev = (i & 0xf) ^ revmask;
9838            int off = e_rev * esize;
9839            read_vec_element(s, tcg_rn, rn, i, size);
9840            if (off >= 64) {
9841                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9842                                    tcg_rn, off - 64, esize);
9843            } else {
9844                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9845            }
9846        }
9847        write_vec_element(s, tcg_rd, rd, 0, MO_64);
9848        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9849
9850        tcg_temp_free_i64(tcg_rd_hi);
9851        tcg_temp_free_i64(tcg_rd);
9852        tcg_temp_free_i64(tcg_rn);
9853    }
9854}
9855
9856static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9857                                  bool is_q, int size, int rn, int rd)
9858{
9859    /* Implement the pairwise operations from 2-misc:
9860     * SADDLP, UADDLP, SADALP, UADALP.
9861     * These all add pairs of elements in the input to produce a
9862     * double-width result element in the output (possibly accumulating).
9863     */
9864    bool accum = (opcode == 0x6);
9865    int maxpass = is_q ? 2 : 1;
9866    int pass;
9867    TCGv_i64 tcg_res[2];
9868
9869    if (size == 2) {
9870        /* 32 + 32 -> 64 op */
9871        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9872
9873        for (pass = 0; pass < maxpass; pass++) {
9874            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9875            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9876
9877            tcg_res[pass] = tcg_temp_new_i64();
9878
9879            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9880            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9881            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9882            if (accum) {
9883                read_vec_element(s, tcg_op1, rd, pass, MO_64);
9884                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9885            }
9886
9887            tcg_temp_free_i64(tcg_op1);
9888            tcg_temp_free_i64(tcg_op2);
9889        }
9890    } else {
9891        for (pass = 0; pass < maxpass; pass++) {
9892            TCGv_i64 tcg_op = tcg_temp_new_i64();
9893            NeonGenOneOpFn *genfn;
9894            static NeonGenOneOpFn * const fns[2][2] = {
9895                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9896                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9897            };
9898
9899            genfn = fns[size][u];
9900
9901            tcg_res[pass] = tcg_temp_new_i64();
9902
9903            read_vec_element(s, tcg_op, rn, pass, MO_64);
9904            genfn(tcg_res[pass], tcg_op);
9905
9906            if (accum) {
9907                read_vec_element(s, tcg_op, rd, pass, MO_64);
9908                if (size == 0) {
9909                    gen_helper_neon_addl_u16(tcg_res[pass],
9910                                             tcg_res[pass], tcg_op);
9911                } else {
9912                    gen_helper_neon_addl_u32(tcg_res[pass],
9913                                             tcg_res[pass], tcg_op);
9914                }
9915            }
9916            tcg_temp_free_i64(tcg_op);
9917        }
9918    }
9919    if (!is_q) {
9920        tcg_res[1] = tcg_const_i64(0);
9921    }
9922    for (pass = 0; pass < 2; pass++) {
9923        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9924        tcg_temp_free_i64(tcg_res[pass]);
9925    }
9926}
9927
9928static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9929{
9930    /* Implement SHLL and SHLL2 */
9931    int pass;
9932    int part = is_q ? 2 : 0;
9933    TCGv_i64 tcg_res[2];
9934
9935    for (pass = 0; pass < 2; pass++) {
9936        static NeonGenWidenFn * const widenfns[3] = {
9937            gen_helper_neon_widen_u8,
9938            gen_helper_neon_widen_u16,
9939            tcg_gen_extu_i32_i64,
9940        };
9941        NeonGenWidenFn *widenfn = widenfns[size];
9942        TCGv_i32 tcg_op = tcg_temp_new_i32();
9943
9944        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9945        tcg_res[pass] = tcg_temp_new_i64();
9946        widenfn(tcg_res[pass], tcg_op);
9947        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9948
9949        tcg_temp_free_i32(tcg_op);
9950    }
9951
9952    for (pass = 0; pass < 2; pass++) {
9953        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9954        tcg_temp_free_i64(tcg_res[pass]);
9955    }
9956}
9957
9958/* AdvSIMD two reg misc
9959 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9960 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9961 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9962 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9963 */
9964static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9965{
9966    int size = extract32(insn, 22, 2);
9967    int opcode = extract32(insn, 12, 5);
9968    bool u = extract32(insn, 29, 1);
9969    bool is_q = extract32(insn, 30, 1);
9970    int rn = extract32(insn, 5, 5);
9971    int rd = extract32(insn, 0, 5);
9972    bool need_fpstatus = false;
9973    bool need_rmode = false;
9974    int rmode = -1;
9975    TCGv_i32 tcg_rmode;
9976    TCGv_ptr tcg_fpstatus;
9977
9978    switch (opcode) {
9979    case 0x0: /* REV64, REV32 */
9980    case 0x1: /* REV16 */
9981        handle_rev(s, opcode, u, is_q, size, rn, rd);
9982        return;
9983    case 0x5: /* CNT, NOT, RBIT */
9984        if (u && size == 0) {
9985            /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9986            size = 3;
9987            break;
9988        } else if (u && size == 1) {
9989            /* RBIT */
9990            break;
9991        } else if (!u && size == 0) {
9992            /* CNT */
9993            break;
9994        }
9995        unallocated_encoding(s);
9996        return;
9997    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9998    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9999        if (size == 3) {
10000            unallocated_encoding(s);

10001            return;
10002        }
10003        if (!fp_access_check(s)) {
10004            return;
10005        }
10006
10007        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10008        return;
10009    case 0x4: /* CLS, CLZ */
10010        if (size == 3) {
10011            unallocated_encoding(s);
10012            return;
10013        }
10014        break;
10015    case 0x2: /* SADDLP, UADDLP */
10016    case 0x6: /* SADALP, UADALP */
10017        if (size == 3) {
10018            unallocated_encoding(s);
10019            return;
10020        }
10021        if (!fp_access_check(s)) {
10022            return;
10023        }
10024        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10025        return;
10026    case 0x13: /* SHLL, SHLL2 */
10027        if (u == 0 || size == 3) {
10028            unallocated_encoding(s);
10029            return;
10030        }
10031        if (!fp_access_check(s)) {
10032            return;
10033        }
10034        handle_shll(s, is_q, size, rn, rd);
10035        return;
10036    case 0xa: /* CMLT */
10037        if (u == 1) {
10038            unallocated_encoding(s);
10039            return;
10040        }
10041        /* fall through */
10042    case 0x8: /* CMGT, CMGE */
10043    case 0x9: /* CMEQ, CMLE */
10044    case 0xb: /* ABS, NEG */
10045        if (size == 3 && !is_q) {
10046            unallocated_encoding(s);
10047            return;
10048        }
10049        break;
10050    case 0x3: /* SUQADD, USQADD */
10051        if (size == 3 && !is_q) {
10052            unallocated_encoding(s);
10053            return;
10054        }
10055        if (!fp_access_check(s)) {
10056            return;
10057        }
10058        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10059        return;
10060    case 0x7: /* SQABS, SQNEG */
10061        if (size == 3 && !is_q) {
10062            unallocated_encoding(s);
10063            return;
10064        }
10065        break;
10066    case 0xc ... 0xf:
10067    case 0x16 ... 0x1d:
10068    case 0x1f:
10069    {
10070        /* Floating point: U, size[1] and opcode indicate operation;
10071         * size[0] indicates single or double precision.
10072         */
10073        int is_double = extract32(size, 0, 1);
10074        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10075        size = is_double ? 3 : 2;
10076        switch (opcode) {
10077        case 0x2f: /* FABS */
10078        case 0x6f: /* FNEG */
10079            if (size == 3 && !is_q) {
10080                unallocated_encoding(s);
10081                return;
10082            }
10083            break;
10084        case 0x1d: /* SCVTF */
10085        case 0x5d: /* UCVTF */
10086        {
10087            bool is_signed = (opcode == 0x1d) ? true : false;
10088            int elements = is_double ? 2 : is_q ? 4 : 2;
10089            if (is_double && !is_q) {
10090                unallocated_encoding(s);
10091                return;
10092            }
10093            if (!fp_access_check(s)) {
10094                return;
10095            }
10096            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10097            return;
10098        }
10099        case 0x2c: /* FCMGT (zero) */
10100        case 0x2d: /* FCMEQ (zero) */
10101        case 0x2e: /* FCMLT (zero) */
10102        case 0x6c: /* FCMGE (zero) */
10103        case 0x6d: /* FCMLE (zero) */
10104            if (size == 3 && !is_q) {
10105                unallocated_encoding(s);
10106                return;
10107            }
10108            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10109            return;
10110        case 0x7f: /* FSQRT */
10111            if (size == 3 && !is_q) {
10112                unallocated_encoding(s);
10113                return;
10114            }
10115            break;
10116        case 0x1a: /* FCVTNS */
10117        case 0x1b: /* FCVTMS */
10118        case 0x3a: /* FCVTPS */
10119        case 0x3b: /* FCVTZS */
10120        case 0x5a: /* FCVTNU */
10121        case 0x5b: /* FCVTMU */
10122        case 0x7a: /* FCVTPU */
10123        case 0x7b: /* FCVTZU */
10124            need_fpstatus = true;
10125            need_rmode = true;
10126            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10127            if (size == 3 && !is_q) {
10128                unallocated_encoding(s);
10129                return;
10130            }
10131            break;
10132        case 0x5c: /* FCVTAU */
10133        case 0x1c: /* FCVTAS */
10134            need_fpstatus = true;
10135            need_rmode = true;
10136            rmode = FPROUNDING_TIEAWAY;
10137            if (size == 3 && !is_q) {
10138                unallocated_encoding(s);
10139                return;
10140            }
10141            break;
10142        case 0x3c: /* URECPE */
10143            if (size == 3) {
10144                unallocated_encoding(s);
10145                return;
10146            }
10147            /* fall through */
10148        case 0x3d: /* FRECPE */
10149        case 0x7d: /* FRSQRTE */
10150            if (size == 3 && !is_q) {
10151                unallocated_encoding(s);
10152                return;
10153            }
10154            if (!fp_access_check(s)) {
10155                return;
10156            }
10157            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10158            return;
10159        case 0x56: /* FCVTXN, FCVTXN2 */
10160            if (size == 2) {
10161                unallocated_encoding(s);
10162                return;
10163            }
10164            /* fall through */
10165        case 0x16: /* FCVTN, FCVTN2 */
10166            /* handle_2misc_narrow does a 2*size -> size operation, but these
10167             * instructions encode the source size rather than dest size.
10168             */
10169            if (!fp_access_check(s)) {
10170                return;
10171            }
10172            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10173            return;
10174        case 0x17: /* FCVTL, FCVTL2 */
10175            if (!fp_access_check(s)) {
10176                return;
10177            }
10178            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10179            return;
10180        case 0x18: /* FRINTN */
10181        case 0x19: /* FRINTM */
10182        case 0x38: /* FRINTP */
10183        case 0x39: /* FRINTZ */
10184            need_rmode = true;
10185            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10186            /* fall through */
10187        case 0x59: /* FRINTX */
10188        case 0x79: /* FRINTI */
10189            need_fpstatus = true;
10190            if (size == 3 && !is_q) {
10191                unallocated_encoding(s);
10192                return;
10193            }
10194            break;
10195        case 0x58: /* FRINTA */
10196            need_rmode = true;
10197            rmode = FPROUNDING_TIEAWAY;
10198            need_fpstatus = true;
10199            if (size == 3 && !is_q) {
10200                unallocated_encoding(s);
10201                return;
10202            }
10203            break;
10204        case 0x7c: /* URSQRTE */
10205            if (size == 3) {
10206                unallocated_encoding(s);
10207                return;
10208            }
10209            need_fpstatus = true;
10210            break;
10211        default:
10212            unallocated_encoding(s);
10213            return;
10214        }
10215        break;
10216    }
10217    default:
10218        unallocated_encoding(s);
10219        return;
10220    }
10221
10222    if (!fp_access_check(s)) {
10223        return;
10224    }
10225
10226    if (need_fpstatus) {
10227        tcg_fpstatus = get_fpstatus_ptr();
10228    } else {
10229        TCGV_UNUSED_PTR(tcg_fpstatus);
10230    }
10231    if (need_rmode) {
10232        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10233        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10234    } else {
10235        TCGV_UNUSED_I32(tcg_rmode);
10236    }
10237
10238    if (size == 3) {
10239        /* All 64-bit element operations can be shared with scalar 2misc */
10240        int pass;
10241
10242        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10243            TCGv_i64 tcg_op = tcg_temp_new_i64();
10244            TCGv_i64 tcg_res = tcg_temp_new_i64();
10245
10246            read_vec_element(s, tcg_op, rn, pass, MO_64);
10247
10248            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10249                            tcg_rmode, tcg_fpstatus);
10250
10251            write_vec_element(s, tcg_res, rd, pass, MO_64);
10252
10253            tcg_temp_free_i64(tcg_res);
10254            tcg_temp_free_i64(tcg_op);
10255        }
10256    } else {
10257        int pass;
10258
10259        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10260            TCGv_i32 tcg_op = tcg_temp_new_i32();
10261            TCGv_i32 tcg_res = tcg_temp_new_i32();
10262            TCGCond cond;
10263
10264            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10265
10266            if (size == 2) {
10267                /* Special cases for 32 bit elements */
10268                switch (opcode) {
10269                case 0xa: /* CMLT */
10270                    /* 32 bit integer comparison against zero, result is
10271                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
10272                     * and inverting.
10273                     */
10274                    cond = TCG_COND_LT;
10275                do_cmop:
10276                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10277                    tcg_gen_neg_i32(tcg_res, tcg_res);
10278                    break;
10279                case 0x8: /* CMGT, CMGE */
10280                    cond = u ? TCG_COND_GE : TCG_COND_GT;
10281                    goto do_cmop;
10282                case 0x9: /* CMEQ, CMLE */
10283                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
10284                    goto do_cmop;
10285                case 0x4: /* CLS */
10286                    if (u) {
10287                        tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
10288                    } else {
10289                        tcg_gen_clrsb_i32(tcg_res, tcg_op);
10290                    }
10291                    break;
10292                case 0x7: /* SQABS, SQNEG */
10293                    if (u) {
10294                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10295                    } else {
10296                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10297                    }
10298                    break;
10299                case 0xb: /* ABS, NEG */
10300                    if (u) {
10301                        tcg_gen_neg_i32(tcg_res, tcg_op);
10302                    } else {
10303                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10304                        tcg_gen_neg_i32(tcg_res, tcg_op);
10305                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10306                                            tcg_zero, tcg_op, tcg_res);
10307                        tcg_temp_free_i32(tcg_zero);
10308                    }
10309                    break;
10310                case 0x2f: /* FABS */
10311                    gen_helper_vfp_abss(tcg_res, tcg_op);
10312                    break;
10313                case 0x6f: /* FNEG */
10314                    gen_helper_vfp_negs(tcg_res, tcg_op);
10315                    break;
10316                case 0x7f: /* FSQRT */
10317                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10318                    break;
10319                case 0x1a: /* FCVTNS */
10320                case 0x1b: /* FCVTMS */
10321                case 0x1c: /* FCVTAS */
10322                case 0x3a: /* FCVTPS */
10323                case 0x3b: /* FCVTZS */
10324                {
10325                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10326                    gen_helper_vfp_tosls(tcg_res, tcg_op,
10327                                         tcg_shift, tcg_fpstatus);
10328                    tcg_temp_free_i32(tcg_shift);
10329                    break;
10330                }
10331                case 0x5a: /* FCVTNU */
10332                case 0x5b: /* FCVTMU */
10333                case 0x5c: /* FCVTAU */
10334                case 0x7a: /* FCVTPU */
10335                case 0x7b: /* FCVTZU */
10336                {
10337                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10338                    gen_helper_vfp_touls(tcg_res, tcg_op,
10339                                         tcg_shift, tcg_fpstatus);
10340                    tcg_temp_free_i32(tcg_shift);
10341                    break;
10342                }
10343                case 0x18: /* FRINTN */
10344                case 0x19: /* FRINTM */
10345                case 0x38: /* FRINTP */
10346                case 0x39: /* FRINTZ */
10347                case 0x58: /* FRINTA */
10348                case 0x79: /* FRINTI */
10349                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10350                    break;
10351                case 0x59: /* FRINTX */
10352                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10353                    break;
10354                case 0x7c: /* URSQRTE */
10355                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10356                    break;
10357                default:
10358                    g_assert_not_reached();
10359                }
10360            } else {
10361                /* Use helpers for 8 and 16 bit elements */
10362                switch (opcode) {
10363                case 0x5: /* CNT, RBIT */
10364                    /* For these two insns size is part of the opcode specifier
10365                     * (handled earlier); they always operate on byte elements.
10366                     */
10367                    if (u) {
10368                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10369                    } else {
10370                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10371                    }
10372                    break;
10373                case 0x7: /* SQABS, SQNEG */
10374                {
10375                    NeonGenOneOpEnvFn *genfn;
10376                    static NeonGenOneOpEnvFn * const fns[2][2] = {
10377                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10378                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10379                    };
10380                    genfn = fns[size][u];
10381                    genfn(tcg_res, cpu_env, tcg_op);
10382                    break;
10383                }
10384                case 0x8: /* CMGT, CMGE */
10385                case 0x9: /* CMEQ, CMLE */
10386                case 0xa: /* CMLT */
10387                {
10388                    static NeonGenTwoOpFn * const fns[3][2] = {
10389                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10390                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10391                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10392                    };
10393                    NeonGenTwoOpFn *genfn;
10394                    int comp;
10395                    bool reverse;
10396                    TCGv_i32 tcg_zero = tcg_const_i32(0);
10397
10398                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10399                    comp = (opcode - 0x8) * 2 + u;
10400                    /* ...but LE, LT are implemented as reverse GE, GT */
10401                    reverse = (comp > 2);
10402                    if (reverse) {
10403                        comp = 4 - comp;
10404                    }
10405                    genfn = fns[comp][size];
10406                    if (reverse) {
10407                        genfn(tcg_res, tcg_zero, tcg_op);
10408                    } else {
10409                        genfn(tcg_res, tcg_op, tcg_zero);
10410                    }
10411                    tcg_temp_free_i32(tcg_zero);
10412                    break;
10413                }
10414                case 0xb: /* ABS, NEG */
10415                    if (u) {
10416                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10417                        if (size) {
10418                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10419                        } else {
10420                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10421                        }
10422                        tcg_temp_free_i32(tcg_zero);
10423                    } else {
10424                        if (size) {
10425                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
10426                        } else {
10427                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
10428                        }
10429                    }
10430                    break;
10431                case 0x4: /* CLS, CLZ */
10432                    if (u) {
10433                        if (size == 0) {
10434                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
10435                        } else {
10436                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
10437                        }
10438                    } else {
10439                        if (size == 0) {
10440                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
10441                        } else {
10442                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
10443                        }
10444                    }
10445                    break;
10446                default:
10447                    g_assert_not_reached();
10448                }
10449            }
10450
10451            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10452
10453            tcg_temp_free_i32(tcg_res);
10454            tcg_temp_free_i32(tcg_op);
10455        }
10456    }
10457    if (!is_q) {
10458        clear_vec_high(s, rd);
10459    }
10460
10461    if (need_rmode) {
10462        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10463        tcg_temp_free_i32(tcg_rmode);
10464    }
10465    if (need_fpstatus) {
10466        tcg_temp_free_ptr(tcg_fpstatus);
10467    }
10468}
10469
10470/* AdvSIMD scalar x indexed element
10471 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10472 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10473 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10474 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10475 * AdvSIMD vector x indexed element
10476 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10477 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10478 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10479 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10480 */
10481static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10482{
10483    /* This encoding has two kinds of instruction:
10484     *  normal, where we perform elt x idxelt => elt for each
10485     *     element in the vector
10486     *  long, where we perform elt x idxelt and generate a result of
10487     *     double the width of the input element
10488     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10489     */
10490    bool is_scalar = extract32(insn, 28, 1);
10491    bool is_q = extract32(insn, 30, 1);
10492    bool u = extract32(insn, 29, 1);
10493    int size = extract32(insn, 22, 2);
10494    int l = extract32(insn, 21, 1);
10495    int m = extract32(insn, 20, 1);
10496    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10497    int rm = extract32(insn, 16, 4);
10498    int opcode = extract32(insn, 12, 4);
10499    int h = extract32(insn, 11, 1);
10500    int rn = extract32(insn, 5, 5);
10501    int rd = extract32(insn, 0, 5);
10502    bool is_long = false;
10503    bool is_fp = false;
10504    int index;
10505    TCGv_ptr fpst;
10506
10507    switch (opcode) {
10508    case 0x0: /* MLA */
10509    case 0x4: /* MLS */
10510        if (!u || is_scalar) {
10511            unallocated_encoding(s);
10512            return;
10513        }
10514        break;
10515    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10516    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10517    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10518        if (is_scalar) {
10519            unallocated_encoding(s);
10520            return;
10521        }
10522        is_long = true;
10523        break;
10524    case 0x3: /* SQDMLAL, SQDMLAL2 */
10525    case 0x7: /* SQDMLSL, SQDMLSL2 */
10526    case 0xb: /* SQDMULL, SQDMULL2 */
10527        is_long = true;
10528        /* fall through */
10529    case 0xc: /* SQDMULH */
10530    case 0xd: /* SQRDMULH */
10531        if (u) {
10532            unallocated_encoding(s);
10533            return;
10534        }
10535        break;
10536    case 0x8: /* MUL */
10537        if (u || is_scalar) {
10538            unallocated_encoding(s);
10539            return;
10540        }
10541        break;
10542    case 0x1: /* FMLA */
10543    case 0x5: /* FMLS */
10544        if (u) {
10545            unallocated_encoding(s);
10546            return;
10547        }
10548        /* fall through */
10549    case 0x9: /* FMUL, FMULX */
10550        if (!extract32(size, 1, 1)) {
10551            unallocated_encoding(s);
10552            return;
10553        }
10554        is_fp = true;
10555        break;
10556    default:
10557        unallocated_encoding(s);
10558        return;
10559    }
10560
10561    if (is_fp) {
10562        /* low bit of size indicates single/double */
10563        size = extract32(size, 0, 1) ? 3 : 2;
10564        if (size == 2) {
10565            index = h << 1 | l;
10566        } else {
10567            if (l || !is_q) {
10568                unallocated_encoding(s);
10569                return;
10570            }
10571            index = h;
10572        }
10573        rm |= (m << 4);
10574    } else {
10575        switch (size) {
10576        case 1:
10577            index = h << 2 | l << 1 | m;
10578            break;
10579        case 2:
10580            index = h << 1 | l;
10581            rm |= (m << 4);
10582            break;
10583        default:
10584            unallocated_encoding(s);
10585            return;
10586        }
10587    }
10588
10589    if (!fp_access_check(s)) {
10590        return;
10591    }
10592
10593    if (is_fp) {
10594        fpst = get_fpstatus_ptr();
10595    } else {
10596        TCGV_UNUSED_PTR(fpst);
10597    }
10598
10599    if (size == 3) {
10600        TCGv_i64 tcg_idx = tcg_temp_new_i64();
10601        int pass;
10602
10603        assert(is_fp && is_q && !is_long);
10604
10605        read_vec_element(s, tcg_idx, rm, index, MO_64);
10606
10607        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10608            TCGv_i64 tcg_op = tcg_temp_new_i64();
10609            TCGv_i64 tcg_res = tcg_temp_new_i64();
10610
10611            read_vec_element(s, tcg_op, rn, pass, MO_64);
10612
10613            switch (opcode) {
10614            case 0x5: /* FMLS */
10615                /* As usual for ARM, separate negation for fused multiply-add */
10616                gen_helper_vfp_negd(tcg_op, tcg_op);
10617                /* fall through */
10618            case 0x1: /* FMLA */
10619                read_vec_element(s, tcg_res, rd, pass, MO_64);
10620                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10621                break;
10622            case 0x9: /* FMUL, FMULX */
10623                if (u) {
10624                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10625                } else {
10626                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10627                }
10628                break;
10629            default:
10630                g_assert_not_reached();
10631            }
10632
10633            write_vec_element(s, tcg_res, rd, pass, MO_64);
10634            tcg_temp_free_i64(tcg_op);
10635            tcg_temp_free_i64(tcg_res);
10636        }
10637
10638        if (is_scalar) {
10639            clear_vec_high(s, rd);
10640        }
10641
10642        tcg_temp_free_i64(tcg_idx);
10643    } else if (!is_long) {
10644        /* 32 bit floating point, or 16 or 32 bit integer.
10645         * For the 16 bit scalar case we use the usual Neon helpers and
10646         * rely on the fact that 0 op 0 == 0 with no side effects.
10647         */
10648        TCGv_i32 tcg_idx = tcg_temp_new_i32();
10649        int pass, maxpasses;
10650
10651        if (is_scalar) {
10652            maxpasses = 1;
10653        } else {
10654            maxpasses = is_q ? 4 : 2;
10655        }
10656
10657        read_vec_element_i32(s, tcg_idx, rm, index, size);
10658
10659        if (size == 1 && !is_scalar) {
10660            /* The simplest way to handle the 16x16 indexed ops is to duplicate
10661             * the index into both halves of the 32 bit tcg_idx and then use
10662             * the usual Neon helpers.
10663             */
10664            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10665        }
10666
10667        for (pass = 0; pass < maxpasses; pass++) {
10668            TCGv_i32 tcg_op = tcg_temp_new_i32();
10669            TCGv_i32 tcg_res = tcg_temp_new_i32();
10670
10671            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10672
10673            switch (opcode) {
10674            case 0x0: /* MLA */
10675            case 0x4: /* MLS */
10676            case 0x8: /* MUL */
10677            {
10678                static NeonGenTwoOpFn * const fns[2][2] = {
10679                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10680                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
10681                };
10682                NeonGenTwoOpFn *genfn;
10683                bool is_sub = opcode == 0x4;
10684
10685                if (size == 1) {
10686                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10687                } else {
10688                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10689                }
10690                if (opcode == 0x8) {
10691                    break;
10692                }
10693                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10694                genfn = fns[size - 1][is_sub];
10695                genfn(tcg_res, tcg_op, tcg_res);
10696                break;
10697            }
10698            case 0x5: /* FMLS */
10699                /* As usual for ARM, separate negation for fused multiply-add */
10700                gen_helper_vfp_negs(tcg_op, tcg_op);
10701                /* fall through */
10702            case 0x1: /* FMLA */
10703                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10704                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10705                break;
10706            case 0x9: /* FMUL, FMULX */
10707                if (u) {
10708                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10709                } else {
10710                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10711                }
10712                break;
10713            case 0xc: /* SQDMULH */
10714                if (size == 1) {
10715                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10716                                               tcg_op, tcg_idx);
10717                } else {
10718                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10719                                               tcg_op, tcg_idx);
10720                }
10721                break;
10722            case 0xd: /* SQRDMULH */
10723                if (size == 1) {
10724                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10725                                                tcg_op, tcg_idx);
10726                } else {
10727                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10728                                                tcg_op, tcg_idx);
10729                }
10730                break;
10731            default:
10732                g_assert_not_reached();
10733            }
10734
10735            if (is_scalar) {
10736                write_fp_sreg(s, rd, tcg_res);
10737            } else {
10738                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10739            }
10740
10741            tcg_temp_free_i32(tcg_op);
10742            tcg_temp_free_i32(tcg_res);
10743        }
10744
10745        tcg_temp_free_i32(tcg_idx);
10746
10747        if (!is_q) {
10748            clear_vec_high(s, rd);
10749        }
10750    } else {
10751        /* long ops: 16x16->32 or 32x32->64 */
10752        TCGv_i64 tcg_res[2];
10753        int pass;
10754        bool satop = extract32(opcode, 0, 1);
10755        TCGMemOp memop = MO_32;
10756
10757        if (satop || !u) {
10758            memop |= MO_SIGN;
10759        }
10760
10761        if (size == 2) {
10762            TCGv_i64 tcg_idx = tcg_temp_new_i64();
10763
10764            read_vec_element(s, tcg_idx, rm, index, memop);
10765
10766            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10767                TCGv_i64 tcg_op = tcg_temp_new_i64();
10768                TCGv_i64 tcg_passres;
10769                int passelt;
10770
10771                if (is_scalar) {
10772                    passelt = 0;
10773                } else {
10774                    passelt = pass + (is_q * 2);
10775                }
10776
10777                read_vec_element(s, tcg_op, rn, passelt, memop);
10778
10779                tcg_res[pass] = tcg_temp_new_i64();
10780
10781                if (opcode == 0xa || opcode == 0xb) {
10782                    /* Non-accumulating ops */
10783                    tcg_passres = tcg_res[pass];
10784                } else {
10785                    tcg_passres = tcg_temp_new_i64();
10786                }
10787
10788                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10789                tcg_temp_free_i64(tcg_op);
10790
10791                if (satop) {
10792                    /* saturating, doubling */
10793                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10794                                                      tcg_passres, tcg_passres);
10795                }
10796
10797                if (opcode == 0xa || opcode == 0xb) {
10798                    continue;
10799                }
10800
10801                /* Accumulating op: handle accumulate step */
10802                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10803
10804                switch (opcode) {
10805                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10806                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10807                    break;
10808                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10809                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10810                    break;
10811                case 0x7: /* SQDMLSL, SQDMLSL2 */
10812                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10813                    /* fall through */
10814                case 0x3: /* SQDMLAL, SQDMLAL2 */
10815                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10816                                                      tcg_res[pass],
10817                                                      tcg_passres);
10818                    break;
10819                default:
10820                    g_assert_not_reached();
10821                }
10822                tcg_temp_free_i64(tcg_passres);
10823            }
10824            tcg_temp_free_i64(tcg_idx);
10825
10826            if (is_scalar) {
10827                clear_vec_high(s, rd);
10828            }
10829        } else {
10830            TCGv_i32 tcg_idx = tcg_temp_new_i32();
10831
10832            assert(size == 1);
10833            read_vec_element_i32(s, tcg_idx, rm, index, size);
10834
10835            if (!is_scalar) {
10836                /* The simplest way to handle the 16x16 indexed ops is to
10837                 * duplicate the index into both halves of the 32 bit tcg_idx
10838                 * and then use the usual Neon helpers.
10839                 */
10840                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10841            }
10842
10843            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10844                TCGv_i32 tcg_op = tcg_temp_new_i32();
10845                TCGv_i64 tcg_passres;
10846
10847                if (is_scalar) {
10848                    read_vec_element_i32(s, tcg_op, rn, pass, size);
10849                } else {
10850                    read_vec_element_i32(s, tcg_op, rn,
10851                                         pass + (is_q * 2), MO_32);
10852                }
10853
10854                tcg_res[pass] = tcg_temp_new_i64();
10855
10856                if (opcode == 0xa || opcode == 0xb) {
10857                    /* Non-accumulating ops */
10858                    tcg_passres = tcg_res[pass];
10859                } else {
10860                    tcg_passres = tcg_temp_new_i64();
10861                }
10862
10863                if (memop & MO_SIGN) {
10864                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10865                } else {
10866                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10867                }
10868                if (satop) {
10869                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10870                                                      tcg_passres, tcg_passres);
10871                }
10872                tcg_temp_free_i32(tcg_op);
10873
10874                if (opcode == 0xa || opcode == 0xb) {
10875                    continue;
10876                }
10877
10878                /* Accumulating op: handle accumulate step */
10879                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10880
10881                switch (opcode) {
10882                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10883                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10884                                             tcg_passres);
10885                    break;
10886                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10887                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10888                                             tcg_passres);
10889                    break;
10890                case 0x7: /* SQDMLSL, SQDMLSL2 */
10891                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10892                    /* fall through */
10893                case 0x3: /* SQDMLAL, SQDMLAL2 */
10894                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10895                                                      tcg_res[pass],
10896                                                      tcg_passres);
10897                    break;
10898                default:
10899                    g_assert_not_reached();
10900                }
10901                tcg_temp_free_i64(tcg_passres);
10902            }
10903            tcg_temp_free_i32(tcg_idx);
10904
10905            if (is_scalar) {
10906                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10907            }
10908        }
10909
10910        if (is_scalar) {
10911            tcg_res[1] = tcg_const_i64(0);
10912        }
10913
10914        for (pass = 0; pass < 2; pass++) {
10915            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10916            tcg_temp_free_i64(tcg_res[pass]);
10917        }
10918    }
10919
10920    if (!TCGV_IS_UNUSED_PTR(fpst)) {
10921        tcg_temp_free_ptr(fpst);
10922    }
10923}
10924
10925/* Crypto AES
10926 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10927 * +-----------------+------+-----------+--------+-----+------+------+
10928 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10929 * +-----------------+------+-----------+--------+-----+------+------+
10930 */
10931static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10932{
10933    int size = extract32(insn, 22, 2);
10934    int opcode = extract32(insn, 12, 5);
10935    int rn = extract32(insn, 5, 5);
10936    int rd = extract32(insn, 0, 5);
10937    int decrypt;
10938    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10939    CryptoThreeOpEnvFn *genfn;
10940
10941    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10942        || size != 0) {
10943        unallocated_encoding(s);
10944        return;
10945    }
10946
10947    switch (opcode) {
10948    case 0x4: /* AESE */
10949        decrypt = 0;
10950        genfn = gen_helper_crypto_aese;
10951        break;
10952    case 0x6: /* AESMC */
10953        decrypt = 0;
10954        genfn = gen_helper_crypto_aesmc;
10955        break;
10956    case 0x5: /* AESD */
10957        decrypt = 1;
10958        genfn = gen_helper_crypto_aese;
10959        break;
10960    case 0x7: /* AESIMC */
10961        decrypt = 1;
10962        genfn = gen_helper_crypto_aesmc;
10963        break;
10964    default:
10965        unallocated_encoding(s);
10966        return;
10967    }
10968
10969    if (!fp_access_check(s)) {
10970        return;
10971    }
10972
10973    /* Note that we convert the Vx register indexes into the
10974     * index within the vfp.regs[] array, so we can share the
10975     * helper with the AArch32 instructions.
10976     */
10977    tcg_rd_regno = tcg_const_i32(rd << 1);
10978    tcg_rn_regno = tcg_const_i32(rn << 1);
10979    tcg_decrypt = tcg_const_i32(decrypt);
10980
10981    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10982
10983    tcg_temp_free_i32(tcg_rd_regno);
10984    tcg_temp_free_i32(tcg_rn_regno);
10985    tcg_temp_free_i32(tcg_decrypt);
10986}
10987
10988/* Crypto three-reg SHA
10989 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10990 * +-----------------+------+---+------+---+--------+-----+------+------+
10991 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10992 * +-----------------+------+---+------+---+--------+-----+------+------+
10993 */
10994static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10995{
10996    int size = extract32(insn, 22, 2);
10997    int opcode = extract32(insn, 12, 3);
10998    int rm = extract32(insn, 16, 5);
10999    int rn = extract32(insn, 5, 5);
11000    int rd = extract32(insn, 0, 5);

11001    CryptoThreeOpEnvFn *genfn;
11002    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
11003    int feature = ARM_FEATURE_V8_SHA256;
11004
11005    if (size != 0) {
11006        unallocated_encoding(s);
11007        return;
11008    }
11009
11010    switch (opcode) {
11011    case 0: /* SHA1C */
11012    case 1: /* SHA1P */
11013    case 2: /* SHA1M */
11014    case 3: /* SHA1SU0 */
11015        genfn = NULL;
11016        feature = ARM_FEATURE_V8_SHA1;
11017        break;
11018    case 4: /* SHA256H */
11019        genfn = gen_helper_crypto_sha256h;
11020        break;
11021    case 5: /* SHA256H2 */
11022        genfn = gen_helper_crypto_sha256h2;
11023        break;
11024    case 6: /* SHA256SU1 */
11025        genfn = gen_helper_crypto_sha256su1;
11026        break;
11027    default:
11028        unallocated_encoding(s);
11029        return;
11030    }
11031
11032    if (!arm_dc_feature(s, feature)) {
11033        unallocated_encoding(s);
11034        return;
11035    }
11036
11037    if (!fp_access_check(s)) {
11038        return;
11039    }
11040
11041    tcg_rd_regno = tcg_const_i32(rd << 1);
11042    tcg_rn_regno = tcg_const_i32(rn << 1);
11043    tcg_rm_regno = tcg_const_i32(rm << 1);
11044
11045    if (genfn) {
11046        genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
11047    } else {
11048        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11049
11050        gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
11051                                    tcg_rn_regno, tcg_rm_regno, tcg_opcode);
11052        tcg_temp_free_i32(tcg_opcode);
11053    }
11054
11055    tcg_temp_free_i32(tcg_rd_regno);
11056    tcg_temp_free_i32(tcg_rn_regno);
11057    tcg_temp_free_i32(tcg_rm_regno);
11058}
11059
11060/* Crypto two-reg SHA
11061 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11062 * +-----------------+------+-----------+--------+-----+------+------+
11063 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11064 * +-----------------+------+-----------+--------+-----+------+------+
11065 */
11066static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11067{
11068    int size = extract32(insn, 22, 2);
11069    int opcode = extract32(insn, 12, 5);
11070    int rn = extract32(insn, 5, 5);
11071    int rd = extract32(insn, 0, 5);
11072    CryptoTwoOpEnvFn *genfn;
11073    int feature;
11074    TCGv_i32 tcg_rd_regno, tcg_rn_regno;
11075
11076    if (size != 0) {
11077        unallocated_encoding(s);
11078        return;
11079    }
11080
11081    switch (opcode) {
11082    case 0: /* SHA1H */
11083        feature = ARM_FEATURE_V8_SHA1;
11084        genfn = gen_helper_crypto_sha1h;
11085        break;
11086    case 1: /* SHA1SU1 */
11087        feature = ARM_FEATURE_V8_SHA1;
11088        genfn = gen_helper_crypto_sha1su1;
11089        break;
11090    case 2: /* SHA256SU0 */
11091        feature = ARM_FEATURE_V8_SHA256;
11092        genfn = gen_helper_crypto_sha256su0;
11093        break;
11094    default:
11095        unallocated_encoding(s);
11096        return;
11097    }
11098
11099    if (!arm_dc_feature(s, feature)) {
11100        unallocated_encoding(s);
11101        return;
11102    }
11103
11104    if (!fp_access_check(s)) {
11105        return;
11106    }
11107
11108    tcg_rd_regno = tcg_const_i32(rd << 1);
11109    tcg_rn_regno = tcg_const_i32(rn << 1);
11110
11111    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11112
11113    tcg_temp_free_i32(tcg_rd_regno);
11114    tcg_temp_free_i32(tcg_rn_regno);
11115}
11116
11117/* C3.6 Data processing - SIMD, inc Crypto
11118 *
11119 * As the decode gets a little complex we are using a table based
11120 * approach for this part of the decode.
11121 */
11122static const AArch64DecodeTable data_proc_simd[] = {
11123    /* pattern  ,  mask     ,  fn                        */
11124    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11125    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11126    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11127    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11128    { 0x0e000400, 0x9fe08400, disas_simd_copy },
11129    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11130    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11131    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11132    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11133    { 0x0e000000, 0xbf208c00, disas_simd_tb },
11134    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11135    { 0x2e000000, 0xbf208400, disas_simd_ext },
11136    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11137    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11138    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11139    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11140    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11141    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11142    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11143    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11144    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11145    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11146    { 0x00000000, 0x00000000, NULL }
11147};
11148
11149static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11150{
11151    /* Note that this is called with all non-FP cases from
11152     * table C3-6 so it must UNDEF for entries not specifically
11153     * allocated to instructions in that table.
11154     */
11155    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11156    if (fn) {
11157        fn(s, insn);
11158    } else {
11159        unallocated_encoding(s);
11160    }
11161}
11162
11163/* C3.6 Data processing - SIMD and floating point */
11164static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11165{
11166    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11167        disas_data_proc_fp(s, insn);
11168    } else {
11169        /* SIMD, including crypto */
11170        disas_data_proc_simd(s, insn);
11171    }
11172}
11173
11174/* C3.1 A64 instruction index by encoding */
11175static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11176{
11177    uint32_t insn;
11178
11179    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11180    s->insn = insn;
11181    s->pc += 4;
11182
11183    s->fp_access_checked = false;
11184
11185    switch (extract32(insn, 25, 4)) {
11186    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11187        unallocated_encoding(s);
11188        break;
11189    case 0x8: case 0x9: /* Data processing - immediate */
11190        disas_data_proc_imm(s, insn);
11191        break;
11192    case 0xa: case 0xb: /* Branch, exception generation and system insns */
11193        disas_b_exc_sys(s, insn);
11194        break;
11195    case 0x4:
11196    case 0x6:
11197    case 0xc:
11198    case 0xe:      /* Loads and stores */
11199        disas_ldst(s, insn);
11200        break;
11201    case 0x5:
11202    case 0xd:      /* Data processing - register */
11203        disas_data_proc_reg(s, insn);
11204        break;
11205    case 0x7:
11206    case 0xf:      /* Data processing - SIMD and floating point */
11207        disas_data_proc_simd_fp(s, insn);
11208        break;
11209    default:
11210        assert(FALSE); /* all 15 cases should be handled above */
11211        break;
11212    }
11213
11214    /* if we allocated any temporaries, free them here */
11215    free_tmp_a64(s);
11216}
11217
11218static int aarch64_tr_init_disas_context(DisasContextBase *dcbase,
11219                                         CPUState *cpu, int max_insns)
11220{
11221    DisasContext *dc = container_of(dcbase, DisasContext, base);
11222    CPUARMState *env = cpu->env_ptr;
11223    ARMCPU *arm_cpu = arm_env_get_cpu(env);
11224    int bound;
11225
11226    dc->pc = dc->base.pc_first;
11227    dc->condjmp = 0;
11228
11229    dc->aarch64 = 1;
11230    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11231     * there is no secure EL1, so we route exceptions to EL3.
11232     */
11233    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11234                               !arm_el_is_aa64(env, 3);
11235    dc->thumb = 0;
11236    dc->sctlr_b = 0;
11237    dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
11238    dc->condexec_mask = 0;
11239    dc->condexec_cond = 0;
11240    dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
11241    dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
11242    dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
11243    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11244#if !defined(CONFIG_USER_ONLY)
11245    dc->user = (dc->current_el == 0);
11246#endif
11247    dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
11248    dc->vec_len = 0;
11249    dc->vec_stride = 0;
11250    dc->cp_regs = arm_cpu->cp_regs;
11251    dc->features = env->features;
11252
11253    /* Single step state. The code-generation logic here is:
11254     *  SS_ACTIVE == 0:
11255     *   generate code with no special handling for single-stepping (except
11256     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11257     *   this happens anyway because those changes are all system register or
11258     *   PSTATE writes).
11259     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11260     *   emit code for one insn
11261     *   emit code to clear PSTATE.SS
11262     *   emit code to generate software step exception for completed step
11263     *   end TB (as usual for having generated an exception)
11264     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11265     *   emit code to generate a software step exception
11266     *   end the TB
11267     */
11268    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
11269    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
11270    dc->is_ldex = false;
11271    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11272
11273    /* Bound the number of insns to execute to those left on the page.  */
11274    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11275
11276    /* If architectural single step active, limit to 1.  */
11277    if (dc->ss_active) {
11278        bound = 1;
11279    }
11280    max_insns = MIN(max_insns, bound);
11281
11282    init_tmp_a64_array(dc);
11283
11284    return max_insns;
11285}
11286
11287static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
11288{
11289    tcg_clear_temp_count();
11290}
11291
11292static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11293{
11294    DisasContext *dc = container_of(dcbase, DisasContext, base);
11295
11296    dc->insn_start_idx = tcg_op_buf_count();
11297    tcg_gen_insn_start(dc->pc, 0, 0);
11298}
11299
11300static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11301                                        const CPUBreakpoint *bp)
11302{
11303    DisasContext *dc = container_of(dcbase, DisasContext, base);
11304
11305    if (bp->flags & BP_CPU) {
11306        gen_a64_set_pc_im(dc->pc);
11307        gen_helper_check_breakpoints(cpu_env);
11308        /* End the TB early; it likely won't be executed */
11309        dc->base.is_jmp = DISAS_TOO_MANY;
11310    } else {
11311        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11312        /* The address covered by the breakpoint must be
11313           included in [tb->pc, tb->pc + tb->size) in order
11314           to for it to be properly cleared -- thus we
11315           increment the PC here so that the logic setting
11316           tb->size below does the right thing.  */
11317        dc->pc += 4;
11318        dc->base.is_jmp = DISAS_NORETURN;
11319    }
11320
11321    return true;
11322}
11323
11324static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11325{
11326    DisasContext *dc = container_of(dcbase, DisasContext, base);
11327    CPUARMState *env = cpu->env_ptr;
11328
11329    if (dc->ss_active && !dc->pstate_ss) {
11330        /* Singlestep state is Active-pending.
11331         * If we're in this state at the start of a TB then either
11332         *  a) we just took an exception to an EL which is being debugged
11333         *     and this is the first insn in the exception handler
11334         *  b) debug exceptions were masked and we just unmasked them
11335         *     without changing EL (eg by clearing PSTATE.D)
11336         * In either case we're going to take a swstep exception in the
11337         * "did not step an insn" case, and so the syndrome ISV and EX
11338         * bits should be zero.
11339         */
11340        assert(dc->base.num_insns == 1);
11341        gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11342                      default_exception_el(dc));
11343        dc->base.is_jmp = DISAS_NORETURN;
11344    } else {
11345        disas_a64_insn(env, dc);
11346    }
11347
11348    dc->base.pc_next = dc->pc;
11349    translator_loop_temp_check(&dc->base);
11350}
11351
11352static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11353{
11354    DisasContext *dc = container_of(dcbase, DisasContext, base);
11355
11356    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
11357        /* Note that this means single stepping WFI doesn't halt the CPU.
11358         * For conditional branch insns this is harmless unreachable code as
11359         * gen_goto_tb() has already handled emitting the debug exception
11360         * (and thus a tb-jump is not possible when singlestepping).
11361         */
11362        switch (dc->base.is_jmp) {
11363        default:
11364            gen_a64_set_pc_im(dc->pc);
11365            /* fall through */
11366        case DISAS_EXIT:
11367        case DISAS_JUMP:
11368            if (dc->base.singlestep_enabled) {
11369                gen_exception_internal(EXCP_DEBUG);
11370            } else {
11371                gen_step_complete_exception(dc);
11372            }
11373            break;
11374        case DISAS_NORETURN:
11375            break;
11376        }
11377    } else {
11378        switch (dc->base.is_jmp) {
11379        case DISAS_NEXT:
11380        case DISAS_TOO_MANY:
11381            gen_goto_tb(dc, 1, dc->pc);
11382            break;
11383        default:
11384        case DISAS_UPDATE:
11385            gen_a64_set_pc_im(dc->pc);
11386            /* fall through */
11387        case DISAS_JUMP:
11388            tcg_gen_lookup_and_goto_ptr();
11389            break;
11390        case DISAS_EXIT:
11391            tcg_gen_exit_tb(0);
11392            break;
11393        case DISAS_NORETURN:
11394        case DISAS_SWI:
11395            break;
11396        case DISAS_WFE:
11397            gen_a64_set_pc_im(dc->pc);
11398            gen_helper_wfe(cpu_env);
11399            break;
11400        case DISAS_YIELD:
11401            gen_a64_set_pc_im(dc->pc);
11402            gen_helper_yield(cpu_env);
11403            break;
11404        case DISAS_WFI:
11405        {
11406            /* This is a special case because we don't want to just halt the CPU
11407             * if trying to debug across a WFI.
11408             */
11409            TCGv_i32 tmp = tcg_const_i32(4);
11410
11411            gen_a64_set_pc_im(dc->pc);
11412            gen_helper_wfi(cpu_env, tmp);
11413            tcg_temp_free_i32(tmp);
11414            /* The helper doesn't necessarily throw an exception, but we
11415             * must go back to the main loop to check for interrupts anyway.
11416             */
11417            tcg_gen_exit_tb(0);
11418            break;
11419        }
11420        }
11421    }
11422
11423    /* Functions above can change dc->pc, so re-align db->pc_next */
11424    dc->base.pc_next = dc->pc;
11425}
11426
11427static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
11428                                      CPUState *cpu)
11429{
11430    DisasContext *dc = container_of(dcbase, DisasContext, base);
11431
11432    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11433    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11434}
11435
11436const TranslatorOps aarch64_translator_ops = {
11437    .init_disas_context = aarch64_tr_init_disas_context,
11438    .tb_start           = aarch64_tr_tb_start,
11439    .insn_start         = aarch64_tr_insn_start,
11440    .breakpoint_check   = aarch64_tr_breakpoint_check,
11441    .translate_insn     = aarch64_tr_translate_insn,
11442    .tb_stop            = aarch64_tr_tb_stop,
11443    .disas_log          = aarch64_tr_disas_log,
11444};
11445