LXR qemu/target-arm/translate-a64.c

   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "tcg-op.h"
  23#include "qemu/log.h"
  24#include "arm_ldst.h"
  25#include "translate.h"
  26#include "internals.h"
  27#include "qemu/host-utils.h"
  28
  29#include "exec/semihost.h"
  30#include "exec/gen-icount.h"
  31
  32#include "exec/helper-proto.h"
  33#include "exec/helper-gen.h"
  34#include "exec/log.h"
  35
  36#include "trace-tcg.h"
  37
  38static TCGv_i64 cpu_X[32];
  39static TCGv_i64 cpu_pc;
  40
  41/* Load/store exclusive handling */
  42static TCGv_i64 cpu_exclusive_high;
  43
  44static const char *regnames[] = {
  45    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  46    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  47    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  48    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  49};
  50
  51enum a64_shift_type {
  52    A64_SHIFT_TYPE_LSL = 0,
  53    A64_SHIFT_TYPE_LSR = 1,
  54    A64_SHIFT_TYPE_ASR = 2,
  55    A64_SHIFT_TYPE_ROR = 3
  56};
  57
  58/* Table based decoder typedefs - used when the relevant bits for decode
  59 * are too awkwardly scattered across the instruction (eg SIMD).
  60 */
  61typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  62
  63typedef struct AArch64DecodeTable {
  64    uint32_t pattern;
  65    uint32_t mask;
  66    AArch64DecodeFn *disas_fn;
  67} AArch64DecodeTable;
  68
  69/* Function prototype for gen_ functions for calling Neon helpers */
  70typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  71typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  72typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  73typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  74typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  75typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  76typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  77typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  78typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  80typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  81typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  82typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  83
  84/* initialize TCG globals.  */
  85void a64_translate_init(void)
  86{
  87    int i;
  88
  89    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  90                                    offsetof(CPUARMState, pc),
  91                                    "pc");
  92    for (i = 0; i < 32; i++) {
  93        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  94                                          offsetof(CPUARMState, xregs[i]),
  95                                          regnames[i]);
  96    }
  97
  98    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
  99        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 100}
 101
 102static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 103{
 104    /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 105     *  if EL1, access as if EL0; otherwise access at current EL
 106     */
 107    switch (s->mmu_idx) {
 108    case ARMMMUIdx_S12NSE1:
 109        return ARMMMUIdx_S12NSE0;
 110    case ARMMMUIdx_S1SE1:
 111        return ARMMMUIdx_S1SE0;
 112    case ARMMMUIdx_S2NS:
 113        g_assert_not_reached();
 114    default:
 115        return s->mmu_idx;
 116    }
 117}
 118
 119void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 120                            fprintf_function cpu_fprintf, int flags)
 121{
 122    ARMCPU *cpu = ARM_CPU(cs);
 123    CPUARMState *env = &cpu->env;
 124    uint32_t psr = pstate_read(env);
 125    int i;
 126    int el = arm_current_el(env);
 127    const char *ns_status;
 128
 129    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 130            env->pc, env->xregs[31]);
 131    for (i = 0; i < 31; i++) {
 132        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 133        if ((i % 4) == 3) {
 134            cpu_fprintf(f, "\n");
 135        } else {
 136            cpu_fprintf(f, " ");
 137        }
 138    }
 139
 140    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 141        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 142    } else {
 143        ns_status = "";
 144    }
 145
 146    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 147                psr,
 148                psr & PSTATE_N ? 'N' : '-',
 149                psr & PSTATE_Z ? 'Z' : '-',
 150                psr & PSTATE_C ? 'C' : '-',
 151                psr & PSTATE_V ? 'V' : '-',
 152                ns_status,
 153                el,
 154                psr & PSTATE_SP ? 'h' : 't');
 155
 156    if (flags & CPU_DUMP_FPU) {
 157        int numvfpregs = 32;
 158        for (i = 0; i < numvfpregs; i += 2) {
 159            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 160            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 161            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 162                        i, vhi, vlo);
 163            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 164            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 165            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 166                        i + 1, vhi, vlo);
 167        }
 168        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 169                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 170    }
 171}
 172
 173void gen_a64_set_pc_im(uint64_t val)
 174{
 175    tcg_gen_movi_i64(cpu_pc, val);
 176}
 177
 178typedef struct DisasCompare64 {
 179    TCGCond cond;
 180    TCGv_i64 value;
 181} DisasCompare64;
 182
 183static void a64_test_cc(DisasCompare64 *c64, int cc)
 184{
 185    DisasCompare c32;
 186
 187    arm_test_cc(&c32, cc);
 188
 189    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 190       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 191    c64->cond = c32.cond;
 192    c64->value = tcg_temp_new_i64();
 193    tcg_gen_ext_i32_i64(c64->value, c32.value);
 194
 195    arm_free_cc(&c32);
 196}
 197
 198static void a64_free_cc(DisasCompare64 *c64)
 199{
 200    tcg_temp_free_i64(c64->value);
 201}
 202
 203static void gen_exception_internal(int excp)
 204{
 205    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 206
 207    assert(excp_is_internal(excp));
 208    gen_helper_exception_internal(cpu_env, tcg_excp);
 209    tcg_temp_free_i32(tcg_excp);
 210}
 211
 212static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 213{
 214    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 215    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 216    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 217
 218    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 219                                       tcg_syn, tcg_el);
 220    tcg_temp_free_i32(tcg_el);
 221    tcg_temp_free_i32(tcg_syn);
 222    tcg_temp_free_i32(tcg_excp);
 223}
 224
 225static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 226{
 227    gen_a64_set_pc_im(s->pc - offset);
 228    gen_exception_internal(excp);
 229    s->is_jmp = DISAS_EXC;
 230}
 231
 232static void gen_exception_insn(DisasContext *s, int offset, int excp,
 233                               uint32_t syndrome, uint32_t target_el)
 234{
 235    gen_a64_set_pc_im(s->pc - offset);
 236    gen_exception(excp, syndrome, target_el);
 237    s->is_jmp = DISAS_EXC;
 238}
 239
 240static void gen_ss_advance(DisasContext *s)
 241{
 242    /* If the singlestep state is Active-not-pending, advance to
 243     * Active-pending.
 244     */
 245    if (s->ss_active) {
 246        s->pstate_ss = 0;
 247        gen_helper_clear_pstate_ss(cpu_env);
 248    }
 249}
 250
 251static void gen_step_complete_exception(DisasContext *s)
 252{
 253    /* We just completed step of an insn. Move from Active-not-pending
 254     * to Active-pending, and then also take the swstep exception.
 255     * This corresponds to making the (IMPDEF) choice to prioritize
 256     * swstep exceptions over asynchronous exceptions taken to an exception
 257     * level where debug is disabled. This choice has the advantage that
 258     * we do not need to maintain internal state corresponding to the
 259     * ISV/EX syndrome bits between completion of the step and generation
 260     * of the exception, and our syndrome information is always correct.
 261     */
 262    gen_ss_advance(s);
 263    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 264                  default_exception_el(s));
 265    s->is_jmp = DISAS_EXC;
 266}
 267
 268static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 269{
 270    /* No direct tb linking with singlestep (either QEMU's or the ARM
 271     * debug architecture kind) or deterministic io
 272     */
 273    if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 274        return false;
 275    }
 276
 277    /* Only link tbs from inside the same guest page */
 278    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 279        return false;
 280    }
 281
 282    return true;
 283}
 284
 285static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 286{
 287    TranslationBlock *tb;
 288
 289    tb = s->tb;
 290    if (use_goto_tb(s, n, dest)) {
 291        tcg_gen_goto_tb(n);
 292        gen_a64_set_pc_im(dest);
 293        tcg_gen_exit_tb((intptr_t)tb + n);
 294        s->is_jmp = DISAS_TB_JUMP;
 295    } else {
 296        gen_a64_set_pc_im(dest);
 297        if (s->ss_active) {
 298            gen_step_complete_exception(s);
 299        } else if (s->singlestep_enabled) {
 300            gen_exception_internal(EXCP_DEBUG);
 301        } else {
 302            tcg_gen_exit_tb(0);
 303            s->is_jmp = DISAS_TB_JUMP;
 304        }
 305    }
 306}
 307
 308static void unallocated_encoding(DisasContext *s)
 309{
 310    /* Unallocated and reserved encodings are uncategorized */
 311    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 312                       default_exception_el(s));
 313}
 314
 315#define unsupported_encoding(s, insn)                                    \
 316    do {                                                                 \
 317        qemu_log_mask(LOG_UNIMP,                                         \
 318                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 319                      "at pc=%016" PRIx64 "\n",                          \
 320                      __FILE__, __LINE__, insn, s->pc - 4);              \
 321        unallocated_encoding(s);                                         \
 322    } while (0);
 323
 324static void init_tmp_a64_array(DisasContext *s)
 325{
 326#ifdef CONFIG_DEBUG_TCG
 327    int i;
 328    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 329        TCGV_UNUSED_I64(s->tmp_a64[i]);
 330    }
 331#endif
 332    s->tmp_a64_count = 0;
 333}
 334
 335static void free_tmp_a64(DisasContext *s)
 336{
 337    int i;
 338    for (i = 0; i < s->tmp_a64_count; i++) {
 339        tcg_temp_free_i64(s->tmp_a64[i]);
 340    }
 341    init_tmp_a64_array(s);
 342}
 343
 344static TCGv_i64 new_tmp_a64(DisasContext *s)
 345{
 346    assert(s->tmp_a64_count < TMP_A64_MAX);
 347    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 348}
 349
 350static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 351{
 352    TCGv_i64 t = new_tmp_a64(s);
 353    tcg_gen_movi_i64(t, 0);
 354    return t;
 355}
 356
 357/*
 358 * Register access functions
 359 *
 360 * These functions are used for directly accessing a register in where
 361 * changes to the final register value are likely to be made. If you
 362 * need to use a register for temporary calculation (e.g. index type
 363 * operations) use the read_* form.
 364 *
 365 * B1.2.1 Register mappings
 366 *
 367 * In instruction register encoding 31 can refer to ZR (zero register) or
 368 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 369 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 370 * This is the point of the _sp forms.
 371 */
 372static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 373{
 374    if (reg == 31) {
 375        return new_tmp_a64_zero(s);
 376    } else {
 377        return cpu_X[reg];
 378    }
 379}
 380
 381/* register access for when 31 == SP */
 382static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 383{
 384    return cpu_X[reg];
 385}
 386
 387/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 388 * representing the register contents. This TCGv is an auto-freed
 389 * temporary so it need not be explicitly freed, and may be modified.
 390 */
 391static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 392{
 393    TCGv_i64 v = new_tmp_a64(s);
 394    if (reg != 31) {
 395        if (sf) {
 396            tcg_gen_mov_i64(v, cpu_X[reg]);
 397        } else {
 398            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 399        }
 400    } else {
 401        tcg_gen_movi_i64(v, 0);
 402    }
 403    return v;
 404}
 405
 406static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 407{
 408    TCGv_i64 v = new_tmp_a64(s);
 409    if (sf) {
 410        tcg_gen_mov_i64(v, cpu_X[reg]);
 411    } else {
 412        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 413    }
 414    return v;
 415}
 416
 417/* We should have at some point before trying to access an FP register
 418 * done the necessary access check, so assert that
 419 * (a) we did the check and
 420 * (b) we didn't then just plough ahead anyway if it failed.
 421 * Print the instruction pattern in the abort message so we can figure
 422 * out what we need to fix if a user encounters this problem in the wild.
 423 */
 424static inline void assert_fp_access_checked(DisasContext *s)
 425{
 426#ifdef CONFIG_DEBUG_TCG
 427    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 428        fprintf(stderr, "target-arm: FP access check missing for "
 429                "instruction 0x%08x\n", s->insn);
 430        abort();
 431    }
 432#endif
 433}
 434
 435/* Return the offset into CPUARMState of an element of specified
 436 * size, 'element' places in from the least significant end of
 437 * the FP/vector register Qn.
 438 */
 439static inline int vec_reg_offset(DisasContext *s, int regno,
 440                                 int element, TCGMemOp size)
 441{
 442    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 443#ifdef HOST_WORDS_BIGENDIAN
 444    /* This is complicated slightly because vfp.regs[2n] is
 445     * still the low half and  vfp.regs[2n+1] the high half
 446     * of the 128 bit vector, even on big endian systems.
 447     * Calculate the offset assuming a fully bigendian 128 bits,
 448     * then XOR to account for the order of the two 64 bit halves.
 449     */
 450    offs += (16 - ((element + 1) * (1 << size)));
 451    offs ^= 8;
 452#else
 453    offs += element * (1 << size);
 454#endif
 455    assert_fp_access_checked(s);
 456    return offs;
 457}
 458
 459/* Return the offset into CPUARMState of a slice (from
 460 * the least significant end) of FP register Qn (ie
 461 * Dn, Sn, Hn or Bn).
 462 * (Note that this is not the same mapping as for A32; see cpu.h)
 463 */
 464static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 465{
 466    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 467#ifdef HOST_WORDS_BIGENDIAN
 468    offs += (8 - (1 << size));
 469#endif
 470    assert_fp_access_checked(s);
 471    return offs;
 472}
 473
 474/* Offset of the high half of the 128 bit vector Qn */
 475static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 476{
 477    assert_fp_access_checked(s);
 478    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 479}
 480
 481/* Convenience accessors for reading and writing single and double
 482 * FP registers. Writing clears the upper parts of the associated
 483 * 128 bit vector register, as required by the architecture.
 484 * Note that unlike the GP register accessors, the values returned
 485 * by the read functions must be manually freed.
 486 */
 487static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 488{
 489    TCGv_i64 v = tcg_temp_new_i64();
 490
 491    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 492    return v;
 493}
 494
 495static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 496{
 497    TCGv_i32 v = tcg_temp_new_i32();
 498
 499    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 500    return v;
 501}
 502
 503static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 504{
 505    TCGv_i64 tcg_zero = tcg_const_i64(0);
 506
 507    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 508    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 509    tcg_temp_free_i64(tcg_zero);
 510}
 511
 512static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 513{
 514    TCGv_i64 tmp = tcg_temp_new_i64();
 515
 516    tcg_gen_extu_i32_i64(tmp, v);
 517    write_fp_dreg(s, reg, tmp);
 518    tcg_temp_free_i64(tmp);
 519}
 520
 521static TCGv_ptr get_fpstatus_ptr(void)
 522{
 523    TCGv_ptr statusptr = tcg_temp_new_ptr();
 524    int offset;
 525
 526    /* In A64 all instructions (both FP and Neon) use the FPCR;
 527     * there is no equivalent of the A32 Neon "standard FPSCR value"
 528     * and all operations use vfp.fp_status.
 529     */
 530    offset = offsetof(CPUARMState, vfp.fp_status);
 531    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 532    return statusptr;
 533}
 534
 535/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 536 * than the 32 bit equivalent.
 537 */
 538static inline void gen_set_NZ64(TCGv_i64 result)
 539{
 540    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 541    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 542}
 543
 544/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 545static inline void gen_logic_CC(int sf, TCGv_i64 result)
 546{
 547    if (sf) {
 548        gen_set_NZ64(result);
 549    } else {
 550        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 551        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 552    }
 553    tcg_gen_movi_i32(cpu_CF, 0);
 554    tcg_gen_movi_i32(cpu_VF, 0);
 555}
 556
 557/* dest = T0 + T1; compute C, N, V and Z flags */
 558static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 559{
 560    if (sf) {
 561        TCGv_i64 result, flag, tmp;
 562        result = tcg_temp_new_i64();
 563        flag = tcg_temp_new_i64();
 564        tmp = tcg_temp_new_i64();
 565
 566        tcg_gen_movi_i64(tmp, 0);
 567        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 568
 569        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 570
 571        gen_set_NZ64(result);
 572
 573        tcg_gen_xor_i64(flag, result, t0);
 574        tcg_gen_xor_i64(tmp, t0, t1);
 575        tcg_gen_andc_i64(flag, flag, tmp);
 576        tcg_temp_free_i64(tmp);
 577        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 578
 579        tcg_gen_mov_i64(dest, result);
 580        tcg_temp_free_i64(result);
 581        tcg_temp_free_i64(flag);
 582    } else {
 583        /* 32 bit arithmetic */
 584        TCGv_i32 t0_32 = tcg_temp_new_i32();
 585        TCGv_i32 t1_32 = tcg_temp_new_i32();
 586        TCGv_i32 tmp = tcg_temp_new_i32();
 587
 588        tcg_gen_movi_i32(tmp, 0);
 589        tcg_gen_extrl_i64_i32(t0_32, t0);
 590        tcg_gen_extrl_i64_i32(t1_32, t1);
 591        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 592        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 593        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 594        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 595        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 596        tcg_gen_extu_i32_i64(dest, cpu_NF);
 597
 598        tcg_temp_free_i32(tmp);
 599        tcg_temp_free_i32(t0_32);
 600        tcg_temp_free_i32(t1_32);
 601    }
 602}
 603
 604/* dest = T0 - T1; compute C, N, V and Z flags */
 605static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 606{
 607    if (sf) {
 608        /* 64 bit arithmetic */
 609        TCGv_i64 result, flag, tmp;
 610
 611        result = tcg_temp_new_i64();
 612        flag = tcg_temp_new_i64();
 613        tcg_gen_sub_i64(result, t0, t1);
 614
 615        gen_set_NZ64(result);
 616
 617        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 618        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 619
 620        tcg_gen_xor_i64(flag, result, t0);
 621        tmp = tcg_temp_new_i64();
 622        tcg_gen_xor_i64(tmp, t0, t1);
 623        tcg_gen_and_i64(flag, flag, tmp);
 624        tcg_temp_free_i64(tmp);
 625        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 626        tcg_gen_mov_i64(dest, result);
 627        tcg_temp_free_i64(flag);
 628        tcg_temp_free_i64(result);
 629    } else {
 630        /* 32 bit arithmetic */
 631        TCGv_i32 t0_32 = tcg_temp_new_i32();
 632        TCGv_i32 t1_32 = tcg_temp_new_i32();
 633        TCGv_i32 tmp;
 634
 635        tcg_gen_extrl_i64_i32(t0_32, t0);
 636        tcg_gen_extrl_i64_i32(t1_32, t1);
 637        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 638        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 639        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 640        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 641        tmp = tcg_temp_new_i32();
 642        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 643        tcg_temp_free_i32(t0_32);
 644        tcg_temp_free_i32(t1_32);
 645        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 646        tcg_temp_free_i32(tmp);
 647        tcg_gen_extu_i32_i64(dest, cpu_NF);
 648    }
 649}
 650
 651/* dest = T0 + T1 + CF; do not compute flags. */
 652static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 653{
 654    TCGv_i64 flag = tcg_temp_new_i64();
 655    tcg_gen_extu_i32_i64(flag, cpu_CF);
 656    tcg_gen_add_i64(dest, t0, t1);
 657    tcg_gen_add_i64(dest, dest, flag);
 658    tcg_temp_free_i64(flag);
 659
 660    if (!sf) {
 661        tcg_gen_ext32u_i64(dest, dest);
 662    }
 663}
 664
 665/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 666static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 667{
 668    if (sf) {
 669        TCGv_i64 result, cf_64, vf_64, tmp;
 670        result = tcg_temp_new_i64();
 671        cf_64 = tcg_temp_new_i64();
 672        vf_64 = tcg_temp_new_i64();
 673        tmp = tcg_const_i64(0);
 674
 675        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 676        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 677        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 678        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 679        gen_set_NZ64(result);
 680
 681        tcg_gen_xor_i64(vf_64, result, t0);
 682        tcg_gen_xor_i64(tmp, t0, t1);
 683        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 684        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 685
 686        tcg_gen_mov_i64(dest, result);
 687
 688        tcg_temp_free_i64(tmp);
 689        tcg_temp_free_i64(vf_64);
 690        tcg_temp_free_i64(cf_64);
 691        tcg_temp_free_i64(result);
 692    } else {
 693        TCGv_i32 t0_32, t1_32, tmp;
 694        t0_32 = tcg_temp_new_i32();
 695        t1_32 = tcg_temp_new_i32();
 696        tmp = tcg_const_i32(0);
 697
 698        tcg_gen_extrl_i64_i32(t0_32, t0);
 699        tcg_gen_extrl_i64_i32(t1_32, t1);
 700        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 701        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 702
 703        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 704        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 705        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 706        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 707        tcg_gen_extu_i32_i64(dest, cpu_NF);
 708
 709        tcg_temp_free_i32(tmp);
 710        tcg_temp_free_i32(t1_32);
 711        tcg_temp_free_i32(t0_32);
 712    }
 713}
 714
 715/*
 716 * Load/Store generators
 717 */
 718
 719/*
 720 * Store from GPR register to memory.
 721 */
 722static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 723                             TCGv_i64 tcg_addr, int size, int memidx)
 724{
 725    g_assert(size <= 3);
 726    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 727}
 728
 729static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 730                      TCGv_i64 tcg_addr, int size)
 731{
 732    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 733}
 734
 735/*
 736 * Load from memory to GPR register
 737 */
 738static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 739                             int size, bool is_signed, bool extend, int memidx)
 740{
 741    TCGMemOp memop = s->be_data + size;
 742
 743    g_assert(size <= 3);
 744
 745    if (is_signed) {
 746        memop += MO_SIGN;
 747    }
 748
 749    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 750
 751    if (extend && is_signed) {
 752        g_assert(size < 3);
 753        tcg_gen_ext32u_i64(dest, dest);
 754    }
 755}
 756
 757static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 758                      int size, bool is_signed, bool extend)
 759{
 760    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 761                     get_mem_index(s));
 762}
 763
 764/*
 765 * Store from FP register to memory
 766 */
 767static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 768{
 769    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 770    TCGv_i64 tmp = tcg_temp_new_i64();
 771    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 772    if (size < 4) {
 773        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 774                            s->be_data + size);
 775    } else {
 776        bool be = s->be_data == MO_BE;
 777        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 778
 779        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 780        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 781                            s->be_data | MO_Q);
 782        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 783        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 784                            s->be_data | MO_Q);
 785        tcg_temp_free_i64(tcg_hiaddr);
 786    }
 787
 788    tcg_temp_free_i64(tmp);
 789}
 790
 791/*
 792 * Load from memory to FP register
 793 */
 794static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 795{
 796    /* This always zero-extends and writes to a full 128 bit wide vector */
 797    TCGv_i64 tmplo = tcg_temp_new_i64();
 798    TCGv_i64 tmphi;
 799
 800    if (size < 4) {
 801        TCGMemOp memop = s->be_data + size;
 802        tmphi = tcg_const_i64(0);
 803        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 804    } else {
 805        bool be = s->be_data == MO_BE;
 806        TCGv_i64 tcg_hiaddr;
 807
 808        tmphi = tcg_temp_new_i64();
 809        tcg_hiaddr = tcg_temp_new_i64();
 810
 811        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 812        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 813                            s->be_data | MO_Q);
 814        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 815                            s->be_data | MO_Q);
 816        tcg_temp_free_i64(tcg_hiaddr);
 817    }
 818
 819    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 820    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 821
 822    tcg_temp_free_i64(tmplo);
 823    tcg_temp_free_i64(tmphi);
 824}
 825
 826/*
 827 * Vector load/store helpers.
 828 *
 829 * The principal difference between this and a FP load is that we don't
 830 * zero extend as we are filling a partial chunk of the vector register.
 831 * These functions don't support 128 bit loads/stores, which would be
 832 * normal load/store operations.
 833 *
 834 * The _i32 versions are useful when operating on 32 bit quantities
 835 * (eg for floating point single or using Neon helper functions).
 836 */
 837
 838/* Get value of an element within a vector register */
 839static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 840                             int element, TCGMemOp memop)
 841{
 842    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 843    switch (memop) {
 844    case MO_8:
 845        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 846        break;
 847    case MO_16:
 848        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 849        break;
 850    case MO_32:
 851        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 852        break;
 853    case MO_8|MO_SIGN:
 854        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 855        break;
 856    case MO_16|MO_SIGN:
 857        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 858        break;
 859    case MO_32|MO_SIGN:
 860        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 861        break;
 862    case MO_64:
 863    case MO_64|MO_SIGN:
 864        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 865        break;
 866    default:
 867        g_assert_not_reached();
 868    }
 869}
 870
 871static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 872                                 int element, TCGMemOp memop)
 873{
 874    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 875    switch (memop) {
 876    case MO_8:
 877        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 878        break;
 879    case MO_16:
 880        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 881        break;
 882    case MO_8|MO_SIGN:
 883        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 884        break;
 885    case MO_16|MO_SIGN:
 886        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 887        break;
 888    case MO_32:
 889    case MO_32|MO_SIGN:
 890        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 891        break;
 892    default:
 893        g_assert_not_reached();
 894    }
 895}
 896
 897/* Set value of an element within a vector register */
 898static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 899                              int element, TCGMemOp memop)
 900{
 901    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 902    switch (memop) {
 903    case MO_8:
 904        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 905        break;
 906    case MO_16:
 907        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 908        break;
 909    case MO_32:
 910        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 911        break;
 912    case MO_64:
 913        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 914        break;
 915    default:
 916        g_assert_not_reached();
 917    }
 918}
 919
 920static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 921                                  int destidx, int element, TCGMemOp memop)
 922{
 923    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 924    switch (memop) {
 925    case MO_8:
 926        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 927        break;
 928    case MO_16:
 929        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 930        break;
 931    case MO_32:
 932        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 933        break;
 934    default:
 935        g_assert_not_reached();
 936    }
 937}
 938
 939/* Clear the high 64 bits of a 128 bit vector (in general non-quad
 940 * vector ops all need to do this).
 941 */
 942static void clear_vec_high(DisasContext *s, int rd)
 943{
 944    TCGv_i64 tcg_zero = tcg_const_i64(0);
 945
 946    write_vec_element(s, tcg_zero, rd, 1, MO_64);
 947    tcg_temp_free_i64(tcg_zero);
 948}
 949
 950/* Store from vector register to memory */
 951static void do_vec_st(DisasContext *s, int srcidx, int element,
 952                      TCGv_i64 tcg_addr, int size)
 953{
 954    TCGMemOp memop = s->be_data + size;
 955    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 956
 957    read_vec_element(s, tcg_tmp, srcidx, element, size);
 958    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 959
 960    tcg_temp_free_i64(tcg_tmp);
 961}
 962
 963/* Load from memory to vector register */
 964static void do_vec_ld(DisasContext *s, int destidx, int element,
 965                      TCGv_i64 tcg_addr, int size)
 966{
 967    TCGMemOp memop = s->be_data + size;
 968    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 969
 970    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 971    write_vec_element(s, tcg_tmp, destidx, element, size);
 972
 973    tcg_temp_free_i64(tcg_tmp);
 974}
 975
 976/* Check that FP/Neon access is enabled. If it is, return
 977 * true. If not, emit code to generate an appropriate exception,
 978 * and return false; the caller should not emit any code for
 979 * the instruction. Note that this check must happen after all
 980 * unallocated-encoding checks (otherwise the syndrome information
 981 * for the resulting exception will be incorrect).
 982 */
 983static inline bool fp_access_check(DisasContext *s)
 984{
 985    assert(!s->fp_access_checked);
 986    s->fp_access_checked = true;
 987
 988    if (!s->fp_excp_el) {
 989        return true;
 990    }
 991
 992    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 993                       s->fp_excp_el);
 994    return false;
 995}
 996
 997/*
 998 * This utility function is for doing register extension with an
 999 * optional shift. You will likely want to pass a temporary for the
1000 * destination register. See DecodeRegExtend() in the ARM ARM.

1001 */
1002static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1003                              int option, unsigned int shift)
1004{
1005    int extsize = extract32(option, 0, 2);
1006    bool is_signed = extract32(option, 2, 1);
1007
1008    if (is_signed) {
1009        switch (extsize) {
1010        case 0:
1011            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1012            break;
1013        case 1:
1014            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1015            break;
1016        case 2:
1017            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1018            break;
1019        case 3:
1020            tcg_gen_mov_i64(tcg_out, tcg_in);
1021            break;
1022        }
1023    } else {
1024        switch (extsize) {
1025        case 0:
1026            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1027            break;
1028        case 1:
1029            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1030            break;
1031        case 2:
1032            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1033            break;
1034        case 3:
1035            tcg_gen_mov_i64(tcg_out, tcg_in);
1036            break;
1037        }
1038    }
1039
1040    if (shift) {
1041        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1042    }
1043}
1044
1045static inline void gen_check_sp_alignment(DisasContext *s)
1046{
1047    /* The AArch64 architecture mandates that (if enabled via PSTATE
1048     * or SCTLR bits) there is a check that SP is 16-aligned on every
1049     * SP-relative load or store (with an exception generated if it is not).
1050     * In line with general QEMU practice regarding misaligned accesses,
1051     * we omit these checks for the sake of guest program performance.
1052     * This function is provided as a hook so we can more easily add these
1053     * checks in future (possibly as a "favour catching guest program bugs
1054     * over speed" user selectable option).
1055     */
1056}
1057
1058/*
1059 * This provides a simple table based table lookup decoder. It is
1060 * intended to be used when the relevant bits for decode are too
1061 * awkwardly placed and switch/if based logic would be confusing and
1062 * deeply nested. Since it's a linear search through the table, tables
1063 * should be kept small.
1064 *
1065 * It returns the first handler where insn & mask == pattern, or
1066 * NULL if there is no match.
1067 * The table is terminated by an empty mask (i.e. 0)
1068 */
1069static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1070                                               uint32_t insn)
1071{
1072    const AArch64DecodeTable *tptr = table;
1073
1074    while (tptr->mask) {
1075        if ((insn & tptr->mask) == tptr->pattern) {
1076            return tptr->disas_fn;
1077        }
1078        tptr++;
1079    }
1080    return NULL;
1081}
1082
1083/*
1084 * the instruction disassembly implemented here matches
1085 * the instruction encoding classifications in chapter 3 (C3)
1086 * of the ARM Architecture Reference Manual (DDI0487A_a)
1087 */
1088
1089/* C3.2.7 Unconditional branch (immediate)
1090 *   31  30       26 25                                  0
1091 * +----+-----------+-------------------------------------+
1092 * | op | 0 0 1 0 1 |                 imm26               |
1093 * +----+-----------+-------------------------------------+
1094 */
1095static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1096{
1097    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1098
1099    if (insn & (1U << 31)) {
1100        /* C5.6.26 BL Branch with link */
1101        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1102    }
1103
1104    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1105    gen_goto_tb(s, 0, addr);
1106}
1107
1108/* C3.2.1 Compare & branch (immediate)
1109 *   31  30         25  24  23                  5 4      0
1110 * +----+-------------+----+---------------------+--------+
1111 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1112 * +----+-------------+----+---------------------+--------+
1113 */
1114static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1115{
1116    unsigned int sf, op, rt;
1117    uint64_t addr;
1118    TCGLabel *label_match;
1119    TCGv_i64 tcg_cmp;
1120
1121    sf = extract32(insn, 31, 1);
1122    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1123    rt = extract32(insn, 0, 5);
1124    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1125
1126    tcg_cmp = read_cpu_reg(s, rt, sf);
1127    label_match = gen_new_label();
1128
1129    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1130                        tcg_cmp, 0, label_match);
1131
1132    gen_goto_tb(s, 0, s->pc);
1133    gen_set_label(label_match);
1134    gen_goto_tb(s, 1, addr);
1135}
1136
1137/* C3.2.5 Test & branch (immediate)
1138 *   31  30         25  24  23   19 18          5 4    0
1139 * +----+-------------+----+-------+-------------+------+
1140 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1141 * +----+-------------+----+-------+-------------+------+
1142 */
1143static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1144{
1145    unsigned int bit_pos, op, rt;
1146    uint64_t addr;
1147    TCGLabel *label_match;
1148    TCGv_i64 tcg_cmp;
1149
1150    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1151    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1152    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1153    rt = extract32(insn, 0, 5);
1154
1155    tcg_cmp = tcg_temp_new_i64();
1156    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1157    label_match = gen_new_label();
1158    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1159                        tcg_cmp, 0, label_match);
1160    tcg_temp_free_i64(tcg_cmp);
1161    gen_goto_tb(s, 0, s->pc);
1162    gen_set_label(label_match);
1163    gen_goto_tb(s, 1, addr);
1164}
1165
1166/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1167 *  31           25  24  23                  5   4  3    0
1168 * +---------------+----+---------------------+----+------+
1169 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1170 * +---------------+----+---------------------+----+------+
1171 */
1172static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1173{
1174    unsigned int cond;
1175    uint64_t addr;
1176
1177    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1178        unallocated_encoding(s);
1179        return;
1180    }
1181    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1182    cond = extract32(insn, 0, 4);
1183
1184    if (cond < 0x0e) {
1185        /* genuinely conditional branches */
1186        TCGLabel *label_match = gen_new_label();
1187        arm_gen_test_cc(cond, label_match);
1188        gen_goto_tb(s, 0, s->pc);
1189        gen_set_label(label_match);
1190        gen_goto_tb(s, 1, addr);
1191    } else {
1192        /* 0xe and 0xf are both "always" conditions */
1193        gen_goto_tb(s, 0, addr);
1194    }
1195}
1196
1197/* C5.6.68 HINT */
1198static void handle_hint(DisasContext *s, uint32_t insn,
1199                        unsigned int op1, unsigned int op2, unsigned int crm)
1200{
1201    unsigned int selector = crm << 3 | op2;
1202
1203    if (op1 != 3) {
1204        unallocated_encoding(s);
1205        return;
1206    }
1207
1208    switch (selector) {
1209    case 0: /* NOP */
1210        return;
1211    case 3: /* WFI */
1212        s->is_jmp = DISAS_WFI;
1213        return;
1214    case 1: /* YIELD */
1215        s->is_jmp = DISAS_YIELD;
1216        return;
1217    case 2: /* WFE */
1218        s->is_jmp = DISAS_WFE;
1219        return;
1220    case 4: /* SEV */
1221    case 5: /* SEVL */
1222        /* we treat all as NOP at least for now */
1223        return;
1224    default:
1225        /* default specified as NOP equivalent */
1226        return;
1227    }
1228}
1229
1230static void gen_clrex(DisasContext *s, uint32_t insn)
1231{
1232    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1233}
1234
1235/* CLREX, DSB, DMB, ISB */
1236static void handle_sync(DisasContext *s, uint32_t insn,
1237                        unsigned int op1, unsigned int op2, unsigned int crm)
1238{
1239    if (op1 != 3) {
1240        unallocated_encoding(s);
1241        return;
1242    }
1243
1244    switch (op2) {
1245    case 2: /* CLREX */
1246        gen_clrex(s, insn);
1247        return;
1248    case 4: /* DSB */
1249    case 5: /* DMB */
1250        /* We don't emulate caches so barriers are no-ops */
1251        return;
1252    case 6: /* ISB */
1253        /* We need to break the TB after this insn to execute
1254         * a self-modified code correctly and also to take
1255         * any pending interrupts immediately.
1256         */
1257        s->is_jmp = DISAS_UPDATE;
1258        return;
1259    default:
1260        unallocated_encoding(s);
1261        return;
1262    }
1263}
1264
1265/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1266static void handle_msr_i(DisasContext *s, uint32_t insn,
1267                         unsigned int op1, unsigned int op2, unsigned int crm)
1268{
1269    int op = op1 << 3 | op2;
1270    switch (op) {
1271    case 0x05: /* SPSel */
1272        if (s->current_el == 0) {
1273            unallocated_encoding(s);
1274            return;
1275        }
1276        /* fall through */
1277    case 0x1e: /* DAIFSet */
1278    case 0x1f: /* DAIFClear */
1279    {
1280        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1281        TCGv_i32 tcg_op = tcg_const_i32(op);
1282        gen_a64_set_pc_im(s->pc - 4);
1283        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1284        tcg_temp_free_i32(tcg_imm);
1285        tcg_temp_free_i32(tcg_op);
1286        s->is_jmp = DISAS_UPDATE;
1287        break;
1288    }
1289    default:
1290        unallocated_encoding(s);
1291        return;
1292    }
1293}
1294
1295static void gen_get_nzcv(TCGv_i64 tcg_rt)
1296{
1297    TCGv_i32 tmp = tcg_temp_new_i32();
1298    TCGv_i32 nzcv = tcg_temp_new_i32();
1299
1300    /* build bit 31, N */
1301    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1302    /* build bit 30, Z */
1303    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1304    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1305    /* build bit 29, C */
1306    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1307    /* build bit 28, V */
1308    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1309    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1310    /* generate result */
1311    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1312
1313    tcg_temp_free_i32(nzcv);
1314    tcg_temp_free_i32(tmp);
1315}
1316
1317static void gen_set_nzcv(TCGv_i64 tcg_rt)
1318
1319{
1320    TCGv_i32 nzcv = tcg_temp_new_i32();
1321
1322    /* take NZCV from R[t] */
1323    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1324
1325    /* bit 31, N */
1326    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1327    /* bit 30, Z */
1328    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1329    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1330    /* bit 29, C */
1331    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1332    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1333    /* bit 28, V */
1334    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1335    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1336    tcg_temp_free_i32(nzcv);
1337}
1338
1339/* C5.6.129 MRS - move from system register
1340 * C5.6.131 MSR (register) - move to system register
1341 * C5.6.204 SYS
1342 * C5.6.205 SYSL
1343 * These are all essentially the same insn in 'read' and 'write'
1344 * versions, with varying op0 fields.
1345 */
1346static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1347                       unsigned int op0, unsigned int op1, unsigned int op2,
1348                       unsigned int crn, unsigned int crm, unsigned int rt)
1349{
1350    const ARMCPRegInfo *ri;
1351    TCGv_i64 tcg_rt;
1352
1353    ri = get_arm_cp_reginfo(s->cp_regs,
1354                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1355                                               crn, crm, op0, op1, op2));
1356
1357    if (!ri) {
1358        /* Unknown register; this might be a guest error or a QEMU
1359         * unimplemented feature.
1360         */
1361        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1362                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1363                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1364        unallocated_encoding(s);
1365        return;
1366    }
1367
1368    /* Check access permissions */
1369    if (!cp_access_ok(s->current_el, ri, isread)) {
1370        unallocated_encoding(s);
1371        return;
1372    }
1373
1374    if (ri->accessfn) {
1375        /* Emit code to perform further access permissions checks at
1376         * runtime; this may result in an exception.
1377         */
1378        TCGv_ptr tmpptr;
1379        TCGv_i32 tcg_syn, tcg_isread;
1380        uint32_t syndrome;
1381
1382        gen_a64_set_pc_im(s->pc - 4);
1383        tmpptr = tcg_const_ptr(ri);
1384        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1385        tcg_syn = tcg_const_i32(syndrome);
1386        tcg_isread = tcg_const_i32(isread);
1387        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1388        tcg_temp_free_ptr(tmpptr);
1389        tcg_temp_free_i32(tcg_syn);
1390        tcg_temp_free_i32(tcg_isread);
1391    }
1392
1393    /* Handle special cases first */
1394    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1395    case ARM_CP_NOP:
1396        return;
1397    case ARM_CP_NZCV:
1398        tcg_rt = cpu_reg(s, rt);
1399        if (isread) {
1400            gen_get_nzcv(tcg_rt);
1401        } else {
1402            gen_set_nzcv(tcg_rt);
1403        }
1404        return;
1405    case ARM_CP_CURRENTEL:
1406        /* Reads as current EL value from pstate, which is
1407         * guaranteed to be constant by the tb flags.
1408         */
1409        tcg_rt = cpu_reg(s, rt);
1410        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1411        return;
1412    case ARM_CP_DC_ZVA:
1413        /* Writes clear the aligned block of memory which rt points into. */
1414        tcg_rt = cpu_reg(s, rt);
1415        gen_helper_dc_zva(cpu_env, tcg_rt);
1416        return;
1417    default:
1418        break;
1419    }
1420
1421    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1422        gen_io_start();
1423    }
1424
1425    tcg_rt = cpu_reg(s, rt);
1426
1427    if (isread) {
1428        if (ri->type & ARM_CP_CONST) {
1429            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1430        } else if (ri->readfn) {
1431            TCGv_ptr tmpptr;
1432            tmpptr = tcg_const_ptr(ri);
1433            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1434            tcg_temp_free_ptr(tmpptr);
1435        } else {
1436            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1437        }
1438    } else {
1439        if (ri->type & ARM_CP_CONST) {
1440            /* If not forbidden by access permissions, treat as WI */
1441            return;
1442        } else if (ri->writefn) {
1443            TCGv_ptr tmpptr;
1444            tmpptr = tcg_const_ptr(ri);
1445            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1446            tcg_temp_free_ptr(tmpptr);
1447        } else {
1448            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1449        }
1450    }
1451
1452    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1453        /* I/O operations must end the TB here (whether read or write) */
1454        gen_io_end();
1455        s->is_jmp = DISAS_UPDATE;
1456    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1457        /* We default to ending the TB on a coprocessor register write,
1458         * but allow this to be suppressed by the register definition
1459         * (usually only necessary to work around guest bugs).
1460         */
1461        s->is_jmp = DISAS_UPDATE;
1462    }
1463}
1464
1465/* C3.2.4 System
1466 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1467 * +---------------------+---+-----+-----+-------+-------+-----+------+
1468 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1469 * +---------------------+---+-----+-----+-------+-------+-----+------+
1470 */
1471static void disas_system(DisasContext *s, uint32_t insn)
1472{
1473    unsigned int l, op0, op1, crn, crm, op2, rt;
1474    l = extract32(insn, 21, 1);
1475    op0 = extract32(insn, 19, 2);
1476    op1 = extract32(insn, 16, 3);
1477    crn = extract32(insn, 12, 4);
1478    crm = extract32(insn, 8, 4);
1479    op2 = extract32(insn, 5, 3);
1480    rt = extract32(insn, 0, 5);
1481
1482    if (op0 == 0) {
1483        if (l || rt != 31) {
1484            unallocated_encoding(s);
1485            return;
1486        }
1487        switch (crn) {
1488        case 2: /* C5.6.68 HINT */
1489            handle_hint(s, insn, op1, op2, crm);
1490            break;
1491        case 3: /* CLREX, DSB, DMB, ISB */
1492            handle_sync(s, insn, op1, op2, crm);
1493            break;
1494        case 4: /* C5.6.130 MSR (immediate) */
1495            handle_msr_i(s, insn, op1, op2, crm);
1496            break;
1497        default:
1498            unallocated_encoding(s);
1499            break;
1500        }
1501        return;
1502    }
1503    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1504}
1505
1506/* C3.2.3 Exception generation
1507 *
1508 *  31             24 23 21 20                     5 4   2 1  0
1509 * +-----------------+-----+------------------------+-----+----+
1510 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1511 * +-----------------------+------------------------+----------+
1512 */
1513static void disas_exc(DisasContext *s, uint32_t insn)
1514{
1515    int opc = extract32(insn, 21, 3);
1516    int op2_ll = extract32(insn, 0, 5);
1517    int imm16 = extract32(insn, 5, 16);
1518    TCGv_i32 tmp;
1519
1520    switch (opc) {
1521    case 0:
1522        /* For SVC, HVC and SMC we advance the single-step state
1523         * machine before taking the exception. This is architecturally
1524         * mandated, to ensure that single-stepping a system call
1525         * instruction works properly.
1526         */
1527        switch (op2_ll) {
1528        case 1:
1529            gen_ss_advance(s);
1530            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1531                               default_exception_el(s));
1532            break;
1533        case 2:
1534            if (s->current_el == 0) {
1535                unallocated_encoding(s);
1536                break;
1537            }
1538            /* The pre HVC helper handles cases when HVC gets trapped
1539             * as an undefined insn by runtime configuration.
1540             */
1541            gen_a64_set_pc_im(s->pc - 4);
1542            gen_helper_pre_hvc(cpu_env);
1543            gen_ss_advance(s);
1544            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1545            break;
1546        case 3:
1547            if (s->current_el == 0) {
1548                unallocated_encoding(s);
1549                break;
1550            }
1551            gen_a64_set_pc_im(s->pc - 4);
1552            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1553            gen_helper_pre_smc(cpu_env, tmp);
1554            tcg_temp_free_i32(tmp);
1555            gen_ss_advance(s);
1556            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1557            break;
1558        default:
1559            unallocated_encoding(s);
1560            break;
1561        }
1562        break;
1563    case 1:
1564        if (op2_ll != 0) {
1565            unallocated_encoding(s);
1566            break;
1567        }
1568        /* BRK */
1569        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1570                           default_exception_el(s));
1571        break;
1572    case 2:
1573        if (op2_ll != 0) {
1574            unallocated_encoding(s);
1575            break;
1576        }
1577        /* HLT. This has two purposes.
1578         * Architecturally, it is an external halting debug instruction.
1579         * Since QEMU doesn't implement external debug, we treat this as
1580         * it is required for halting debug disabled: it will UNDEF.
1581         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1582         */
1583        if (semihosting_enabled() && imm16 == 0xf000) {
1584#ifndef CONFIG_USER_ONLY
1585            /* In system mode, don't allow userspace access to semihosting,
1586             * to provide some semblance of security (and for consistency
1587             * with our 32-bit semihosting).
1588             */
1589            if (s->current_el == 0) {
1590                unsupported_encoding(s, insn);
1591                break;
1592            }
1593#endif
1594            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1595        } else {
1596            unsupported_encoding(s, insn);
1597        }
1598        break;
1599    case 5:
1600        if (op2_ll < 1 || op2_ll > 3) {
1601            unallocated_encoding(s);
1602            break;
1603        }
1604        /* DCPS1, DCPS2, DCPS3 */
1605        unsupported_encoding(s, insn);
1606        break;
1607    default:
1608        unallocated_encoding(s);
1609        break;
1610    }
1611}
1612
1613/* C3.2.7 Unconditional branch (register)
1614 *  31           25 24   21 20   16 15   10 9    5 4     0
1615 * +---------------+-------+-------+-------+------+-------+
1616 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1617 * +---------------+-------+-------+-------+------+-------+
1618 */
1619static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1620{
1621    unsigned int opc, op2, op3, rn, op4;
1622
1623    opc = extract32(insn, 21, 4);
1624    op2 = extract32(insn, 16, 5);
1625    op3 = extract32(insn, 10, 6);
1626    rn = extract32(insn, 5, 5);
1627    op4 = extract32(insn, 0, 5);
1628
1629    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1630        unallocated_encoding(s);
1631        return;
1632    }
1633
1634    switch (opc) {
1635    case 0: /* BR */
1636    case 2: /* RET */
1637        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1638        break;
1639    case 1: /* BLR */
1640        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1641        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1642        break;
1643    case 4: /* ERET */
1644        if (s->current_el == 0) {
1645            unallocated_encoding(s);
1646            return;
1647        }
1648        gen_helper_exception_return(cpu_env);
1649        s->is_jmp = DISAS_JUMP;
1650        return;
1651    case 5: /* DRPS */
1652        if (rn != 0x1f) {
1653            unallocated_encoding(s);
1654        } else {
1655            unsupported_encoding(s, insn);
1656        }
1657        return;
1658    default:
1659        unallocated_encoding(s);
1660        return;
1661    }
1662
1663    s->is_jmp = DISAS_JUMP;
1664}
1665
1666/* C3.2 Branches, exception generating and system instructions */
1667static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1668{
1669    switch (extract32(insn, 25, 7)) {
1670    case 0x0a: case 0x0b:
1671    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1672        disas_uncond_b_imm(s, insn);
1673        break;
1674    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1675        disas_comp_b_imm(s, insn);
1676        break;
1677    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1678        disas_test_b_imm(s, insn);
1679        break;
1680    case 0x2a: /* Conditional branch (immediate) */
1681        disas_cond_b_imm(s, insn);
1682        break;
1683    case 0x6a: /* Exception generation / System */
1684        if (insn & (1 << 24)) {
1685            disas_system(s, insn);
1686        } else {
1687            disas_exc(s, insn);
1688        }
1689        break;
1690    case 0x6b: /* Unconditional branch (register) */
1691        disas_uncond_b_reg(s, insn);
1692        break;
1693    default:
1694        unallocated_encoding(s);
1695        break;
1696    }
1697}
1698
1699/*
1700 * Load/Store exclusive instructions are implemented by remembering
1701 * the value/address loaded, and seeing if these are the same
1702 * when the store is performed. This is not actually the architecturally
1703 * mandated semantics, but it works for typical guest code sequences
1704 * and avoids having to monitor regular stores.
1705 *
1706 * In system emulation mode only one CPU will be running at once, so
1707 * this sequence is effectively atomic.  In user emulation mode we
1708 * throw an exception and handle the atomic operation elsewhere.
1709 */
1710static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1711                               TCGv_i64 addr, int size, bool is_pair)
1712{
1713    TCGv_i64 tmp = tcg_temp_new_i64();
1714    TCGMemOp memop = s->be_data + size;
1715
1716    g_assert(size <= 3);
1717    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1718
1719    if (is_pair) {
1720        TCGv_i64 addr2 = tcg_temp_new_i64();
1721        TCGv_i64 hitmp = tcg_temp_new_i64();
1722
1723        g_assert(size >= 2);
1724        tcg_gen_addi_i64(addr2, addr, 1 << size);
1725        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1726        tcg_temp_free_i64(addr2);
1727        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1728        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1729        tcg_temp_free_i64(hitmp);
1730    }
1731
1732    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1733    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1734
1735    tcg_temp_free_i64(tmp);
1736    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1737}
1738
1739#ifdef CONFIG_USER_ONLY
1740static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1741                                TCGv_i64 addr, int size, int is_pair)
1742{
1743    tcg_gen_mov_i64(cpu_exclusive_test, addr);
1744    tcg_gen_movi_i32(cpu_exclusive_info,
1745                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1746    gen_exception_internal_insn(s, 4, EXCP_STREX);
1747}
1748#else
1749static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1750                                TCGv_i64 inaddr, int size, int is_pair)
1751{
1752    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1753     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1754     *     [addr] = {Rt};
1755     *     if (is_pair) {
1756     *         [addr + datasize] = {Rt2};
1757     *     }
1758     *     {Rd} = 0;
1759     * } else {
1760     *     {Rd} = 1;
1761     * }
1762     * env->exclusive_addr = -1;
1763     */
1764    TCGLabel *fail_label = gen_new_label();
1765    TCGLabel *done_label = gen_new_label();
1766    TCGv_i64 addr = tcg_temp_local_new_i64();
1767    TCGv_i64 tmp;
1768
1769    /* Copy input into a local temp so it is not trashed when the
1770     * basic block ends at the branch insn.
1771     */
1772    tcg_gen_mov_i64(addr, inaddr);
1773    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1774
1775    tmp = tcg_temp_new_i64();
1776    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
1777    tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1778    tcg_temp_free_i64(tmp);
1779
1780    if (is_pair) {
1781        TCGv_i64 addrhi = tcg_temp_new_i64();
1782        TCGv_i64 tmphi = tcg_temp_new_i64();
1783
1784        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1785        tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
1786                            s->be_data + size);
1787        tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1788
1789        tcg_temp_free_i64(tmphi);
1790        tcg_temp_free_i64(addrhi);
1791    }
1792
1793    /* We seem to still have the exclusive monitor, so do the store */
1794    tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
1795                        s->be_data + size);
1796    if (is_pair) {
1797        TCGv_i64 addrhi = tcg_temp_new_i64();
1798
1799        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1800        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1801                            get_mem_index(s), s->be_data + size);
1802        tcg_temp_free_i64(addrhi);
1803    }
1804
1805    tcg_temp_free_i64(addr);
1806
1807    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1808    tcg_gen_br(done_label);
1809    gen_set_label(fail_label);
1810    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1811    gen_set_label(done_label);
1812    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1813
1814}
1815#endif
1816
1817/* C3.3.6 Load/store exclusive
1818 *
1819 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1820 * +-----+-------------+----+---+----+------+----+-------+------+------+
1821 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1822 * +-----+-------------+----+---+----+------+----+-------+------+------+
1823 *
1824 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1825 *   L: 0 -> store, 1 -> load
1826 *  o2: 0 -> exclusive, 1 -> not
1827 *  o1: 0 -> single register, 1 -> register pair
1828 *  o0: 1 -> load-acquire/store-release, 0 -> not
1829 */
1830static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1831{
1832    int rt = extract32(insn, 0, 5);
1833    int rn = extract32(insn, 5, 5);
1834    int rt2 = extract32(insn, 10, 5);
1835    int is_lasr = extract32(insn, 15, 1);
1836    int rs = extract32(insn, 16, 5);
1837    int is_pair = extract32(insn, 21, 1);
1838    int is_store = !extract32(insn, 22, 1);
1839    int is_excl = !extract32(insn, 23, 1);
1840    int size = extract32(insn, 30, 2);
1841    TCGv_i64 tcg_addr;
1842
1843    if ((!is_excl && !is_pair && !is_lasr) ||
1844        (!is_excl && is_pair) ||
1845        (is_pair && size < 2)) {
1846        unallocated_encoding(s);
1847        return;
1848    }
1849
1850    if (rn == 31) {
1851        gen_check_sp_alignment(s);
1852    }
1853    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1854
1855    /* Note that since TCG is single threaded load-acquire/store-release
1856     * semantics require no extra if (is_lasr) { ... } handling.
1857     */
1858
1859    if (is_excl) {
1860        if (!is_store) {
1861            s->is_ldex = true;
1862            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1863        } else {
1864            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1865        }
1866    } else {
1867        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1868        if (is_store) {
1869            do_gpr_st(s, tcg_rt, tcg_addr, size);
1870        } else {
1871            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1872        }
1873    }
1874}
1875
1876/*
1877 * C3.3.5 Load register (literal)
1878 *
1879 *  31 30 29   27  26 25 24 23                5 4     0
1880 * +-----+-------+---+-----+-------------------+-------+
1881 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1882 * +-----+-------+---+-----+-------------------+-------+
1883 *
1884 * V: 1 -> vector (simd/fp)
1885 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1886 *                   10-> 32 bit signed, 11 -> prefetch
1887 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1888 */
1889static void disas_ld_lit(DisasContext *s, uint32_t insn)
1890{
1891    int rt = extract32(insn, 0, 5);
1892    int64_t imm = sextract32(insn, 5, 19) << 2;
1893    bool is_vector = extract32(insn, 26, 1);
1894    int opc = extract32(insn, 30, 2);
1895    bool is_signed = false;
1896    int size = 2;
1897    TCGv_i64 tcg_rt, tcg_addr;
1898
1899    if (is_vector) {
1900        if (opc == 3) {
1901            unallocated_encoding(s);
1902            return;
1903        }
1904        size = 2 + opc;
1905        if (!fp_access_check(s)) {
1906            return;
1907        }
1908    } else {
1909        if (opc == 3) {
1910            /* PRFM (literal) : prefetch */
1911            return;
1912        }
1913        size = 2 + extract32(opc, 0, 1);
1914        is_signed = extract32(opc, 1, 1);
1915    }
1916
1917    tcg_rt = cpu_reg(s, rt);
1918
1919    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1920    if (is_vector) {
1921        do_fp_ld(s, rt, tcg_addr, size);
1922    } else {
1923        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1924    }
1925    tcg_temp_free_i64(tcg_addr);
1926}
1927
1928/*
1929 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1930 * C5.6.81 LDP (Load Pair - non vector)
1931 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1932 * C5.6.176 STNP (Store Pair - non-temporal hint)
1933 * C5.6.177 STP (Store Pair - non vector)
1934 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1935 * C6.3.165 LDP (Load Pair of SIMD&FP)
1936 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1937 * C6.3.284 STP (Store Pair of SIMD&FP)
1938 *
1939 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1940 * +-----+-------+---+---+-------+---+-----------------------------+
1941 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1942 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1943 *
1944 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1945 *      LDPSW                    01
1946 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1947 *   V: 0 -> GPR, 1 -> Vector
1948 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1949 *      10 -> signed offset, 11 -> pre-index
1950 *   L: 0 -> Store 1 -> Load
1951 *
1952 * Rt, Rt2 = GPR or SIMD registers to be stored
1953 * Rn = general purpose register containing address
1954 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1955 */
1956static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1957{
1958    int rt = extract32(insn, 0, 5);
1959    int rn = extract32(insn, 5, 5);
1960    int rt2 = extract32(insn, 10, 5);
1961    uint64_t offset = sextract64(insn, 15, 7);
1962    int index = extract32(insn, 23, 2);
1963    bool is_vector = extract32(insn, 26, 1);
1964    bool is_load = extract32(insn, 22, 1);
1965    int opc = extract32(insn, 30, 2);
1966
1967    bool is_signed = false;
1968    bool postindex = false;
1969    bool wback = false;
1970
1971    TCGv_i64 tcg_addr; /* calculated address */
1972    int size;
1973
1974    if (opc == 3) {
1975        unallocated_encoding(s);
1976        return;
1977    }
1978
1979    if (is_vector) {
1980        size = 2 + opc;
1981    } else {
1982        size = 2 + extract32(opc, 1, 1);
1983        is_signed = extract32(opc, 0, 1);
1984        if (!is_load && is_signed) {
1985            unallocated_encoding(s);
1986            return;
1987        }
1988    }
1989
1990    switch (index) {
1991    case 1: /* post-index */
1992        postindex = true;
1993        wback = true;
1994        break;
1995    case 0:
1996        /* signed offset with "non-temporal" hint. Since we don't emulate
1997         * caches we don't care about hints to the cache system about
1998         * data access patterns, and handle this identically to plain
1999         * signed offset.
2000         */

2001        if (is_signed) {
2002            /* There is no non-temporal-hint version of LDPSW */
2003            unallocated_encoding(s);
2004            return;
2005        }
2006        postindex = false;
2007        break;
2008    case 2: /* signed offset, rn not updated */
2009        postindex = false;
2010        break;
2011    case 3: /* pre-index */
2012        postindex = false;
2013        wback = true;
2014        break;
2015    }
2016
2017    if (is_vector && !fp_access_check(s)) {
2018        return;
2019    }
2020
2021    offset <<= size;
2022
2023    if (rn == 31) {
2024        gen_check_sp_alignment(s);
2025    }
2026
2027    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2028
2029    if (!postindex) {
2030        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2031    }
2032
2033    if (is_vector) {
2034        if (is_load) {
2035            do_fp_ld(s, rt, tcg_addr, size);
2036        } else {
2037            do_fp_st(s, rt, tcg_addr, size);
2038        }
2039    } else {
2040        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2041        if (is_load) {
2042            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2043        } else {
2044            do_gpr_st(s, tcg_rt, tcg_addr, size);
2045        }
2046    }
2047    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2048    if (is_vector) {
2049        if (is_load) {
2050            do_fp_ld(s, rt2, tcg_addr, size);
2051        } else {
2052            do_fp_st(s, rt2, tcg_addr, size);
2053        }
2054    } else {
2055        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2056        if (is_load) {
2057            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2058        } else {
2059            do_gpr_st(s, tcg_rt2, tcg_addr, size);
2060        }
2061    }
2062
2063    if (wback) {
2064        if (postindex) {
2065            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2066        } else {
2067            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2068        }
2069        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2070    }
2071}
2072
2073/*
2074 * C3.3.8 Load/store (immediate post-indexed)
2075 * C3.3.9 Load/store (immediate pre-indexed)
2076 * C3.3.12 Load/store (unscaled immediate)
2077 *
2078 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2079 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2080 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2081 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2082 *
2083 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2084         10 -> unprivileged
2085 * V = 0 -> non-vector
2086 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2087 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2088 */
2089static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2090{
2091    int rt = extract32(insn, 0, 5);
2092    int rn = extract32(insn, 5, 5);
2093    int imm9 = sextract32(insn, 12, 9);
2094    int opc = extract32(insn, 22, 2);
2095    int size = extract32(insn, 30, 2);
2096    int idx = extract32(insn, 10, 2);
2097    bool is_signed = false;
2098    bool is_store = false;
2099    bool is_extended = false;
2100    bool is_unpriv = (idx == 2);
2101    bool is_vector = extract32(insn, 26, 1);
2102    bool post_index;
2103    bool writeback;
2104
2105    TCGv_i64 tcg_addr;
2106
2107    if (is_vector) {
2108        size |= (opc & 2) << 1;
2109        if (size > 4 || is_unpriv) {
2110            unallocated_encoding(s);
2111            return;
2112        }
2113        is_store = ((opc & 1) == 0);
2114        if (!fp_access_check(s)) {
2115            return;
2116        }
2117    } else {
2118        if (size == 3 && opc == 2) {
2119            /* PRFM - prefetch */
2120            if (is_unpriv) {
2121                unallocated_encoding(s);
2122                return;
2123            }
2124            return;
2125        }
2126        if (opc == 3 && size > 1) {
2127            unallocated_encoding(s);
2128            return;
2129        }
2130        is_store = (opc == 0);
2131        is_signed = opc & (1<<1);
2132        is_extended = (size < 3) && (opc & 1);
2133    }
2134
2135    switch (idx) {
2136    case 0:
2137    case 2:
2138        post_index = false;
2139        writeback = false;
2140        break;
2141    case 1:
2142        post_index = true;
2143        writeback = true;
2144        break;
2145    case 3:
2146        post_index = false;
2147        writeback = true;
2148        break;
2149    }
2150
2151    if (rn == 31) {
2152        gen_check_sp_alignment(s);
2153    }
2154    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2155
2156    if (!post_index) {
2157        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2158    }
2159
2160    if (is_vector) {
2161        if (is_store) {
2162            do_fp_st(s, rt, tcg_addr, size);
2163        } else {
2164            do_fp_ld(s, rt, tcg_addr, size);
2165        }
2166    } else {
2167        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2168        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2169
2170        if (is_store) {
2171            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2172        } else {
2173            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2174                             is_signed, is_extended, memidx);
2175        }
2176    }
2177
2178    if (writeback) {
2179        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2180        if (post_index) {
2181            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2182        }
2183        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2184    }
2185}
2186
2187/*
2188 * C3.3.10 Load/store (register offset)
2189 *
2190 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2191 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2192 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2193 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2194 *
2195 * For non-vector:
2196 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2197 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2198 * For vector:
2199 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2200 *   opc<0>: 0 -> store, 1 -> load
2201 * V: 1 -> vector/simd
2202 * opt: extend encoding (see DecodeRegExtend)
2203 * S: if S=1 then scale (essentially index by sizeof(size))
2204 * Rt: register to transfer into/out of
2205 * Rn: address register or SP for base
2206 * Rm: offset register or ZR for offset
2207 */
2208static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2209{
2210    int rt = extract32(insn, 0, 5);
2211    int rn = extract32(insn, 5, 5);
2212    int shift = extract32(insn, 12, 1);
2213    int rm = extract32(insn, 16, 5);
2214    int opc = extract32(insn, 22, 2);
2215    int opt = extract32(insn, 13, 3);
2216    int size = extract32(insn, 30, 2);
2217    bool is_signed = false;
2218    bool is_store = false;
2219    bool is_extended = false;
2220    bool is_vector = extract32(insn, 26, 1);
2221
2222    TCGv_i64 tcg_rm;
2223    TCGv_i64 tcg_addr;
2224
2225    if (extract32(opt, 1, 1) == 0) {
2226        unallocated_encoding(s);
2227        return;
2228    }
2229
2230    if (is_vector) {
2231        size |= (opc & 2) << 1;
2232        if (size > 4) {
2233            unallocated_encoding(s);
2234            return;
2235        }
2236        is_store = !extract32(opc, 0, 1);
2237        if (!fp_access_check(s)) {
2238            return;
2239        }
2240    } else {
2241        if (size == 3 && opc == 2) {
2242            /* PRFM - prefetch */
2243            return;
2244        }
2245        if (opc == 3 && size > 1) {
2246            unallocated_encoding(s);
2247            return;
2248        }
2249        is_store = (opc == 0);
2250        is_signed = extract32(opc, 1, 1);
2251        is_extended = (size < 3) && extract32(opc, 0, 1);
2252    }
2253
2254    if (rn == 31) {
2255        gen_check_sp_alignment(s);
2256    }
2257    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2258
2259    tcg_rm = read_cpu_reg(s, rm, 1);
2260    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2261
2262    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2263
2264    if (is_vector) {
2265        if (is_store) {
2266            do_fp_st(s, rt, tcg_addr, size);
2267        } else {
2268            do_fp_ld(s, rt, tcg_addr, size);
2269        }
2270    } else {
2271        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2272        if (is_store) {
2273            do_gpr_st(s, tcg_rt, tcg_addr, size);
2274        } else {
2275            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2276        }
2277    }
2278}
2279
2280/*
2281 * C3.3.13 Load/store (unsigned immediate)
2282 *
2283 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2284 * +----+-------+---+-----+-----+------------+-------+------+
2285 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2286 * +----+-------+---+-----+-----+------------+-------+------+
2287 *
2288 * For non-vector:
2289 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2290 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2291 * For vector:
2292 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2293 *   opc<0>: 0 -> store, 1 -> load
2294 * Rn: base address register (inc SP)
2295 * Rt: target register
2296 */
2297static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2298{
2299    int rt = extract32(insn, 0, 5);
2300    int rn = extract32(insn, 5, 5);
2301    unsigned int imm12 = extract32(insn, 10, 12);
2302    bool is_vector = extract32(insn, 26, 1);
2303    int size = extract32(insn, 30, 2);
2304    int opc = extract32(insn, 22, 2);
2305    unsigned int offset;
2306
2307    TCGv_i64 tcg_addr;
2308
2309    bool is_store;
2310    bool is_signed = false;
2311    bool is_extended = false;
2312
2313    if (is_vector) {
2314        size |= (opc & 2) << 1;
2315        if (size > 4) {
2316            unallocated_encoding(s);
2317            return;
2318        }
2319        is_store = !extract32(opc, 0, 1);
2320        if (!fp_access_check(s)) {
2321            return;
2322        }
2323    } else {
2324        if (size == 3 && opc == 2) {
2325            /* PRFM - prefetch */
2326            return;
2327        }
2328        if (opc == 3 && size > 1) {
2329            unallocated_encoding(s);
2330            return;
2331        }
2332        is_store = (opc == 0);
2333        is_signed = extract32(opc, 1, 1);
2334        is_extended = (size < 3) && extract32(opc, 0, 1);
2335    }
2336
2337    if (rn == 31) {
2338        gen_check_sp_alignment(s);
2339    }
2340    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2341    offset = imm12 << size;
2342    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2343
2344    if (is_vector) {
2345        if (is_store) {
2346            do_fp_st(s, rt, tcg_addr, size);
2347        } else {
2348            do_fp_ld(s, rt, tcg_addr, size);
2349        }
2350    } else {
2351        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2352        if (is_store) {
2353            do_gpr_st(s, tcg_rt, tcg_addr, size);
2354        } else {
2355            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2356        }
2357    }
2358}
2359
2360/* Load/store register (all forms) */
2361static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2362{
2363    switch (extract32(insn, 24, 2)) {
2364    case 0:
2365        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2366            disas_ldst_reg_roffset(s, insn);
2367        } else {
2368            /* Load/store register (unscaled immediate)
2369             * Load/store immediate pre/post-indexed
2370             * Load/store register unprivileged
2371             */
2372            disas_ldst_reg_imm9(s, insn);
2373        }
2374        break;
2375    case 1:
2376        disas_ldst_reg_unsigned_imm(s, insn);
2377        break;
2378    default:
2379        unallocated_encoding(s);
2380        break;
2381    }
2382}
2383
2384/* C3.3.1 AdvSIMD load/store multiple structures
2385 *
2386 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2387 * +---+---+---------------+---+-------------+--------+------+------+------+
2388 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2389 * +---+---+---------------+---+-------------+--------+------+------+------+
2390 *
2391 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2392 *
2393 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2394 * +---+---+---------------+---+---+---------+--------+------+------+------+
2395 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2396 * +---+---+---------------+---+---+---------+--------+------+------+------+
2397 *
2398 * Rt: first (or only) SIMD&FP register to be transferred
2399 * Rn: base address or SP
2400 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2401 */
2402static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2403{
2404    int rt = extract32(insn, 0, 5);
2405    int rn = extract32(insn, 5, 5);
2406    int size = extract32(insn, 10, 2);
2407    int opcode = extract32(insn, 12, 4);
2408    bool is_store = !extract32(insn, 22, 1);
2409    bool is_postidx = extract32(insn, 23, 1);
2410    bool is_q = extract32(insn, 30, 1);
2411    TCGv_i64 tcg_addr, tcg_rn;
2412
2413    int ebytes = 1 << size;
2414    int elements = (is_q ? 128 : 64) / (8 << size);
2415    int rpt;    /* num iterations */
2416    int selem;  /* structure elements */
2417    int r;
2418
2419    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2420        unallocated_encoding(s);
2421        return;
2422    }
2423
2424    /* From the shared decode logic */
2425    switch (opcode) {
2426    case 0x0:
2427        rpt = 1;
2428        selem = 4;
2429        break;
2430    case 0x2:
2431        rpt = 4;
2432        selem = 1;
2433        break;
2434    case 0x4:
2435        rpt = 1;
2436        selem = 3;
2437        break;
2438    case 0x6:
2439        rpt = 3;
2440        selem = 1;
2441        break;
2442    case 0x7:
2443        rpt = 1;
2444        selem = 1;
2445        break;
2446    case 0x8:
2447        rpt = 1;
2448        selem = 2;
2449        break;
2450    case 0xa:
2451        rpt = 2;
2452        selem = 1;
2453        break;
2454    default:
2455        unallocated_encoding(s);
2456        return;
2457    }
2458
2459    if (size == 3 && !is_q && selem != 1) {
2460        /* reserved */
2461        unallocated_encoding(s);
2462        return;
2463    }
2464
2465    if (!fp_access_check(s)) {
2466        return;
2467    }
2468
2469    if (rn == 31) {
2470        gen_check_sp_alignment(s);
2471    }
2472
2473    tcg_rn = cpu_reg_sp(s, rn);
2474    tcg_addr = tcg_temp_new_i64();
2475    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2476
2477    for (r = 0; r < rpt; r++) {
2478        int e;
2479        for (e = 0; e < elements; e++) {
2480            int tt = (rt + r) % 32;
2481            int xs;
2482            for (xs = 0; xs < selem; xs++) {
2483                if (is_store) {
2484                    do_vec_st(s, tt, e, tcg_addr, size);
2485                } else {
2486                    do_vec_ld(s, tt, e, tcg_addr, size);
2487
2488                    /* For non-quad operations, setting a slice of the low
2489                     * 64 bits of the register clears the high 64 bits (in
2490                     * the ARM ARM pseudocode this is implicit in the fact
2491                     * that 'rval' is a 64 bit wide variable). We optimize
2492                     * by noticing that we only need to do this the first
2493                     * time we touch a register.
2494                     */
2495                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2496                        clear_vec_high(s, tt);
2497                    }
2498                }
2499                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2500                tt = (tt + 1) % 32;
2501            }
2502        }
2503    }
2504
2505    if (is_postidx) {
2506        int rm = extract32(insn, 16, 5);
2507        if (rm == 31) {
2508            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2509        } else {
2510            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2511        }
2512    }
2513    tcg_temp_free_i64(tcg_addr);
2514}
2515
2516/* C3.3.3 AdvSIMD load/store single structure
2517 *
2518 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2519 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2520 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2521 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2522 *
2523 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2524 *
2525 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2526 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2527 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2528 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2529 *
2530 * Rt: first (or only) SIMD&FP register to be transferred
2531 * Rn: base address or SP
2532 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2533 * index = encoded in Q:S:size dependent on size
2534 *
2535 * lane_size = encoded in R, opc
2536 * transfer width = encoded in opc, S, size
2537 */
2538static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2539{
2540    int rt = extract32(insn, 0, 5);
2541    int rn = extract32(insn, 5, 5);
2542    int size = extract32(insn, 10, 2);
2543    int S = extract32(insn, 12, 1);
2544    int opc = extract32(insn, 13, 3);
2545    int R = extract32(insn, 21, 1);
2546    int is_load = extract32(insn, 22, 1);
2547    int is_postidx = extract32(insn, 23, 1);
2548    int is_q = extract32(insn, 30, 1);
2549
2550    int scale = extract32(opc, 1, 2);
2551    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2552    bool replicate = false;
2553    int index = is_q << 3 | S << 2 | size;
2554    int ebytes, xs;
2555    TCGv_i64 tcg_addr, tcg_rn;
2556
2557    switch (scale) {
2558    case 3:
2559        if (!is_load || S) {
2560            unallocated_encoding(s);
2561            return;
2562        }
2563        scale = size;
2564        replicate = true;
2565        break;
2566    case 0:
2567        break;
2568    case 1:
2569        if (extract32(size, 0, 1)) {
2570            unallocated_encoding(s);
2571            return;
2572        }
2573        index >>= 1;
2574        break;
2575    case 2:
2576        if (extract32(size, 1, 1)) {
2577            unallocated_encoding(s);
2578            return;
2579        }
2580        if (!extract32(size, 0, 1)) {
2581            index >>= 2;
2582        } else {
2583            if (S) {
2584                unallocated_encoding(s);
2585                return;
2586            }
2587            index >>= 3;
2588            scale = 3;
2589        }
2590        break;
2591    default:
2592        g_assert_not_reached();
2593    }
2594
2595    if (!fp_access_check(s)) {
2596        return;
2597    }
2598
2599    ebytes = 1 << scale;
2600
2601    if (rn == 31) {
2602        gen_check_sp_alignment(s);
2603    }
2604
2605    tcg_rn = cpu_reg_sp(s, rn);
2606    tcg_addr = tcg_temp_new_i64();
2607    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2608
2609    for (xs = 0; xs < selem; xs++) {
2610        if (replicate) {
2611            /* Load and replicate to all elements */
2612            uint64_t mulconst;
2613            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2614
2615            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2616                                get_mem_index(s), s->be_data + scale);
2617            switch (scale) {
2618            case 0:
2619                mulconst = 0x0101010101010101ULL;
2620                break;
2621            case 1:
2622                mulconst = 0x0001000100010001ULL;
2623                break;
2624            case 2:
2625                mulconst = 0x0000000100000001ULL;
2626                break;
2627            case 3:
2628                mulconst = 0;
2629                break;
2630            default:
2631                g_assert_not_reached();
2632            }
2633            if (mulconst) {
2634                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2635            }
2636            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2637            if (is_q) {
2638                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2639            } else {
2640                clear_vec_high(s, rt);
2641            }
2642            tcg_temp_free_i64(tcg_tmp);
2643        } else {
2644            /* Load/store one element per register */
2645            if (is_load) {
2646                do_vec_ld(s, rt, index, tcg_addr, s->be_data + scale);
2647            } else {
2648                do_vec_st(s, rt, index, tcg_addr, s->be_data + scale);
2649            }
2650        }
2651        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2652        rt = (rt + 1) % 32;
2653    }
2654
2655    if (is_postidx) {
2656        int rm = extract32(insn, 16, 5);
2657        if (rm == 31) {
2658            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2659        } else {
2660            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2661        }
2662    }
2663    tcg_temp_free_i64(tcg_addr);
2664}
2665
2666/* C3.3 Loads and stores */
2667static void disas_ldst(DisasContext *s, uint32_t insn)
2668{
2669    switch (extract32(insn, 24, 6)) {
2670    case 0x08: /* Load/store exclusive */
2671        disas_ldst_excl(s, insn);
2672        break;
2673    case 0x18: case 0x1c: /* Load register (literal) */
2674        disas_ld_lit(s, insn);
2675        break;
2676    case 0x28: case 0x29:
2677    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2678        disas_ldst_pair(s, insn);
2679        break;
2680    case 0x38: case 0x39:
2681    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2682        disas_ldst_reg(s, insn);
2683        break;
2684    case 0x0c: /* AdvSIMD load/store multiple structures */
2685        disas_ldst_multiple_struct(s, insn);
2686        break;
2687    case 0x0d: /* AdvSIMD load/store single structure */
2688        disas_ldst_single_struct(s, insn);
2689        break;
2690    default:
2691        unallocated_encoding(s);
2692        break;
2693    }
2694}
2695
2696/* C3.4.6 PC-rel. addressing
2697 *   31  30   29 28       24 23                5 4    0
2698 * +----+-------+-----------+-------------------+------+
2699 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2700 * +----+-------+-----------+-------------------+------+
2701 */
2702static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2703{
2704    unsigned int page, rd;
2705    uint64_t base;
2706    uint64_t offset;
2707
2708    page = extract32(insn, 31, 1);
2709    /* SignExtend(immhi:immlo) -> offset */
2710    offset = sextract64(insn, 5, 19);
2711    offset = offset << 2 | extract32(insn, 29, 2);
2712    rd = extract32(insn, 0, 5);
2713    base = s->pc - 4;
2714
2715    if (page) {
2716        /* ADRP (page based) */
2717        base &= ~0xfff;
2718        offset <<= 12;
2719    }
2720
2721    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2722}
2723
2724/*
2725 * C3.4.1 Add/subtract (immediate)
2726 *
2727 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2728 * +--+--+--+-----------+-----+-------------+-----+-----+
2729 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2730 * +--+--+--+-----------+-----+-------------+-----+-----+
2731 *
2732 *    sf: 0 -> 32bit, 1 -> 64bit
2733 *    op: 0 -> add  , 1 -> sub
2734 *     S: 1 -> set flags
2735 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2736 */
2737static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2738{
2739    int rd = extract32(insn, 0, 5);
2740    int rn = extract32(insn, 5, 5);
2741    uint64_t imm = extract32(insn, 10, 12);
2742    int shift = extract32(insn, 22, 2);
2743    bool setflags = extract32(insn, 29, 1);
2744    bool sub_op = extract32(insn, 30, 1);
2745    bool is_64bit = extract32(insn, 31, 1);
2746
2747    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2748    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2749    TCGv_i64 tcg_result;
2750
2751    switch (shift) {
2752    case 0x0:
2753        break;
2754    case 0x1:
2755        imm <<= 12;
2756        break;
2757    default:
2758        unallocated_encoding(s);
2759        return;
2760    }
2761
2762    tcg_result = tcg_temp_new_i64();
2763    if (!setflags) {
2764        if (sub_op) {
2765            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2766        } else {
2767            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2768        }
2769    } else {
2770        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2771        if (sub_op) {
2772            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2773        } else {
2774            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2775        }
2776        tcg_temp_free_i64(tcg_imm);
2777    }
2778
2779    if (is_64bit) {
2780        tcg_gen_mov_i64(tcg_rd, tcg_result);
2781    } else {
2782        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2783    }
2784
2785    tcg_temp_free_i64(tcg_result);
2786}
2787
2788/* The input should be a value in the bottom e bits (with higher
2789 * bits zero); returns that value replicated into every element
2790 * of size e in a 64 bit integer.
2791 */
2792static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2793{
2794    assert(e != 0);
2795    while (e < 64) {
2796        mask |= mask << e;
2797        e *= 2;
2798    }
2799    return mask;
2800}
2801
2802/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2803static inline uint64_t bitmask64(unsigned int length)
2804{
2805    assert(length > 0 && length <= 64);
2806    return ~0ULL >> (64 - length);
2807}
2808
2809/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2810 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2811 * value (ie should cause a guest UNDEF exception), and true if they are
2812 * valid, in which case the decoded bit pattern is written to result.
2813 */
2814static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2815                                   unsigned int imms, unsigned int immr)
2816{
2817    uint64_t mask;
2818    unsigned e, levels, s, r;
2819    int len;
2820
2821    assert(immn < 2 && imms < 64 && immr < 64);
2822
2823    /* The bit patterns we create here are 64 bit patterns which
2824     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2825     * 64 bits each. Each element contains the same value: a run
2826     * of between 1 and e-1 non-zero bits, rotated within the
2827     * element by between 0 and e-1 bits.
2828     *
2829     * The element size and run length are encoded into immn (1 bit)
2830     * and imms (6 bits) as follows:
2831     * 64 bit elements: immn = 1, imms = <length of run - 1>
2832     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2833     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2834     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2835     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2836     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2837     * Notice that immn = 0, imms = 11111x is the only combination
2838     * not covered by one of the above options; this is reserved.
2839     * Further, <length of run - 1> all-ones is a reserved pattern.
2840     *
2841     * In all cases the rotation is by immr % e (and immr is 6 bits).
2842     */
2843
2844    /* First determine the element size */
2845    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2846    if (len < 1) {
2847        /* This is the immn == 0, imms == 0x11111x case */
2848        return false;
2849    }
2850    e = 1 << len;
2851
2852    levels = e - 1;
2853    s = imms & levels;
2854    r = immr & levels;
2855
2856    if (s == levels) {
2857        /* <length of run - 1> mustn't be all-ones. */
2858        return false;
2859    }
2860
2861    /* Create the value of one element: s+1 set bits rotated
2862     * by r within the element (which is e bits wide)...
2863     */
2864    mask = bitmask64(s + 1);
2865    if (r) {
2866        mask = (mask >> r) | (mask << (e - r));
2867        mask &= bitmask64(e);
2868    }
2869    /* ...then replicate the element over the whole 64 bit value */
2870    mask = bitfield_replicate(mask, e);
2871    *result = mask;
2872    return true;
2873}
2874
2875/* C3.4.4 Logical (immediate)
2876 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2877 * +----+-----+-------------+---+------+------+------+------+
2878 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2879 * +----+-----+-------------+---+------+------+------+------+
2880 */
2881static void disas_logic_imm(DisasContext *s, uint32_t insn)
2882{
2883    unsigned int sf, opc, is_n, immr, imms, rn, rd;
2884    TCGv_i64 tcg_rd, tcg_rn;
2885    uint64_t wmask;
2886    bool is_and = false;
2887
2888    sf = extract32(insn, 31, 1);
2889    opc = extract32(insn, 29, 2);
2890    is_n = extract32(insn, 22, 1);
2891    immr = extract32(insn, 16, 6);
2892    imms = extract32(insn, 10, 6);
2893    rn = extract32(insn, 5, 5);
2894    rd = extract32(insn, 0, 5);
2895
2896    if (!sf && is_n) {
2897        unallocated_encoding(s);
2898        return;
2899    }
2900
2901    if (opc == 0x3) { /* ANDS */
2902        tcg_rd = cpu_reg(s, rd);
2903    } else {
2904        tcg_rd = cpu_reg_sp(s, rd);
2905    }
2906    tcg_rn = cpu_reg(s, rn);
2907
2908    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2909        /* some immediate field values are reserved */
2910        unallocated_encoding(s);
2911        return;
2912    }
2913
2914    if (!sf) {
2915        wmask &= 0xffffffff;
2916    }
2917
2918    switch (opc) {
2919    case 0x3: /* ANDS */
2920    case 0x0: /* AND */
2921        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2922        is_and = true;
2923        break;
2924    case 0x1: /* ORR */
2925        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2926        break;
2927    case 0x2: /* EOR */
2928        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2929        break;
2930    default:
2931        assert(FALSE); /* must handle all above */
2932        break;
2933    }
2934
2935    if (!sf && !is_and) {
2936        /* zero extend final result; we know we can skip this for AND
2937         * since the immediate had the high 32 bits clear.
2938         */
2939        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2940    }
2941
2942    if (opc == 3) { /* ANDS */
2943        gen_logic_CC(sf, tcg_rd);
2944    }
2945}
2946
2947/*
2948 * C3.4.5 Move wide (immediate)
2949 *
2950 *  31 30 29 28         23 22 21 20             5 4    0
2951 * +--+-----+-------------+-----+----------------+------+
2952 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2953 * +--+-----+-------------+-----+----------------+------+
2954 *
2955 * sf: 0 -> 32 bit, 1 -> 64 bit
2956 * opc: 00 -> N, 10 -> Z, 11 -> K
2957 * hw: shift/16 (0,16, and sf only 32, 48)
2958 */
2959static void disas_movw_imm(DisasContext *s, uint32_t insn)
2960{
2961    int rd = extract32(insn, 0, 5);
2962    uint64_t imm = extract32(insn, 5, 16);
2963    int sf = extract32(insn, 31, 1);
2964    int opc = extract32(insn, 29, 2);
2965    int pos = extract32(insn, 21, 2) << 4;
2966    TCGv_i64 tcg_rd = cpu_reg(s, rd);
2967    TCGv_i64 tcg_imm;
2968
2969    if (!sf && (pos >= 32)) {
2970        unallocated_encoding(s);
2971        return;
2972    }
2973
2974    switch (opc) {
2975    case 0: /* MOVN */
2976    case 2: /* MOVZ */
2977        imm <<= pos;
2978        if (opc == 0) {
2979            imm = ~imm;
2980        }
2981        if (!sf) {
2982            imm &= 0xffffffffu;
2983        }
2984        tcg_gen_movi_i64(tcg_rd, imm);
2985        break;
2986    case 3: /* MOVK */
2987        tcg_imm = tcg_const_i64(imm);
2988        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2989        tcg_temp_free_i64(tcg_imm);
2990        if (!sf) {
2991            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2992        }
2993        break;
2994    default:
2995        unallocated_encoding(s);
2996        break;
2997    }
2998}
2999
3000/* C3.4.2 Bitfield

3001 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3002 * +----+-----+-------------+---+------+------+------+------+
3003 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3004 * +----+-----+-------------+---+------+------+------+------+
3005 */
3006static void disas_bitfield(DisasContext *s, uint32_t insn)
3007{
3008    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3009    TCGv_i64 tcg_rd, tcg_tmp;
3010
3011    sf = extract32(insn, 31, 1);
3012    opc = extract32(insn, 29, 2);
3013    n = extract32(insn, 22, 1);
3014    ri = extract32(insn, 16, 6);
3015    si = extract32(insn, 10, 6);
3016    rn = extract32(insn, 5, 5);
3017    rd = extract32(insn, 0, 5);
3018    bitsize = sf ? 64 : 32;
3019
3020    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3021        unallocated_encoding(s);
3022        return;
3023    }
3024
3025    tcg_rd = cpu_reg(s, rd);
3026
3027    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3028       to be smaller than bitsize, we'll never reference data outside the
3029       low 32-bits anyway.  */
3030    tcg_tmp = read_cpu_reg(s, rn, 1);
3031
3032    /* Recognize the common aliases.  */
3033    if (opc == 0) { /* SBFM */
3034        if (ri == 0) {
3035            if (si == 7) { /* SXTB */
3036                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3037                goto done;
3038            } else if (si == 15) { /* SXTH */
3039                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3040                goto done;
3041            } else if (si == 31) { /* SXTW */
3042                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3043                goto done;
3044            }
3045        }
3046        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3047            if (si == 31) {
3048                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3049            }
3050            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3051            goto done;
3052        }
3053    } else if (opc == 2) { /* UBFM */
3054        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3055            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3056            return;
3057        }
3058        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3059            if (si == 31) {
3060                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3061            }
3062            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3063            return;
3064        }
3065        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3066            int shift = bitsize - 1 - si;
3067            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3068            goto done;
3069        }
3070    }
3071
3072    if (opc != 1) { /* SBFM or UBFM */
3073        tcg_gen_movi_i64(tcg_rd, 0);
3074    }
3075
3076    /* do the bit move operation */
3077    if (si >= ri) {
3078        /* Wd<s-r:0> = Wn<s:r> */
3079        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3080        pos = 0;
3081        len = (si - ri) + 1;
3082    } else {
3083        /* Wd<32+s-r,32-r> = Wn<s:0> */
3084        pos = bitsize - ri;
3085        len = si + 1;
3086    }
3087
3088    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3089
3090    if (opc == 0) { /* SBFM - sign extend the destination field */
3091        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3092        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3093    }
3094
3095 done:
3096    if (!sf) { /* zero extend final result */
3097        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3098    }
3099}
3100
3101/* C3.4.3 Extract
3102 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3103 * +----+------+-------------+---+----+------+--------+------+------+
3104 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3105 * +----+------+-------------+---+----+------+--------+------+------+
3106 */
3107static void disas_extract(DisasContext *s, uint32_t insn)
3108{
3109    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3110
3111    sf = extract32(insn, 31, 1);
3112    n = extract32(insn, 22, 1);
3113    rm = extract32(insn, 16, 5);
3114    imm = extract32(insn, 10, 6);
3115    rn = extract32(insn, 5, 5);
3116    rd = extract32(insn, 0, 5);
3117    op21 = extract32(insn, 29, 2);
3118    op0 = extract32(insn, 21, 1);
3119    bitsize = sf ? 64 : 32;
3120
3121    if (sf != n || op21 || op0 || imm >= bitsize) {
3122        unallocated_encoding(s);
3123    } else {
3124        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3125
3126        tcg_rd = cpu_reg(s, rd);
3127
3128        if (unlikely(imm == 0)) {
3129            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3130             * so an extract from bit 0 is a special case.
3131             */
3132            if (sf) {
3133                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3134            } else {
3135                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3136            }
3137        } else if (rm == rn) { /* ROR */
3138            tcg_rm = cpu_reg(s, rm);
3139            if (sf) {
3140                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3141            } else {
3142                TCGv_i32 tmp = tcg_temp_new_i32();
3143                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3144                tcg_gen_rotri_i32(tmp, tmp, imm);
3145                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3146                tcg_temp_free_i32(tmp);
3147            }
3148        } else {
3149            tcg_rm = read_cpu_reg(s, rm, sf);
3150            tcg_rn = read_cpu_reg(s, rn, sf);
3151            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3152            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3153            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3154            if (!sf) {
3155                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3156            }
3157        }
3158    }
3159}
3160
3161/* C3.4 Data processing - immediate */
3162static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3163{
3164    switch (extract32(insn, 23, 6)) {
3165    case 0x20: case 0x21: /* PC-rel. addressing */
3166        disas_pc_rel_adr(s, insn);
3167        break;
3168    case 0x22: case 0x23: /* Add/subtract (immediate) */
3169        disas_add_sub_imm(s, insn);
3170        break;
3171    case 0x24: /* Logical (immediate) */
3172        disas_logic_imm(s, insn);
3173        break;
3174    case 0x25: /* Move wide (immediate) */
3175        disas_movw_imm(s, insn);
3176        break;
3177    case 0x26: /* Bitfield */
3178        disas_bitfield(s, insn);
3179        break;
3180    case 0x27: /* Extract */
3181        disas_extract(s, insn);
3182        break;
3183    default:
3184        unallocated_encoding(s);
3185        break;
3186    }
3187}
3188
3189/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3190 * Note that it is the caller's responsibility to ensure that the
3191 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3192 * mandated semantics for out of range shifts.
3193 */
3194static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3195                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3196{
3197    switch (shift_type) {
3198    case A64_SHIFT_TYPE_LSL:
3199        tcg_gen_shl_i64(dst, src, shift_amount);
3200        break;
3201    case A64_SHIFT_TYPE_LSR:
3202        tcg_gen_shr_i64(dst, src, shift_amount);
3203        break;
3204    case A64_SHIFT_TYPE_ASR:
3205        if (!sf) {
3206            tcg_gen_ext32s_i64(dst, src);
3207        }
3208        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3209        break;
3210    case A64_SHIFT_TYPE_ROR:
3211        if (sf) {
3212            tcg_gen_rotr_i64(dst, src, shift_amount);
3213        } else {
3214            TCGv_i32 t0, t1;
3215            t0 = tcg_temp_new_i32();
3216            t1 = tcg_temp_new_i32();
3217            tcg_gen_extrl_i64_i32(t0, src);
3218            tcg_gen_extrl_i64_i32(t1, shift_amount);
3219            tcg_gen_rotr_i32(t0, t0, t1);
3220            tcg_gen_extu_i32_i64(dst, t0);
3221            tcg_temp_free_i32(t0);
3222            tcg_temp_free_i32(t1);
3223        }
3224        break;
3225    default:
3226        assert(FALSE); /* all shift types should be handled */
3227        break;
3228    }
3229
3230    if (!sf) { /* zero extend final result */
3231        tcg_gen_ext32u_i64(dst, dst);
3232    }
3233}
3234
3235/* Shift a TCGv src by immediate, put result in dst.
3236 * The shift amount must be in range (this should always be true as the
3237 * relevant instructions will UNDEF on bad shift immediates).
3238 */
3239static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3240                          enum a64_shift_type shift_type, unsigned int shift_i)
3241{
3242    assert(shift_i < (sf ? 64 : 32));
3243
3244    if (shift_i == 0) {
3245        tcg_gen_mov_i64(dst, src);
3246    } else {
3247        TCGv_i64 shift_const;
3248
3249        shift_const = tcg_const_i64(shift_i);
3250        shift_reg(dst, src, sf, shift_type, shift_const);
3251        tcg_temp_free_i64(shift_const);
3252    }
3253}
3254
3255/* C3.5.10 Logical (shifted register)
3256 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3257 * +----+-----+-----------+-------+---+------+--------+------+------+
3258 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3259 * +----+-----+-----------+-------+---+------+--------+------+------+
3260 */
3261static void disas_logic_reg(DisasContext *s, uint32_t insn)
3262{
3263    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3264    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3265
3266    sf = extract32(insn, 31, 1);
3267    opc = extract32(insn, 29, 2);
3268    shift_type = extract32(insn, 22, 2);
3269    invert = extract32(insn, 21, 1);
3270    rm = extract32(insn, 16, 5);
3271    shift_amount = extract32(insn, 10, 6);
3272    rn = extract32(insn, 5, 5);
3273    rd = extract32(insn, 0, 5);
3274
3275    if (!sf && (shift_amount & (1 << 5))) {
3276        unallocated_encoding(s);
3277        return;
3278    }
3279
3280    tcg_rd = cpu_reg(s, rd);
3281
3282    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3283        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3284         * register-register MOV and MVN, so it is worth special casing.
3285         */
3286        tcg_rm = cpu_reg(s, rm);
3287        if (invert) {
3288            tcg_gen_not_i64(tcg_rd, tcg_rm);
3289            if (!sf) {
3290                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3291            }
3292        } else {
3293            if (sf) {
3294                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3295            } else {
3296                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3297            }
3298        }
3299        return;
3300    }
3301
3302    tcg_rm = read_cpu_reg(s, rm, sf);
3303
3304    if (shift_amount) {
3305        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3306    }
3307
3308    tcg_rn = cpu_reg(s, rn);
3309
3310    switch (opc | (invert << 2)) {
3311    case 0: /* AND */
3312    case 3: /* ANDS */
3313        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3314        break;
3315    case 1: /* ORR */
3316        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3317        break;
3318    case 2: /* EOR */
3319        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3320        break;
3321    case 4: /* BIC */
3322    case 7: /* BICS */
3323        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3324        break;
3325    case 5: /* ORN */
3326        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3327        break;
3328    case 6: /* EON */
3329        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3330        break;
3331    default:
3332        assert(FALSE);
3333        break;
3334    }
3335
3336    if (!sf) {
3337        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3338    }
3339
3340    if (opc == 3) {
3341        gen_logic_CC(sf, tcg_rd);
3342    }
3343}
3344
3345/*
3346 * C3.5.1 Add/subtract (extended register)
3347 *
3348 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3349 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3350 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3351 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3352 *
3353 *  sf: 0 -> 32bit, 1 -> 64bit
3354 *  op: 0 -> add  , 1 -> sub
3355 *   S: 1 -> set flags
3356 * opt: 00
3357 * option: extension type (see DecodeRegExtend)
3358 * imm3: optional shift to Rm
3359 *
3360 * Rd = Rn + LSL(extend(Rm), amount)
3361 */
3362static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3363{
3364    int rd = extract32(insn, 0, 5);
3365    int rn = extract32(insn, 5, 5);
3366    int imm3 = extract32(insn, 10, 3);
3367    int option = extract32(insn, 13, 3);
3368    int rm = extract32(insn, 16, 5);
3369    bool setflags = extract32(insn, 29, 1);
3370    bool sub_op = extract32(insn, 30, 1);
3371    bool sf = extract32(insn, 31, 1);
3372
3373    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3374    TCGv_i64 tcg_rd;
3375    TCGv_i64 tcg_result;
3376
3377    if (imm3 > 4) {
3378        unallocated_encoding(s);
3379        return;
3380    }
3381
3382    /* non-flag setting ops may use SP */
3383    if (!setflags) {
3384        tcg_rd = cpu_reg_sp(s, rd);
3385    } else {
3386        tcg_rd = cpu_reg(s, rd);
3387    }
3388    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3389
3390    tcg_rm = read_cpu_reg(s, rm, sf);
3391    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3392
3393    tcg_result = tcg_temp_new_i64();
3394
3395    if (!setflags) {
3396        if (sub_op) {
3397            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3398        } else {
3399            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3400        }
3401    } else {
3402        if (sub_op) {
3403            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3404        } else {
3405            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3406        }
3407    }
3408
3409    if (sf) {
3410        tcg_gen_mov_i64(tcg_rd, tcg_result);
3411    } else {
3412        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3413    }
3414
3415    tcg_temp_free_i64(tcg_result);
3416}
3417
3418/*
3419 * C3.5.2 Add/subtract (shifted register)
3420 *
3421 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3422 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3423 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3424 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3425 *
3426 *    sf: 0 -> 32bit, 1 -> 64bit
3427 *    op: 0 -> add  , 1 -> sub
3428 *     S: 1 -> set flags
3429 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3430 *  imm6: Shift amount to apply to Rm before the add/sub
3431 */
3432static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3433{
3434    int rd = extract32(insn, 0, 5);
3435    int rn = extract32(insn, 5, 5);
3436    int imm6 = extract32(insn, 10, 6);
3437    int rm = extract32(insn, 16, 5);
3438    int shift_type = extract32(insn, 22, 2);
3439    bool setflags = extract32(insn, 29, 1);
3440    bool sub_op = extract32(insn, 30, 1);
3441    bool sf = extract32(insn, 31, 1);
3442
3443    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3444    TCGv_i64 tcg_rn, tcg_rm;
3445    TCGv_i64 tcg_result;
3446
3447    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3448        unallocated_encoding(s);
3449        return;
3450    }
3451
3452    tcg_rn = read_cpu_reg(s, rn, sf);
3453    tcg_rm = read_cpu_reg(s, rm, sf);
3454
3455    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3456
3457    tcg_result = tcg_temp_new_i64();
3458
3459    if (!setflags) {
3460        if (sub_op) {
3461            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3462        } else {
3463            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3464        }
3465    } else {
3466        if (sub_op) {
3467            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3468        } else {
3469            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3470        }
3471    }
3472
3473    if (sf) {
3474        tcg_gen_mov_i64(tcg_rd, tcg_result);
3475    } else {
3476        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3477    }
3478
3479    tcg_temp_free_i64(tcg_result);
3480}
3481
3482/* C3.5.9 Data-processing (3 source)
3483
3484   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3485  +--+------+-----------+------+------+----+------+------+------+
3486  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3487  +--+------+-----------+------+------+----+------+------+------+
3488
3489 */
3490static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3491{
3492    int rd = extract32(insn, 0, 5);
3493    int rn = extract32(insn, 5, 5);
3494    int ra = extract32(insn, 10, 5);
3495    int rm = extract32(insn, 16, 5);
3496    int op_id = (extract32(insn, 29, 3) << 4) |
3497        (extract32(insn, 21, 3) << 1) |
3498        extract32(insn, 15, 1);
3499    bool sf = extract32(insn, 31, 1);
3500    bool is_sub = extract32(op_id, 0, 1);
3501    bool is_high = extract32(op_id, 2, 1);
3502    bool is_signed = false;
3503    TCGv_i64 tcg_op1;
3504    TCGv_i64 tcg_op2;
3505    TCGv_i64 tcg_tmp;
3506
3507    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3508    switch (op_id) {
3509    case 0x42: /* SMADDL */
3510    case 0x43: /* SMSUBL */
3511    case 0x44: /* SMULH */
3512        is_signed = true;
3513        break;
3514    case 0x0: /* MADD (32bit) */
3515    case 0x1: /* MSUB (32bit) */
3516    case 0x40: /* MADD (64bit) */
3517    case 0x41: /* MSUB (64bit) */
3518    case 0x4a: /* UMADDL */
3519    case 0x4b: /* UMSUBL */
3520    case 0x4c: /* UMULH */
3521        break;
3522    default:
3523        unallocated_encoding(s);
3524        return;
3525    }
3526
3527    if (is_high) {
3528        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3529        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3530        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3531        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3532
3533        if (is_signed) {
3534            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3535        } else {
3536            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3537        }
3538
3539        tcg_temp_free_i64(low_bits);
3540        return;
3541    }
3542
3543    tcg_op1 = tcg_temp_new_i64();
3544    tcg_op2 = tcg_temp_new_i64();
3545    tcg_tmp = tcg_temp_new_i64();
3546
3547    if (op_id < 0x42) {
3548        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3549        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3550    } else {
3551        if (is_signed) {
3552            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3553            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3554        } else {
3555            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3556            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3557        }
3558    }
3559
3560    if (ra == 31 && !is_sub) {
3561        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3562        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3563    } else {
3564        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3565        if (is_sub) {
3566            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3567        } else {
3568            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3569        }
3570    }
3571
3572    if (!sf) {
3573        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3574    }
3575
3576    tcg_temp_free_i64(tcg_op1);
3577    tcg_temp_free_i64(tcg_op2);
3578    tcg_temp_free_i64(tcg_tmp);
3579}
3580
3581/* C3.5.3 - Add/subtract (with carry)
3582 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3583 * +--+--+--+------------------------+------+---------+------+-----+
3584 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3585 * +--+--+--+------------------------+------+---------+------+-----+
3586 *                                            [000000]
3587 */
3588
3589static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3590{
3591    unsigned int sf, op, setflags, rm, rn, rd;
3592    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3593
3594    if (extract32(insn, 10, 6) != 0) {
3595        unallocated_encoding(s);
3596        return;
3597    }
3598
3599    sf = extract32(insn, 31, 1);
3600    op = extract32(insn, 30, 1);
3601    setflags = extract32(insn, 29, 1);
3602    rm = extract32(insn, 16, 5);
3603    rn = extract32(insn, 5, 5);
3604    rd = extract32(insn, 0, 5);
3605
3606    tcg_rd = cpu_reg(s, rd);
3607    tcg_rn = cpu_reg(s, rn);
3608
3609    if (op) {
3610        tcg_y = new_tmp_a64(s);
3611        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3612    } else {
3613        tcg_y = cpu_reg(s, rm);
3614    }
3615
3616    if (setflags) {
3617        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3618    } else {
3619        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3620    }
3621}
3622
3623/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3624 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3625 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3626 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3627 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3628 *        [1]                             y                [0]       [0]
3629 */
3630static void disas_cc(DisasContext *s, uint32_t insn)
3631{
3632    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3633    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3634    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3635    DisasCompare c;
3636
3637    if (!extract32(insn, 29, 1)) {
3638        unallocated_encoding(s);
3639        return;
3640    }
3641    if (insn & (1 << 10 | 1 << 4)) {
3642        unallocated_encoding(s);
3643        return;
3644    }
3645    sf = extract32(insn, 31, 1);
3646    op = extract32(insn, 30, 1);
3647    is_imm = extract32(insn, 11, 1);
3648    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3649    cond = extract32(insn, 12, 4);
3650    rn = extract32(insn, 5, 5);
3651    nzcv = extract32(insn, 0, 4);
3652
3653    /* Set T0 = !COND.  */
3654    tcg_t0 = tcg_temp_new_i32();
3655    arm_test_cc(&c, cond);
3656    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3657    arm_free_cc(&c);
3658
3659    /* Load the arguments for the new comparison.  */
3660    if (is_imm) {
3661        tcg_y = new_tmp_a64(s);
3662        tcg_gen_movi_i64(tcg_y, y);
3663    } else {
3664        tcg_y = cpu_reg(s, y);
3665    }
3666    tcg_rn = cpu_reg(s, rn);
3667
3668    /* Set the flags for the new comparison.  */
3669    tcg_tmp = tcg_temp_new_i64();
3670    if (op) {
3671        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3672    } else {
3673        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3674    }
3675    tcg_temp_free_i64(tcg_tmp);
3676
3677    /* If COND was false, force the flags to #nzcv.  Compute two masks
3678     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3679     * For tcg hosts that support ANDC, we can make do with just T1.
3680     * In either case, allow the tcg optimizer to delete any unused mask.
3681     */
3682    tcg_t1 = tcg_temp_new_i32();
3683    tcg_t2 = tcg_temp_new_i32();
3684    tcg_gen_neg_i32(tcg_t1, tcg_t0);
3685    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3686
3687    if (nzcv & 8) { /* N */
3688        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3689    } else {
3690        if (TCG_TARGET_HAS_andc_i32) {
3691            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3692        } else {
3693            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3694        }
3695    }
3696    if (nzcv & 4) { /* Z */
3697        if (TCG_TARGET_HAS_andc_i32) {
3698            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3699        } else {
3700            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3701        }
3702    } else {
3703        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3704    }
3705    if (nzcv & 2) { /* C */
3706        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3707    } else {
3708        if (TCG_TARGET_HAS_andc_i32) {
3709            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3710        } else {
3711            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3712        }
3713    }
3714    if (nzcv & 1) { /* V */
3715        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3716    } else {
3717        if (TCG_TARGET_HAS_andc_i32) {
3718            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3719        } else {
3720            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3721        }
3722    }
3723    tcg_temp_free_i32(tcg_t0);
3724    tcg_temp_free_i32(tcg_t1);
3725    tcg_temp_free_i32(tcg_t2);
3726}
3727
3728/* C3.5.6 Conditional select
3729 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3730 * +----+----+---+-----------------+------+------+-----+------+------+
3731 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3732 * +----+----+---+-----------------+------+------+-----+------+------+
3733 */
3734static void disas_cond_select(DisasContext *s, uint32_t insn)
3735{
3736    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3737    TCGv_i64 tcg_rd, zero;
3738    DisasCompare64 c;
3739
3740    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3741        /* S == 1 or op2<1> == 1 */
3742        unallocated_encoding(s);
3743        return;
3744    }
3745    sf = extract32(insn, 31, 1);
3746    else_inv = extract32(insn, 30, 1);
3747    rm = extract32(insn, 16, 5);
3748    cond = extract32(insn, 12, 4);
3749    else_inc = extract32(insn, 10, 1);
3750    rn = extract32(insn, 5, 5);
3751    rd = extract32(insn, 0, 5);
3752
3753    tcg_rd = cpu_reg(s, rd);
3754
3755    a64_test_cc(&c, cond);
3756    zero = tcg_const_i64(0);
3757
3758    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3759        /* CSET & CSETM.  */
3760        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3761        if (else_inv) {
3762            tcg_gen_neg_i64(tcg_rd, tcg_rd);
3763        }
3764    } else {
3765        TCGv_i64 t_true = cpu_reg(s, rn);
3766        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3767        if (else_inv && else_inc) {
3768            tcg_gen_neg_i64(t_false, t_false);
3769        } else if (else_inv) {
3770            tcg_gen_not_i64(t_false, t_false);
3771        } else if (else_inc) {
3772            tcg_gen_addi_i64(t_false, t_false, 1);
3773        }
3774        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3775    }
3776
3777    tcg_temp_free_i64(zero);
3778    a64_free_cc(&c);
3779
3780    if (!sf) {
3781        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3782    }
3783}
3784
3785static void handle_clz(DisasContext *s, unsigned int sf,
3786                       unsigned int rn, unsigned int rd)
3787{
3788    TCGv_i64 tcg_rd, tcg_rn;
3789    tcg_rd = cpu_reg(s, rd);
3790    tcg_rn = cpu_reg(s, rn);
3791
3792    if (sf) {
3793        gen_helper_clz64(tcg_rd, tcg_rn);
3794    } else {
3795        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3796        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3797        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3798        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3799        tcg_temp_free_i32(tcg_tmp32);
3800    }
3801}
3802
3803static void handle_cls(DisasContext *s, unsigned int sf,
3804                       unsigned int rn, unsigned int rd)
3805{
3806    TCGv_i64 tcg_rd, tcg_rn;
3807    tcg_rd = cpu_reg(s, rd);
3808    tcg_rn = cpu_reg(s, rn);
3809
3810    if (sf) {
3811        gen_helper_cls64(tcg_rd, tcg_rn);
3812    } else {
3813        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3814        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3815        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3816        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3817        tcg_temp_free_i32(tcg_tmp32);
3818    }
3819}
3820
3821static void handle_rbit(DisasContext *s, unsigned int sf,
3822                        unsigned int rn, unsigned int rd)
3823{
3824    TCGv_i64 tcg_rd, tcg_rn;
3825    tcg_rd = cpu_reg(s, rd);
3826    tcg_rn = cpu_reg(s, rn);
3827
3828    if (sf) {
3829        gen_helper_rbit64(tcg_rd, tcg_rn);
3830    } else {
3831        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3832        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3833        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3834        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3835        tcg_temp_free_i32(tcg_tmp32);
3836    }
3837}
3838
3839/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3840static void handle_rev64(DisasContext *s, unsigned int sf,
3841                         unsigned int rn, unsigned int rd)
3842{
3843    if (!sf) {
3844        unallocated_encoding(s);
3845        return;
3846    }
3847    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3848}
3849
3850/* C5.6.149 REV with sf==0, opcode==2
3851 * C5.6.151 REV32 (sf==1, opcode==2)
3852 */
3853static void handle_rev32(DisasContext *s, unsigned int sf,
3854                         unsigned int rn, unsigned int rd)
3855{
3856    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3857
3858    if (sf) {
3859        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3860        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3861
3862        /* bswap32_i64 requires zero high word */
3863        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3864        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3865        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3866        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3867        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3868
3869        tcg_temp_free_i64(tcg_tmp);
3870    } else {
3871        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3872        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3873    }
3874}
3875
3876/* C5.6.150 REV16 (opcode==1) */
3877static void handle_rev16(DisasContext *s, unsigned int sf,
3878                         unsigned int rn, unsigned int rd)
3879{
3880    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3881    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3882    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3883
3884    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3885    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3886
3887    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3888    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3889    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3890    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3891
3892    if (sf) {
3893        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3894        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3895        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3896        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3897
3898        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3899        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3900        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3901    }
3902
3903    tcg_temp_free_i64(tcg_tmp);
3904}
3905
3906/* C3.5.7 Data-processing (1 source)
3907 *   31  30  29  28             21 20     16 15    10 9    5 4    0
3908 * +----+---+---+-----------------+---------+--------+------+------+
3909 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3910 * +----+---+---+-----------------+---------+--------+------+------+
3911 */
3912static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3913{
3914    unsigned int sf, opcode, rn, rd;
3915
3916    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3917        unallocated_encoding(s);
3918        return;
3919    }
3920
3921    sf = extract32(insn, 31, 1);
3922    opcode = extract32(insn, 10, 6);
3923    rn = extract32(insn, 5, 5);
3924    rd = extract32(insn, 0, 5);
3925
3926    switch (opcode) {
3927    case 0: /* RBIT */
3928        handle_rbit(s, sf, rn, rd);
3929        break;
3930    case 1: /* REV16 */
3931        handle_rev16(s, sf, rn, rd);
3932        break;
3933    case 2: /* REV32 */
3934        handle_rev32(s, sf, rn, rd);
3935        break;
3936    case 3: /* REV64 */
3937        handle_rev64(s, sf, rn, rd);
3938        break;
3939    case 4: /* CLZ */
3940        handle_clz(s, sf, rn, rd);
3941        break;
3942    case 5: /* CLS */
3943        handle_cls(s, sf, rn, rd);
3944        break;
3945    }
3946}
3947
3948static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3949                       unsigned int rm, unsigned int rn, unsigned int rd)
3950{
3951    TCGv_i64 tcg_n, tcg_m, tcg_rd;
3952    tcg_rd = cpu_reg(s, rd);
3953
3954    if (!sf && is_signed) {
3955        tcg_n = new_tmp_a64(s);
3956        tcg_m = new_tmp_a64(s);
3957        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3958        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3959    } else {
3960        tcg_n = read_cpu_reg(s, rn, sf);
3961        tcg_m = read_cpu_reg(s, rm, sf);
3962    }
3963
3964    if (is_signed) {
3965        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3966    } else {
3967        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3968    }
3969
3970    if (!sf) { /* zero extend final result */
3971        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3972    }
3973}
3974
3975/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3976static void handle_shift_reg(DisasContext *s,
3977                             enum a64_shift_type shift_type, unsigned int sf,
3978                             unsigned int rm, unsigned int rn, unsigned int rd)
3979{
3980    TCGv_i64 tcg_shift = tcg_temp_new_i64();
3981    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3982    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3983
3984    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3985    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3986    tcg_temp_free_i64(tcg_shift);
3987}
3988
3989/* CRC32[BHWX], CRC32C[BHWX] */
3990static void handle_crc32(DisasContext *s,
3991                         unsigned int sf, unsigned int sz, bool crc32c,
3992                         unsigned int rm, unsigned int rn, unsigned int rd)
3993{
3994    TCGv_i64 tcg_acc, tcg_val;
3995    TCGv_i32 tcg_bytes;
3996
3997    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3998        || (sf == 1 && sz != 3)
3999        || (sf == 0 && sz == 3)) {
4000        unallocated_encoding(s);

4001        return;
4002    }
4003
4004    if (sz == 3) {
4005        tcg_val = cpu_reg(s, rm);
4006    } else {
4007        uint64_t mask;
4008        switch (sz) {
4009        case 0:
4010            mask = 0xFF;
4011            break;
4012        case 1:
4013            mask = 0xFFFF;
4014            break;
4015        case 2:
4016            mask = 0xFFFFFFFF;
4017            break;
4018        default:
4019            g_assert_not_reached();
4020        }
4021        tcg_val = new_tmp_a64(s);
4022        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4023    }
4024
4025    tcg_acc = cpu_reg(s, rn);
4026    tcg_bytes = tcg_const_i32(1 << sz);
4027
4028    if (crc32c) {
4029        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4030    } else {
4031        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4032    }
4033
4034    tcg_temp_free_i32(tcg_bytes);
4035}
4036
4037/* C3.5.8 Data-processing (2 source)
4038 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4039 * +----+---+---+-----------------+------+--------+------+------+
4040 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4041 * +----+---+---+-----------------+------+--------+------+------+
4042 */
4043static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4044{
4045    unsigned int sf, rm, opcode, rn, rd;
4046    sf = extract32(insn, 31, 1);
4047    rm = extract32(insn, 16, 5);
4048    opcode = extract32(insn, 10, 6);
4049    rn = extract32(insn, 5, 5);
4050    rd = extract32(insn, 0, 5);
4051
4052    if (extract32(insn, 29, 1)) {
4053        unallocated_encoding(s);
4054        return;
4055    }
4056
4057    switch (opcode) {
4058    case 2: /* UDIV */
4059        handle_div(s, false, sf, rm, rn, rd);
4060        break;
4061    case 3: /* SDIV */
4062        handle_div(s, true, sf, rm, rn, rd);
4063        break;
4064    case 8: /* LSLV */
4065        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4066        break;
4067    case 9: /* LSRV */
4068        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4069        break;
4070    case 10: /* ASRV */
4071        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4072        break;
4073    case 11: /* RORV */
4074        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4075        break;
4076    case 16:
4077    case 17:
4078    case 18:
4079    case 19:
4080    case 20:
4081    case 21:
4082    case 22:
4083    case 23: /* CRC32 */
4084    {
4085        int sz = extract32(opcode, 0, 2);
4086        bool crc32c = extract32(opcode, 2, 1);
4087        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4088        break;
4089    }
4090    default:
4091        unallocated_encoding(s);
4092        break;
4093    }
4094}
4095
4096/* C3.5 Data processing - register */
4097static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4098{
4099    switch (extract32(insn, 24, 5)) {
4100    case 0x0a: /* Logical (shifted register) */
4101        disas_logic_reg(s, insn);
4102        break;
4103    case 0x0b: /* Add/subtract */
4104        if (insn & (1 << 21)) { /* (extended register) */
4105            disas_add_sub_ext_reg(s, insn);
4106        } else {
4107            disas_add_sub_reg(s, insn);
4108        }
4109        break;
4110    case 0x1b: /* Data-processing (3 source) */
4111        disas_data_proc_3src(s, insn);
4112        break;
4113    case 0x1a:
4114        switch (extract32(insn, 21, 3)) {
4115        case 0x0: /* Add/subtract (with carry) */
4116            disas_adc_sbc(s, insn);
4117            break;
4118        case 0x2: /* Conditional compare */
4119            disas_cc(s, insn); /* both imm and reg forms */
4120            break;
4121        case 0x4: /* Conditional select */
4122            disas_cond_select(s, insn);
4123            break;
4124        case 0x6: /* Data-processing */
4125            if (insn & (1 << 30)) { /* (1 source) */
4126                disas_data_proc_1src(s, insn);
4127            } else {            /* (2 source) */
4128                disas_data_proc_2src(s, insn);
4129            }
4130            break;
4131        default:
4132            unallocated_encoding(s);
4133            break;
4134        }
4135        break;
4136    default:
4137        unallocated_encoding(s);
4138        break;
4139    }
4140}
4141
4142static void handle_fp_compare(DisasContext *s, bool is_double,
4143                              unsigned int rn, unsigned int rm,
4144                              bool cmp_with_zero, bool signal_all_nans)
4145{
4146    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4147    TCGv_ptr fpst = get_fpstatus_ptr();
4148
4149    if (is_double) {
4150        TCGv_i64 tcg_vn, tcg_vm;
4151
4152        tcg_vn = read_fp_dreg(s, rn);
4153        if (cmp_with_zero) {
4154            tcg_vm = tcg_const_i64(0);
4155        } else {
4156            tcg_vm = read_fp_dreg(s, rm);
4157        }
4158        if (signal_all_nans) {
4159            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4160        } else {
4161            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4162        }
4163        tcg_temp_free_i64(tcg_vn);
4164        tcg_temp_free_i64(tcg_vm);
4165    } else {
4166        TCGv_i32 tcg_vn, tcg_vm;
4167
4168        tcg_vn = read_fp_sreg(s, rn);
4169        if (cmp_with_zero) {
4170            tcg_vm = tcg_const_i32(0);
4171        } else {
4172            tcg_vm = read_fp_sreg(s, rm);
4173        }
4174        if (signal_all_nans) {
4175            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4176        } else {
4177            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4178        }
4179        tcg_temp_free_i32(tcg_vn);
4180        tcg_temp_free_i32(tcg_vm);
4181    }
4182
4183    tcg_temp_free_ptr(fpst);
4184
4185    gen_set_nzcv(tcg_flags);
4186
4187    tcg_temp_free_i64(tcg_flags);
4188}
4189
4190/* C3.6.22 Floating point compare
4191 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4192 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4193 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4194 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4195 */
4196static void disas_fp_compare(DisasContext *s, uint32_t insn)
4197{
4198    unsigned int mos, type, rm, op, rn, opc, op2r;
4199
4200    mos = extract32(insn, 29, 3);
4201    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4202    rm = extract32(insn, 16, 5);
4203    op = extract32(insn, 14, 2);
4204    rn = extract32(insn, 5, 5);
4205    opc = extract32(insn, 3, 2);
4206    op2r = extract32(insn, 0, 3);
4207
4208    if (mos || op || op2r || type > 1) {
4209        unallocated_encoding(s);
4210        return;
4211    }
4212
4213    if (!fp_access_check(s)) {
4214        return;
4215    }
4216
4217    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4218}
4219
4220/* C3.6.23 Floating point conditional compare
4221 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4222 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4223 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4224 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4225 */
4226static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4227{
4228    unsigned int mos, type, rm, cond, rn, op, nzcv;
4229    TCGv_i64 tcg_flags;
4230    TCGLabel *label_continue = NULL;
4231
4232    mos = extract32(insn, 29, 3);
4233    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4234    rm = extract32(insn, 16, 5);
4235    cond = extract32(insn, 12, 4);
4236    rn = extract32(insn, 5, 5);
4237    op = extract32(insn, 4, 1);
4238    nzcv = extract32(insn, 0, 4);
4239
4240    if (mos || type > 1) {
4241        unallocated_encoding(s);
4242        return;
4243    }
4244
4245    if (!fp_access_check(s)) {
4246        return;
4247    }
4248
4249    if (cond < 0x0e) { /* not always */
4250        TCGLabel *label_match = gen_new_label();
4251        label_continue = gen_new_label();
4252        arm_gen_test_cc(cond, label_match);
4253        /* nomatch: */
4254        tcg_flags = tcg_const_i64(nzcv << 28);
4255        gen_set_nzcv(tcg_flags);
4256        tcg_temp_free_i64(tcg_flags);
4257        tcg_gen_br(label_continue);
4258        gen_set_label(label_match);
4259    }
4260
4261    handle_fp_compare(s, type, rn, rm, false, op);
4262
4263    if (cond < 0x0e) {
4264        gen_set_label(label_continue);
4265    }
4266}
4267
4268/* C3.6.24 Floating point conditional select
4269 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4270 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4271 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4272 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4273 */
4274static void disas_fp_csel(DisasContext *s, uint32_t insn)
4275{
4276    unsigned int mos, type, rm, cond, rn, rd;
4277    TCGv_i64 t_true, t_false, t_zero;
4278    DisasCompare64 c;
4279
4280    mos = extract32(insn, 29, 3);
4281    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4282    rm = extract32(insn, 16, 5);
4283    cond = extract32(insn, 12, 4);
4284    rn = extract32(insn, 5, 5);
4285    rd = extract32(insn, 0, 5);
4286
4287    if (mos || type > 1) {
4288        unallocated_encoding(s);
4289        return;
4290    }
4291
4292    if (!fp_access_check(s)) {
4293        return;
4294    }
4295
4296    /* Zero extend sreg inputs to 64 bits now.  */
4297    t_true = tcg_temp_new_i64();
4298    t_false = tcg_temp_new_i64();
4299    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4300    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4301
4302    a64_test_cc(&c, cond);
4303    t_zero = tcg_const_i64(0);
4304    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4305    tcg_temp_free_i64(t_zero);
4306    tcg_temp_free_i64(t_false);
4307    a64_free_cc(&c);
4308
4309    /* Note that sregs write back zeros to the high bits,
4310       and we've already done the zero-extension.  */
4311    write_fp_dreg(s, rd, t_true);
4312    tcg_temp_free_i64(t_true);
4313}
4314
4315/* C3.6.25 Floating-point data-processing (1 source) - single precision */
4316static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4317{
4318    TCGv_ptr fpst;
4319    TCGv_i32 tcg_op;
4320    TCGv_i32 tcg_res;
4321
4322    fpst = get_fpstatus_ptr();
4323    tcg_op = read_fp_sreg(s, rn);
4324    tcg_res = tcg_temp_new_i32();
4325
4326    switch (opcode) {
4327    case 0x0: /* FMOV */
4328        tcg_gen_mov_i32(tcg_res, tcg_op);
4329        break;
4330    case 0x1: /* FABS */
4331        gen_helper_vfp_abss(tcg_res, tcg_op);
4332        break;
4333    case 0x2: /* FNEG */
4334        gen_helper_vfp_negs(tcg_res, tcg_op);
4335        break;
4336    case 0x3: /* FSQRT */
4337        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4338        break;
4339    case 0x8: /* FRINTN */
4340    case 0x9: /* FRINTP */
4341    case 0xa: /* FRINTM */
4342    case 0xb: /* FRINTZ */
4343    case 0xc: /* FRINTA */
4344    {
4345        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4346
4347        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4348        gen_helper_rints(tcg_res, tcg_op, fpst);
4349
4350        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4351        tcg_temp_free_i32(tcg_rmode);
4352        break;
4353    }
4354    case 0xe: /* FRINTX */
4355        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4356        break;
4357    case 0xf: /* FRINTI */
4358        gen_helper_rints(tcg_res, tcg_op, fpst);
4359        break;
4360    default:
4361        abort();
4362    }
4363
4364    write_fp_sreg(s, rd, tcg_res);
4365
4366    tcg_temp_free_ptr(fpst);
4367    tcg_temp_free_i32(tcg_op);
4368    tcg_temp_free_i32(tcg_res);
4369}
4370
4371/* C3.6.25 Floating-point data-processing (1 source) - double precision */
4372static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4373{
4374    TCGv_ptr fpst;
4375    TCGv_i64 tcg_op;
4376    TCGv_i64 tcg_res;
4377
4378    fpst = get_fpstatus_ptr();
4379    tcg_op = read_fp_dreg(s, rn);
4380    tcg_res = tcg_temp_new_i64();
4381
4382    switch (opcode) {
4383    case 0x0: /* FMOV */
4384        tcg_gen_mov_i64(tcg_res, tcg_op);
4385        break;
4386    case 0x1: /* FABS */
4387        gen_helper_vfp_absd(tcg_res, tcg_op);
4388        break;
4389    case 0x2: /* FNEG */
4390        gen_helper_vfp_negd(tcg_res, tcg_op);
4391        break;
4392    case 0x3: /* FSQRT */
4393        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4394        break;
4395    case 0x8: /* FRINTN */
4396    case 0x9: /* FRINTP */
4397    case 0xa: /* FRINTM */
4398    case 0xb: /* FRINTZ */
4399    case 0xc: /* FRINTA */
4400    {
4401        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4402
4403        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4404        gen_helper_rintd(tcg_res, tcg_op, fpst);
4405
4406        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4407        tcg_temp_free_i32(tcg_rmode);
4408        break;
4409    }
4410    case 0xe: /* FRINTX */
4411        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4412        break;
4413    case 0xf: /* FRINTI */
4414        gen_helper_rintd(tcg_res, tcg_op, fpst);
4415        break;
4416    default:
4417        abort();
4418    }
4419
4420    write_fp_dreg(s, rd, tcg_res);
4421
4422    tcg_temp_free_ptr(fpst);
4423    tcg_temp_free_i64(tcg_op);
4424    tcg_temp_free_i64(tcg_res);
4425}
4426
4427static void handle_fp_fcvt(DisasContext *s, int opcode,
4428                           int rd, int rn, int dtype, int ntype)
4429{
4430    switch (ntype) {
4431    case 0x0:
4432    {
4433        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4434        if (dtype == 1) {
4435            /* Single to double */
4436            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4437            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4438            write_fp_dreg(s, rd, tcg_rd);
4439            tcg_temp_free_i64(tcg_rd);
4440        } else {
4441            /* Single to half */
4442            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4443            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4444            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4445            write_fp_sreg(s, rd, tcg_rd);
4446            tcg_temp_free_i32(tcg_rd);
4447        }
4448        tcg_temp_free_i32(tcg_rn);
4449        break;
4450    }
4451    case 0x1:
4452    {
4453        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4454        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4455        if (dtype == 0) {
4456            /* Double to single */
4457            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4458        } else {
4459            /* Double to half */
4460            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4461            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4462        }
4463        write_fp_sreg(s, rd, tcg_rd);
4464        tcg_temp_free_i32(tcg_rd);
4465        tcg_temp_free_i64(tcg_rn);
4466        break;
4467    }
4468    case 0x3:
4469    {
4470        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4471        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4472        if (dtype == 0) {
4473            /* Half to single */
4474            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4475            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4476            write_fp_sreg(s, rd, tcg_rd);
4477            tcg_temp_free_i32(tcg_rd);
4478        } else {
4479            /* Half to double */
4480            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4481            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4482            write_fp_dreg(s, rd, tcg_rd);
4483            tcg_temp_free_i64(tcg_rd);
4484        }
4485        tcg_temp_free_i32(tcg_rn);
4486        break;
4487    }
4488    default:
4489        abort();
4490    }
4491}
4492
4493/* C3.6.25 Floating point data-processing (1 source)
4494 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4495 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4496 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4497 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4498 */
4499static void disas_fp_1src(DisasContext *s, uint32_t insn)
4500{
4501    int type = extract32(insn, 22, 2);
4502    int opcode = extract32(insn, 15, 6);
4503    int rn = extract32(insn, 5, 5);
4504    int rd = extract32(insn, 0, 5);
4505
4506    switch (opcode) {
4507    case 0x4: case 0x5: case 0x7:
4508    {
4509        /* FCVT between half, single and double precision */
4510        int dtype = extract32(opcode, 0, 2);
4511        if (type == 2 || dtype == type) {
4512            unallocated_encoding(s);
4513            return;
4514        }
4515        if (!fp_access_check(s)) {
4516            return;
4517        }
4518
4519        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4520        break;
4521    }
4522    case 0x0 ... 0x3:
4523    case 0x8 ... 0xc:
4524    case 0xe ... 0xf:
4525        /* 32-to-32 and 64-to-64 ops */
4526        switch (type) {
4527        case 0:
4528            if (!fp_access_check(s)) {
4529                return;
4530            }
4531
4532            handle_fp_1src_single(s, opcode, rd, rn);
4533            break;
4534        case 1:
4535            if (!fp_access_check(s)) {
4536                return;
4537            }
4538
4539            handle_fp_1src_double(s, opcode, rd, rn);
4540            break;
4541        default:
4542            unallocated_encoding(s);
4543        }
4544        break;
4545    default:
4546        unallocated_encoding(s);
4547        break;
4548    }
4549}
4550
4551/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4552static void handle_fp_2src_single(DisasContext *s, int opcode,
4553                                  int rd, int rn, int rm)
4554{
4555    TCGv_i32 tcg_op1;
4556    TCGv_i32 tcg_op2;
4557    TCGv_i32 tcg_res;
4558    TCGv_ptr fpst;
4559
4560    tcg_res = tcg_temp_new_i32();
4561    fpst = get_fpstatus_ptr();
4562    tcg_op1 = read_fp_sreg(s, rn);
4563    tcg_op2 = read_fp_sreg(s, rm);
4564
4565    switch (opcode) {
4566    case 0x0: /* FMUL */
4567        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4568        break;
4569    case 0x1: /* FDIV */
4570        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4571        break;
4572    case 0x2: /* FADD */
4573        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4574        break;
4575    case 0x3: /* FSUB */
4576        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4577        break;
4578    case 0x4: /* FMAX */
4579        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4580        break;
4581    case 0x5: /* FMIN */
4582        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4583        break;
4584    case 0x6: /* FMAXNM */
4585        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4586        break;
4587    case 0x7: /* FMINNM */
4588        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4589        break;
4590    case 0x8: /* FNMUL */
4591        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4592        gen_helper_vfp_negs(tcg_res, tcg_res);
4593        break;
4594    }
4595
4596    write_fp_sreg(s, rd, tcg_res);
4597
4598    tcg_temp_free_ptr(fpst);
4599    tcg_temp_free_i32(tcg_op1);
4600    tcg_temp_free_i32(tcg_op2);
4601    tcg_temp_free_i32(tcg_res);
4602}
4603
4604/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4605static void handle_fp_2src_double(DisasContext *s, int opcode,
4606                                  int rd, int rn, int rm)
4607{
4608    TCGv_i64 tcg_op1;
4609    TCGv_i64 tcg_op2;
4610    TCGv_i64 tcg_res;
4611    TCGv_ptr fpst;
4612
4613    tcg_res = tcg_temp_new_i64();
4614    fpst = get_fpstatus_ptr();
4615    tcg_op1 = read_fp_dreg(s, rn);
4616    tcg_op2 = read_fp_dreg(s, rm);
4617
4618    switch (opcode) {
4619    case 0x0: /* FMUL */
4620        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4621        break;
4622    case 0x1: /* FDIV */
4623        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4624        break;
4625    case 0x2: /* FADD */
4626        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4627        break;
4628    case 0x3: /* FSUB */
4629        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4630        break;
4631    case 0x4: /* FMAX */
4632        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4633        break;
4634    case 0x5: /* FMIN */
4635        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4636        break;
4637    case 0x6: /* FMAXNM */
4638        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4639        break;
4640    case 0x7: /* FMINNM */
4641        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4642        break;
4643    case 0x8: /* FNMUL */
4644        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4645        gen_helper_vfp_negd(tcg_res, tcg_res);
4646        break;
4647    }
4648
4649    write_fp_dreg(s, rd, tcg_res);
4650
4651    tcg_temp_free_ptr(fpst);
4652    tcg_temp_free_i64(tcg_op1);
4653    tcg_temp_free_i64(tcg_op2);
4654    tcg_temp_free_i64(tcg_res);
4655}
4656
4657/* C3.6.26 Floating point data-processing (2 source)
4658 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4659 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4660 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4661 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4662 */
4663static void disas_fp_2src(DisasContext *s, uint32_t insn)
4664{
4665    int type = extract32(insn, 22, 2);
4666    int rd = extract32(insn, 0, 5);
4667    int rn = extract32(insn, 5, 5);
4668    int rm = extract32(insn, 16, 5);
4669    int opcode = extract32(insn, 12, 4);
4670
4671    if (opcode > 8) {
4672        unallocated_encoding(s);
4673        return;
4674    }
4675
4676    switch (type) {
4677    case 0:
4678        if (!fp_access_check(s)) {
4679            return;
4680        }
4681        handle_fp_2src_single(s, opcode, rd, rn, rm);
4682        break;
4683    case 1:
4684        if (!fp_access_check(s)) {
4685            return;
4686        }
4687        handle_fp_2src_double(s, opcode, rd, rn, rm);
4688        break;
4689    default:
4690        unallocated_encoding(s);
4691    }
4692}
4693
4694/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4695static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4696                                  int rd, int rn, int rm, int ra)
4697{
4698    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4699    TCGv_i32 tcg_res = tcg_temp_new_i32();
4700    TCGv_ptr fpst = get_fpstatus_ptr();
4701
4702    tcg_op1 = read_fp_sreg(s, rn);
4703    tcg_op2 = read_fp_sreg(s, rm);
4704    tcg_op3 = read_fp_sreg(s, ra);
4705
4706    /* These are fused multiply-add, and must be done as one
4707     * floating point operation with no rounding between the
4708     * multiplication and addition steps.
4709     * NB that doing the negations here as separate steps is
4710     * correct : an input NaN should come out with its sign bit
4711     * flipped if it is a negated-input.
4712     */
4713    if (o1 == true) {
4714        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4715    }
4716
4717    if (o0 != o1) {
4718        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4719    }
4720
4721    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4722
4723    write_fp_sreg(s, rd, tcg_res);
4724
4725    tcg_temp_free_ptr(fpst);
4726    tcg_temp_free_i32(tcg_op1);
4727    tcg_temp_free_i32(tcg_op2);
4728    tcg_temp_free_i32(tcg_op3);
4729    tcg_temp_free_i32(tcg_res);
4730}
4731
4732/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4733static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4734                                  int rd, int rn, int rm, int ra)
4735{
4736    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4737    TCGv_i64 tcg_res = tcg_temp_new_i64();
4738    TCGv_ptr fpst = get_fpstatus_ptr();
4739
4740    tcg_op1 = read_fp_dreg(s, rn);
4741    tcg_op2 = read_fp_dreg(s, rm);
4742    tcg_op3 = read_fp_dreg(s, ra);
4743
4744    /* These are fused multiply-add, and must be done as one
4745     * floating point operation with no rounding between the
4746     * multiplication and addition steps.
4747     * NB that doing the negations here as separate steps is
4748     * correct : an input NaN should come out with its sign bit
4749     * flipped if it is a negated-input.
4750     */
4751    if (o1 == true) {
4752        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4753    }
4754
4755    if (o0 != o1) {
4756        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4757    }
4758
4759    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4760
4761    write_fp_dreg(s, rd, tcg_res);
4762
4763    tcg_temp_free_ptr(fpst);
4764    tcg_temp_free_i64(tcg_op1);
4765    tcg_temp_free_i64(tcg_op2);
4766    tcg_temp_free_i64(tcg_op3);
4767    tcg_temp_free_i64(tcg_res);
4768}
4769
4770/* C3.6.27 Floating point data-processing (3 source)
4771 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4772 * +---+---+---+-----------+------+----+------+----+------+------+------+
4773 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4774 * +---+---+---+-----------+------+----+------+----+------+------+------+
4775 */
4776static void disas_fp_3src(DisasContext *s, uint32_t insn)
4777{
4778    int type = extract32(insn, 22, 2);
4779    int rd = extract32(insn, 0, 5);
4780    int rn = extract32(insn, 5, 5);
4781    int ra = extract32(insn, 10, 5);
4782    int rm = extract32(insn, 16, 5);
4783    bool o0 = extract32(insn, 15, 1);
4784    bool o1 = extract32(insn, 21, 1);
4785
4786    switch (type) {
4787    case 0:
4788        if (!fp_access_check(s)) {
4789            return;
4790        }
4791        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4792        break;
4793    case 1:
4794        if (!fp_access_check(s)) {
4795            return;
4796        }
4797        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4798        break;
4799    default:
4800        unallocated_encoding(s);
4801    }
4802}
4803
4804/* C3.6.28 Floating point immediate
4805 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4806 * +---+---+---+-----------+------+---+------------+-------+------+------+
4807 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4808 * +---+---+---+-----------+------+---+------------+-------+------+------+
4809 */
4810static void disas_fp_imm(DisasContext *s, uint32_t insn)
4811{
4812    int rd = extract32(insn, 0, 5);
4813    int imm8 = extract32(insn, 13, 8);
4814    int is_double = extract32(insn, 22, 2);
4815    uint64_t imm;
4816    TCGv_i64 tcg_res;
4817
4818    if (is_double > 1) {
4819        unallocated_encoding(s);
4820        return;
4821    }
4822
4823    if (!fp_access_check(s)) {
4824        return;
4825    }
4826
4827    /* The imm8 encodes the sign bit, enough bits to represent
4828     * an exponent in the range 01....1xx to 10....0xx,
4829     * and the most significant 4 bits of the mantissa; see
4830     * VFPExpandImm() in the v8 ARM ARM.
4831     */
4832    if (is_double) {
4833        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4834            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4835            extract32(imm8, 0, 6);
4836        imm <<= 48;
4837    } else {
4838        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4839            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4840            (extract32(imm8, 0, 6) << 3);
4841        imm <<= 16;
4842    }
4843
4844    tcg_res = tcg_const_i64(imm);
4845    write_fp_dreg(s, rd, tcg_res);
4846    tcg_temp_free_i64(tcg_res);
4847}
4848
4849/* Handle floating point <=> fixed point conversions. Note that we can
4850 * also deal with fp <=> integer conversions as a special case (scale == 64)
4851 * OPTME: consider handling that special case specially or at least skipping
4852 * the call to scalbn in the helpers for zero shifts.
4853 */
4854static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4855                           bool itof, int rmode, int scale, int sf, int type)
4856{
4857    bool is_signed = !(opcode & 1);
4858    bool is_double = type;
4859    TCGv_ptr tcg_fpstatus;
4860    TCGv_i32 tcg_shift;
4861
4862    tcg_fpstatus = get_fpstatus_ptr();
4863
4864    tcg_shift = tcg_const_i32(64 - scale);
4865
4866    if (itof) {
4867        TCGv_i64 tcg_int = cpu_reg(s, rn);
4868        if (!sf) {
4869            TCGv_i64 tcg_extend = new_tmp_a64(s);
4870
4871            if (is_signed) {
4872                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4873            } else {
4874                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4875            }
4876
4877            tcg_int = tcg_extend;
4878        }
4879
4880        if (is_double) {
4881            TCGv_i64 tcg_double = tcg_temp_new_i64();
4882            if (is_signed) {
4883                gen_helper_vfp_sqtod(tcg_double, tcg_int,
4884                                     tcg_shift, tcg_fpstatus);
4885            } else {
4886                gen_helper_vfp_uqtod(tcg_double, tcg_int,
4887                                     tcg_shift, tcg_fpstatus);
4888            }
4889            write_fp_dreg(s, rd, tcg_double);
4890            tcg_temp_free_i64(tcg_double);
4891        } else {
4892            TCGv_i32 tcg_single = tcg_temp_new_i32();
4893            if (is_signed) {
4894                gen_helper_vfp_sqtos(tcg_single, tcg_int,
4895                                     tcg_shift, tcg_fpstatus);
4896            } else {
4897                gen_helper_vfp_uqtos(tcg_single, tcg_int,
4898                                     tcg_shift, tcg_fpstatus);
4899            }
4900            write_fp_sreg(s, rd, tcg_single);
4901            tcg_temp_free_i32(tcg_single);
4902        }
4903    } else {
4904        TCGv_i64 tcg_int = cpu_reg(s, rd);
4905        TCGv_i32 tcg_rmode;
4906
4907        if (extract32(opcode, 2, 1)) {
4908            /* There are too many rounding modes to all fit into rmode,
4909             * so FCVTA[US] is a special case.
4910             */
4911            rmode = FPROUNDING_TIEAWAY;
4912        }
4913
4914        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4915
4916        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4917
4918        if (is_double) {
4919            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4920            if (is_signed) {
4921                if (!sf) {
4922                    gen_helper_vfp_tosld(tcg_int, tcg_double,
4923                                         tcg_shift, tcg_fpstatus);
4924                } else {
4925                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
4926                                         tcg_shift, tcg_fpstatus);
4927                }
4928            } else {
4929                if (!sf) {
4930                    gen_helper_vfp_tould(tcg_int, tcg_double,
4931                                         tcg_shift, tcg_fpstatus);
4932                } else {
4933                    gen_helper_vfp_touqd(tcg_int, tcg_double,
4934                                         tcg_shift, tcg_fpstatus);
4935                }
4936            }
4937            tcg_temp_free_i64(tcg_double);
4938        } else {
4939            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4940            if (sf) {
4941                if (is_signed) {
4942                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
4943                                         tcg_shift, tcg_fpstatus);
4944                } else {
4945                    gen_helper_vfp_touqs(tcg_int, tcg_single,
4946                                         tcg_shift, tcg_fpstatus);
4947                }
4948            } else {
4949                TCGv_i32 tcg_dest = tcg_temp_new_i32();
4950                if (is_signed) {
4951                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
4952                                         tcg_shift, tcg_fpstatus);
4953                } else {
4954                    gen_helper_vfp_touls(tcg_dest, tcg_single,
4955                                         tcg_shift, tcg_fpstatus);
4956                }
4957                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4958                tcg_temp_free_i32(tcg_dest);
4959            }
4960            tcg_temp_free_i32(tcg_single);
4961        }
4962
4963        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4964        tcg_temp_free_i32(tcg_rmode);
4965
4966        if (!sf) {
4967            tcg_gen_ext32u_i64(tcg_int, tcg_int);
4968        }
4969    }
4970
4971    tcg_temp_free_ptr(tcg_fpstatus);
4972    tcg_temp_free_i32(tcg_shift);
4973}
4974
4975/* C3.6.29 Floating point <-> fixed point conversions
4976 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4977 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4978 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4979 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4980 */
4981static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4982{
4983    int rd = extract32(insn, 0, 5);
4984    int rn = extract32(insn, 5, 5);
4985    int scale = extract32(insn, 10, 6);
4986    int opcode = extract32(insn, 16, 3);
4987    int rmode = extract32(insn, 19, 2);
4988    int type = extract32(insn, 22, 2);
4989    bool sbit = extract32(insn, 29, 1);
4990    bool sf = extract32(insn, 31, 1);
4991    bool itof;
4992
4993    if (sbit || (type > 1)
4994        || (!sf && scale < 32)) {
4995        unallocated_encoding(s);
4996        return;
4997    }
4998
4999    switch ((rmode << 3) | opcode) {
5000    case 0x2: /* SCVTF */

5001    case 0x3: /* UCVTF */
5002        itof = true;
5003        break;
5004    case 0x18: /* FCVTZS */
5005    case 0x19: /* FCVTZU */
5006        itof = false;
5007        break;
5008    default:
5009        unallocated_encoding(s);
5010        return;
5011    }
5012
5013    if (!fp_access_check(s)) {
5014        return;
5015    }
5016
5017    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5018}
5019
5020static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5021{
5022    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5023     * without conversion.
5024     */
5025
5026    if (itof) {
5027        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5028
5029        switch (type) {
5030        case 0:
5031        {
5032            /* 32 bit */
5033            TCGv_i64 tmp = tcg_temp_new_i64();
5034            tcg_gen_ext32u_i64(tmp, tcg_rn);
5035            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5036            tcg_gen_movi_i64(tmp, 0);
5037            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5038            tcg_temp_free_i64(tmp);
5039            break;
5040        }
5041        case 1:
5042        {
5043            /* 64 bit */
5044            TCGv_i64 tmp = tcg_const_i64(0);
5045            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5046            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5047            tcg_temp_free_i64(tmp);
5048            break;
5049        }
5050        case 2:
5051            /* 64 bit to top half. */
5052            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5053            break;
5054        }
5055    } else {
5056        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5057
5058        switch (type) {
5059        case 0:
5060            /* 32 bit */
5061            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5062            break;
5063        case 1:
5064            /* 64 bit */
5065            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5066            break;
5067        case 2:
5068            /* 64 bits from top half */
5069            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5070            break;
5071        }
5072    }
5073}
5074
5075/* C3.6.30 Floating point <-> integer conversions
5076 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5077 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5078 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5079 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5080 */
5081static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5082{
5083    int rd = extract32(insn, 0, 5);
5084    int rn = extract32(insn, 5, 5);
5085    int opcode = extract32(insn, 16, 3);
5086    int rmode = extract32(insn, 19, 2);
5087    int type = extract32(insn, 22, 2);
5088    bool sbit = extract32(insn, 29, 1);
5089    bool sf = extract32(insn, 31, 1);
5090
5091    if (sbit) {
5092        unallocated_encoding(s);
5093        return;
5094    }
5095
5096    if (opcode > 5) {
5097        /* FMOV */
5098        bool itof = opcode & 1;
5099
5100        if (rmode >= 2) {
5101            unallocated_encoding(s);
5102            return;
5103        }
5104
5105        switch (sf << 3 | type << 1 | rmode) {
5106        case 0x0: /* 32 bit */
5107        case 0xa: /* 64 bit */
5108        case 0xd: /* 64 bit to top half of quad */
5109            break;
5110        default:
5111            /* all other sf/type/rmode combinations are invalid */
5112            unallocated_encoding(s);
5113            break;
5114        }
5115
5116        if (!fp_access_check(s)) {
5117            return;
5118        }
5119        handle_fmov(s, rd, rn, type, itof);
5120    } else {
5121        /* actual FP conversions */
5122        bool itof = extract32(opcode, 1, 1);
5123
5124        if (type > 1 || (rmode != 0 && opcode > 1)) {
5125            unallocated_encoding(s);
5126            return;
5127        }
5128
5129        if (!fp_access_check(s)) {
5130            return;
5131        }
5132        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5133    }
5134}
5135
5136/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5137 *   31  30  29 28     25 24                          0
5138 * +---+---+---+---------+-----------------------------+
5139 * |   | 0 |   | 1 1 1 1 |                             |
5140 * +---+---+---+---------+-----------------------------+
5141 */
5142static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5143{
5144    if (extract32(insn, 24, 1)) {
5145        /* Floating point data-processing (3 source) */
5146        disas_fp_3src(s, insn);
5147    } else if (extract32(insn, 21, 1) == 0) {
5148        /* Floating point to fixed point conversions */
5149        disas_fp_fixed_conv(s, insn);
5150    } else {
5151        switch (extract32(insn, 10, 2)) {
5152        case 1:
5153            /* Floating point conditional compare */
5154            disas_fp_ccomp(s, insn);
5155            break;
5156        case 2:
5157            /* Floating point data-processing (2 source) */
5158            disas_fp_2src(s, insn);
5159            break;
5160        case 3:
5161            /* Floating point conditional select */
5162            disas_fp_csel(s, insn);
5163            break;
5164        case 0:
5165            switch (ctz32(extract32(insn, 12, 4))) {
5166            case 0: /* [15:12] == xxx1 */
5167                /* Floating point immediate */
5168                disas_fp_imm(s, insn);
5169                break;
5170            case 1: /* [15:12] == xx10 */
5171                /* Floating point compare */
5172                disas_fp_compare(s, insn);
5173                break;
5174            case 2: /* [15:12] == x100 */
5175                /* Floating point data-processing (1 source) */
5176                disas_fp_1src(s, insn);
5177                break;
5178            case 3: /* [15:12] == 1000 */
5179                unallocated_encoding(s);
5180                break;
5181            default: /* [15:12] == 0000 */
5182                /* Floating point <-> integer conversions */
5183                disas_fp_int_conv(s, insn);
5184                break;
5185            }
5186            break;
5187        }
5188    }
5189}
5190
5191static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5192                     int pos)
5193{
5194    /* Extract 64 bits from the middle of two concatenated 64 bit
5195     * vector register slices left:right. The extracted bits start
5196     * at 'pos' bits into the right (least significant) side.
5197     * We return the result in tcg_right, and guarantee not to
5198     * trash tcg_left.
5199     */
5200    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5201    assert(pos > 0 && pos < 64);
5202
5203    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5204    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5205    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5206
5207    tcg_temp_free_i64(tcg_tmp);
5208}
5209
5210/* C3.6.1 EXT
5211 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5212 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5213 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5214 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5215 */
5216static void disas_simd_ext(DisasContext *s, uint32_t insn)
5217{
5218    int is_q = extract32(insn, 30, 1);
5219    int op2 = extract32(insn, 22, 2);
5220    int imm4 = extract32(insn, 11, 4);
5221    int rm = extract32(insn, 16, 5);
5222    int rn = extract32(insn, 5, 5);
5223    int rd = extract32(insn, 0, 5);
5224    int pos = imm4 << 3;
5225    TCGv_i64 tcg_resl, tcg_resh;
5226
5227    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5228        unallocated_encoding(s);
5229        return;
5230    }
5231
5232    if (!fp_access_check(s)) {
5233        return;
5234    }
5235
5236    tcg_resh = tcg_temp_new_i64();
5237    tcg_resl = tcg_temp_new_i64();
5238
5239    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5240     * either extracting 128 bits from a 128:128 concatenation, or
5241     * extracting 64 bits from a 64:64 concatenation.
5242     */
5243    if (!is_q) {
5244        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5245        if (pos != 0) {
5246            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5247            do_ext64(s, tcg_resh, tcg_resl, pos);
5248        }
5249        tcg_gen_movi_i64(tcg_resh, 0);
5250    } else {
5251        TCGv_i64 tcg_hh;
5252        typedef struct {
5253            int reg;
5254            int elt;
5255        } EltPosns;
5256        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5257        EltPosns *elt = eltposns;
5258
5259        if (pos >= 64) {
5260            elt++;
5261            pos -= 64;
5262        }
5263
5264        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5265        elt++;
5266        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5267        elt++;
5268        if (pos != 0) {
5269            do_ext64(s, tcg_resh, tcg_resl, pos);
5270            tcg_hh = tcg_temp_new_i64();
5271            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5272            do_ext64(s, tcg_hh, tcg_resh, pos);
5273            tcg_temp_free_i64(tcg_hh);
5274        }
5275    }
5276
5277    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5278    tcg_temp_free_i64(tcg_resl);
5279    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5280    tcg_temp_free_i64(tcg_resh);
5281}
5282
5283/* C3.6.2 TBL/TBX
5284 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5285 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5286 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5287 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5288 */
5289static void disas_simd_tb(DisasContext *s, uint32_t insn)
5290{
5291    int op2 = extract32(insn, 22, 2);
5292    int is_q = extract32(insn, 30, 1);
5293    int rm = extract32(insn, 16, 5);
5294    int rn = extract32(insn, 5, 5);
5295    int rd = extract32(insn, 0, 5);
5296    int is_tblx = extract32(insn, 12, 1);
5297    int len = extract32(insn, 13, 2);
5298    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5299    TCGv_i32 tcg_regno, tcg_numregs;
5300
5301    if (op2 != 0) {
5302        unallocated_encoding(s);
5303        return;
5304    }
5305
5306    if (!fp_access_check(s)) {
5307        return;
5308    }
5309
5310    /* This does a table lookup: for every byte element in the input
5311     * we index into a table formed from up to four vector registers,
5312     * and then the output is the result of the lookups. Our helper
5313     * function does the lookup operation for a single 64 bit part of
5314     * the input.
5315     */
5316    tcg_resl = tcg_temp_new_i64();
5317    tcg_resh = tcg_temp_new_i64();
5318
5319    if (is_tblx) {
5320        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5321    } else {
5322        tcg_gen_movi_i64(tcg_resl, 0);
5323    }
5324    if (is_tblx && is_q) {
5325        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5326    } else {
5327        tcg_gen_movi_i64(tcg_resh, 0);
5328    }
5329
5330    tcg_idx = tcg_temp_new_i64();
5331    tcg_regno = tcg_const_i32(rn);
5332    tcg_numregs = tcg_const_i32(len + 1);
5333    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5334    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5335                        tcg_regno, tcg_numregs);
5336    if (is_q) {
5337        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5338        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5339                            tcg_regno, tcg_numregs);
5340    }
5341    tcg_temp_free_i64(tcg_idx);
5342    tcg_temp_free_i32(tcg_regno);
5343    tcg_temp_free_i32(tcg_numregs);
5344
5345    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5346    tcg_temp_free_i64(tcg_resl);
5347    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5348    tcg_temp_free_i64(tcg_resh);
5349}
5350
5351/* C3.6.3 ZIP/UZP/TRN
5352 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5353 * +---+---+-------------+------+---+------+---+------------------+------+
5354 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5355 * +---+---+-------------+------+---+------+---+------------------+------+
5356 */
5357static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5358{
5359    int rd = extract32(insn, 0, 5);
5360    int rn = extract32(insn, 5, 5);
5361    int rm = extract32(insn, 16, 5);
5362    int size = extract32(insn, 22, 2);
5363    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5364     * bit 2 indicates 1 vs 2 variant of the insn.
5365     */
5366    int opcode = extract32(insn, 12, 2);
5367    bool part = extract32(insn, 14, 1);
5368    bool is_q = extract32(insn, 30, 1);
5369    int esize = 8 << size;
5370    int i, ofs;
5371    int datasize = is_q ? 128 : 64;
5372    int elements = datasize / esize;
5373    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5374
5375    if (opcode == 0 || (size == 3 && !is_q)) {
5376        unallocated_encoding(s);
5377        return;
5378    }
5379
5380    if (!fp_access_check(s)) {
5381        return;
5382    }
5383
5384    tcg_resl = tcg_const_i64(0);
5385    tcg_resh = tcg_const_i64(0);
5386    tcg_res = tcg_temp_new_i64();
5387
5388    for (i = 0; i < elements; i++) {
5389        switch (opcode) {
5390        case 1: /* UZP1/2 */
5391        {
5392            int midpoint = elements / 2;
5393            if (i < midpoint) {
5394                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5395            } else {
5396                read_vec_element(s, tcg_res, rm,
5397                                 2 * (i - midpoint) + part, size);
5398            }
5399            break;
5400        }
5401        case 2: /* TRN1/2 */
5402            if (i & 1) {
5403                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5404            } else {
5405                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5406            }
5407            break;
5408        case 3: /* ZIP1/2 */
5409        {
5410            int base = part * elements / 2;
5411            if (i & 1) {
5412                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5413            } else {
5414                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5415            }
5416            break;
5417        }
5418        default:
5419            g_assert_not_reached();
5420        }
5421
5422        ofs = i * esize;
5423        if (ofs < 64) {
5424            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5425            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5426        } else {
5427            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5428            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5429        }
5430    }
5431
5432    tcg_temp_free_i64(tcg_res);
5433
5434    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5435    tcg_temp_free_i64(tcg_resl);
5436    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5437    tcg_temp_free_i64(tcg_resh);
5438}
5439
5440static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5441                        int opc, bool is_min, TCGv_ptr fpst)
5442{
5443    /* Helper function for disas_simd_across_lanes: do a single precision
5444     * min/max operation on the specified two inputs,
5445     * and return the result in tcg_elt1.
5446     */
5447    if (opc == 0xc) {
5448        if (is_min) {
5449            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5450        } else {
5451            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5452        }
5453    } else {
5454        assert(opc == 0xf);
5455        if (is_min) {
5456            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5457        } else {
5458            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5459        }
5460    }
5461}
5462
5463/* C3.6.4 AdvSIMD across lanes
5464 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5465 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5466 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5467 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5468 */
5469static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5470{
5471    int rd = extract32(insn, 0, 5);
5472    int rn = extract32(insn, 5, 5);
5473    int size = extract32(insn, 22, 2);
5474    int opcode = extract32(insn, 12, 5);
5475    bool is_q = extract32(insn, 30, 1);
5476    bool is_u = extract32(insn, 29, 1);
5477    bool is_fp = false;
5478    bool is_min = false;
5479    int esize;
5480    int elements;
5481    int i;
5482    TCGv_i64 tcg_res, tcg_elt;
5483
5484    switch (opcode) {
5485    case 0x1b: /* ADDV */
5486        if (is_u) {
5487            unallocated_encoding(s);
5488            return;
5489        }
5490        /* fall through */
5491    case 0x3: /* SADDLV, UADDLV */
5492    case 0xa: /* SMAXV, UMAXV */
5493    case 0x1a: /* SMINV, UMINV */
5494        if (size == 3 || (size == 2 && !is_q)) {
5495            unallocated_encoding(s);
5496            return;
5497        }
5498        break;
5499    case 0xc: /* FMAXNMV, FMINNMV */
5500    case 0xf: /* FMAXV, FMINV */
5501        if (!is_u || !is_q || extract32(size, 0, 1)) {
5502            unallocated_encoding(s);
5503            return;
5504        }
5505        /* Bit 1 of size field encodes min vs max, and actual size is always
5506         * 32 bits: adjust the size variable so following code can rely on it
5507         */
5508        is_min = extract32(size, 1, 1);
5509        is_fp = true;
5510        size = 2;
5511        break;
5512    default:
5513        unallocated_encoding(s);
5514        return;
5515    }
5516
5517    if (!fp_access_check(s)) {
5518        return;
5519    }
5520
5521    esize = 8 << size;
5522    elements = (is_q ? 128 : 64) / esize;
5523
5524    tcg_res = tcg_temp_new_i64();
5525    tcg_elt = tcg_temp_new_i64();
5526
5527    /* These instructions operate across all lanes of a vector
5528     * to produce a single result. We can guarantee that a 64
5529     * bit intermediate is sufficient:
5530     *  + for [US]ADDLV the maximum element size is 32 bits, and
5531     *    the result type is 64 bits
5532     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5533     *    same as the element size, which is 32 bits at most
5534     * For the integer operations we can choose to work at 64
5535     * or 32 bits and truncate at the end; for simplicity
5536     * we use 64 bits always. The floating point
5537     * ops do require 32 bit intermediates, though.
5538     */
5539    if (!is_fp) {
5540        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5541
5542        for (i = 1; i < elements; i++) {
5543            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5544
5545            switch (opcode) {
5546            case 0x03: /* SADDLV / UADDLV */
5547            case 0x1b: /* ADDV */
5548                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5549                break;
5550            case 0x0a: /* SMAXV / UMAXV */
5551                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5552                                    tcg_res,
5553                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5554                break;
5555            case 0x1a: /* SMINV / UMINV */
5556                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5557                                    tcg_res,
5558                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
5559                break;
5560                break;
5561            default:
5562                g_assert_not_reached();
5563            }
5564
5565        }
5566    } else {
5567        /* Floating point ops which work on 32 bit (single) intermediates.
5568         * Note that correct NaN propagation requires that we do these
5569         * operations in exactly the order specified by the pseudocode.
5570         */
5571        TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5572        TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5573        TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5574        TCGv_ptr fpst = get_fpstatus_ptr();
5575
5576        assert(esize == 32);
5577        assert(elements == 4);
5578
5579        read_vec_element(s, tcg_elt, rn, 0, MO_32);
5580        tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5581        read_vec_element(s, tcg_elt, rn, 1, MO_32);
5582        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5583
5584        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5585
5586        read_vec_element(s, tcg_elt, rn, 2, MO_32);
5587        tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5588        read_vec_element(s, tcg_elt, rn, 3, MO_32);
5589        tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5590
5591        do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5592
5593        do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5594
5595        tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5596        tcg_temp_free_i32(tcg_elt1);
5597        tcg_temp_free_i32(tcg_elt2);
5598        tcg_temp_free_i32(tcg_elt3);
5599        tcg_temp_free_ptr(fpst);
5600    }
5601
5602    tcg_temp_free_i64(tcg_elt);
5603
5604    /* Now truncate the result to the width required for the final output */
5605    if (opcode == 0x03) {
5606        /* SADDLV, UADDLV: result is 2*esize */
5607        size++;
5608    }
5609
5610    switch (size) {
5611    case 0:
5612        tcg_gen_ext8u_i64(tcg_res, tcg_res);
5613        break;
5614    case 1:
5615        tcg_gen_ext16u_i64(tcg_res, tcg_res);
5616        break;
5617    case 2:
5618        tcg_gen_ext32u_i64(tcg_res, tcg_res);
5619        break;
5620    case 3:
5621        break;
5622    default:
5623        g_assert_not_reached();
5624    }
5625
5626    write_fp_dreg(s, rd, tcg_res);
5627    tcg_temp_free_i64(tcg_res);
5628}
5629
5630/* C6.3.31 DUP (Element, Vector)
5631 *
5632 *  31  30   29              21 20    16 15        10  9    5 4    0
5633 * +---+---+-------------------+--------+-------------+------+------+
5634 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5635 * +---+---+-------------------+--------+-------------+------+------+
5636 *
5637 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5638 */
5639static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5640                             int imm5)
5641{
5642    int size = ctz32(imm5);
5643    int esize = 8 << size;
5644    int elements = (is_q ? 128 : 64) / esize;
5645    int index, i;
5646    TCGv_i64 tmp;
5647
5648    if (size > 3 || (size == 3 && !is_q)) {
5649        unallocated_encoding(s);
5650        return;
5651    }
5652
5653    if (!fp_access_check(s)) {
5654        return;
5655    }
5656
5657    index = imm5 >> (size + 1);
5658
5659    tmp = tcg_temp_new_i64();
5660    read_vec_element(s, tmp, rn, index, size);
5661
5662    for (i = 0; i < elements; i++) {
5663        write_vec_element(s, tmp, rd, i, size);
5664    }
5665
5666    if (!is_q) {
5667        clear_vec_high(s, rd);
5668    }
5669
5670    tcg_temp_free_i64(tmp);
5671}
5672
5673/* C6.3.31 DUP (element, scalar)
5674 *  31                   21 20    16 15        10  9    5 4    0
5675 * +-----------------------+--------+-------------+------+------+
5676 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5677 * +-----------------------+--------+-------------+------+------+
5678 */
5679static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5680                              int imm5)
5681{
5682    int size = ctz32(imm5);
5683    int index;
5684    TCGv_i64 tmp;
5685
5686    if (size > 3) {
5687        unallocated_encoding(s);
5688        return;
5689    }
5690
5691    if (!fp_access_check(s)) {
5692        return;
5693    }
5694
5695    index = imm5 >> (size + 1);
5696
5697    /* This instruction just extracts the specified element and
5698     * zero-extends it into the bottom of the destination register.
5699     */
5700    tmp = tcg_temp_new_i64();
5701    read_vec_element(s, tmp, rn, index, size);
5702    write_fp_dreg(s, rd, tmp);
5703    tcg_temp_free_i64(tmp);
5704}
5705
5706/* C6.3.32 DUP (General)
5707 *
5708 *  31  30   29              21 20    16 15        10  9    5 4    0
5709 * +---+---+-------------------+--------+-------------+------+------+
5710 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5711 * +---+---+-------------------+--------+-------------+------+------+
5712 *
5713 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5714 */
5715static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5716                             int imm5)
5717{
5718    int size = ctz32(imm5);
5719    int esize = 8 << size;
5720    int elements = (is_q ? 128 : 64)/esize;
5721    int i = 0;
5722
5723    if (size > 3 || ((size == 3) && !is_q)) {
5724        unallocated_encoding(s);
5725        return;
5726    }
5727
5728    if (!fp_access_check(s)) {
5729        return;
5730    }
5731
5732    for (i = 0; i < elements; i++) {
5733        write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5734    }
5735    if (!is_q) {
5736        clear_vec_high(s, rd);
5737    }
5738}
5739
5740/* C6.3.150 INS (Element)
5741 *
5742 *  31                   21 20    16 15  14    11  10 9    5 4    0
5743 * +-----------------------+--------+------------+---+------+------+
5744 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5745 * +-----------------------+--------+------------+---+------+------+
5746 *
5747 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5748 * index: encoded in imm5<4:size+1>
5749 */
5750static void handle_simd_inse(DisasContext *s, int rd, int rn,
5751                             int imm4, int imm5)
5752{
5753    int size = ctz32(imm5);
5754    int src_index, dst_index;
5755    TCGv_i64 tmp;
5756
5757    if (size > 3) {
5758        unallocated_encoding(s);
5759        return;
5760    }
5761
5762    if (!fp_access_check(s)) {
5763        return;
5764    }
5765
5766    dst_index = extract32(imm5, 1+size, 5);
5767    src_index = extract32(imm4, size, 4);
5768
5769    tmp = tcg_temp_new_i64();
5770
5771    read_vec_element(s, tmp, rn, src_index, size);
5772    write_vec_element(s, tmp, rd, dst_index, size);
5773
5774    tcg_temp_free_i64(tmp);
5775}
5776
5777
5778/* C6.3.151 INS (General)
5779 *
5780 *  31                   21 20    16 15        10  9    5 4    0
5781 * +-----------------------+--------+-------------+------+------+
5782 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5783 * +-----------------------+--------+-------------+------+------+
5784 *
5785 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5786 * index: encoded in imm5<4:size+1>
5787 */
5788static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5789{
5790    int size = ctz32(imm5);
5791    int idx;
5792
5793    if (size > 3) {
5794        unallocated_encoding(s);
5795        return;
5796    }
5797
5798    if (!fp_access_check(s)) {
5799        return;
5800    }
5801
5802    idx = extract32(imm5, 1 + size, 4 - size);
5803    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5804}
5805
5806/*
5807 * C6.3.321 UMOV (General)
5808 * C6.3.237 SMOV (General)
5809 *
5810 *  31  30   29              21 20    16 15    12   10 9    5 4    0
5811 * +---+---+-------------------+--------+-------------+------+------+
5812 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5813 * +---+---+-------------------+--------+-------------+------+------+
5814 *
5815 * U: unsigned when set
5816 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5817 */
5818static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5819                                  int rn, int rd, int imm5)
5820{
5821    int size = ctz32(imm5);
5822    int element;
5823    TCGv_i64 tcg_rd;
5824
5825    /* Check for UnallocatedEncodings */
5826    if (is_signed) {
5827        if (size > 2 || (size == 2 && !is_q)) {
5828            unallocated_encoding(s);
5829            return;
5830        }
5831    } else {
5832        if (size > 3
5833            || (size < 3 && is_q)
5834            || (size == 3 && !is_q)) {
5835            unallocated_encoding(s);
5836            return;
5837        }
5838    }
5839
5840    if (!fp_access_check(s)) {
5841        return;
5842    }
5843
5844    element = extract32(imm5, 1+size, 4);
5845
5846    tcg_rd = cpu_reg(s, rd);
5847    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5848    if (is_signed && !is_q) {
5849        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5850    }
5851}
5852
5853/* C3.6.5 AdvSIMD copy
5854 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5855 * +---+---+----+-----------------+------+---+------+---+------+------+
5856 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5857 * +---+---+----+-----------------+------+---+------+---+------+------+
5858 */
5859static void disas_simd_copy(DisasContext *s, uint32_t insn)
5860{
5861    int rd = extract32(insn, 0, 5);
5862    int rn = extract32(insn, 5, 5);
5863    int imm4 = extract32(insn, 11, 4);
5864    int op = extract32(insn, 29, 1);
5865    int is_q = extract32(insn, 30, 1);
5866    int imm5 = extract32(insn, 16, 5);
5867
5868    if (op) {
5869        if (is_q) {
5870            /* INS (element) */
5871            handle_simd_inse(s, rd, rn, imm4, imm5);
5872        } else {
5873            unallocated_encoding(s);
5874        }
5875    } else {
5876        switch (imm4) {
5877        case 0:
5878            /* DUP (element - vector) */
5879            handle_simd_dupe(s, is_q, rd, rn, imm5);
5880            break;
5881        case 1:
5882            /* DUP (general) */
5883            handle_simd_dupg(s, is_q, rd, rn, imm5);
5884            break;
5885        case 3:
5886            if (is_q) {
5887                /* INS (general) */
5888                handle_simd_insg(s, rd, rn, imm5);
5889            } else {
5890                unallocated_encoding(s);
5891            }
5892            break;
5893        case 5:
5894        case 7:
5895            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5896            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5897            break;
5898        default:
5899            unallocated_encoding(s);
5900            break;
5901        }
5902    }
5903}
5904
5905/* C3.6.6 AdvSIMD modified immediate
5906 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5907 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5908 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5909 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5910 *
5911 * There are a number of operations that can be carried out here:
5912 *   MOVI - move (shifted) imm into register
5913 *   MVNI - move inverted (shifted) imm into register
5914 *   ORR  - bitwise OR of (shifted) imm with register
5915 *   BIC  - bitwise clear of (shifted) imm with register
5916 */
5917static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5918{
5919    int rd = extract32(insn, 0, 5);
5920    int cmode = extract32(insn, 12, 4);
5921    int cmode_3_1 = extract32(cmode, 1, 3);
5922    int cmode_0 = extract32(cmode, 0, 1);
5923    int o2 = extract32(insn, 11, 1);
5924    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5925    bool is_neg = extract32(insn, 29, 1);
5926    bool is_q = extract32(insn, 30, 1);
5927    uint64_t imm = 0;
5928    TCGv_i64 tcg_rd, tcg_imm;
5929    int i;
5930
5931    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5932        unallocated_encoding(s);
5933        return;
5934    }
5935
5936    if (!fp_access_check(s)) {
5937        return;
5938    }
5939
5940    /* See AdvSIMDExpandImm() in ARM ARM */
5941    switch (cmode_3_1) {
5942    case 0: /* Replicate(Zeros(24):imm8, 2) */
5943    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5944    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5945    case 3: /* Replicate(imm8:Zeros(24), 2) */
5946    {
5947        int shift = cmode_3_1 * 8;
5948        imm = bitfield_replicate(abcdefgh << shift, 32);
5949        break;
5950    }
5951    case 4: /* Replicate(Zeros(8):imm8, 4) */
5952    case 5: /* Replicate(imm8:Zeros(8), 4) */
5953    {
5954        int shift = (cmode_3_1 & 0x1) * 8;
5955        imm = bitfield_replicate(abcdefgh << shift, 16);
5956        break;
5957    }
5958    case 6:
5959        if (cmode_0) {
5960            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5961            imm = (abcdefgh << 16) | 0xffff;
5962        } else {
5963            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5964            imm = (abcdefgh << 8) | 0xff;
5965        }
5966        imm = bitfield_replicate(imm, 32);
5967        break;
5968    case 7:
5969        if (!cmode_0 && !is_neg) {
5970            imm = bitfield_replicate(abcdefgh, 8);
5971        } else if (!cmode_0 && is_neg) {
5972            int i;
5973            imm = 0;
5974            for (i = 0; i < 8; i++) {
5975                if ((abcdefgh) & (1 << i)) {
5976                    imm |= 0xffULL << (i * 8);
5977                }
5978            }
5979        } else if (cmode_0) {
5980            if (is_neg) {
5981                imm = (abcdefgh & 0x3f) << 48;
5982                if (abcdefgh & 0x80) {
5983                    imm |= 0x8000000000000000ULL;
5984                }
5985                if (abcdefgh & 0x40) {
5986                    imm |= 0x3fc0000000000000ULL;
5987                } else {
5988                    imm |= 0x4000000000000000ULL;
5989                }
5990            } else {
5991                imm = (abcdefgh & 0x3f) << 19;
5992                if (abcdefgh & 0x80) {
5993                    imm |= 0x80000000;
5994                }
5995                if (abcdefgh & 0x40) {
5996                    imm |= 0x3e000000;
5997                } else {
5998                    imm |= 0x40000000;
5999                }
6000                imm |= (imm << 32);

6001            }
6002        }
6003        break;
6004    }
6005
6006    if (cmode_3_1 != 7 && is_neg) {
6007        imm = ~imm;
6008    }
6009
6010    tcg_imm = tcg_const_i64(imm);
6011    tcg_rd = new_tmp_a64(s);
6012
6013    for (i = 0; i < 2; i++) {
6014        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6015
6016        if (i == 1 && !is_q) {
6017            /* non-quad ops clear high half of vector */
6018            tcg_gen_movi_i64(tcg_rd, 0);
6019        } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6020            tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6021            if (is_neg) {
6022                /* AND (BIC) */
6023                tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6024            } else {
6025                /* ORR */
6026                tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6027            }
6028        } else {
6029            /* MOVI */
6030            tcg_gen_mov_i64(tcg_rd, tcg_imm);
6031        }
6032        tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6033    }
6034
6035    tcg_temp_free_i64(tcg_imm);
6036}
6037
6038/* C3.6.7 AdvSIMD scalar copy
6039 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6040 * +-----+----+-----------------+------+---+------+---+------+------+
6041 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6042 * +-----+----+-----------------+------+---+------+---+------+------+
6043 */
6044static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6045{
6046    int rd = extract32(insn, 0, 5);
6047    int rn = extract32(insn, 5, 5);
6048    int imm4 = extract32(insn, 11, 4);
6049    int imm5 = extract32(insn, 16, 5);
6050    int op = extract32(insn, 29, 1);
6051
6052    if (op != 0 || imm4 != 0) {
6053        unallocated_encoding(s);
6054        return;
6055    }
6056
6057    /* DUP (element, scalar) */
6058    handle_simd_dupes(s, rd, rn, imm5);
6059}
6060
6061/* C3.6.8 AdvSIMD scalar pairwise
6062 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6063 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6064 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6065 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6066 */
6067static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6068{
6069    int u = extract32(insn, 29, 1);
6070    int size = extract32(insn, 22, 2);
6071    int opcode = extract32(insn, 12, 5);
6072    int rn = extract32(insn, 5, 5);
6073    int rd = extract32(insn, 0, 5);
6074    TCGv_ptr fpst;
6075
6076    /* For some ops (the FP ones), size[1] is part of the encoding.
6077     * For ADDP strictly it is not but size[1] is always 1 for valid
6078     * encodings.
6079     */
6080    opcode |= (extract32(size, 1, 1) << 5);
6081
6082    switch (opcode) {
6083    case 0x3b: /* ADDP */
6084        if (u || size != 3) {
6085            unallocated_encoding(s);
6086            return;
6087        }
6088        if (!fp_access_check(s)) {
6089            return;
6090        }
6091
6092        TCGV_UNUSED_PTR(fpst);
6093        break;
6094    case 0xc: /* FMAXNMP */
6095    case 0xd: /* FADDP */
6096    case 0xf: /* FMAXP */
6097    case 0x2c: /* FMINNMP */
6098    case 0x2f: /* FMINP */
6099        /* FP op, size[0] is 32 or 64 bit */
6100        if (!u) {
6101            unallocated_encoding(s);
6102            return;
6103        }
6104        if (!fp_access_check(s)) {
6105            return;
6106        }
6107
6108        size = extract32(size, 0, 1) ? 3 : 2;
6109        fpst = get_fpstatus_ptr();
6110        break;
6111    default:
6112        unallocated_encoding(s);
6113        return;
6114    }
6115
6116    if (size == 3) {
6117        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6118        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6119        TCGv_i64 tcg_res = tcg_temp_new_i64();
6120
6121        read_vec_element(s, tcg_op1, rn, 0, MO_64);
6122        read_vec_element(s, tcg_op2, rn, 1, MO_64);
6123
6124        switch (opcode) {
6125        case 0x3b: /* ADDP */
6126            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6127            break;
6128        case 0xc: /* FMAXNMP */
6129            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6130            break;
6131        case 0xd: /* FADDP */
6132            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6133            break;
6134        case 0xf: /* FMAXP */
6135            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6136            break;
6137        case 0x2c: /* FMINNMP */
6138            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6139            break;
6140        case 0x2f: /* FMINP */
6141            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6142            break;
6143        default:
6144            g_assert_not_reached();
6145        }
6146
6147        write_fp_dreg(s, rd, tcg_res);
6148
6149        tcg_temp_free_i64(tcg_op1);
6150        tcg_temp_free_i64(tcg_op2);
6151        tcg_temp_free_i64(tcg_res);
6152    } else {
6153        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6154        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6155        TCGv_i32 tcg_res = tcg_temp_new_i32();
6156
6157        read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6158        read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6159
6160        switch (opcode) {
6161        case 0xc: /* FMAXNMP */
6162            gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6163            break;
6164        case 0xd: /* FADDP */
6165            gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6166            break;
6167        case 0xf: /* FMAXP */
6168            gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6169            break;
6170        case 0x2c: /* FMINNMP */
6171            gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6172            break;
6173        case 0x2f: /* FMINP */
6174            gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6175            break;
6176        default:
6177            g_assert_not_reached();
6178        }
6179
6180        write_fp_sreg(s, rd, tcg_res);
6181
6182        tcg_temp_free_i32(tcg_op1);
6183        tcg_temp_free_i32(tcg_op2);
6184        tcg_temp_free_i32(tcg_res);
6185    }
6186
6187    if (!TCGV_IS_UNUSED_PTR(fpst)) {
6188        tcg_temp_free_ptr(fpst);
6189    }
6190}
6191
6192/*
6193 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6194 *
6195 * This code is handles the common shifting code and is used by both
6196 * the vector and scalar code.
6197 */
6198static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6199                                    TCGv_i64 tcg_rnd, bool accumulate,
6200                                    bool is_u, int size, int shift)
6201{
6202    bool extended_result = false;
6203    bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6204    int ext_lshift = 0;
6205    TCGv_i64 tcg_src_hi;
6206
6207    if (round && size == 3) {
6208        extended_result = true;
6209        ext_lshift = 64 - shift;
6210        tcg_src_hi = tcg_temp_new_i64();
6211    } else if (shift == 64) {
6212        if (!accumulate && is_u) {
6213            /* result is zero */
6214            tcg_gen_movi_i64(tcg_res, 0);
6215            return;
6216        }
6217    }
6218
6219    /* Deal with the rounding step */
6220    if (round) {
6221        if (extended_result) {
6222            TCGv_i64 tcg_zero = tcg_const_i64(0);
6223            if (!is_u) {
6224                /* take care of sign extending tcg_res */
6225                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6226                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6227                                 tcg_src, tcg_src_hi,
6228                                 tcg_rnd, tcg_zero);
6229            } else {
6230                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6231                                 tcg_src, tcg_zero,
6232                                 tcg_rnd, tcg_zero);
6233            }
6234            tcg_temp_free_i64(tcg_zero);
6235        } else {
6236            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6237        }
6238    }
6239
6240    /* Now do the shift right */
6241    if (round && extended_result) {
6242        /* extended case, >64 bit precision required */
6243        if (ext_lshift == 0) {
6244            /* special case, only high bits matter */
6245            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6246        } else {
6247            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6248            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6249            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6250        }
6251    } else {
6252        if (is_u) {
6253            if (shift == 64) {
6254                /* essentially shifting in 64 zeros */
6255                tcg_gen_movi_i64(tcg_src, 0);
6256            } else {
6257                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6258            }
6259        } else {
6260            if (shift == 64) {
6261                /* effectively extending the sign-bit */
6262                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6263            } else {
6264                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6265            }
6266        }
6267    }
6268
6269    if (accumulate) {
6270        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6271    } else {
6272        tcg_gen_mov_i64(tcg_res, tcg_src);
6273    }
6274
6275    if (extended_result) {
6276        tcg_temp_free_i64(tcg_src_hi);
6277    }
6278}
6279
6280/* Common SHL/SLI - Shift left with an optional insert */
6281static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6282                                 bool insert, int shift)
6283{
6284    if (insert) { /* SLI */
6285        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6286    } else { /* SHL */
6287        tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6288    }
6289}
6290
6291/* SRI: shift right with insert */
6292static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6293                                 int size, int shift)
6294{
6295    int esize = 8 << size;
6296
6297    /* shift count same as element size is valid but does nothing;
6298     * special case to avoid potential shift by 64.
6299     */
6300    if (shift != esize) {
6301        tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6302        tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6303    }
6304}
6305
6306/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6307static void handle_scalar_simd_shri(DisasContext *s,
6308                                    bool is_u, int immh, int immb,
6309                                    int opcode, int rn, int rd)
6310{
6311    const int size = 3;
6312    int immhb = immh << 3 | immb;
6313    int shift = 2 * (8 << size) - immhb;
6314    bool accumulate = false;
6315    bool round = false;
6316    bool insert = false;
6317    TCGv_i64 tcg_rn;
6318    TCGv_i64 tcg_rd;
6319    TCGv_i64 tcg_round;
6320
6321    if (!extract32(immh, 3, 1)) {
6322        unallocated_encoding(s);
6323        return;
6324    }
6325
6326    if (!fp_access_check(s)) {
6327        return;
6328    }
6329
6330    switch (opcode) {
6331    case 0x02: /* SSRA / USRA (accumulate) */
6332        accumulate = true;
6333        break;
6334    case 0x04: /* SRSHR / URSHR (rounding) */
6335        round = true;
6336        break;
6337    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6338        accumulate = round = true;
6339        break;
6340    case 0x08: /* SRI */
6341        insert = true;
6342        break;
6343    }
6344
6345    if (round) {
6346        uint64_t round_const = 1ULL << (shift - 1);
6347        tcg_round = tcg_const_i64(round_const);
6348    } else {
6349        TCGV_UNUSED_I64(tcg_round);
6350    }
6351
6352    tcg_rn = read_fp_dreg(s, rn);
6353    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6354
6355    if (insert) {
6356        handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6357    } else {
6358        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6359                                accumulate, is_u, size, shift);
6360    }
6361
6362    write_fp_dreg(s, rd, tcg_rd);
6363
6364    tcg_temp_free_i64(tcg_rn);
6365    tcg_temp_free_i64(tcg_rd);
6366    if (round) {
6367        tcg_temp_free_i64(tcg_round);
6368    }
6369}
6370
6371/* SHL/SLI - Scalar shift left */
6372static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6373                                    int immh, int immb, int opcode,
6374                                    int rn, int rd)
6375{
6376    int size = 32 - clz32(immh) - 1;
6377    int immhb = immh << 3 | immb;
6378    int shift = immhb - (8 << size);
6379    TCGv_i64 tcg_rn = new_tmp_a64(s);
6380    TCGv_i64 tcg_rd = new_tmp_a64(s);
6381
6382    if (!extract32(immh, 3, 1)) {
6383        unallocated_encoding(s);
6384        return;
6385    }
6386
6387    if (!fp_access_check(s)) {
6388        return;
6389    }
6390
6391    tcg_rn = read_fp_dreg(s, rn);
6392    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6393
6394    handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6395
6396    write_fp_dreg(s, rd, tcg_rd);
6397
6398    tcg_temp_free_i64(tcg_rn);
6399    tcg_temp_free_i64(tcg_rd);
6400}
6401
6402/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6403 * (signed/unsigned) narrowing */
6404static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6405                                   bool is_u_shift, bool is_u_narrow,
6406                                   int immh, int immb, int opcode,
6407                                   int rn, int rd)
6408{
6409    int immhb = immh << 3 | immb;
6410    int size = 32 - clz32(immh) - 1;
6411    int esize = 8 << size;
6412    int shift = (2 * esize) - immhb;
6413    int elements = is_scalar ? 1 : (64 / esize);
6414    bool round = extract32(opcode, 0, 1);
6415    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6416    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6417    TCGv_i32 tcg_rd_narrowed;
6418    TCGv_i64 tcg_final;
6419
6420    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6421        { gen_helper_neon_narrow_sat_s8,
6422          gen_helper_neon_unarrow_sat8 },
6423        { gen_helper_neon_narrow_sat_s16,
6424          gen_helper_neon_unarrow_sat16 },
6425        { gen_helper_neon_narrow_sat_s32,
6426          gen_helper_neon_unarrow_sat32 },
6427        { NULL, NULL },
6428    };
6429    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6430        gen_helper_neon_narrow_sat_u8,
6431        gen_helper_neon_narrow_sat_u16,
6432        gen_helper_neon_narrow_sat_u32,
6433        NULL
6434    };
6435    NeonGenNarrowEnvFn *narrowfn;
6436
6437    int i;
6438
6439    assert(size < 4);
6440
6441    if (extract32(immh, 3, 1)) {
6442        unallocated_encoding(s);
6443        return;
6444    }
6445
6446    if (!fp_access_check(s)) {
6447        return;
6448    }
6449
6450    if (is_u_shift) {
6451        narrowfn = unsigned_narrow_fns[size];
6452    } else {
6453        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6454    }
6455
6456    tcg_rn = tcg_temp_new_i64();
6457    tcg_rd = tcg_temp_new_i64();
6458    tcg_rd_narrowed = tcg_temp_new_i32();
6459    tcg_final = tcg_const_i64(0);
6460
6461    if (round) {
6462        uint64_t round_const = 1ULL << (shift - 1);
6463        tcg_round = tcg_const_i64(round_const);
6464    } else {
6465        TCGV_UNUSED_I64(tcg_round);
6466    }
6467
6468    for (i = 0; i < elements; i++) {
6469        read_vec_element(s, tcg_rn, rn, i, ldop);
6470        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6471                                false, is_u_shift, size+1, shift);
6472        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6473        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6474        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6475    }
6476
6477    if (!is_q) {
6478        clear_vec_high(s, rd);
6479        write_vec_element(s, tcg_final, rd, 0, MO_64);
6480    } else {
6481        write_vec_element(s, tcg_final, rd, 1, MO_64);
6482    }
6483
6484    if (round) {
6485        tcg_temp_free_i64(tcg_round);
6486    }
6487    tcg_temp_free_i64(tcg_rn);
6488    tcg_temp_free_i64(tcg_rd);
6489    tcg_temp_free_i32(tcg_rd_narrowed);
6490    tcg_temp_free_i64(tcg_final);
6491    return;
6492}
6493
6494/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6495static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6496                             bool src_unsigned, bool dst_unsigned,
6497                             int immh, int immb, int rn, int rd)
6498{
6499    int immhb = immh << 3 | immb;
6500    int size = 32 - clz32(immh) - 1;
6501    int shift = immhb - (8 << size);
6502    int pass;
6503
6504    assert(immh != 0);
6505    assert(!(scalar && is_q));
6506
6507    if (!scalar) {
6508        if (!is_q && extract32(immh, 3, 1)) {
6509            unallocated_encoding(s);
6510            return;
6511        }
6512
6513        /* Since we use the variable-shift helpers we must
6514         * replicate the shift count into each element of
6515         * the tcg_shift value.
6516         */
6517        switch (size) {
6518        case 0:
6519            shift |= shift << 8;
6520            /* fall through */
6521        case 1:
6522            shift |= shift << 16;
6523            break;
6524        case 2:
6525        case 3:
6526            break;
6527        default:
6528            g_assert_not_reached();
6529        }
6530    }
6531
6532    if (!fp_access_check(s)) {
6533        return;
6534    }
6535
6536    if (size == 3) {
6537        TCGv_i64 tcg_shift = tcg_const_i64(shift);
6538        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6539            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6540            { NULL, gen_helper_neon_qshl_u64 },
6541        };
6542        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6543        int maxpass = is_q ? 2 : 1;
6544
6545        for (pass = 0; pass < maxpass; pass++) {
6546            TCGv_i64 tcg_op = tcg_temp_new_i64();
6547
6548            read_vec_element(s, tcg_op, rn, pass, MO_64);
6549            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6550            write_vec_element(s, tcg_op, rd, pass, MO_64);
6551
6552            tcg_temp_free_i64(tcg_op);
6553        }
6554        tcg_temp_free_i64(tcg_shift);
6555
6556        if (!is_q) {
6557            clear_vec_high(s, rd);
6558        }
6559    } else {
6560        TCGv_i32 tcg_shift = tcg_const_i32(shift);
6561        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6562            {
6563                { gen_helper_neon_qshl_s8,
6564                  gen_helper_neon_qshl_s16,
6565                  gen_helper_neon_qshl_s32 },
6566                { gen_helper_neon_qshlu_s8,
6567                  gen_helper_neon_qshlu_s16,
6568                  gen_helper_neon_qshlu_s32 }
6569            }, {
6570                { NULL, NULL, NULL },
6571                { gen_helper_neon_qshl_u8,
6572                  gen_helper_neon_qshl_u16,
6573                  gen_helper_neon_qshl_u32 }
6574            }
6575        };
6576        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6577        TCGMemOp memop = scalar ? size : MO_32;
6578        int maxpass = scalar ? 1 : is_q ? 4 : 2;
6579
6580        for (pass = 0; pass < maxpass; pass++) {
6581            TCGv_i32 tcg_op = tcg_temp_new_i32();
6582
6583            read_vec_element_i32(s, tcg_op, rn, pass, memop);
6584            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6585            if (scalar) {
6586                switch (size) {
6587                case 0:
6588                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
6589                    break;
6590                case 1:
6591                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
6592                    break;
6593                case 2:
6594                    break;
6595                default:
6596                    g_assert_not_reached();
6597                }
6598                write_fp_sreg(s, rd, tcg_op);
6599            } else {
6600                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6601            }
6602
6603            tcg_temp_free_i32(tcg_op);
6604        }
6605        tcg_temp_free_i32(tcg_shift);
6606
6607        if (!is_q && !scalar) {
6608            clear_vec_high(s, rd);
6609        }
6610    }
6611}
6612
6613/* Common vector code for handling integer to FP conversion */
6614static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6615                                   int elements, int is_signed,
6616                                   int fracbits, int size)
6617{
6618    bool is_double = size == 3 ? true : false;
6619    TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6620    TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6621    TCGv_i64 tcg_int = tcg_temp_new_i64();
6622    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6623    int pass;
6624
6625    for (pass = 0; pass < elements; pass++) {
6626        read_vec_element(s, tcg_int, rn, pass, mop);
6627
6628        if (is_double) {
6629            TCGv_i64 tcg_double = tcg_temp_new_i64();
6630            if (is_signed) {
6631                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6632                                     tcg_shift, tcg_fpst);
6633            } else {
6634                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6635                                     tcg_shift, tcg_fpst);
6636            }
6637            if (elements == 1) {
6638                write_fp_dreg(s, rd, tcg_double);
6639            } else {
6640                write_vec_element(s, tcg_double, rd, pass, MO_64);
6641            }
6642            tcg_temp_free_i64(tcg_double);
6643        } else {
6644            TCGv_i32 tcg_single = tcg_temp_new_i32();
6645            if (is_signed) {
6646                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6647                                     tcg_shift, tcg_fpst);
6648            } else {
6649                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6650                                     tcg_shift, tcg_fpst);
6651            }
6652            if (elements == 1) {
6653                write_fp_sreg(s, rd, tcg_single);
6654            } else {
6655                write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6656            }
6657            tcg_temp_free_i32(tcg_single);
6658        }
6659    }
6660
6661    if (!is_double && elements == 2) {
6662        clear_vec_high(s, rd);
6663    }
6664
6665    tcg_temp_free_i64(tcg_int);
6666    tcg_temp_free_ptr(tcg_fpst);
6667    tcg_temp_free_i32(tcg_shift);
6668}
6669
6670/* UCVTF/SCVTF - Integer to FP conversion */
6671static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6672                                         bool is_q, bool is_u,
6673                                         int immh, int immb, int opcode,
6674                                         int rn, int rd)
6675{
6676    bool is_double = extract32(immh, 3, 1);
6677    int size = is_double ? MO_64 : MO_32;
6678    int elements;
6679    int immhb = immh << 3 | immb;
6680    int fracbits = (is_double ? 128 : 64) - immhb;
6681
6682    if (!extract32(immh, 2, 2)) {
6683        unallocated_encoding(s);
6684        return;
6685    }
6686
6687    if (is_scalar) {
6688        elements = 1;
6689    } else {
6690        elements = is_double ? 2 : is_q ? 4 : 2;
6691        if (is_double && !is_q) {
6692            unallocated_encoding(s);
6693            return;
6694        }
6695    }
6696
6697    if (!fp_access_check(s)) {
6698        return;
6699    }
6700
6701    /* immh == 0 would be a failure of the decode logic */
6702    g_assert(immh);
6703
6704    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6705}
6706
6707/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6708static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6709                                         bool is_q, bool is_u,
6710                                         int immh, int immb, int rn, int rd)
6711{
6712    bool is_double = extract32(immh, 3, 1);
6713    int immhb = immh << 3 | immb;
6714    int fracbits = (is_double ? 128 : 64) - immhb;
6715    int pass;
6716    TCGv_ptr tcg_fpstatus;
6717    TCGv_i32 tcg_rmode, tcg_shift;
6718
6719    if (!extract32(immh, 2, 2)) {
6720        unallocated_encoding(s);
6721        return;
6722    }
6723
6724    if (!is_scalar && !is_q && is_double) {
6725        unallocated_encoding(s);
6726        return;
6727    }
6728
6729    if (!fp_access_check(s)) {
6730        return;
6731    }
6732
6733    assert(!(is_scalar && is_q));
6734
6735    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6736    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6737    tcg_fpstatus = get_fpstatus_ptr();
6738    tcg_shift = tcg_const_i32(fracbits);
6739
6740    if (is_double) {
6741        int maxpass = is_scalar ? 1 : 2;
6742
6743        for (pass = 0; pass < maxpass; pass++) {
6744            TCGv_i64 tcg_op = tcg_temp_new_i64();
6745
6746            read_vec_element(s, tcg_op, rn, pass, MO_64);
6747            if (is_u) {
6748                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6749            } else {
6750                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6751            }
6752            write_vec_element(s, tcg_op, rd, pass, MO_64);
6753            tcg_temp_free_i64(tcg_op);
6754        }
6755        if (!is_q) {
6756            clear_vec_high(s, rd);
6757        }
6758    } else {
6759        int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6760        for (pass = 0; pass < maxpass; pass++) {
6761            TCGv_i32 tcg_op = tcg_temp_new_i32();
6762
6763            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6764            if (is_u) {
6765                gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6766            } else {
6767                gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6768            }
6769            if (is_scalar) {
6770                write_fp_sreg(s, rd, tcg_op);
6771            } else {
6772                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6773            }
6774            tcg_temp_free_i32(tcg_op);
6775        }
6776        if (!is_q && !is_scalar) {
6777            clear_vec_high(s, rd);
6778        }
6779    }
6780
6781    tcg_temp_free_ptr(tcg_fpstatus);
6782    tcg_temp_free_i32(tcg_shift);
6783    gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6784    tcg_temp_free_i32(tcg_rmode);
6785}
6786
6787/* C3.6.9 AdvSIMD scalar shift by immediate
6788 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6789 * +-----+---+-------------+------+------+--------+---+------+------+
6790 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6791 * +-----+---+-------------+------+------+--------+---+------+------+
6792 *
6793 * This is the scalar version so it works on a fixed sized registers
6794 */
6795static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6796{
6797    int rd = extract32(insn, 0, 5);
6798    int rn = extract32(insn, 5, 5);
6799    int opcode = extract32(insn, 11, 5);
6800    int immb = extract32(insn, 16, 3);
6801    int immh = extract32(insn, 19, 4);
6802    bool is_u = extract32(insn, 29, 1);
6803
6804    if (immh == 0) {
6805        unallocated_encoding(s);
6806        return;
6807    }
6808
6809    switch (opcode) {
6810    case 0x08: /* SRI */
6811        if (!is_u) {
6812            unallocated_encoding(s);
6813            return;
6814        }
6815        /* fall through */
6816    case 0x00: /* SSHR / USHR */
6817    case 0x02: /* SSRA / USRA */
6818    case 0x04: /* SRSHR / URSHR */
6819    case 0x06: /* SRSRA / URSRA */
6820        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6821        break;
6822    case 0x0a: /* SHL / SLI */
6823        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6824        break;
6825    case 0x1c: /* SCVTF, UCVTF */
6826        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6827                                     opcode, rn, rd);
6828        break;
6829    case 0x10: /* SQSHRUN, SQSHRUN2 */
6830    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6831        if (!is_u) {
6832            unallocated_encoding(s);
6833            return;
6834        }
6835        handle_vec_simd_sqshrn(s, true, false, false, true,
6836                               immh, immb, opcode, rn, rd);
6837        break;
6838    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6839    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6840        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6841                               immh, immb, opcode, rn, rd);
6842        break;
6843    case 0xc: /* SQSHLU */
6844        if (!is_u) {
6845            unallocated_encoding(s);
6846            return;
6847        }
6848        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6849        break;
6850    case 0xe: /* SQSHL, UQSHL */
6851        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6852        break;
6853    case 0x1f: /* FCVTZS, FCVTZU */
6854        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6855        break;
6856    default:
6857        unallocated_encoding(s);
6858        break;
6859    }
6860}
6861
6862/* C3.6.10 AdvSIMD scalar three different
6863 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6864 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6865 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6866 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6867 */
6868static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6869{
6870    bool is_u = extract32(insn, 29, 1);
6871    int size = extract32(insn, 22, 2);
6872    int opcode = extract32(insn, 12, 4);
6873    int rm = extract32(insn, 16, 5);
6874    int rn = extract32(insn, 5, 5);
6875    int rd = extract32(insn, 0, 5);
6876
6877    if (is_u) {
6878        unallocated_encoding(s);
6879        return;
6880    }
6881
6882    switch (opcode) {
6883    case 0x9: /* SQDMLAL, SQDMLAL2 */
6884    case 0xb: /* SQDMLSL, SQDMLSL2 */
6885    case 0xd: /* SQDMULL, SQDMULL2 */
6886        if (size == 0 || size == 3) {
6887            unallocated_encoding(s);
6888            return;
6889        }
6890        break;
6891    default:
6892        unallocated_encoding(s);
6893        return;
6894    }
6895
6896    if (!fp_access_check(s)) {
6897        return;
6898    }
6899
6900    if (size == 2) {
6901        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6902        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6903        TCGv_i64 tcg_res = tcg_temp_new_i64();
6904
6905        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6906        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6907
6908        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6909        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6910
6911        switch (opcode) {
6912        case 0xd: /* SQDMULL, SQDMULL2 */
6913            break;
6914        case 0xb: /* SQDMLSL, SQDMLSL2 */
6915            tcg_gen_neg_i64(tcg_res, tcg_res);
6916            /* fall through */
6917        case 0x9: /* SQDMLAL, SQDMLAL2 */
6918            read_vec_element(s, tcg_op1, rd, 0, MO_64);
6919            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6920                                              tcg_res, tcg_op1);
6921            break;
6922        default:
6923            g_assert_not_reached();
6924        }
6925
6926        write_fp_dreg(s, rd, tcg_res);
6927
6928        tcg_temp_free_i64(tcg_op1);
6929        tcg_temp_free_i64(tcg_op2);
6930        tcg_temp_free_i64(tcg_res);
6931    } else {
6932        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6933        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6934        TCGv_i64 tcg_res = tcg_temp_new_i64();
6935
6936        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6937        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6938
6939        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6940        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6941
6942        switch (opcode) {
6943        case 0xd: /* SQDMULL, SQDMULL2 */
6944            break;
6945        case 0xb: /* SQDMLSL, SQDMLSL2 */
6946            gen_helper_neon_negl_u32(tcg_res, tcg_res);
6947            /* fall through */
6948        case 0x9: /* SQDMLAL, SQDMLAL2 */
6949        {
6950            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6951            read_vec_element(s, tcg_op3, rd, 0, MO_32);
6952            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6953                                              tcg_res, tcg_op3);
6954            tcg_temp_free_i64(tcg_op3);
6955            break;
6956        }
6957        default:
6958            g_assert_not_reached();
6959        }
6960
6961        tcg_gen_ext32u_i64(tcg_res, tcg_res);
6962        write_fp_dreg(s, rd, tcg_res);
6963
6964        tcg_temp_free_i32(tcg_op1);
6965        tcg_temp_free_i32(tcg_op2);
6966        tcg_temp_free_i64(tcg_res);
6967    }
6968}
6969
6970static void handle_3same_64(DisasContext *s, int opcode, bool u,
6971                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6972{
6973    /* Handle 64x64->64 opcodes which are shared between the scalar
6974     * and vector 3-same groups. We cover every opcode where size == 3
6975     * is valid in either the three-reg-same (integer, not pairwise)
6976     * or scalar-three-reg-same groups. (Some opcodes are not yet
6977     * implemented.)
6978     */
6979    TCGCond cond;
6980
6981    switch (opcode) {
6982    case 0x1: /* SQADD */
6983        if (u) {
6984            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6985        } else {
6986            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6987        }
6988        break;
6989    case 0x5: /* SQSUB */
6990        if (u) {
6991            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6992        } else {
6993            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6994        }
6995        break;
6996    case 0x6: /* CMGT, CMHI */
6997        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6998         * We implement this using setcond (test) and then negating.
6999         */
7000        cond = u ? TCG_COND_GTU : TCG_COND_GT;

7001    do_cmop:
7002        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7003        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7004        break;
7005    case 0x7: /* CMGE, CMHS */
7006        cond = u ? TCG_COND_GEU : TCG_COND_GE;
7007        goto do_cmop;
7008    case 0x11: /* CMTST, CMEQ */
7009        if (u) {
7010            cond = TCG_COND_EQ;
7011            goto do_cmop;
7012        }
7013        /* CMTST : test is "if (X & Y != 0)". */
7014        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7015        tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7016        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7017        break;
7018    case 0x8: /* SSHL, USHL */
7019        if (u) {
7020            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7021        } else {
7022            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7023        }
7024        break;
7025    case 0x9: /* SQSHL, UQSHL */
7026        if (u) {
7027            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7028        } else {
7029            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7030        }
7031        break;
7032    case 0xa: /* SRSHL, URSHL */
7033        if (u) {
7034            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7035        } else {
7036            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7037        }
7038        break;
7039    case 0xb: /* SQRSHL, UQRSHL */
7040        if (u) {
7041            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7042        } else {
7043            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7044        }
7045        break;
7046    case 0x10: /* ADD, SUB */
7047        if (u) {
7048            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7049        } else {
7050            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7051        }
7052        break;
7053    default:
7054        g_assert_not_reached();
7055    }
7056}
7057
7058/* Handle the 3-same-operands float operations; shared by the scalar
7059 * and vector encodings. The caller must filter out any encodings
7060 * not allocated for the encoding it is dealing with.
7061 */
7062static void handle_3same_float(DisasContext *s, int size, int elements,
7063                               int fpopcode, int rd, int rn, int rm)
7064{
7065    int pass;
7066    TCGv_ptr fpst = get_fpstatus_ptr();
7067
7068    for (pass = 0; pass < elements; pass++) {
7069        if (size) {
7070            /* Double */
7071            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7072            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7073            TCGv_i64 tcg_res = tcg_temp_new_i64();
7074
7075            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7076            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7077
7078            switch (fpopcode) {
7079            case 0x39: /* FMLS */
7080                /* As usual for ARM, separate negation for fused multiply-add */
7081                gen_helper_vfp_negd(tcg_op1, tcg_op1);
7082                /* fall through */
7083            case 0x19: /* FMLA */
7084                read_vec_element(s, tcg_res, rd, pass, MO_64);
7085                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7086                                       tcg_res, fpst);
7087                break;
7088            case 0x18: /* FMAXNM */
7089                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7090                break;
7091            case 0x1a: /* FADD */
7092                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7093                break;
7094            case 0x1b: /* FMULX */
7095                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7096                break;
7097            case 0x1c: /* FCMEQ */
7098                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7099                break;
7100            case 0x1e: /* FMAX */
7101                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7102                break;
7103            case 0x1f: /* FRECPS */
7104                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7105                break;
7106            case 0x38: /* FMINNM */
7107                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7108                break;
7109            case 0x3a: /* FSUB */
7110                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7111                break;
7112            case 0x3e: /* FMIN */
7113                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7114                break;
7115            case 0x3f: /* FRSQRTS */
7116                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7117                break;
7118            case 0x5b: /* FMUL */
7119                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7120                break;
7121            case 0x5c: /* FCMGE */
7122                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7123                break;
7124            case 0x5d: /* FACGE */
7125                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7126                break;
7127            case 0x5f: /* FDIV */
7128                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7129                break;
7130            case 0x7a: /* FABD */
7131                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7132                gen_helper_vfp_absd(tcg_res, tcg_res);
7133                break;
7134            case 0x7c: /* FCMGT */
7135                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7136                break;
7137            case 0x7d: /* FACGT */
7138                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7139                break;
7140            default:
7141                g_assert_not_reached();
7142            }
7143
7144            write_vec_element(s, tcg_res, rd, pass, MO_64);
7145
7146            tcg_temp_free_i64(tcg_res);
7147            tcg_temp_free_i64(tcg_op1);
7148            tcg_temp_free_i64(tcg_op2);
7149        } else {
7150            /* Single */
7151            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7152            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7153            TCGv_i32 tcg_res = tcg_temp_new_i32();
7154
7155            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7156            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7157
7158            switch (fpopcode) {
7159            case 0x39: /* FMLS */
7160                /* As usual for ARM, separate negation for fused multiply-add */
7161                gen_helper_vfp_negs(tcg_op1, tcg_op1);
7162                /* fall through */
7163            case 0x19: /* FMLA */
7164                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7165                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7166                                       tcg_res, fpst);
7167                break;
7168            case 0x1a: /* FADD */
7169                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7170                break;
7171            case 0x1b: /* FMULX */
7172                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7173                break;
7174            case 0x1c: /* FCMEQ */
7175                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7176                break;
7177            case 0x1e: /* FMAX */
7178                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7179                break;
7180            case 0x1f: /* FRECPS */
7181                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7182                break;
7183            case 0x18: /* FMAXNM */
7184                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7185                break;
7186            case 0x38: /* FMINNM */
7187                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7188                break;
7189            case 0x3a: /* FSUB */
7190                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7191                break;
7192            case 0x3e: /* FMIN */
7193                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7194                break;
7195            case 0x3f: /* FRSQRTS */
7196                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7197                break;
7198            case 0x5b: /* FMUL */
7199                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7200                break;
7201            case 0x5c: /* FCMGE */
7202                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7203                break;
7204            case 0x5d: /* FACGE */
7205                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7206                break;
7207            case 0x5f: /* FDIV */
7208                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7209                break;
7210            case 0x7a: /* FABD */
7211                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7212                gen_helper_vfp_abss(tcg_res, tcg_res);
7213                break;
7214            case 0x7c: /* FCMGT */
7215                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7216                break;
7217            case 0x7d: /* FACGT */
7218                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7219                break;
7220            default:
7221                g_assert_not_reached();
7222            }
7223
7224            if (elements == 1) {
7225                /* scalar single so clear high part */
7226                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7227
7228                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7229                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7230                tcg_temp_free_i64(tcg_tmp);
7231            } else {
7232                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7233            }
7234
7235            tcg_temp_free_i32(tcg_res);
7236            tcg_temp_free_i32(tcg_op1);
7237            tcg_temp_free_i32(tcg_op2);
7238        }
7239    }
7240
7241    tcg_temp_free_ptr(fpst);
7242
7243    if ((elements << size) < 4) {
7244        /* scalar, or non-quad vector op */
7245        clear_vec_high(s, rd);
7246    }
7247}
7248
7249/* C3.6.11 AdvSIMD scalar three same
7250 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7251 * +-----+---+-----------+------+---+------+--------+---+------+------+
7252 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7253 * +-----+---+-----------+------+---+------+--------+---+------+------+
7254 */
7255static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7256{
7257    int rd = extract32(insn, 0, 5);
7258    int rn = extract32(insn, 5, 5);
7259    int opcode = extract32(insn, 11, 5);
7260    int rm = extract32(insn, 16, 5);
7261    int size = extract32(insn, 22, 2);
7262    bool u = extract32(insn, 29, 1);
7263    TCGv_i64 tcg_rd;
7264
7265    if (opcode >= 0x18) {
7266        /* Floating point: U, size[1] and opcode indicate operation */
7267        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7268        switch (fpopcode) {
7269        case 0x1b: /* FMULX */
7270        case 0x1f: /* FRECPS */
7271        case 0x3f: /* FRSQRTS */
7272        case 0x5d: /* FACGE */
7273        case 0x7d: /* FACGT */
7274        case 0x1c: /* FCMEQ */
7275        case 0x5c: /* FCMGE */
7276        case 0x7c: /* FCMGT */
7277        case 0x7a: /* FABD */
7278            break;
7279        default:
7280            unallocated_encoding(s);
7281            return;
7282        }
7283
7284        if (!fp_access_check(s)) {
7285            return;
7286        }
7287
7288        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7289        return;
7290    }
7291
7292    switch (opcode) {
7293    case 0x1: /* SQADD, UQADD */
7294    case 0x5: /* SQSUB, UQSUB */
7295    case 0x9: /* SQSHL, UQSHL */
7296    case 0xb: /* SQRSHL, UQRSHL */
7297        break;
7298    case 0x8: /* SSHL, USHL */
7299    case 0xa: /* SRSHL, URSHL */
7300    case 0x6: /* CMGT, CMHI */
7301    case 0x7: /* CMGE, CMHS */
7302    case 0x11: /* CMTST, CMEQ */
7303    case 0x10: /* ADD, SUB (vector) */
7304        if (size != 3) {
7305            unallocated_encoding(s);
7306            return;
7307        }
7308        break;
7309    case 0x16: /* SQDMULH, SQRDMULH (vector) */
7310        if (size != 1 && size != 2) {
7311            unallocated_encoding(s);
7312            return;
7313        }
7314        break;
7315    default:
7316        unallocated_encoding(s);
7317        return;
7318    }
7319
7320    if (!fp_access_check(s)) {
7321        return;
7322    }
7323
7324    tcg_rd = tcg_temp_new_i64();
7325
7326    if (size == 3) {
7327        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7328        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7329
7330        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7331        tcg_temp_free_i64(tcg_rn);
7332        tcg_temp_free_i64(tcg_rm);
7333    } else {
7334        /* Do a single operation on the lowest element in the vector.
7335         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7336         * no side effects for all these operations.
7337         * OPTME: special-purpose helpers would avoid doing some
7338         * unnecessary work in the helper for the 8 and 16 bit cases.
7339         */
7340        NeonGenTwoOpEnvFn *genenvfn;
7341        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7342        TCGv_i32 tcg_rm = tcg_temp_new_i32();
7343        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7344
7345        read_vec_element_i32(s, tcg_rn, rn, 0, size);
7346        read_vec_element_i32(s, tcg_rm, rm, 0, size);
7347
7348        switch (opcode) {
7349        case 0x1: /* SQADD, UQADD */
7350        {
7351            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7352                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7353                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7354                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7355            };
7356            genenvfn = fns[size][u];
7357            break;
7358        }
7359        case 0x5: /* SQSUB, UQSUB */
7360        {
7361            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7362                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7363                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7364                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7365            };
7366            genenvfn = fns[size][u];
7367            break;
7368        }
7369        case 0x9: /* SQSHL, UQSHL */
7370        {
7371            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7372                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7373                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7374                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7375            };
7376            genenvfn = fns[size][u];
7377            break;
7378        }
7379        case 0xb: /* SQRSHL, UQRSHL */
7380        {
7381            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7382                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7383                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7384                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7385            };
7386            genenvfn = fns[size][u];
7387            break;
7388        }
7389        case 0x16: /* SQDMULH, SQRDMULH */
7390        {
7391            static NeonGenTwoOpEnvFn * const fns[2][2] = {
7392                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7393                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7394            };
7395            assert(size == 1 || size == 2);
7396            genenvfn = fns[size - 1][u];
7397            break;
7398        }
7399        default:
7400            g_assert_not_reached();
7401        }
7402
7403        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7404        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7405        tcg_temp_free_i32(tcg_rd32);
7406        tcg_temp_free_i32(tcg_rn);
7407        tcg_temp_free_i32(tcg_rm);
7408    }
7409
7410    write_fp_dreg(s, rd, tcg_rd);
7411
7412    tcg_temp_free_i64(tcg_rd);
7413}
7414
7415static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7416                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7417                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7418{
7419    /* Handle 64->64 opcodes which are shared between the scalar and
7420     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7421     * is valid in either group and also the double-precision fp ops.
7422     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7423     * requires them.
7424     */
7425    TCGCond cond;
7426
7427    switch (opcode) {
7428    case 0x4: /* CLS, CLZ */
7429        if (u) {
7430            gen_helper_clz64(tcg_rd, tcg_rn);
7431        } else {
7432            gen_helper_cls64(tcg_rd, tcg_rn);
7433        }
7434        break;
7435    case 0x5: /* NOT */
7436        /* This opcode is shared with CNT and RBIT but we have earlier
7437         * enforced that size == 3 if and only if this is the NOT insn.
7438         */
7439        tcg_gen_not_i64(tcg_rd, tcg_rn);
7440        break;
7441    case 0x7: /* SQABS, SQNEG */
7442        if (u) {
7443            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7444        } else {
7445            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7446        }
7447        break;
7448    case 0xa: /* CMLT */
7449        /* 64 bit integer comparison against zero, result is
7450         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7451         * subtracting 1.
7452         */
7453        cond = TCG_COND_LT;
7454    do_cmop:
7455        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7456        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7457        break;
7458    case 0x8: /* CMGT, CMGE */
7459        cond = u ? TCG_COND_GE : TCG_COND_GT;
7460        goto do_cmop;
7461    case 0x9: /* CMEQ, CMLE */
7462        cond = u ? TCG_COND_LE : TCG_COND_EQ;
7463        goto do_cmop;
7464    case 0xb: /* ABS, NEG */
7465        if (u) {
7466            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7467        } else {
7468            TCGv_i64 tcg_zero = tcg_const_i64(0);
7469            tcg_gen_neg_i64(tcg_rd, tcg_rn);
7470            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7471                                tcg_rn, tcg_rd);
7472            tcg_temp_free_i64(tcg_zero);
7473        }
7474        break;
7475    case 0x2f: /* FABS */
7476        gen_helper_vfp_absd(tcg_rd, tcg_rn);
7477        break;
7478    case 0x6f: /* FNEG */
7479        gen_helper_vfp_negd(tcg_rd, tcg_rn);
7480        break;
7481    case 0x7f: /* FSQRT */
7482        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7483        break;
7484    case 0x1a: /* FCVTNS */
7485    case 0x1b: /* FCVTMS */
7486    case 0x1c: /* FCVTAS */
7487    case 0x3a: /* FCVTPS */
7488    case 0x3b: /* FCVTZS */
7489    {
7490        TCGv_i32 tcg_shift = tcg_const_i32(0);
7491        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7492        tcg_temp_free_i32(tcg_shift);
7493        break;
7494    }
7495    case 0x5a: /* FCVTNU */
7496    case 0x5b: /* FCVTMU */
7497    case 0x5c: /* FCVTAU */
7498    case 0x7a: /* FCVTPU */
7499    case 0x7b: /* FCVTZU */
7500    {
7501        TCGv_i32 tcg_shift = tcg_const_i32(0);
7502        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7503        tcg_temp_free_i32(tcg_shift);
7504        break;
7505    }
7506    case 0x18: /* FRINTN */
7507    case 0x19: /* FRINTM */
7508    case 0x38: /* FRINTP */
7509    case 0x39: /* FRINTZ */
7510    case 0x58: /* FRINTA */
7511    case 0x79: /* FRINTI */
7512        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7513        break;
7514    case 0x59: /* FRINTX */
7515        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7516        break;
7517    default:
7518        g_assert_not_reached();
7519    }
7520}
7521
7522static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7523                                   bool is_scalar, bool is_u, bool is_q,
7524                                   int size, int rn, int rd)
7525{
7526    bool is_double = (size == 3);
7527    TCGv_ptr fpst;
7528
7529    if (!fp_access_check(s)) {
7530        return;
7531    }
7532
7533    fpst = get_fpstatus_ptr();
7534
7535    if (is_double) {
7536        TCGv_i64 tcg_op = tcg_temp_new_i64();
7537        TCGv_i64 tcg_zero = tcg_const_i64(0);
7538        TCGv_i64 tcg_res = tcg_temp_new_i64();
7539        NeonGenTwoDoubleOPFn *genfn;
7540        bool swap = false;
7541        int pass;
7542
7543        switch (opcode) {
7544        case 0x2e: /* FCMLT (zero) */
7545            swap = true;
7546            /* fallthrough */
7547        case 0x2c: /* FCMGT (zero) */
7548            genfn = gen_helper_neon_cgt_f64;
7549            break;
7550        case 0x2d: /* FCMEQ (zero) */
7551            genfn = gen_helper_neon_ceq_f64;
7552            break;
7553        case 0x6d: /* FCMLE (zero) */
7554            swap = true;
7555            /* fall through */
7556        case 0x6c: /* FCMGE (zero) */
7557            genfn = gen_helper_neon_cge_f64;
7558            break;
7559        default:
7560            g_assert_not_reached();
7561        }
7562
7563        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7564            read_vec_element(s, tcg_op, rn, pass, MO_64);
7565            if (swap) {
7566                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7567            } else {
7568                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7569            }
7570            write_vec_element(s, tcg_res, rd, pass, MO_64);
7571        }
7572        if (is_scalar) {
7573            clear_vec_high(s, rd);
7574        }
7575
7576        tcg_temp_free_i64(tcg_res);
7577        tcg_temp_free_i64(tcg_zero);
7578        tcg_temp_free_i64(tcg_op);
7579    } else {
7580        TCGv_i32 tcg_op = tcg_temp_new_i32();
7581        TCGv_i32 tcg_zero = tcg_const_i32(0);
7582        TCGv_i32 tcg_res = tcg_temp_new_i32();
7583        NeonGenTwoSingleOPFn *genfn;
7584        bool swap = false;
7585        int pass, maxpasses;
7586
7587        switch (opcode) {
7588        case 0x2e: /* FCMLT (zero) */
7589            swap = true;
7590            /* fall through */
7591        case 0x2c: /* FCMGT (zero) */
7592            genfn = gen_helper_neon_cgt_f32;
7593            break;
7594        case 0x2d: /* FCMEQ (zero) */
7595            genfn = gen_helper_neon_ceq_f32;
7596            break;
7597        case 0x6d: /* FCMLE (zero) */
7598            swap = true;
7599            /* fall through */
7600        case 0x6c: /* FCMGE (zero) */
7601            genfn = gen_helper_neon_cge_f32;
7602            break;
7603        default:
7604            g_assert_not_reached();
7605        }
7606
7607        if (is_scalar) {
7608            maxpasses = 1;
7609        } else {
7610            maxpasses = is_q ? 4 : 2;
7611        }
7612
7613        for (pass = 0; pass < maxpasses; pass++) {
7614            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7615            if (swap) {
7616                genfn(tcg_res, tcg_zero, tcg_op, fpst);
7617            } else {
7618                genfn(tcg_res, tcg_op, tcg_zero, fpst);
7619            }
7620            if (is_scalar) {
7621                write_fp_sreg(s, rd, tcg_res);
7622            } else {
7623                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7624            }
7625        }
7626        tcg_temp_free_i32(tcg_res);
7627        tcg_temp_free_i32(tcg_zero);
7628        tcg_temp_free_i32(tcg_op);
7629        if (!is_q && !is_scalar) {
7630            clear_vec_high(s, rd);
7631        }
7632    }
7633
7634    tcg_temp_free_ptr(fpst);
7635}
7636
7637static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7638                                    bool is_scalar, bool is_u, bool is_q,
7639                                    int size, int rn, int rd)
7640{
7641    bool is_double = (size == 3);
7642    TCGv_ptr fpst = get_fpstatus_ptr();
7643
7644    if (is_double) {
7645        TCGv_i64 tcg_op = tcg_temp_new_i64();
7646        TCGv_i64 tcg_res = tcg_temp_new_i64();
7647        int pass;
7648
7649        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7650            read_vec_element(s, tcg_op, rn, pass, MO_64);
7651            switch (opcode) {
7652            case 0x3d: /* FRECPE */
7653                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7654                break;
7655            case 0x3f: /* FRECPX */
7656                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7657                break;
7658            case 0x7d: /* FRSQRTE */
7659                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7660                break;
7661            default:
7662                g_assert_not_reached();
7663            }
7664            write_vec_element(s, tcg_res, rd, pass, MO_64);
7665        }
7666        if (is_scalar) {
7667            clear_vec_high(s, rd);
7668        }
7669
7670        tcg_temp_free_i64(tcg_res);
7671        tcg_temp_free_i64(tcg_op);
7672    } else {
7673        TCGv_i32 tcg_op = tcg_temp_new_i32();
7674        TCGv_i32 tcg_res = tcg_temp_new_i32();
7675        int pass, maxpasses;
7676
7677        if (is_scalar) {
7678            maxpasses = 1;
7679        } else {
7680            maxpasses = is_q ? 4 : 2;
7681        }
7682
7683        for (pass = 0; pass < maxpasses; pass++) {
7684            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7685
7686            switch (opcode) {
7687            case 0x3c: /* URECPE */
7688                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7689                break;
7690            case 0x3d: /* FRECPE */
7691                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7692                break;
7693            case 0x3f: /* FRECPX */
7694                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7695                break;
7696            case 0x7d: /* FRSQRTE */
7697                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7698                break;
7699            default:
7700                g_assert_not_reached();
7701            }
7702
7703            if (is_scalar) {
7704                write_fp_sreg(s, rd, tcg_res);
7705            } else {
7706                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7707            }
7708        }
7709        tcg_temp_free_i32(tcg_res);
7710        tcg_temp_free_i32(tcg_op);
7711        if (!is_q && !is_scalar) {
7712            clear_vec_high(s, rd);
7713        }
7714    }
7715    tcg_temp_free_ptr(fpst);
7716}
7717
7718static void handle_2misc_narrow(DisasContext *s, bool scalar,
7719                                int opcode, bool u, bool is_q,
7720                                int size, int rn, int rd)
7721{
7722    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7723     * in the source becomes a size element in the destination).
7724     */
7725    int pass;
7726    TCGv_i32 tcg_res[2];
7727    int destelt = is_q ? 2 : 0;
7728    int passes = scalar ? 1 : 2;
7729
7730    if (scalar) {
7731        tcg_res[1] = tcg_const_i32(0);
7732    }
7733
7734    for (pass = 0; pass < passes; pass++) {
7735        TCGv_i64 tcg_op = tcg_temp_new_i64();
7736        NeonGenNarrowFn *genfn = NULL;
7737        NeonGenNarrowEnvFn *genenvfn = NULL;
7738
7739        if (scalar) {
7740            read_vec_element(s, tcg_op, rn, pass, size + 1);
7741        } else {
7742            read_vec_element(s, tcg_op, rn, pass, MO_64);
7743        }
7744        tcg_res[pass] = tcg_temp_new_i32();
7745
7746        switch (opcode) {
7747        case 0x12: /* XTN, SQXTUN */
7748        {
7749            static NeonGenNarrowFn * const xtnfns[3] = {
7750                gen_helper_neon_narrow_u8,
7751                gen_helper_neon_narrow_u16,
7752                tcg_gen_extrl_i64_i32,
7753            };
7754            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7755                gen_helper_neon_unarrow_sat8,
7756                gen_helper_neon_unarrow_sat16,
7757                gen_helper_neon_unarrow_sat32,
7758            };
7759            if (u) {
7760                genenvfn = sqxtunfns[size];
7761            } else {
7762                genfn = xtnfns[size];
7763            }
7764            break;
7765        }
7766        case 0x14: /* SQXTN, UQXTN */
7767        {
7768            static NeonGenNarrowEnvFn * const fns[3][2] = {
7769                { gen_helper_neon_narrow_sat_s8,
7770                  gen_helper_neon_narrow_sat_u8 },
7771                { gen_helper_neon_narrow_sat_s16,
7772                  gen_helper_neon_narrow_sat_u16 },
7773                { gen_helper_neon_narrow_sat_s32,
7774                  gen_helper_neon_narrow_sat_u32 },
7775            };
7776            genenvfn = fns[size][u];
7777            break;
7778        }
7779        case 0x16: /* FCVTN, FCVTN2 */
7780            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7781            if (size == 2) {
7782                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7783            } else {
7784                TCGv_i32 tcg_lo = tcg_temp_new_i32();
7785                TCGv_i32 tcg_hi = tcg_temp_new_i32();
7786                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7787                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7788                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7789                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7790                tcg_temp_free_i32(tcg_lo);
7791                tcg_temp_free_i32(tcg_hi);
7792            }
7793            break;
7794        case 0x56:  /* FCVTXN, FCVTXN2 */
7795            /* 64 bit to 32 bit float conversion
7796             * with von Neumann rounding (round to odd)
7797             */
7798            assert(size == 2);
7799            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7800            break;
7801        default:
7802            g_assert_not_reached();
7803        }
7804
7805        if (genfn) {
7806            genfn(tcg_res[pass], tcg_op);
7807        } else if (genenvfn) {
7808            genenvfn(tcg_res[pass], cpu_env, tcg_op);
7809        }
7810
7811        tcg_temp_free_i64(tcg_op);
7812    }
7813
7814    for (pass = 0; pass < 2; pass++) {
7815        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7816        tcg_temp_free_i32(tcg_res[pass]);
7817    }
7818    if (!is_q) {
7819        clear_vec_high(s, rd);
7820    }
7821}
7822
7823/* Remaining saturating accumulating ops */
7824static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7825                                bool is_q, int size, int rn, int rd)
7826{
7827    bool is_double = (size == 3);
7828
7829    if (is_double) {
7830        TCGv_i64 tcg_rn = tcg_temp_new_i64();
7831        TCGv_i64 tcg_rd = tcg_temp_new_i64();
7832        int pass;
7833
7834        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7835            read_vec_element(s, tcg_rn, rn, pass, MO_64);
7836            read_vec_element(s, tcg_rd, rd, pass, MO_64);
7837
7838            if (is_u) { /* USQADD */
7839                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7840            } else { /* SUQADD */
7841                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7842            }
7843            write_vec_element(s, tcg_rd, rd, pass, MO_64);
7844        }
7845        if (is_scalar) {
7846            clear_vec_high(s, rd);
7847        }
7848
7849        tcg_temp_free_i64(tcg_rd);
7850        tcg_temp_free_i64(tcg_rn);
7851    } else {
7852        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7853        TCGv_i32 tcg_rd = tcg_temp_new_i32();
7854        int pass, maxpasses;
7855
7856        if (is_scalar) {
7857            maxpasses = 1;
7858        } else {
7859            maxpasses = is_q ? 4 : 2;
7860        }
7861
7862        for (pass = 0; pass < maxpasses; pass++) {
7863            if (is_scalar) {
7864                read_vec_element_i32(s, tcg_rn, rn, pass, size);
7865                read_vec_element_i32(s, tcg_rd, rd, pass, size);
7866            } else {
7867                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7868                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7869            }
7870
7871            if (is_u) { /* USQADD */
7872                switch (size) {
7873                case 0:
7874                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7875                    break;
7876                case 1:
7877                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7878                    break;
7879                case 2:
7880                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7881                    break;
7882                default:
7883                    g_assert_not_reached();
7884                }
7885            } else { /* SUQADD */
7886                switch (size) {
7887                case 0:
7888                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7889                    break;
7890                case 1:
7891                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7892                    break;
7893                case 2:
7894                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7895                    break;
7896                default:
7897                    g_assert_not_reached();
7898                }
7899            }
7900
7901            if (is_scalar) {
7902                TCGv_i64 tcg_zero = tcg_const_i64(0);
7903                write_vec_element(s, tcg_zero, rd, 0, MO_64);
7904                tcg_temp_free_i64(tcg_zero);
7905            }
7906            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7907        }
7908
7909        if (!is_q) {
7910            clear_vec_high(s, rd);
7911        }
7912
7913        tcg_temp_free_i32(tcg_rd);
7914        tcg_temp_free_i32(tcg_rn);
7915    }
7916}
7917
7918/* C3.6.12 AdvSIMD scalar two reg misc
7919 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7920 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7921 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7922 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7923 */
7924static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7925{
7926    int rd = extract32(insn, 0, 5);
7927    int rn = extract32(insn, 5, 5);
7928    int opcode = extract32(insn, 12, 5);
7929    int size = extract32(insn, 22, 2);
7930    bool u = extract32(insn, 29, 1);
7931    bool is_fcvt = false;
7932    int rmode;
7933    TCGv_i32 tcg_rmode;
7934    TCGv_ptr tcg_fpstatus;
7935
7936    switch (opcode) {
7937    case 0x3: /* USQADD / SUQADD*/
7938        if (!fp_access_check(s)) {
7939            return;
7940        }
7941        handle_2misc_satacc(s, true, u, false, size, rn, rd);
7942        return;
7943    case 0x7: /* SQABS / SQNEG */
7944        break;
7945    case 0xa: /* CMLT */
7946        if (u) {
7947            unallocated_encoding(s);
7948            return;
7949        }
7950        /* fall through */
7951    case 0x8: /* CMGT, CMGE */
7952    case 0x9: /* CMEQ, CMLE */
7953    case 0xb: /* ABS, NEG */
7954        if (size != 3) {
7955            unallocated_encoding(s);
7956            return;
7957        }
7958        break;
7959    case 0x12: /* SQXTUN */
7960        if (!u) {
7961            unallocated_encoding(s);
7962            return;
7963        }
7964        /* fall through */
7965    case 0x14: /* SQXTN, UQXTN */
7966        if (size == 3) {
7967            unallocated_encoding(s);
7968            return;
7969        }
7970        if (!fp_access_check(s)) {
7971            return;
7972        }
7973        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7974        return;
7975    case 0xc ... 0xf:
7976    case 0x16 ... 0x1d:
7977    case 0x1f:
7978        /* Floating point: U, size[1] and opcode indicate operation;
7979         * size[0] indicates single or double precision.
7980         */
7981        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7982        size = extract32(size, 0, 1) ? 3 : 2;
7983        switch (opcode) {
7984        case 0x2c: /* FCMGT (zero) */
7985        case 0x2d: /* FCMEQ (zero) */
7986        case 0x2e: /* FCMLT (zero) */
7987        case 0x6c: /* FCMGE (zero) */
7988        case 0x6d: /* FCMLE (zero) */
7989            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7990            return;
7991        case 0x1d: /* SCVTF */
7992        case 0x5d: /* UCVTF */
7993        {
7994            bool is_signed = (opcode == 0x1d);
7995            if (!fp_access_check(s)) {
7996                return;
7997            }
7998            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7999            return;
8000        }

8001        case 0x3d: /* FRECPE */
8002        case 0x3f: /* FRECPX */
8003        case 0x7d: /* FRSQRTE */
8004            if (!fp_access_check(s)) {
8005                return;
8006            }
8007            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8008            return;
8009        case 0x1a: /* FCVTNS */
8010        case 0x1b: /* FCVTMS */
8011        case 0x3a: /* FCVTPS */
8012        case 0x3b: /* FCVTZS */
8013        case 0x5a: /* FCVTNU */
8014        case 0x5b: /* FCVTMU */
8015        case 0x7a: /* FCVTPU */
8016        case 0x7b: /* FCVTZU */
8017            is_fcvt = true;
8018            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8019            break;
8020        case 0x1c: /* FCVTAS */
8021        case 0x5c: /* FCVTAU */
8022            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8023            is_fcvt = true;
8024            rmode = FPROUNDING_TIEAWAY;
8025            break;
8026        case 0x56: /* FCVTXN, FCVTXN2 */
8027            if (size == 2) {
8028                unallocated_encoding(s);
8029                return;
8030            }
8031            if (!fp_access_check(s)) {
8032                return;
8033            }
8034            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8035            return;
8036        default:
8037            unallocated_encoding(s);
8038            return;
8039        }
8040        break;
8041    default:
8042        unallocated_encoding(s);
8043        return;
8044    }
8045
8046    if (!fp_access_check(s)) {
8047        return;
8048    }
8049
8050    if (is_fcvt) {
8051        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8052        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8053        tcg_fpstatus = get_fpstatus_ptr();
8054    } else {
8055        TCGV_UNUSED_I32(tcg_rmode);
8056        TCGV_UNUSED_PTR(tcg_fpstatus);
8057    }
8058
8059    if (size == 3) {
8060        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8061        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8062
8063        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8064        write_fp_dreg(s, rd, tcg_rd);
8065        tcg_temp_free_i64(tcg_rd);
8066        tcg_temp_free_i64(tcg_rn);
8067    } else {
8068        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8069        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8070
8071        read_vec_element_i32(s, tcg_rn, rn, 0, size);
8072
8073        switch (opcode) {
8074        case 0x7: /* SQABS, SQNEG */
8075        {
8076            NeonGenOneOpEnvFn *genfn;
8077            static NeonGenOneOpEnvFn * const fns[3][2] = {
8078                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8079                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8080                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8081            };
8082            genfn = fns[size][u];
8083            genfn(tcg_rd, cpu_env, tcg_rn);
8084            break;
8085        }
8086        case 0x1a: /* FCVTNS */
8087        case 0x1b: /* FCVTMS */
8088        case 0x1c: /* FCVTAS */
8089        case 0x3a: /* FCVTPS */
8090        case 0x3b: /* FCVTZS */
8091        {
8092            TCGv_i32 tcg_shift = tcg_const_i32(0);
8093            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8094            tcg_temp_free_i32(tcg_shift);
8095            break;
8096        }
8097        case 0x5a: /* FCVTNU */
8098        case 0x5b: /* FCVTMU */
8099        case 0x5c: /* FCVTAU */
8100        case 0x7a: /* FCVTPU */
8101        case 0x7b: /* FCVTZU */
8102        {
8103            TCGv_i32 tcg_shift = tcg_const_i32(0);
8104            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8105            tcg_temp_free_i32(tcg_shift);
8106            break;
8107        }
8108        default:
8109            g_assert_not_reached();
8110        }
8111
8112        write_fp_sreg(s, rd, tcg_rd);
8113        tcg_temp_free_i32(tcg_rd);
8114        tcg_temp_free_i32(tcg_rn);
8115    }
8116
8117    if (is_fcvt) {
8118        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8119        tcg_temp_free_i32(tcg_rmode);
8120        tcg_temp_free_ptr(tcg_fpstatus);
8121    }
8122}
8123
8124/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8125static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8126                                 int immh, int immb, int opcode, int rn, int rd)
8127{
8128    int size = 32 - clz32(immh) - 1;
8129    int immhb = immh << 3 | immb;
8130    int shift = 2 * (8 << size) - immhb;
8131    bool accumulate = false;
8132    bool round = false;
8133    bool insert = false;
8134    int dsize = is_q ? 128 : 64;
8135    int esize = 8 << size;
8136    int elements = dsize/esize;
8137    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8138    TCGv_i64 tcg_rn = new_tmp_a64(s);
8139    TCGv_i64 tcg_rd = new_tmp_a64(s);
8140    TCGv_i64 tcg_round;
8141    int i;
8142
8143    if (extract32(immh, 3, 1) && !is_q) {
8144        unallocated_encoding(s);
8145        return;
8146    }
8147
8148    if (size > 3 && !is_q) {
8149        unallocated_encoding(s);
8150        return;
8151    }
8152
8153    if (!fp_access_check(s)) {
8154        return;
8155    }
8156
8157    switch (opcode) {
8158    case 0x02: /* SSRA / USRA (accumulate) */
8159        accumulate = true;
8160        break;
8161    case 0x04: /* SRSHR / URSHR (rounding) */
8162        round = true;
8163        break;
8164    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8165        accumulate = round = true;
8166        break;
8167    case 0x08: /* SRI */
8168        insert = true;
8169        break;
8170    }
8171
8172    if (round) {
8173        uint64_t round_const = 1ULL << (shift - 1);
8174        tcg_round = tcg_const_i64(round_const);
8175    } else {
8176        TCGV_UNUSED_I64(tcg_round);
8177    }
8178
8179    for (i = 0; i < elements; i++) {
8180        read_vec_element(s, tcg_rn, rn, i, memop);
8181        if (accumulate || insert) {
8182            read_vec_element(s, tcg_rd, rd, i, memop);
8183        }
8184
8185        if (insert) {
8186            handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8187        } else {
8188            handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8189                                    accumulate, is_u, size, shift);
8190        }
8191
8192        write_vec_element(s, tcg_rd, rd, i, size);
8193    }
8194
8195    if (!is_q) {
8196        clear_vec_high(s, rd);
8197    }
8198
8199    if (round) {
8200        tcg_temp_free_i64(tcg_round);
8201    }
8202}
8203
8204/* SHL/SLI - Vector shift left */
8205static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8206                                int immh, int immb, int opcode, int rn, int rd)
8207{
8208    int size = 32 - clz32(immh) - 1;
8209    int immhb = immh << 3 | immb;
8210    int shift = immhb - (8 << size);
8211    int dsize = is_q ? 128 : 64;
8212    int esize = 8 << size;
8213    int elements = dsize/esize;
8214    TCGv_i64 tcg_rn = new_tmp_a64(s);
8215    TCGv_i64 tcg_rd = new_tmp_a64(s);
8216    int i;
8217
8218    if (extract32(immh, 3, 1) && !is_q) {
8219        unallocated_encoding(s);
8220        return;
8221    }
8222
8223    if (size > 3 && !is_q) {
8224        unallocated_encoding(s);
8225        return;
8226    }
8227
8228    if (!fp_access_check(s)) {
8229        return;
8230    }
8231
8232    for (i = 0; i < elements; i++) {
8233        read_vec_element(s, tcg_rn, rn, i, size);
8234        if (insert) {
8235            read_vec_element(s, tcg_rd, rd, i, size);
8236        }
8237
8238        handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8239
8240        write_vec_element(s, tcg_rd, rd, i, size);
8241    }
8242
8243    if (!is_q) {
8244        clear_vec_high(s, rd);
8245    }
8246}
8247
8248/* USHLL/SHLL - Vector shift left with widening */
8249static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8250                                 int immh, int immb, int opcode, int rn, int rd)
8251{
8252    int size = 32 - clz32(immh) - 1;
8253    int immhb = immh << 3 | immb;
8254    int shift = immhb - (8 << size);
8255    int dsize = 64;
8256    int esize = 8 << size;
8257    int elements = dsize/esize;
8258    TCGv_i64 tcg_rn = new_tmp_a64(s);
8259    TCGv_i64 tcg_rd = new_tmp_a64(s);
8260    int i;
8261
8262    if (size >= 3) {
8263        unallocated_encoding(s);
8264        return;
8265    }
8266
8267    if (!fp_access_check(s)) {
8268        return;
8269    }
8270
8271    /* For the LL variants the store is larger than the load,
8272     * so if rd == rn we would overwrite parts of our input.
8273     * So load everything right now and use shifts in the main loop.
8274     */
8275    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8276
8277    for (i = 0; i < elements; i++) {
8278        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8279        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8280        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8281        write_vec_element(s, tcg_rd, rd, i, size + 1);
8282    }
8283}
8284
8285/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8286static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8287                                 int immh, int immb, int opcode, int rn, int rd)
8288{
8289    int immhb = immh << 3 | immb;
8290    int size = 32 - clz32(immh) - 1;
8291    int dsize = 64;
8292    int esize = 8 << size;
8293    int elements = dsize/esize;
8294    int shift = (2 * esize) - immhb;
8295    bool round = extract32(opcode, 0, 1);
8296    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8297    TCGv_i64 tcg_round;
8298    int i;
8299
8300    if (extract32(immh, 3, 1)) {
8301        unallocated_encoding(s);
8302        return;
8303    }
8304
8305    if (!fp_access_check(s)) {
8306        return;
8307    }
8308
8309    tcg_rn = tcg_temp_new_i64();
8310    tcg_rd = tcg_temp_new_i64();
8311    tcg_final = tcg_temp_new_i64();
8312    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8313
8314    if (round) {
8315        uint64_t round_const = 1ULL << (shift - 1);
8316        tcg_round = tcg_const_i64(round_const);
8317    } else {
8318        TCGV_UNUSED_I64(tcg_round);
8319    }
8320
8321    for (i = 0; i < elements; i++) {
8322        read_vec_element(s, tcg_rn, rn, i, size+1);
8323        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8324                                false, true, size+1, shift);
8325
8326        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8327    }
8328
8329    if (!is_q) {
8330        clear_vec_high(s, rd);
8331        write_vec_element(s, tcg_final, rd, 0, MO_64);
8332    } else {
8333        write_vec_element(s, tcg_final, rd, 1, MO_64);
8334    }
8335
8336    if (round) {
8337        tcg_temp_free_i64(tcg_round);
8338    }
8339    tcg_temp_free_i64(tcg_rn);
8340    tcg_temp_free_i64(tcg_rd);
8341    tcg_temp_free_i64(tcg_final);
8342    return;
8343}
8344
8345
8346/* C3.6.14 AdvSIMD shift by immediate
8347 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8348 * +---+---+---+-------------+------+------+--------+---+------+------+
8349 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8350 * +---+---+---+-------------+------+------+--------+---+------+------+
8351 */
8352static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8353{
8354    int rd = extract32(insn, 0, 5);
8355    int rn = extract32(insn, 5, 5);
8356    int opcode = extract32(insn, 11, 5);
8357    int immb = extract32(insn, 16, 3);
8358    int immh = extract32(insn, 19, 4);
8359    bool is_u = extract32(insn, 29, 1);
8360    bool is_q = extract32(insn, 30, 1);
8361
8362    switch (opcode) {
8363    case 0x08: /* SRI */
8364        if (!is_u) {
8365            unallocated_encoding(s);
8366            return;
8367        }
8368        /* fall through */
8369    case 0x00: /* SSHR / USHR */
8370    case 0x02: /* SSRA / USRA (accumulate) */
8371    case 0x04: /* SRSHR / URSHR (rounding) */
8372    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8373        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8374        break;
8375    case 0x0a: /* SHL / SLI */
8376        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8377        break;
8378    case 0x10: /* SHRN */
8379    case 0x11: /* RSHRN / SQRSHRUN */
8380        if (is_u) {
8381            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8382                                   opcode, rn, rd);
8383        } else {
8384            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8385        }
8386        break;
8387    case 0x12: /* SQSHRN / UQSHRN */
8388    case 0x13: /* SQRSHRN / UQRSHRN */
8389        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8390                               opcode, rn, rd);
8391        break;
8392    case 0x14: /* SSHLL / USHLL */
8393        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8394        break;
8395    case 0x1c: /* SCVTF / UCVTF */
8396        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8397                                     opcode, rn, rd);
8398        break;
8399    case 0xc: /* SQSHLU */
8400        if (!is_u) {
8401            unallocated_encoding(s);
8402            return;
8403        }
8404        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8405        break;
8406    case 0xe: /* SQSHL, UQSHL */
8407        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8408        break;
8409    case 0x1f: /* FCVTZS/ FCVTZU */
8410        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8411        return;
8412    default:
8413        unallocated_encoding(s);
8414        return;
8415    }
8416}
8417
8418/* Generate code to do a "long" addition or subtraction, ie one done in
8419 * TCGv_i64 on vector lanes twice the width specified by size.
8420 */
8421static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8422                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8423{
8424    static NeonGenTwo64OpFn * const fns[3][2] = {
8425        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8426        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8427        { tcg_gen_add_i64, tcg_gen_sub_i64 },
8428    };
8429    NeonGenTwo64OpFn *genfn;
8430    assert(size < 3);
8431
8432    genfn = fns[size][is_sub];
8433    genfn(tcg_res, tcg_op1, tcg_op2);
8434}
8435
8436static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8437                                int opcode, int rd, int rn, int rm)
8438{
8439    /* 3-reg-different widening insns: 64 x 64 -> 128 */
8440    TCGv_i64 tcg_res[2];
8441    int pass, accop;
8442
8443    tcg_res[0] = tcg_temp_new_i64();
8444    tcg_res[1] = tcg_temp_new_i64();
8445
8446    /* Does this op do an adding accumulate, a subtracting accumulate,
8447     * or no accumulate at all?
8448     */
8449    switch (opcode) {
8450    case 5:
8451    case 8:
8452    case 9:
8453        accop = 1;
8454        break;
8455    case 10:
8456    case 11:
8457        accop = -1;
8458        break;
8459    default:
8460        accop = 0;
8461        break;
8462    }
8463
8464    if (accop != 0) {
8465        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8466        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8467    }
8468
8469    /* size == 2 means two 32x32->64 operations; this is worth special
8470     * casing because we can generally handle it inline.
8471     */
8472    if (size == 2) {
8473        for (pass = 0; pass < 2; pass++) {
8474            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8475            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8476            TCGv_i64 tcg_passres;
8477            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8478
8479            int elt = pass + is_q * 2;
8480
8481            read_vec_element(s, tcg_op1, rn, elt, memop);
8482            read_vec_element(s, tcg_op2, rm, elt, memop);
8483
8484            if (accop == 0) {
8485                tcg_passres = tcg_res[pass];
8486            } else {
8487                tcg_passres = tcg_temp_new_i64();
8488            }
8489
8490            switch (opcode) {
8491            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8492                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8493                break;
8494            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8495                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8496                break;
8497            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8498            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8499            {
8500                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8501                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8502
8503                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8504                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8505                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8506                                    tcg_passres,
8507                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8508                tcg_temp_free_i64(tcg_tmp1);
8509                tcg_temp_free_i64(tcg_tmp2);
8510                break;
8511            }
8512            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8513            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8514            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8515                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8516                break;
8517            case 9: /* SQDMLAL, SQDMLAL2 */
8518            case 11: /* SQDMLSL, SQDMLSL2 */
8519            case 13: /* SQDMULL, SQDMULL2 */
8520                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8521                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8522                                                  tcg_passres, tcg_passres);
8523                break;
8524            default:
8525                g_assert_not_reached();
8526            }
8527
8528            if (opcode == 9 || opcode == 11) {
8529                /* saturating accumulate ops */
8530                if (accop < 0) {
8531                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
8532                }
8533                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8534                                                  tcg_res[pass], tcg_passres);
8535            } else if (accop > 0) {
8536                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8537            } else if (accop < 0) {
8538                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8539            }
8540
8541            if (accop != 0) {
8542                tcg_temp_free_i64(tcg_passres);
8543            }
8544
8545            tcg_temp_free_i64(tcg_op1);
8546            tcg_temp_free_i64(tcg_op2);
8547        }
8548    } else {
8549        /* size 0 or 1, generally helper functions */
8550        for (pass = 0; pass < 2; pass++) {
8551            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8552            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8553            TCGv_i64 tcg_passres;
8554            int elt = pass + is_q * 2;
8555
8556            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8557            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8558
8559            if (accop == 0) {
8560                tcg_passres = tcg_res[pass];
8561            } else {
8562                tcg_passres = tcg_temp_new_i64();
8563            }
8564
8565            switch (opcode) {
8566            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8567            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8568            {
8569                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8570                static NeonGenWidenFn * const widenfns[2][2] = {
8571                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8572                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8573                };
8574                NeonGenWidenFn *widenfn = widenfns[size][is_u];
8575
8576                widenfn(tcg_op2_64, tcg_op2);
8577                widenfn(tcg_passres, tcg_op1);
8578                gen_neon_addl(size, (opcode == 2), tcg_passres,
8579                              tcg_passres, tcg_op2_64);
8580                tcg_temp_free_i64(tcg_op2_64);
8581                break;
8582            }
8583            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8584            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8585                if (size == 0) {
8586                    if (is_u) {
8587                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8588                    } else {
8589                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8590                    }
8591                } else {
8592                    if (is_u) {
8593                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8594                    } else {
8595                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8596                    }
8597                }
8598                break;
8599            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8600            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8601            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8602                if (size == 0) {
8603                    if (is_u) {
8604                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8605                    } else {
8606                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8607                    }
8608                } else {
8609                    if (is_u) {
8610                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8611                    } else {
8612                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8613                    }
8614                }
8615                break;
8616            case 9: /* SQDMLAL, SQDMLAL2 */
8617            case 11: /* SQDMLSL, SQDMLSL2 */
8618            case 13: /* SQDMULL, SQDMULL2 */
8619                assert(size == 1);
8620                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8621                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8622                                                  tcg_passres, tcg_passres);
8623                break;
8624            case 14: /* PMULL */
8625                assert(size == 0);
8626                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8627                break;
8628            default:
8629                g_assert_not_reached();
8630            }
8631            tcg_temp_free_i32(tcg_op1);
8632            tcg_temp_free_i32(tcg_op2);
8633
8634            if (accop != 0) {
8635                if (opcode == 9 || opcode == 11) {
8636                    /* saturating accumulate ops */
8637                    if (accop < 0) {
8638                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8639                    }
8640                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8641                                                      tcg_res[pass],
8642                                                      tcg_passres);
8643                } else {
8644                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
8645                                  tcg_res[pass], tcg_passres);
8646                }
8647                tcg_temp_free_i64(tcg_passres);
8648            }
8649        }
8650    }
8651
8652    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8653    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8654    tcg_temp_free_i64(tcg_res[0]);
8655    tcg_temp_free_i64(tcg_res[1]);
8656}
8657
8658static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8659                            int opcode, int rd, int rn, int rm)
8660{
8661    TCGv_i64 tcg_res[2];
8662    int part = is_q ? 2 : 0;
8663    int pass;
8664
8665    for (pass = 0; pass < 2; pass++) {
8666        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8667        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8668        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8669        static NeonGenWidenFn * const widenfns[3][2] = {
8670            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8671            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8672            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8673        };
8674        NeonGenWidenFn *widenfn = widenfns[size][is_u];
8675
8676        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8677        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8678        widenfn(tcg_op2_wide, tcg_op2);
8679        tcg_temp_free_i32(tcg_op2);
8680        tcg_res[pass] = tcg_temp_new_i64();
8681        gen_neon_addl(size, (opcode == 3),
8682                      tcg_res[pass], tcg_op1, tcg_op2_wide);
8683        tcg_temp_free_i64(tcg_op1);
8684        tcg_temp_free_i64(tcg_op2_wide);
8685    }
8686
8687    for (pass = 0; pass < 2; pass++) {
8688        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8689        tcg_temp_free_i64(tcg_res[pass]);
8690    }
8691}
8692
8693static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8694{
8695    tcg_gen_addi_i64(in, in, 1U << 31);
8696    tcg_gen_extrh_i64_i32(res, in);
8697}
8698
8699static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8700                                 int opcode, int rd, int rn, int rm)
8701{
8702    TCGv_i32 tcg_res[2];
8703    int part = is_q ? 2 : 0;
8704    int pass;
8705
8706    for (pass = 0; pass < 2; pass++) {
8707        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8708        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8709        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8710        static NeonGenNarrowFn * const narrowfns[3][2] = {
8711            { gen_helper_neon_narrow_high_u8,
8712              gen_helper_neon_narrow_round_high_u8 },
8713            { gen_helper_neon_narrow_high_u16,
8714              gen_helper_neon_narrow_round_high_u16 },
8715            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8716        };
8717        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8718
8719        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8720        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8721
8722        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8723
8724        tcg_temp_free_i64(tcg_op1);
8725        tcg_temp_free_i64(tcg_op2);
8726
8727        tcg_res[pass] = tcg_temp_new_i32();
8728        gennarrow(tcg_res[pass], tcg_wideres);
8729        tcg_temp_free_i64(tcg_wideres);
8730    }
8731
8732    for (pass = 0; pass < 2; pass++) {
8733        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8734        tcg_temp_free_i32(tcg_res[pass]);
8735    }
8736    if (!is_q) {
8737        clear_vec_high(s, rd);
8738    }
8739}
8740
8741static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8742{
8743    /* PMULL of 64 x 64 -> 128 is an odd special case because it
8744     * is the only three-reg-diff instruction which produces a
8745     * 128-bit wide result from a single operation. However since
8746     * it's possible to calculate the two halves more or less
8747     * separately we just use two helper calls.
8748     */
8749    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8750    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8751    TCGv_i64 tcg_res = tcg_temp_new_i64();
8752
8753    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8754    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8755    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8756    write_vec_element(s, tcg_res, rd, 0, MO_64);
8757    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8758    write_vec_element(s, tcg_res, rd, 1, MO_64);
8759
8760    tcg_temp_free_i64(tcg_op1);
8761    tcg_temp_free_i64(tcg_op2);
8762    tcg_temp_free_i64(tcg_res);
8763}
8764
8765/* C3.6.15 AdvSIMD three different
8766 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8767 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8768 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8769 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8770 */
8771static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8772{
8773    /* Instructions in this group fall into three basic classes
8774     * (in each case with the operation working on each element in
8775     * the input vectors):
8776     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8777     *     128 bit input)
8778     * (2) wide 64 x 128 -> 128
8779     * (3) narrowing 128 x 128 -> 64
8780     * Here we do initial decode, catch unallocated cases and
8781     * dispatch to separate functions for each class.
8782     */
8783    int is_q = extract32(insn, 30, 1);
8784    int is_u = extract32(insn, 29, 1);
8785    int size = extract32(insn, 22, 2);
8786    int opcode = extract32(insn, 12, 4);
8787    int rm = extract32(insn, 16, 5);
8788    int rn = extract32(insn, 5, 5);
8789    int rd = extract32(insn, 0, 5);
8790
8791    switch (opcode) {
8792    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8793    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8794        /* 64 x 128 -> 128 */
8795        if (size == 3) {
8796            unallocated_encoding(s);
8797            return;
8798        }
8799        if (!fp_access_check(s)) {
8800            return;
8801        }
8802        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8803        break;
8804    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8805    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8806        /* 128 x 128 -> 64 */
8807        if (size == 3) {
8808            unallocated_encoding(s);
8809            return;
8810        }
8811        if (!fp_access_check(s)) {
8812            return;
8813        }
8814        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8815        break;
8816    case 14: /* PMULL, PMULL2 */
8817        if (is_u || size == 1 || size == 2) {
8818            unallocated_encoding(s);
8819            return;
8820        }
8821        if (size == 3) {
8822            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8823                unallocated_encoding(s);
8824                return;
8825            }
8826            if (!fp_access_check(s)) {
8827                return;
8828            }
8829            handle_pmull_64(s, is_q, rd, rn, rm);
8830            return;
8831        }
8832        goto is_widening;
8833    case 9: /* SQDMLAL, SQDMLAL2 */
8834    case 11: /* SQDMLSL, SQDMLSL2 */
8835    case 13: /* SQDMULL, SQDMULL2 */
8836        if (is_u || size == 0) {
8837            unallocated_encoding(s);
8838            return;
8839        }
8840        /* fall through */
8841    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8842    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8843    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8844    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8845    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8846    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8847    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8848        /* 64 x 64 -> 128 */
8849        if (size == 3) {
8850            unallocated_encoding(s);
8851            return;
8852        }
8853    is_widening:
8854        if (!fp_access_check(s)) {
8855            return;
8856        }
8857
8858        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8859        break;
8860    default:
8861        /* opcode 15 not allocated */
8862        unallocated_encoding(s);
8863        break;
8864    }
8865}
8866
8867/* Logic op (opcode == 3) subgroup of C3.6.16. */
8868static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8869{
8870    int rd = extract32(insn, 0, 5);
8871    int rn = extract32(insn, 5, 5);
8872    int rm = extract32(insn, 16, 5);
8873    int size = extract32(insn, 22, 2);
8874    bool is_u = extract32(insn, 29, 1);
8875    bool is_q = extract32(insn, 30, 1);
8876    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8877    int pass;
8878
8879    if (!fp_access_check(s)) {
8880        return;
8881    }
8882
8883    tcg_op1 = tcg_temp_new_i64();
8884    tcg_op2 = tcg_temp_new_i64();
8885    tcg_res[0] = tcg_temp_new_i64();
8886    tcg_res[1] = tcg_temp_new_i64();
8887
8888    for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8889        read_vec_element(s, tcg_op1, rn, pass, MO_64);
8890        read_vec_element(s, tcg_op2, rm, pass, MO_64);
8891
8892        if (!is_u) {
8893            switch (size) {
8894            case 0: /* AND */
8895                tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8896                break;
8897            case 1: /* BIC */
8898                tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8899                break;
8900            case 2: /* ORR */
8901                tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8902                break;
8903            case 3: /* ORN */
8904                tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8905                break;
8906            }
8907        } else {
8908            if (size != 0) {
8909                /* B* ops need res loaded to operate on */
8910                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8911            }
8912
8913            switch (size) {
8914            case 0: /* EOR */
8915                tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8916                break;
8917            case 1: /* BSL bitwise select */
8918                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8919                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8920                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8921                break;
8922            case 2: /* BIT, bitwise insert if true */
8923                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8924                tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8925                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8926                break;
8927            case 3: /* BIF, bitwise insert if false */
8928                tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8929                tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8930                tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8931                break;
8932            }
8933        }
8934    }
8935
8936    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8937    if (!is_q) {
8938        tcg_gen_movi_i64(tcg_res[1], 0);
8939    }
8940    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8941
8942    tcg_temp_free_i64(tcg_op1);
8943    tcg_temp_free_i64(tcg_op2);
8944    tcg_temp_free_i64(tcg_res[0]);
8945    tcg_temp_free_i64(tcg_res[1]);
8946}
8947
8948/* Helper functions for 32 bit comparisons */
8949static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8950{
8951    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8952}
8953
8954static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8955{
8956    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8957}
8958
8959static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8960{
8961    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8962}
8963
8964static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8965{
8966    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8967}
8968
8969/* Pairwise op subgroup of C3.6.16.
8970 *
8971 * This is called directly or via the handle_3same_float for float pairwise
8972 * operations where the opcode and size are calculated differently.
8973 */
8974static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8975                                   int size, int rn, int rm, int rd)
8976{
8977    TCGv_ptr fpst;
8978    int pass;
8979
8980    /* Floating point operations need fpst */
8981    if (opcode >= 0x58) {
8982        fpst = get_fpstatus_ptr();
8983    } else {
8984        TCGV_UNUSED_PTR(fpst);
8985    }
8986
8987    if (!fp_access_check(s)) {
8988        return;
8989    }
8990
8991    /* These operations work on the concatenated rm:rn, with each pair of
8992     * adjacent elements being operated on to produce an element in the result.
8993     */
8994    if (size == 3) {
8995        TCGv_i64 tcg_res[2];
8996
8997        for (pass = 0; pass < 2; pass++) {
8998            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8999            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9000            int passreg = (pass == 0) ? rn : rm;

9001
9002            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9003            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9004            tcg_res[pass] = tcg_temp_new_i64();
9005
9006            switch (opcode) {
9007            case 0x17: /* ADDP */
9008                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9009                break;
9010            case 0x58: /* FMAXNMP */
9011                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9012                break;
9013            case 0x5a: /* FADDP */
9014                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9015                break;
9016            case 0x5e: /* FMAXP */
9017                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9018                break;
9019            case 0x78: /* FMINNMP */
9020                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9021                break;
9022            case 0x7e: /* FMINP */
9023                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9024                break;
9025            default:
9026                g_assert_not_reached();
9027            }
9028
9029            tcg_temp_free_i64(tcg_op1);
9030            tcg_temp_free_i64(tcg_op2);
9031        }
9032
9033        for (pass = 0; pass < 2; pass++) {
9034            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9035            tcg_temp_free_i64(tcg_res[pass]);
9036        }
9037    } else {
9038        int maxpass = is_q ? 4 : 2;
9039        TCGv_i32 tcg_res[4];
9040
9041        for (pass = 0; pass < maxpass; pass++) {
9042            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9043            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9044            NeonGenTwoOpFn *genfn = NULL;
9045            int passreg = pass < (maxpass / 2) ? rn : rm;
9046            int passelt = (is_q && (pass & 1)) ? 2 : 0;
9047
9048            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9049            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9050            tcg_res[pass] = tcg_temp_new_i32();
9051
9052            switch (opcode) {
9053            case 0x17: /* ADDP */
9054            {
9055                static NeonGenTwoOpFn * const fns[3] = {
9056                    gen_helper_neon_padd_u8,
9057                    gen_helper_neon_padd_u16,
9058                    tcg_gen_add_i32,
9059                };
9060                genfn = fns[size];
9061                break;
9062            }
9063            case 0x14: /* SMAXP, UMAXP */
9064            {
9065                static NeonGenTwoOpFn * const fns[3][2] = {
9066                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9067                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9068                    { gen_max_s32, gen_max_u32 },
9069                };
9070                genfn = fns[size][u];
9071                break;
9072            }
9073            case 0x15: /* SMINP, UMINP */
9074            {
9075                static NeonGenTwoOpFn * const fns[3][2] = {
9076                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9077                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9078                    { gen_min_s32, gen_min_u32 },
9079                };
9080                genfn = fns[size][u];
9081                break;
9082            }
9083            /* The FP operations are all on single floats (32 bit) */
9084            case 0x58: /* FMAXNMP */
9085                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9086                break;
9087            case 0x5a: /* FADDP */
9088                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9089                break;
9090            case 0x5e: /* FMAXP */
9091                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9092                break;
9093            case 0x78: /* FMINNMP */
9094                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9095                break;
9096            case 0x7e: /* FMINP */
9097                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9098                break;
9099            default:
9100                g_assert_not_reached();
9101            }
9102
9103            /* FP ops called directly, otherwise call now */
9104            if (genfn) {
9105                genfn(tcg_res[pass], tcg_op1, tcg_op2);
9106            }
9107
9108            tcg_temp_free_i32(tcg_op1);
9109            tcg_temp_free_i32(tcg_op2);
9110        }
9111
9112        for (pass = 0; pass < maxpass; pass++) {
9113            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9114            tcg_temp_free_i32(tcg_res[pass]);
9115        }
9116        if (!is_q) {
9117            clear_vec_high(s, rd);
9118        }
9119    }
9120
9121    if (!TCGV_IS_UNUSED_PTR(fpst)) {
9122        tcg_temp_free_ptr(fpst);
9123    }
9124}
9125
9126/* Floating point op subgroup of C3.6.16. */
9127static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9128{
9129    /* For floating point ops, the U, size[1] and opcode bits
9130     * together indicate the operation. size[0] indicates single
9131     * or double.
9132     */
9133    int fpopcode = extract32(insn, 11, 5)
9134        | (extract32(insn, 23, 1) << 5)
9135        | (extract32(insn, 29, 1) << 6);
9136    int is_q = extract32(insn, 30, 1);
9137    int size = extract32(insn, 22, 1);
9138    int rm = extract32(insn, 16, 5);
9139    int rn = extract32(insn, 5, 5);
9140    int rd = extract32(insn, 0, 5);
9141
9142    int datasize = is_q ? 128 : 64;
9143    int esize = 32 << size;
9144    int elements = datasize / esize;
9145
9146    if (size == 1 && !is_q) {
9147        unallocated_encoding(s);
9148        return;
9149    }
9150
9151    switch (fpopcode) {
9152    case 0x58: /* FMAXNMP */
9153    case 0x5a: /* FADDP */
9154    case 0x5e: /* FMAXP */
9155    case 0x78: /* FMINNMP */
9156    case 0x7e: /* FMINP */
9157        if (size && !is_q) {
9158            unallocated_encoding(s);
9159            return;
9160        }
9161        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9162                               rn, rm, rd);
9163        return;
9164    case 0x1b: /* FMULX */
9165    case 0x1f: /* FRECPS */
9166    case 0x3f: /* FRSQRTS */
9167    case 0x5d: /* FACGE */
9168    case 0x7d: /* FACGT */
9169    case 0x19: /* FMLA */
9170    case 0x39: /* FMLS */
9171    case 0x18: /* FMAXNM */
9172    case 0x1a: /* FADD */
9173    case 0x1c: /* FCMEQ */
9174    case 0x1e: /* FMAX */
9175    case 0x38: /* FMINNM */
9176    case 0x3a: /* FSUB */
9177    case 0x3e: /* FMIN */
9178    case 0x5b: /* FMUL */
9179    case 0x5c: /* FCMGE */
9180    case 0x5f: /* FDIV */
9181    case 0x7a: /* FABD */
9182    case 0x7c: /* FCMGT */
9183        if (!fp_access_check(s)) {
9184            return;
9185        }
9186
9187        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9188        return;
9189    default:
9190        unallocated_encoding(s);
9191        return;
9192    }
9193}
9194
9195/* Integer op subgroup of C3.6.16. */
9196static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9197{
9198    int is_q = extract32(insn, 30, 1);
9199    int u = extract32(insn, 29, 1);
9200    int size = extract32(insn, 22, 2);
9201    int opcode = extract32(insn, 11, 5);
9202    int rm = extract32(insn, 16, 5);
9203    int rn = extract32(insn, 5, 5);
9204    int rd = extract32(insn, 0, 5);
9205    int pass;
9206
9207    switch (opcode) {
9208    case 0x13: /* MUL, PMUL */
9209        if (u && size != 0) {
9210            unallocated_encoding(s);
9211            return;
9212        }
9213        /* fall through */
9214    case 0x0: /* SHADD, UHADD */
9215    case 0x2: /* SRHADD, URHADD */
9216    case 0x4: /* SHSUB, UHSUB */
9217    case 0xc: /* SMAX, UMAX */
9218    case 0xd: /* SMIN, UMIN */
9219    case 0xe: /* SABD, UABD */
9220    case 0xf: /* SABA, UABA */
9221    case 0x12: /* MLA, MLS */
9222        if (size == 3) {
9223            unallocated_encoding(s);
9224            return;
9225        }
9226        break;
9227    case 0x16: /* SQDMULH, SQRDMULH */
9228        if (size == 0 || size == 3) {
9229            unallocated_encoding(s);
9230            return;
9231        }
9232        break;
9233    default:
9234        if (size == 3 && !is_q) {
9235            unallocated_encoding(s);
9236            return;
9237        }
9238        break;
9239    }
9240
9241    if (!fp_access_check(s)) {
9242        return;
9243    }
9244
9245    if (size == 3) {
9246        assert(is_q);
9247        for (pass = 0; pass < 2; pass++) {
9248            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9249            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9250            TCGv_i64 tcg_res = tcg_temp_new_i64();
9251
9252            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9253            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9254
9255            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9256
9257            write_vec_element(s, tcg_res, rd, pass, MO_64);
9258
9259            tcg_temp_free_i64(tcg_res);
9260            tcg_temp_free_i64(tcg_op1);
9261            tcg_temp_free_i64(tcg_op2);
9262        }
9263    } else {
9264        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9265            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9266            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9267            TCGv_i32 tcg_res = tcg_temp_new_i32();
9268            NeonGenTwoOpFn *genfn = NULL;
9269            NeonGenTwoOpEnvFn *genenvfn = NULL;
9270
9271            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9272            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9273
9274            switch (opcode) {
9275            case 0x0: /* SHADD, UHADD */
9276            {
9277                static NeonGenTwoOpFn * const fns[3][2] = {
9278                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9279                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9280                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9281                };
9282                genfn = fns[size][u];
9283                break;
9284            }
9285            case 0x1: /* SQADD, UQADD */
9286            {
9287                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9288                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9289                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9290                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9291                };
9292                genenvfn = fns[size][u];
9293                break;
9294            }
9295            case 0x2: /* SRHADD, URHADD */
9296            {
9297                static NeonGenTwoOpFn * const fns[3][2] = {
9298                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9299                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9300                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9301                };
9302                genfn = fns[size][u];
9303                break;
9304            }
9305            case 0x4: /* SHSUB, UHSUB */
9306            {
9307                static NeonGenTwoOpFn * const fns[3][2] = {
9308                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9309                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9310                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9311                };
9312                genfn = fns[size][u];
9313                break;
9314            }
9315            case 0x5: /* SQSUB, UQSUB */
9316            {
9317                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9318                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9319                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9320                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9321                };
9322                genenvfn = fns[size][u];
9323                break;
9324            }
9325            case 0x6: /* CMGT, CMHI */
9326            {
9327                static NeonGenTwoOpFn * const fns[3][2] = {
9328                    { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9329                    { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9330                    { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9331                };
9332                genfn = fns[size][u];
9333                break;
9334            }
9335            case 0x7: /* CMGE, CMHS */
9336            {
9337                static NeonGenTwoOpFn * const fns[3][2] = {
9338                    { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9339                    { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9340                    { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9341                };
9342                genfn = fns[size][u];
9343                break;
9344            }
9345            case 0x8: /* SSHL, USHL */
9346            {
9347                static NeonGenTwoOpFn * const fns[3][2] = {
9348                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9349                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9350                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9351                };
9352                genfn = fns[size][u];
9353                break;
9354            }
9355            case 0x9: /* SQSHL, UQSHL */
9356            {
9357                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9358                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9359                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9360                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9361                };
9362                genenvfn = fns[size][u];
9363                break;
9364            }
9365            case 0xa: /* SRSHL, URSHL */
9366            {
9367                static NeonGenTwoOpFn * const fns[3][2] = {
9368                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9369                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9370                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9371                };
9372                genfn = fns[size][u];
9373                break;
9374            }
9375            case 0xb: /* SQRSHL, UQRSHL */
9376            {
9377                static NeonGenTwoOpEnvFn * const fns[3][2] = {
9378                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9379                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9380                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9381                };
9382                genenvfn = fns[size][u];
9383                break;
9384            }
9385            case 0xc: /* SMAX, UMAX */
9386            {
9387                static NeonGenTwoOpFn * const fns[3][2] = {
9388                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9389                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9390                    { gen_max_s32, gen_max_u32 },
9391                };
9392                genfn = fns[size][u];
9393                break;
9394            }
9395
9396            case 0xd: /* SMIN, UMIN */
9397            {
9398                static NeonGenTwoOpFn * const fns[3][2] = {
9399                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9400                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9401                    { gen_min_s32, gen_min_u32 },
9402                };
9403                genfn = fns[size][u];
9404                break;
9405            }
9406            case 0xe: /* SABD, UABD */
9407            case 0xf: /* SABA, UABA */
9408            {
9409                static NeonGenTwoOpFn * const fns[3][2] = {
9410                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9411                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9412                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9413                };
9414                genfn = fns[size][u];
9415                break;
9416            }
9417            case 0x10: /* ADD, SUB */
9418            {
9419                static NeonGenTwoOpFn * const fns[3][2] = {
9420                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9421                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9422                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9423                };
9424                genfn = fns[size][u];
9425                break;
9426            }
9427            case 0x11: /* CMTST, CMEQ */
9428            {
9429                static NeonGenTwoOpFn * const fns[3][2] = {
9430                    { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9431                    { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9432                    { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9433                };
9434                genfn = fns[size][u];
9435                break;
9436            }
9437            case 0x13: /* MUL, PMUL */
9438                if (u) {
9439                    /* PMUL */
9440                    assert(size == 0);
9441                    genfn = gen_helper_neon_mul_p8;
9442                    break;
9443                }
9444                /* fall through : MUL */
9445            case 0x12: /* MLA, MLS */
9446            {
9447                static NeonGenTwoOpFn * const fns[3] = {
9448                    gen_helper_neon_mul_u8,
9449                    gen_helper_neon_mul_u16,
9450                    tcg_gen_mul_i32,
9451                };
9452                genfn = fns[size];
9453                break;
9454            }
9455            case 0x16: /* SQDMULH, SQRDMULH */
9456            {
9457                static NeonGenTwoOpEnvFn * const fns[2][2] = {
9458                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9459                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9460                };
9461                assert(size == 1 || size == 2);
9462                genenvfn = fns[size - 1][u];
9463                break;
9464            }
9465            default:
9466                g_assert_not_reached();
9467            }
9468
9469            if (genenvfn) {
9470                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9471            } else {
9472                genfn(tcg_res, tcg_op1, tcg_op2);
9473            }
9474
9475            if (opcode == 0xf || opcode == 0x12) {
9476                /* SABA, UABA, MLA, MLS: accumulating ops */
9477                static NeonGenTwoOpFn * const fns[3][2] = {
9478                    { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9479                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9480                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
9481                };
9482                bool is_sub = (opcode == 0x12 && u); /* MLS */
9483
9484                genfn = fns[size][is_sub];
9485                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9486                genfn(tcg_res, tcg_op1, tcg_res);
9487            }
9488
9489            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9490
9491            tcg_temp_free_i32(tcg_res);
9492            tcg_temp_free_i32(tcg_op1);
9493            tcg_temp_free_i32(tcg_op2);
9494        }
9495    }
9496
9497    if (!is_q) {
9498        clear_vec_high(s, rd);
9499    }
9500}
9501
9502/* C3.6.16 AdvSIMD three same
9503 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9504 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9505 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9506 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9507 */
9508static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9509{
9510    int opcode = extract32(insn, 11, 5);
9511
9512    switch (opcode) {
9513    case 0x3: /* logic ops */
9514        disas_simd_3same_logic(s, insn);
9515        break;
9516    case 0x17: /* ADDP */
9517    case 0x14: /* SMAXP, UMAXP */
9518    case 0x15: /* SMINP, UMINP */
9519    {
9520        /* Pairwise operations */
9521        int is_q = extract32(insn, 30, 1);
9522        int u = extract32(insn, 29, 1);
9523        int size = extract32(insn, 22, 2);
9524        int rm = extract32(insn, 16, 5);
9525        int rn = extract32(insn, 5, 5);
9526        int rd = extract32(insn, 0, 5);
9527        if (opcode == 0x17) {
9528            if (u || (size == 3 && !is_q)) {
9529                unallocated_encoding(s);
9530                return;
9531            }
9532        } else {
9533            if (size == 3) {
9534                unallocated_encoding(s);
9535                return;
9536            }
9537        }
9538        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9539        break;
9540    }
9541    case 0x18 ... 0x31:
9542        /* floating point ops, sz[1] and U are part of opcode */
9543        disas_simd_3same_float(s, insn);
9544        break;
9545    default:
9546        disas_simd_3same_int(s, insn);
9547        break;
9548    }
9549}
9550
9551static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9552                                  int size, int rn, int rd)
9553{
9554    /* Handle 2-reg-misc ops which are widening (so each size element
9555     * in the source becomes a 2*size element in the destination.
9556     * The only instruction like this is FCVTL.
9557     */
9558    int pass;
9559
9560    if (size == 3) {
9561        /* 32 -> 64 bit fp conversion */
9562        TCGv_i64 tcg_res[2];
9563        int srcelt = is_q ? 2 : 0;
9564
9565        for (pass = 0; pass < 2; pass++) {
9566            TCGv_i32 tcg_op = tcg_temp_new_i32();
9567            tcg_res[pass] = tcg_temp_new_i64();
9568
9569            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9570            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9571            tcg_temp_free_i32(tcg_op);
9572        }
9573        for (pass = 0; pass < 2; pass++) {
9574            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9575            tcg_temp_free_i64(tcg_res[pass]);
9576        }
9577    } else {
9578        /* 16 -> 32 bit fp conversion */
9579        int srcelt = is_q ? 4 : 0;
9580        TCGv_i32 tcg_res[4];
9581
9582        for (pass = 0; pass < 4; pass++) {
9583            tcg_res[pass] = tcg_temp_new_i32();
9584
9585            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9586            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9587                                           cpu_env);
9588        }
9589        for (pass = 0; pass < 4; pass++) {
9590            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9591            tcg_temp_free_i32(tcg_res[pass]);
9592        }
9593    }
9594}
9595
9596static void handle_rev(DisasContext *s, int opcode, bool u,
9597                       bool is_q, int size, int rn, int rd)
9598{
9599    int op = (opcode << 1) | u;
9600    int opsz = op + size;
9601    int grp_size = 3 - opsz;
9602    int dsize = is_q ? 128 : 64;
9603    int i;
9604
9605    if (opsz >= 3) {
9606        unallocated_encoding(s);
9607        return;
9608    }
9609
9610    if (!fp_access_check(s)) {
9611        return;
9612    }
9613
9614    if (size == 0) {
9615        /* Special case bytes, use bswap op on each group of elements */
9616        int groups = dsize / (8 << grp_size);
9617
9618        for (i = 0; i < groups; i++) {
9619            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9620
9621            read_vec_element(s, tcg_tmp, rn, i, grp_size);
9622            switch (grp_size) {
9623            case MO_16:
9624                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9625                break;
9626            case MO_32:
9627                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9628                break;
9629            case MO_64:
9630                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9631                break;
9632            default:
9633                g_assert_not_reached();
9634            }
9635            write_vec_element(s, tcg_tmp, rd, i, grp_size);
9636            tcg_temp_free_i64(tcg_tmp);
9637        }
9638        if (!is_q) {
9639            clear_vec_high(s, rd);
9640        }
9641    } else {
9642        int revmask = (1 << grp_size) - 1;
9643        int esize = 8 << size;
9644        int elements = dsize / esize;
9645        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9646        TCGv_i64 tcg_rd = tcg_const_i64(0);
9647        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9648
9649        for (i = 0; i < elements; i++) {
9650            int e_rev = (i & 0xf) ^ revmask;
9651            int off = e_rev * esize;
9652            read_vec_element(s, tcg_rn, rn, i, size);
9653            if (off >= 64) {
9654                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9655                                    tcg_rn, off - 64, esize);
9656            } else {
9657                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9658            }
9659        }
9660        write_vec_element(s, tcg_rd, rd, 0, MO_64);
9661        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9662
9663        tcg_temp_free_i64(tcg_rd_hi);
9664        tcg_temp_free_i64(tcg_rd);
9665        tcg_temp_free_i64(tcg_rn);
9666    }
9667}
9668
9669static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9670                                  bool is_q, int size, int rn, int rd)
9671{
9672    /* Implement the pairwise operations from 2-misc:
9673     * SADDLP, UADDLP, SADALP, UADALP.
9674     * These all add pairs of elements in the input to produce a
9675     * double-width result element in the output (possibly accumulating).
9676     */
9677    bool accum = (opcode == 0x6);
9678    int maxpass = is_q ? 2 : 1;
9679    int pass;
9680    TCGv_i64 tcg_res[2];
9681
9682    if (size == 2) {
9683        /* 32 + 32 -> 64 op */
9684        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9685
9686        for (pass = 0; pass < maxpass; pass++) {
9687            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9688            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9689
9690            tcg_res[pass] = tcg_temp_new_i64();
9691
9692            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9693            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9694            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9695            if (accum) {
9696                read_vec_element(s, tcg_op1, rd, pass, MO_64);
9697                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9698            }
9699
9700            tcg_temp_free_i64(tcg_op1);
9701            tcg_temp_free_i64(tcg_op2);
9702        }
9703    } else {
9704        for (pass = 0; pass < maxpass; pass++) {
9705            TCGv_i64 tcg_op = tcg_temp_new_i64();
9706            NeonGenOneOpFn *genfn;
9707            static NeonGenOneOpFn * const fns[2][2] = {
9708                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9709                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9710            };
9711
9712            genfn = fns[size][u];
9713
9714            tcg_res[pass] = tcg_temp_new_i64();
9715
9716            read_vec_element(s, tcg_op, rn, pass, MO_64);
9717            genfn(tcg_res[pass], tcg_op);
9718
9719            if (accum) {
9720                read_vec_element(s, tcg_op, rd, pass, MO_64);
9721                if (size == 0) {
9722                    gen_helper_neon_addl_u16(tcg_res[pass],
9723                                             tcg_res[pass], tcg_op);
9724                } else {
9725                    gen_helper_neon_addl_u32(tcg_res[pass],
9726                                             tcg_res[pass], tcg_op);
9727                }
9728            }
9729            tcg_temp_free_i64(tcg_op);
9730        }
9731    }
9732    if (!is_q) {
9733        tcg_res[1] = tcg_const_i64(0);
9734    }
9735    for (pass = 0; pass < 2; pass++) {
9736        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9737        tcg_temp_free_i64(tcg_res[pass]);
9738    }
9739}
9740
9741static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9742{
9743    /* Implement SHLL and SHLL2 */
9744    int pass;
9745    int part = is_q ? 2 : 0;
9746    TCGv_i64 tcg_res[2];
9747
9748    for (pass = 0; pass < 2; pass++) {
9749        static NeonGenWidenFn * const widenfns[3] = {
9750            gen_helper_neon_widen_u8,
9751            gen_helper_neon_widen_u16,
9752            tcg_gen_extu_i32_i64,
9753        };
9754        NeonGenWidenFn *widenfn = widenfns[size];
9755        TCGv_i32 tcg_op = tcg_temp_new_i32();
9756
9757        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9758        tcg_res[pass] = tcg_temp_new_i64();
9759        widenfn(tcg_res[pass], tcg_op);
9760        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9761
9762        tcg_temp_free_i32(tcg_op);
9763    }
9764
9765    for (pass = 0; pass < 2; pass++) {
9766        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9767        tcg_temp_free_i64(tcg_res[pass]);
9768    }
9769}
9770
9771/* C3.6.17 AdvSIMD two reg misc
9772 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9773 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9774 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9775 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9776 */
9777static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9778{
9779    int size = extract32(insn, 22, 2);
9780    int opcode = extract32(insn, 12, 5);
9781    bool u = extract32(insn, 29, 1);
9782    bool is_q = extract32(insn, 30, 1);
9783    int rn = extract32(insn, 5, 5);
9784    int rd = extract32(insn, 0, 5);
9785    bool need_fpstatus = false;
9786    bool need_rmode = false;
9787    int rmode = -1;
9788    TCGv_i32 tcg_rmode;
9789    TCGv_ptr tcg_fpstatus;
9790
9791    switch (opcode) {
9792    case 0x0: /* REV64, REV32 */
9793    case 0x1: /* REV16 */
9794        handle_rev(s, opcode, u, is_q, size, rn, rd);
9795        return;
9796    case 0x5: /* CNT, NOT, RBIT */
9797        if (u && size == 0) {
9798            /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9799            size = 3;
9800            break;
9801        } else if (u && size == 1) {
9802            /* RBIT */
9803            break;
9804        } else if (!u && size == 0) {
9805            /* CNT */
9806            break;
9807        }
9808        unallocated_encoding(s);
9809        return;
9810    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9811    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9812        if (size == 3) {
9813            unallocated_encoding(s);
9814            return;
9815        }
9816        if (!fp_access_check(s)) {
9817            return;
9818        }
9819
9820        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9821        return;
9822    case 0x4: /* CLS, CLZ */
9823        if (size == 3) {
9824            unallocated_encoding(s);
9825            return;
9826        }
9827        break;
9828    case 0x2: /* SADDLP, UADDLP */
9829    case 0x6: /* SADALP, UADALP */
9830        if (size == 3) {
9831            unallocated_encoding(s);
9832            return;
9833        }
9834        if (!fp_access_check(s)) {
9835            return;
9836        }
9837        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9838        return;
9839    case 0x13: /* SHLL, SHLL2 */
9840        if (u == 0 || size == 3) {
9841            unallocated_encoding(s);
9842            return;
9843        }
9844        if (!fp_access_check(s)) {
9845            return;
9846        }
9847        handle_shll(s, is_q, size, rn, rd);
9848        return;
9849    case 0xa: /* CMLT */
9850        if (u == 1) {
9851            unallocated_encoding(s);
9852            return;
9853        }
9854        /* fall through */
9855    case 0x8: /* CMGT, CMGE */
9856    case 0x9: /* CMEQ, CMLE */
9857    case 0xb: /* ABS, NEG */
9858        if (size == 3 && !is_q) {
9859            unallocated_encoding(s);
9860            return;
9861        }
9862        break;
9863    case 0x3: /* SUQADD, USQADD */
9864        if (size == 3 && !is_q) {
9865            unallocated_encoding(s);
9866            return;
9867        }
9868        if (!fp_access_check(s)) {
9869            return;
9870        }
9871        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9872        return;
9873    case 0x7: /* SQABS, SQNEG */
9874        if (size == 3 && !is_q) {
9875            unallocated_encoding(s);
9876            return;
9877        }
9878        break;
9879    case 0xc ... 0xf:
9880    case 0x16 ... 0x1d:
9881    case 0x1f:
9882    {
9883        /* Floating point: U, size[1] and opcode indicate operation;
9884         * size[0] indicates single or double precision.
9885         */
9886        int is_double = extract32(size, 0, 1);
9887        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9888        size = is_double ? 3 : 2;
9889        switch (opcode) {
9890        case 0x2f: /* FABS */
9891        case 0x6f: /* FNEG */
9892            if (size == 3 && !is_q) {
9893                unallocated_encoding(s);
9894                return;
9895            }
9896            break;
9897        case 0x1d: /* SCVTF */
9898        case 0x5d: /* UCVTF */
9899        {
9900            bool is_signed = (opcode == 0x1d) ? true : false;
9901            int elements = is_double ? 2 : is_q ? 4 : 2;
9902            if (is_double && !is_q) {
9903                unallocated_encoding(s);
9904                return;
9905            }
9906            if (!fp_access_check(s)) {
9907                return;
9908            }
9909            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9910            return;
9911        }
9912        case 0x2c: /* FCMGT (zero) */
9913        case 0x2d: /* FCMEQ (zero) */
9914        case 0x2e: /* FCMLT (zero) */
9915        case 0x6c: /* FCMGE (zero) */
9916        case 0x6d: /* FCMLE (zero) */
9917            if (size == 3 && !is_q) {
9918                unallocated_encoding(s);
9919                return;
9920            }
9921            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9922            return;
9923        case 0x7f: /* FSQRT */
9924            if (size == 3 && !is_q) {
9925                unallocated_encoding(s);
9926                return;
9927            }
9928            break;
9929        case 0x1a: /* FCVTNS */
9930        case 0x1b: /* FCVTMS */
9931        case 0x3a: /* FCVTPS */
9932        case 0x3b: /* FCVTZS */
9933        case 0x5a: /* FCVTNU */
9934        case 0x5b: /* FCVTMU */
9935        case 0x7a: /* FCVTPU */
9936        case 0x7b: /* FCVTZU */
9937            need_fpstatus = true;
9938            need_rmode = true;
9939            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9940            if (size == 3 && !is_q) {
9941                unallocated_encoding(s);
9942                return;
9943            }
9944            break;
9945        case 0x5c: /* FCVTAU */
9946        case 0x1c: /* FCVTAS */
9947            need_fpstatus = true;
9948            need_rmode = true;
9949            rmode = FPROUNDING_TIEAWAY;
9950            if (size == 3 && !is_q) {
9951                unallocated_encoding(s);
9952                return;
9953            }
9954            break;
9955        case 0x3c: /* URECPE */
9956            if (size == 3) {
9957                unallocated_encoding(s);
9958                return;
9959            }
9960            /* fall through */
9961        case 0x3d: /* FRECPE */
9962        case 0x7d: /* FRSQRTE */
9963            if (size == 3 && !is_q) {
9964                unallocated_encoding(s);
9965                return;
9966            }
9967            if (!fp_access_check(s)) {
9968                return;
9969            }
9970            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9971            return;
9972        case 0x56: /* FCVTXN, FCVTXN2 */
9973            if (size == 2) {
9974                unallocated_encoding(s);
9975                return;
9976            }
9977            /* fall through */
9978        case 0x16: /* FCVTN, FCVTN2 */
9979            /* handle_2misc_narrow does a 2*size -> size operation, but these
9980             * instructions encode the source size rather than dest size.
9981             */
9982            if (!fp_access_check(s)) {
9983                return;
9984            }
9985            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9986            return;
9987        case 0x17: /* FCVTL, FCVTL2 */
9988            if (!fp_access_check(s)) {
9989                return;
9990            }
9991            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9992            return;
9993        case 0x18: /* FRINTN */
9994        case 0x19: /* FRINTM */
9995        case 0x38: /* FRINTP */
9996        case 0x39: /* FRINTZ */
9997            need_rmode = true;
9998            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9999            /* fall through */
10000        case 0x59: /* FRINTX */

10001        case 0x79: /* FRINTI */
10002            need_fpstatus = true;
10003            if (size == 3 && !is_q) {
10004                unallocated_encoding(s);
10005                return;
10006            }
10007            break;
10008        case 0x58: /* FRINTA */
10009            need_rmode = true;
10010            rmode = FPROUNDING_TIEAWAY;
10011            need_fpstatus = true;
10012            if (size == 3 && !is_q) {
10013                unallocated_encoding(s);
10014                return;
10015            }
10016            break;
10017        case 0x7c: /* URSQRTE */
10018            if (size == 3) {
10019                unallocated_encoding(s);
10020                return;
10021            }
10022            need_fpstatus = true;
10023            break;
10024        default:
10025            unallocated_encoding(s);
10026            return;
10027        }
10028        break;
10029    }
10030    default:
10031        unallocated_encoding(s);
10032        return;
10033    }
10034
10035    if (!fp_access_check(s)) {
10036        return;
10037    }
10038
10039    if (need_fpstatus) {
10040        tcg_fpstatus = get_fpstatus_ptr();
10041    } else {
10042        TCGV_UNUSED_PTR(tcg_fpstatus);
10043    }
10044    if (need_rmode) {
10045        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10046        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10047    } else {
10048        TCGV_UNUSED_I32(tcg_rmode);
10049    }
10050
10051    if (size == 3) {
10052        /* All 64-bit element operations can be shared with scalar 2misc */
10053        int pass;
10054
10055        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10056            TCGv_i64 tcg_op = tcg_temp_new_i64();
10057            TCGv_i64 tcg_res = tcg_temp_new_i64();
10058
10059            read_vec_element(s, tcg_op, rn, pass, MO_64);
10060
10061            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10062                            tcg_rmode, tcg_fpstatus);
10063
10064            write_vec_element(s, tcg_res, rd, pass, MO_64);
10065
10066            tcg_temp_free_i64(tcg_res);
10067            tcg_temp_free_i64(tcg_op);
10068        }
10069    } else {
10070        int pass;
10071
10072        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10073            TCGv_i32 tcg_op = tcg_temp_new_i32();
10074            TCGv_i32 tcg_res = tcg_temp_new_i32();
10075            TCGCond cond;
10076
10077            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10078
10079            if (size == 2) {
10080                /* Special cases for 32 bit elements */
10081                switch (opcode) {
10082                case 0xa: /* CMLT */
10083                    /* 32 bit integer comparison against zero, result is
10084                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
10085                     * and inverting.
10086                     */
10087                    cond = TCG_COND_LT;
10088                do_cmop:
10089                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10090                    tcg_gen_neg_i32(tcg_res, tcg_res);
10091                    break;
10092                case 0x8: /* CMGT, CMGE */
10093                    cond = u ? TCG_COND_GE : TCG_COND_GT;
10094                    goto do_cmop;
10095                case 0x9: /* CMEQ, CMLE */
10096                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
10097                    goto do_cmop;
10098                case 0x4: /* CLS */
10099                    if (u) {
10100                        gen_helper_clz32(tcg_res, tcg_op);
10101                    } else {
10102                        gen_helper_cls32(tcg_res, tcg_op);
10103                    }
10104                    break;
10105                case 0x7: /* SQABS, SQNEG */
10106                    if (u) {
10107                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10108                    } else {
10109                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10110                    }
10111                    break;
10112                case 0xb: /* ABS, NEG */
10113                    if (u) {
10114                        tcg_gen_neg_i32(tcg_res, tcg_op);
10115                    } else {
10116                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10117                        tcg_gen_neg_i32(tcg_res, tcg_op);
10118                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10119                                            tcg_zero, tcg_op, tcg_res);
10120                        tcg_temp_free_i32(tcg_zero);
10121                    }
10122                    break;
10123                case 0x2f: /* FABS */
10124                    gen_helper_vfp_abss(tcg_res, tcg_op);
10125                    break;
10126                case 0x6f: /* FNEG */
10127                    gen_helper_vfp_negs(tcg_res, tcg_op);
10128                    break;
10129                case 0x7f: /* FSQRT */
10130                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10131                    break;
10132                case 0x1a: /* FCVTNS */
10133                case 0x1b: /* FCVTMS */
10134                case 0x1c: /* FCVTAS */
10135                case 0x3a: /* FCVTPS */
10136                case 0x3b: /* FCVTZS */
10137                {
10138                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10139                    gen_helper_vfp_tosls(tcg_res, tcg_op,
10140                                         tcg_shift, tcg_fpstatus);
10141                    tcg_temp_free_i32(tcg_shift);
10142                    break;
10143                }
10144                case 0x5a: /* FCVTNU */
10145                case 0x5b: /* FCVTMU */
10146                case 0x5c: /* FCVTAU */
10147                case 0x7a: /* FCVTPU */
10148                case 0x7b: /* FCVTZU */
10149                {
10150                    TCGv_i32 tcg_shift = tcg_const_i32(0);
10151                    gen_helper_vfp_touls(tcg_res, tcg_op,
10152                                         tcg_shift, tcg_fpstatus);
10153                    tcg_temp_free_i32(tcg_shift);
10154                    break;
10155                }
10156                case 0x18: /* FRINTN */
10157                case 0x19: /* FRINTM */
10158                case 0x38: /* FRINTP */
10159                case 0x39: /* FRINTZ */
10160                case 0x58: /* FRINTA */
10161                case 0x79: /* FRINTI */
10162                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10163                    break;
10164                case 0x59: /* FRINTX */
10165                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10166                    break;
10167                case 0x7c: /* URSQRTE */
10168                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10169                    break;
10170                default:
10171                    g_assert_not_reached();
10172                }
10173            } else {
10174                /* Use helpers for 8 and 16 bit elements */
10175                switch (opcode) {
10176                case 0x5: /* CNT, RBIT */
10177                    /* For these two insns size is part of the opcode specifier
10178                     * (handled earlier); they always operate on byte elements.
10179                     */
10180                    if (u) {
10181                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10182                    } else {
10183                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10184                    }
10185                    break;
10186                case 0x7: /* SQABS, SQNEG */
10187                {
10188                    NeonGenOneOpEnvFn *genfn;
10189                    static NeonGenOneOpEnvFn * const fns[2][2] = {
10190                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10191                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10192                    };
10193                    genfn = fns[size][u];
10194                    genfn(tcg_res, cpu_env, tcg_op);
10195                    break;
10196                }
10197                case 0x8: /* CMGT, CMGE */
10198                case 0x9: /* CMEQ, CMLE */
10199                case 0xa: /* CMLT */
10200                {
10201                    static NeonGenTwoOpFn * const fns[3][2] = {
10202                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10203                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10204                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10205                    };
10206                    NeonGenTwoOpFn *genfn;
10207                    int comp;
10208                    bool reverse;
10209                    TCGv_i32 tcg_zero = tcg_const_i32(0);
10210
10211                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10212                    comp = (opcode - 0x8) * 2 + u;
10213                    /* ...but LE, LT are implemented as reverse GE, GT */
10214                    reverse = (comp > 2);
10215                    if (reverse) {
10216                        comp = 4 - comp;
10217                    }
10218                    genfn = fns[comp][size];
10219                    if (reverse) {
10220                        genfn(tcg_res, tcg_zero, tcg_op);
10221                    } else {
10222                        genfn(tcg_res, tcg_op, tcg_zero);
10223                    }
10224                    tcg_temp_free_i32(tcg_zero);
10225                    break;
10226                }
10227                case 0xb: /* ABS, NEG */
10228                    if (u) {
10229                        TCGv_i32 tcg_zero = tcg_const_i32(0);
10230                        if (size) {
10231                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10232                        } else {
10233                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10234                        }
10235                        tcg_temp_free_i32(tcg_zero);
10236                    } else {
10237                        if (size) {
10238                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
10239                        } else {
10240                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
10241                        }
10242                    }
10243                    break;
10244                case 0x4: /* CLS, CLZ */
10245                    if (u) {
10246                        if (size == 0) {
10247                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
10248                        } else {
10249                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
10250                        }
10251                    } else {
10252                        if (size == 0) {
10253                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
10254                        } else {
10255                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
10256                        }
10257                    }
10258                    break;
10259                default:
10260                    g_assert_not_reached();
10261                }
10262            }
10263
10264            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10265
10266            tcg_temp_free_i32(tcg_res);
10267            tcg_temp_free_i32(tcg_op);
10268        }
10269    }
10270    if (!is_q) {
10271        clear_vec_high(s, rd);
10272    }
10273
10274    if (need_rmode) {
10275        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10276        tcg_temp_free_i32(tcg_rmode);
10277    }
10278    if (need_fpstatus) {
10279        tcg_temp_free_ptr(tcg_fpstatus);
10280    }
10281}
10282
10283/* C3.6.13 AdvSIMD scalar x indexed element
10284 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10285 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10286 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10287 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10288 * C3.6.18 AdvSIMD vector x indexed element
10289 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10290 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10291 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10292 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10293 */
10294static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10295{
10296    /* This encoding has two kinds of instruction:
10297     *  normal, where we perform elt x idxelt => elt for each
10298     *     element in the vector
10299     *  long, where we perform elt x idxelt and generate a result of
10300     *     double the width of the input element
10301     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10302     */
10303    bool is_scalar = extract32(insn, 28, 1);
10304    bool is_q = extract32(insn, 30, 1);
10305    bool u = extract32(insn, 29, 1);
10306    int size = extract32(insn, 22, 2);
10307    int l = extract32(insn, 21, 1);
10308    int m = extract32(insn, 20, 1);
10309    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10310    int rm = extract32(insn, 16, 4);
10311    int opcode = extract32(insn, 12, 4);
10312    int h = extract32(insn, 11, 1);
10313    int rn = extract32(insn, 5, 5);
10314    int rd = extract32(insn, 0, 5);
10315    bool is_long = false;
10316    bool is_fp = false;
10317    int index;
10318    TCGv_ptr fpst;
10319
10320    switch (opcode) {
10321    case 0x0: /* MLA */
10322    case 0x4: /* MLS */
10323        if (!u || is_scalar) {
10324            unallocated_encoding(s);
10325            return;
10326        }
10327        break;
10328    case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10329    case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10330    case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10331        if (is_scalar) {
10332            unallocated_encoding(s);
10333            return;
10334        }
10335        is_long = true;
10336        break;
10337    case 0x3: /* SQDMLAL, SQDMLAL2 */
10338    case 0x7: /* SQDMLSL, SQDMLSL2 */
10339    case 0xb: /* SQDMULL, SQDMULL2 */
10340        is_long = true;
10341        /* fall through */
10342    case 0xc: /* SQDMULH */
10343    case 0xd: /* SQRDMULH */
10344        if (u) {
10345            unallocated_encoding(s);
10346            return;
10347        }
10348        break;
10349    case 0x8: /* MUL */
10350        if (u || is_scalar) {
10351            unallocated_encoding(s);
10352            return;
10353        }
10354        break;
10355    case 0x1: /* FMLA */
10356    case 0x5: /* FMLS */
10357        if (u) {
10358            unallocated_encoding(s);
10359            return;
10360        }
10361        /* fall through */
10362    case 0x9: /* FMUL, FMULX */
10363        if (!extract32(size, 1, 1)) {
10364            unallocated_encoding(s);
10365            return;
10366        }
10367        is_fp = true;
10368        break;
10369    default:
10370        unallocated_encoding(s);
10371        return;
10372    }
10373
10374    if (is_fp) {
10375        /* low bit of size indicates single/double */
10376        size = extract32(size, 0, 1) ? 3 : 2;
10377        if (size == 2) {
10378            index = h << 1 | l;
10379        } else {
10380            if (l || !is_q) {
10381                unallocated_encoding(s);
10382                return;
10383            }
10384            index = h;
10385        }
10386        rm |= (m << 4);
10387    } else {
10388        switch (size) {
10389        case 1:
10390            index = h << 2 | l << 1 | m;
10391            break;
10392        case 2:
10393            index = h << 1 | l;
10394            rm |= (m << 4);
10395            break;
10396        default:
10397            unallocated_encoding(s);
10398            return;
10399        }
10400    }
10401
10402    if (!fp_access_check(s)) {
10403        return;
10404    }
10405
10406    if (is_fp) {
10407        fpst = get_fpstatus_ptr();
10408    } else {
10409        TCGV_UNUSED_PTR(fpst);
10410    }
10411
10412    if (size == 3) {
10413        TCGv_i64 tcg_idx = tcg_temp_new_i64();
10414        int pass;
10415
10416        assert(is_fp && is_q && !is_long);
10417
10418        read_vec_element(s, tcg_idx, rm, index, MO_64);
10419
10420        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10421            TCGv_i64 tcg_op = tcg_temp_new_i64();
10422            TCGv_i64 tcg_res = tcg_temp_new_i64();
10423
10424            read_vec_element(s, tcg_op, rn, pass, MO_64);
10425
10426            switch (opcode) {
10427            case 0x5: /* FMLS */
10428                /* As usual for ARM, separate negation for fused multiply-add */
10429                gen_helper_vfp_negd(tcg_op, tcg_op);
10430                /* fall through */
10431            case 0x1: /* FMLA */
10432                read_vec_element(s, tcg_res, rd, pass, MO_64);
10433                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10434                break;
10435            case 0x9: /* FMUL, FMULX */
10436                if (u) {
10437                    gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10438                } else {
10439                    gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10440                }
10441                break;
10442            default:
10443                g_assert_not_reached();
10444            }
10445
10446            write_vec_element(s, tcg_res, rd, pass, MO_64);
10447            tcg_temp_free_i64(tcg_op);
10448            tcg_temp_free_i64(tcg_res);
10449        }
10450
10451        if (is_scalar) {
10452            clear_vec_high(s, rd);
10453        }
10454
10455        tcg_temp_free_i64(tcg_idx);
10456    } else if (!is_long) {
10457        /* 32 bit floating point, or 16 or 32 bit integer.
10458         * For the 16 bit scalar case we use the usual Neon helpers and
10459         * rely on the fact that 0 op 0 == 0 with no side effects.
10460         */
10461        TCGv_i32 tcg_idx = tcg_temp_new_i32();
10462        int pass, maxpasses;
10463
10464        if (is_scalar) {
10465            maxpasses = 1;
10466        } else {
10467            maxpasses = is_q ? 4 : 2;
10468        }
10469
10470        read_vec_element_i32(s, tcg_idx, rm, index, size);
10471
10472        if (size == 1 && !is_scalar) {
10473            /* The simplest way to handle the 16x16 indexed ops is to duplicate
10474             * the index into both halves of the 32 bit tcg_idx and then use
10475             * the usual Neon helpers.
10476             */
10477            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10478        }
10479
10480        for (pass = 0; pass < maxpasses; pass++) {
10481            TCGv_i32 tcg_op = tcg_temp_new_i32();
10482            TCGv_i32 tcg_res = tcg_temp_new_i32();
10483
10484            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10485
10486            switch (opcode) {
10487            case 0x0: /* MLA */
10488            case 0x4: /* MLS */
10489            case 0x8: /* MUL */
10490            {
10491                static NeonGenTwoOpFn * const fns[2][2] = {
10492                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10493                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
10494                };
10495                NeonGenTwoOpFn *genfn;
10496                bool is_sub = opcode == 0x4;
10497
10498                if (size == 1) {
10499                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10500                } else {
10501                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10502                }
10503                if (opcode == 0x8) {
10504                    break;
10505                }
10506                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10507                genfn = fns[size - 1][is_sub];
10508                genfn(tcg_res, tcg_op, tcg_res);
10509                break;
10510            }
10511            case 0x5: /* FMLS */
10512                /* As usual for ARM, separate negation for fused multiply-add */
10513                gen_helper_vfp_negs(tcg_op, tcg_op);
10514                /* fall through */
10515            case 0x1: /* FMLA */
10516                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10517                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10518                break;
10519            case 0x9: /* FMUL, FMULX */
10520                if (u) {
10521                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10522                } else {
10523                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10524                }
10525                break;
10526            case 0xc: /* SQDMULH */
10527                if (size == 1) {
10528                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10529                                               tcg_op, tcg_idx);
10530                } else {
10531                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10532                                               tcg_op, tcg_idx);
10533                }
10534                break;
10535            case 0xd: /* SQRDMULH */
10536                if (size == 1) {
10537                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10538                                                tcg_op, tcg_idx);
10539                } else {
10540                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10541                                                tcg_op, tcg_idx);
10542                }
10543                break;
10544            default:
10545                g_assert_not_reached();
10546            }
10547
10548            if (is_scalar) {
10549                write_fp_sreg(s, rd, tcg_res);
10550            } else {
10551                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10552            }
10553
10554            tcg_temp_free_i32(tcg_op);
10555            tcg_temp_free_i32(tcg_res);
10556        }
10557
10558        tcg_temp_free_i32(tcg_idx);
10559
10560        if (!is_q) {
10561            clear_vec_high(s, rd);
10562        }
10563    } else {
10564        /* long ops: 16x16->32 or 32x32->64 */
10565        TCGv_i64 tcg_res[2];
10566        int pass;
10567        bool satop = extract32(opcode, 0, 1);
10568        TCGMemOp memop = MO_32;
10569
10570        if (satop || !u) {
10571            memop |= MO_SIGN;
10572        }
10573
10574        if (size == 2) {
10575            TCGv_i64 tcg_idx = tcg_temp_new_i64();
10576
10577            read_vec_element(s, tcg_idx, rm, index, memop);
10578
10579            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10580                TCGv_i64 tcg_op = tcg_temp_new_i64();
10581                TCGv_i64 tcg_passres;
10582                int passelt;
10583
10584                if (is_scalar) {
10585                    passelt = 0;
10586                } else {
10587                    passelt = pass + (is_q * 2);
10588                }
10589
10590                read_vec_element(s, tcg_op, rn, passelt, memop);
10591
10592                tcg_res[pass] = tcg_temp_new_i64();
10593
10594                if (opcode == 0xa || opcode == 0xb) {
10595                    /* Non-accumulating ops */
10596                    tcg_passres = tcg_res[pass];
10597                } else {
10598                    tcg_passres = tcg_temp_new_i64();
10599                }
10600
10601                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10602                tcg_temp_free_i64(tcg_op);
10603
10604                if (satop) {
10605                    /* saturating, doubling */
10606                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10607                                                      tcg_passres, tcg_passres);
10608                }
10609
10610                if (opcode == 0xa || opcode == 0xb) {
10611                    continue;
10612                }
10613
10614                /* Accumulating op: handle accumulate step */
10615                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10616
10617                switch (opcode) {
10618                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10619                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10620                    break;
10621                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10622                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10623                    break;
10624                case 0x7: /* SQDMLSL, SQDMLSL2 */
10625                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10626                    /* fall through */
10627                case 0x3: /* SQDMLAL, SQDMLAL2 */
10628                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10629                                                      tcg_res[pass],
10630                                                      tcg_passres);
10631                    break;
10632                default:
10633                    g_assert_not_reached();
10634                }
10635                tcg_temp_free_i64(tcg_passres);
10636            }
10637            tcg_temp_free_i64(tcg_idx);
10638
10639            if (is_scalar) {
10640                clear_vec_high(s, rd);
10641            }
10642        } else {
10643            TCGv_i32 tcg_idx = tcg_temp_new_i32();
10644
10645            assert(size == 1);
10646            read_vec_element_i32(s, tcg_idx, rm, index, size);
10647
10648            if (!is_scalar) {
10649                /* The simplest way to handle the 16x16 indexed ops is to
10650                 * duplicate the index into both halves of the 32 bit tcg_idx
10651                 * and then use the usual Neon helpers.
10652                 */
10653                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10654            }
10655
10656            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10657                TCGv_i32 tcg_op = tcg_temp_new_i32();
10658                TCGv_i64 tcg_passres;
10659
10660                if (is_scalar) {
10661                    read_vec_element_i32(s, tcg_op, rn, pass, size);
10662                } else {
10663                    read_vec_element_i32(s, tcg_op, rn,
10664                                         pass + (is_q * 2), MO_32);
10665                }
10666
10667                tcg_res[pass] = tcg_temp_new_i64();
10668
10669                if (opcode == 0xa || opcode == 0xb) {
10670                    /* Non-accumulating ops */
10671                    tcg_passres = tcg_res[pass];
10672                } else {
10673                    tcg_passres = tcg_temp_new_i64();
10674                }
10675
10676                if (memop & MO_SIGN) {
10677                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10678                } else {
10679                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10680                }
10681                if (satop) {
10682                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10683                                                      tcg_passres, tcg_passres);
10684                }
10685                tcg_temp_free_i32(tcg_op);
10686
10687                if (opcode == 0xa || opcode == 0xb) {
10688                    continue;
10689                }
10690
10691                /* Accumulating op: handle accumulate step */
10692                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10693
10694                switch (opcode) {
10695                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10696                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10697                                             tcg_passres);
10698                    break;
10699                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10700                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10701                                             tcg_passres);
10702                    break;
10703                case 0x7: /* SQDMLSL, SQDMLSL2 */
10704                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10705                    /* fall through */
10706                case 0x3: /* SQDMLAL, SQDMLAL2 */
10707                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10708                                                      tcg_res[pass],
10709                                                      tcg_passres);
10710                    break;
10711                default:
10712                    g_assert_not_reached();
10713                }
10714                tcg_temp_free_i64(tcg_passres);
10715            }
10716            tcg_temp_free_i32(tcg_idx);
10717
10718            if (is_scalar) {
10719                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10720            }
10721        }
10722
10723        if (is_scalar) {
10724            tcg_res[1] = tcg_const_i64(0);
10725        }
10726
10727        for (pass = 0; pass < 2; pass++) {
10728            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10729            tcg_temp_free_i64(tcg_res[pass]);
10730        }
10731    }
10732
10733    if (!TCGV_IS_UNUSED_PTR(fpst)) {
10734        tcg_temp_free_ptr(fpst);
10735    }
10736}
10737
10738/* C3.6.19 Crypto AES
10739 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10740 * +-----------------+------+-----------+--------+-----+------+------+
10741 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10742 * +-----------------+------+-----------+--------+-----+------+------+
10743 */
10744static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10745{
10746    int size = extract32(insn, 22, 2);
10747    int opcode = extract32(insn, 12, 5);
10748    int rn = extract32(insn, 5, 5);
10749    int rd = extract32(insn, 0, 5);
10750    int decrypt;
10751    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10752    CryptoThreeOpEnvFn *genfn;
10753
10754    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10755        || size != 0) {
10756        unallocated_encoding(s);
10757        return;
10758    }
10759
10760    switch (opcode) {
10761    case 0x4: /* AESE */
10762        decrypt = 0;
10763        genfn = gen_helper_crypto_aese;
10764        break;
10765    case 0x6: /* AESMC */
10766        decrypt = 0;
10767        genfn = gen_helper_crypto_aesmc;
10768        break;
10769    case 0x5: /* AESD */
10770        decrypt = 1;
10771        genfn = gen_helper_crypto_aese;
10772        break;
10773    case 0x7: /* AESIMC */
10774        decrypt = 1;
10775        genfn = gen_helper_crypto_aesmc;
10776        break;
10777    default:
10778        unallocated_encoding(s);
10779        return;
10780    }
10781
10782    /* Note that we convert the Vx register indexes into the
10783     * index within the vfp.regs[] array, so we can share the
10784     * helper with the AArch32 instructions.
10785     */
10786    tcg_rd_regno = tcg_const_i32(rd << 1);
10787    tcg_rn_regno = tcg_const_i32(rn << 1);
10788    tcg_decrypt = tcg_const_i32(decrypt);
10789
10790    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10791
10792    tcg_temp_free_i32(tcg_rd_regno);
10793    tcg_temp_free_i32(tcg_rn_regno);
10794    tcg_temp_free_i32(tcg_decrypt);
10795}
10796
10797/* C3.6.20 Crypto three-reg SHA
10798 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10799 * +-----------------+------+---+------+---+--------+-----+------+------+
10800 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10801 * +-----------------+------+---+------+---+--------+-----+------+------+
10802 */
10803static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10804{
10805    int size = extract32(insn, 22, 2);
10806    int opcode = extract32(insn, 12, 3);
10807    int rm = extract32(insn, 16, 5);
10808    int rn = extract32(insn, 5, 5);
10809    int rd = extract32(insn, 0, 5);
10810    CryptoThreeOpEnvFn *genfn;
10811    TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10812    int feature = ARM_FEATURE_V8_SHA256;
10813
10814    if (size != 0) {
10815        unallocated_encoding(s);
10816        return;
10817    }
10818
10819    switch (opcode) {
10820    case 0: /* SHA1C */
10821    case 1: /* SHA1P */
10822    case 2: /* SHA1M */
10823    case 3: /* SHA1SU0 */
10824        genfn = NULL;
10825        feature = ARM_FEATURE_V8_SHA1;
10826        break;
10827    case 4: /* SHA256H */
10828        genfn = gen_helper_crypto_sha256h;
10829        break;
10830    case 5: /* SHA256H2 */
10831        genfn = gen_helper_crypto_sha256h2;
10832        break;
10833    case 6: /* SHA256SU1 */
10834        genfn = gen_helper_crypto_sha256su1;
10835        break;
10836    default:
10837        unallocated_encoding(s);
10838        return;
10839    }
10840
10841    if (!arm_dc_feature(s, feature)) {
10842        unallocated_encoding(s);
10843        return;
10844    }
10845
10846    tcg_rd_regno = tcg_const_i32(rd << 1);
10847    tcg_rn_regno = tcg_const_i32(rn << 1);
10848    tcg_rm_regno = tcg_const_i32(rm << 1);
10849
10850    if (genfn) {
10851        genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10852    } else {
10853        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10854
10855        gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10856                                    tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10857        tcg_temp_free_i32(tcg_opcode);
10858    }
10859
10860    tcg_temp_free_i32(tcg_rd_regno);
10861    tcg_temp_free_i32(tcg_rn_regno);
10862    tcg_temp_free_i32(tcg_rm_regno);
10863}
10864
10865/* C3.6.21 Crypto two-reg SHA
10866 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10867 * +-----------------+------+-----------+--------+-----+------+------+
10868 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10869 * +-----------------+------+-----------+--------+-----+------+------+
10870 */
10871static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10872{
10873    int size = extract32(insn, 22, 2);
10874    int opcode = extract32(insn, 12, 5);
10875    int rn = extract32(insn, 5, 5);
10876    int rd = extract32(insn, 0, 5);
10877    CryptoTwoOpEnvFn *genfn;
10878    int feature;
10879    TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10880
10881    if (size != 0) {
10882        unallocated_encoding(s);
10883        return;
10884    }
10885
10886    switch (opcode) {
10887    case 0: /* SHA1H */
10888        feature = ARM_FEATURE_V8_SHA1;
10889        genfn = gen_helper_crypto_sha1h;
10890        break;
10891    case 1: /* SHA1SU1 */
10892        feature = ARM_FEATURE_V8_SHA1;
10893        genfn = gen_helper_crypto_sha1su1;
10894        break;
10895    case 2: /* SHA256SU0 */
10896        feature = ARM_FEATURE_V8_SHA256;
10897        genfn = gen_helper_crypto_sha256su0;
10898        break;
10899    default:
10900        unallocated_encoding(s);
10901        return;
10902    }
10903
10904    if (!arm_dc_feature(s, feature)) {
10905        unallocated_encoding(s);
10906        return;
10907    }
10908
10909    tcg_rd_regno = tcg_const_i32(rd << 1);
10910    tcg_rn_regno = tcg_const_i32(rn << 1);
10911
10912    genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10913
10914    tcg_temp_free_i32(tcg_rd_regno);
10915    tcg_temp_free_i32(tcg_rn_regno);
10916}
10917
10918/* C3.6 Data processing - SIMD, inc Crypto
10919 *
10920 * As the decode gets a little complex we are using a table based
10921 * approach for this part of the decode.
10922 */
10923static const AArch64DecodeTable data_proc_simd[] = {
10924    /* pattern  ,  mask     ,  fn                        */
10925    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10926    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10927    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10928    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10929    { 0x0e000400, 0x9fe08400, disas_simd_copy },
10930    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10931    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10932    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10933    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10934    { 0x0e000000, 0xbf208c00, disas_simd_tb },
10935    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10936    { 0x2e000000, 0xbf208400, disas_simd_ext },
10937    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10938    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10939    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10940    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10941    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10942    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10943    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10944    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10945    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10946    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10947    { 0x00000000, 0x00000000, NULL }
10948};
10949
10950static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10951{
10952    /* Note that this is called with all non-FP cases from
10953     * table C3-6 so it must UNDEF for entries not specifically
10954     * allocated to instructions in that table.
10955     */
10956    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10957    if (fn) {
10958        fn(s, insn);
10959    } else {
10960        unallocated_encoding(s);
10961    }
10962}
10963
10964/* C3.6 Data processing - SIMD and floating point */
10965static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10966{
10967    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10968        disas_data_proc_fp(s, insn);
10969    } else {
10970        /* SIMD, including crypto */
10971        disas_data_proc_simd(s, insn);
10972    }
10973}
10974
10975/* C3.1 A64 instruction index by encoding */
10976static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10977{
10978    uint32_t insn;
10979
10980    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
10981    s->insn = insn;
10982    s->pc += 4;
10983
10984    s->fp_access_checked = false;
10985
10986    switch (extract32(insn, 25, 4)) {
10987    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10988        unallocated_encoding(s);
10989        break;
10990    case 0x8: case 0x9: /* Data processing - immediate */
10991        disas_data_proc_imm(s, insn);
10992        break;
10993    case 0xa: case 0xb: /* Branch, exception generation and system insns */
10994        disas_b_exc_sys(s, insn);
10995        break;
10996    case 0x4:
10997    case 0x6:
10998    case 0xc:
10999    case 0xe:      /* Loads and stores */
11000        disas_ldst(s, insn);

11001        break;
11002    case 0x5:
11003    case 0xd:      /* Data processing - register */
11004        disas_data_proc_reg(s, insn);
11005        break;
11006    case 0x7:
11007    case 0xf:      /* Data processing - SIMD and floating point */
11008        disas_data_proc_simd_fp(s, insn);
11009        break;
11010    default:
11011        assert(FALSE); /* all 15 cases should be handled above */
11012        break;
11013    }
11014
11015    /* if we allocated any temporaries, free them here */
11016    free_tmp_a64(s);
11017}
11018
11019void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11020{
11021    CPUState *cs = CPU(cpu);
11022    CPUARMState *env = &cpu->env;
11023    DisasContext dc1, *dc = &dc1;
11024    target_ulong pc_start;
11025    target_ulong next_page_start;
11026    int num_insns;
11027    int max_insns;
11028
11029    pc_start = tb->pc;
11030
11031    dc->tb = tb;
11032
11033    dc->is_jmp = DISAS_NEXT;
11034    dc->pc = pc_start;
11035    dc->singlestep_enabled = cs->singlestep_enabled;
11036    dc->condjmp = 0;
11037
11038    dc->aarch64 = 1;
11039    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11040     * there is no secure EL1, so we route exceptions to EL3.
11041     */
11042    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11043                               !arm_el_is_aa64(env, 3);
11044    dc->thumb = 0;
11045    dc->sctlr_b = 0;
11046    dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11047    dc->condexec_mask = 0;
11048    dc->condexec_cond = 0;
11049    dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11050    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11051#if !defined(CONFIG_USER_ONLY)
11052    dc->user = (dc->current_el == 0);
11053#endif
11054    dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11055    dc->vec_len = 0;
11056    dc->vec_stride = 0;
11057    dc->cp_regs = cpu->cp_regs;
11058    dc->features = env->features;
11059
11060    /* Single step state. The code-generation logic here is:
11061     *  SS_ACTIVE == 0:
11062     *   generate code with no special handling for single-stepping (except
11063     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11064     *   this happens anyway because those changes are all system register or
11065     *   PSTATE writes).
11066     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11067     *   emit code for one insn
11068     *   emit code to clear PSTATE.SS
11069     *   emit code to generate software step exception for completed step
11070     *   end TB (as usual for having generated an exception)
11071     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11072     *   emit code to generate a software step exception
11073     *   end the TB
11074     */
11075    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11076    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11077    dc->is_ldex = false;
11078    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11079
11080    init_tmp_a64_array(dc);
11081
11082    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11083    num_insns = 0;
11084    max_insns = tb->cflags & CF_COUNT_MASK;
11085    if (max_insns == 0) {
11086        max_insns = CF_COUNT_MASK;
11087    }
11088    if (max_insns > TCG_MAX_INSNS) {
11089        max_insns = TCG_MAX_INSNS;
11090    }
11091
11092    gen_tb_start(tb);
11093
11094    tcg_clear_temp_count();
11095
11096    do {
11097        tcg_gen_insn_start(dc->pc, 0);
11098        num_insns++;
11099
11100        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11101            CPUBreakpoint *bp;
11102            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11103                if (bp->pc == dc->pc) {
11104                    if (bp->flags & BP_CPU) {
11105                        gen_a64_set_pc_im(dc->pc);
11106                        gen_helper_check_breakpoints(cpu_env);
11107                        /* End the TB early; it likely won't be executed */
11108                        dc->is_jmp = DISAS_UPDATE;
11109                    } else {
11110                        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11111                        /* The address covered by the breakpoint must be
11112                           included in [tb->pc, tb->pc + tb->size) in order
11113                           to for it to be properly cleared -- thus we
11114                           increment the PC here so that the logic setting
11115                           tb->size below does the right thing.  */
11116                        dc->pc += 4;
11117                        goto done_generating;
11118                    }
11119                    break;
11120                }
11121            }
11122        }
11123
11124        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11125            gen_io_start();
11126        }
11127
11128        if (dc->ss_active && !dc->pstate_ss) {
11129            /* Singlestep state is Active-pending.
11130             * If we're in this state at the start of a TB then either
11131             *  a) we just took an exception to an EL which is being debugged
11132             *     and this is the first insn in the exception handler
11133             *  b) debug exceptions were masked and we just unmasked them
11134             *     without changing EL (eg by clearing PSTATE.D)
11135             * In either case we're going to take a swstep exception in the
11136             * "did not step an insn" case, and so the syndrome ISV and EX
11137             * bits should be zero.
11138             */
11139            assert(num_insns == 1);
11140            gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11141                          default_exception_el(dc));
11142            dc->is_jmp = DISAS_EXC;
11143            break;
11144        }
11145
11146        disas_a64_insn(env, dc);
11147
11148        if (tcg_check_temp_count()) {
11149            fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11150                    dc->pc);
11151        }
11152
11153        /* Translation stops when a conditional branch is encountered.
11154         * Otherwise the subsequent code could get translated several times.
11155         * Also stop translation when a page boundary is reached.  This
11156         * ensures prefetch aborts occur at the right place.
11157         */
11158    } while (!dc->is_jmp && !tcg_op_buf_full() &&
11159             !cs->singlestep_enabled &&
11160             !singlestep &&
11161             !dc->ss_active &&
11162             dc->pc < next_page_start &&
11163             num_insns < max_insns);
11164
11165    if (tb->cflags & CF_LAST_IO) {
11166        gen_io_end();
11167    }
11168
11169    if (unlikely(cs->singlestep_enabled || dc->ss_active)
11170        && dc->is_jmp != DISAS_EXC) {
11171        /* Note that this means single stepping WFI doesn't halt the CPU.
11172         * For conditional branch insns this is harmless unreachable code as
11173         * gen_goto_tb() has already handled emitting the debug exception
11174         * (and thus a tb-jump is not possible when singlestepping).
11175         */
11176        assert(dc->is_jmp != DISAS_TB_JUMP);
11177        if (dc->is_jmp != DISAS_JUMP) {
11178            gen_a64_set_pc_im(dc->pc);
11179        }
11180        if (cs->singlestep_enabled) {
11181            gen_exception_internal(EXCP_DEBUG);
11182        } else {
11183            gen_step_complete_exception(dc);
11184        }
11185    } else {
11186        switch (dc->is_jmp) {
11187        case DISAS_NEXT:
11188            gen_goto_tb(dc, 1, dc->pc);
11189            break;
11190        default:
11191        case DISAS_UPDATE:
11192            gen_a64_set_pc_im(dc->pc);
11193            /* fall through */
11194        case DISAS_JUMP:
11195            /* indicate that the hash table must be used to find the next TB */
11196            tcg_gen_exit_tb(0);
11197            break;
11198        case DISAS_TB_JUMP:
11199        case DISAS_EXC:
11200        case DISAS_SWI:
11201            break;
11202        case DISAS_WFE:
11203            gen_a64_set_pc_im(dc->pc);
11204            gen_helper_wfe(cpu_env);
11205            break;
11206        case DISAS_YIELD:
11207            gen_a64_set_pc_im(dc->pc);
11208            gen_helper_yield(cpu_env);
11209            break;
11210        case DISAS_WFI:
11211            /* This is a special case because we don't want to just halt the CPU
11212             * if trying to debug across a WFI.
11213             */
11214            gen_a64_set_pc_im(dc->pc);
11215            gen_helper_wfi(cpu_env);
11216            /* The helper doesn't necessarily throw an exception, but we
11217             * must go back to the main loop to check for interrupts anyway.
11218             */
11219            tcg_gen_exit_tb(0);
11220            break;
11221        }
11222    }
11223
11224done_generating:
11225    gen_tb_end(tb, num_insns);
11226
11227#ifdef DEBUG_DISAS
11228    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11229        qemu_log_in_addr_range(pc_start)) {
11230        qemu_log("----------------\n");
11231        qemu_log("IN: %s\n", lookup_symbol(pc_start));
11232        log_target_disas(cs, pc_start, dc->pc - pc_start,
11233                         4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11234        qemu_log("\n");
11235    }
11236#endif
11237    tb->size = dc->pc - pc_start;
11238    tb->icount = num_insns;
11239}
11240