qemu/target/arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "qemu/log.h"
  26#include "arm_ldst.h"
  27#include "translate.h"
  28#include "internals.h"
  29#include "qemu/host-utils.h"
  30
  31#include "exec/semihost.h"
  32#include "exec/gen-icount.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36#include "exec/log.h"
  37
  38#include "trace-tcg.h"
  39
  40static TCGv_i64 cpu_X[32];
  41static TCGv_i64 cpu_pc;
  42
  43/* Load/store exclusive handling */
  44static TCGv_i64 cpu_exclusive_high;
  45static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  46
  47static const char *regnames[] = {
  48    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52};
  53
  54enum a64_shift_type {
  55    A64_SHIFT_TYPE_LSL = 0,
  56    A64_SHIFT_TYPE_LSR = 1,
  57    A64_SHIFT_TYPE_ASR = 2,
  58    A64_SHIFT_TYPE_ROR = 3
  59};
  60
  61/* Table based decoder typedefs - used when the relevant bits for decode
  62 * are too awkwardly scattered across the instruction (eg SIMD).
  63 */
  64typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66typedef struct AArch64DecodeTable {
  67    uint32_t pattern;
  68    uint32_t mask;
  69    AArch64DecodeFn *disas_fn;
  70} AArch64DecodeTable;
  71
  72/* Function prototype for gen_ functions for calling Neon helpers */
  73typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  85typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  86typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  87
  88/* Note that the gvec expanders operate on offsets + sizes.  */
  89typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
  90typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
  91                         uint32_t, uint32_t);
  92typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
  93                        uint32_t, uint32_t, uint32_t);
  94
  95/* initialize TCG globals.  */
  96void a64_translate_init(void)
  97{
  98    int i;
  99
 100    cpu_pc = tcg_global_mem_new_i64(cpu_env,
 101                                    offsetof(CPUARMState, pc),
 102                                    "pc");
 103    for (i = 0; i < 32; i++) {
 104        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 105                                          offsetof(CPUARMState, xregs[i]),
 106                                          regnames[i]);
 107    }
 108
 109    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 110        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 111}
 112
 113static inline int get_a64_user_mem_index(DisasContext *s)
 114{
 115    /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 116     *  if EL1, access as if EL0; otherwise access at current EL
 117     */
 118    ARMMMUIdx useridx;
 119
 120    switch (s->mmu_idx) {
 121    case ARMMMUIdx_S12NSE1:
 122        useridx = ARMMMUIdx_S12NSE0;
 123        break;
 124    case ARMMMUIdx_S1SE1:
 125        useridx = ARMMMUIdx_S1SE0;
 126        break;
 127    case ARMMMUIdx_S2NS:
 128        g_assert_not_reached();
 129    default:
 130        useridx = s->mmu_idx;
 131        break;
 132    }
 133    return arm_to_core_mmu_idx(useridx);
 134}
 135
 136void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 137                            fprintf_function cpu_fprintf, int flags)
 138{
 139    ARMCPU *cpu = ARM_CPU(cs);
 140    CPUARMState *env = &cpu->env;
 141    uint32_t psr = pstate_read(env);
 142    int i;
 143    int el = arm_current_el(env);
 144    const char *ns_status;
 145
 146    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 147            env->pc, env->xregs[31]);
 148    for (i = 0; i < 31; i++) {
 149        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 150        if ((i % 4) == 3) {
 151            cpu_fprintf(f, "\n");
 152        } else {
 153            cpu_fprintf(f, " ");
 154        }
 155    }
 156
 157    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 158        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 159    } else {
 160        ns_status = "";
 161    }
 162
 163    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 164                psr,
 165                psr & PSTATE_N ? 'N' : '-',
 166                psr & PSTATE_Z ? 'Z' : '-',
 167                psr & PSTATE_C ? 'C' : '-',
 168                psr & PSTATE_V ? 'V' : '-',
 169                ns_status,
 170                el,
 171                psr & PSTATE_SP ? 'h' : 't');
 172
 173    if (flags & CPU_DUMP_FPU) {
 174        int numvfpregs = 32;
 175        for (i = 0; i < numvfpregs; i++) {
 176            uint64_t *q = aa64_vfp_qreg(env, i);
 177            uint64_t vlo = q[0];
 178            uint64_t vhi = q[1];
 179            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "%c",
 180                        i, vhi, vlo, (i & 1 ? '\n' : ' '));
 181        }
 182        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 183                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 184    }
 185}
 186
 187void gen_a64_set_pc_im(uint64_t val)
 188{
 189    tcg_gen_movi_i64(cpu_pc, val);
 190}
 191
 192/* Load the PC from a generic TCG variable.
 193 *
 194 * If address tagging is enabled via the TCR TBI bits, then loading
 195 * an address into the PC will clear out any tag in the it:
 196 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 197 *    then the address is zero-extended, clearing bits [63:56]
 198 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 199 *    and TBI1 controls addressses with bit 55 == 1.
 200 *    If the appropriate TBI bit is set for the address then
 201 *    the address is sign-extended from bit 55 into bits [63:56]
 202 *
 203 * We can avoid doing this for relative-branches, because the
 204 * PC + offset can never overflow into the tag bits (assuming
 205 * that virtual addresses are less than 56 bits wide, as they
 206 * are currently), but we must handle it for branch-to-register.
 207 */
 208static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 209{
 210
 211    if (s->current_el <= 1) {
 212        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 213         * examine bit 55 of address, can just generate code.
 214         * If mixed, then test via generated code
 215         */
 216        if (s->tbi0 && s->tbi1) {
 217            TCGv_i64 tmp_reg = tcg_temp_new_i64();
 218            /* Both bits set, sign extension from bit 55 into [63:56] will
 219             * cover both cases
 220             */
 221            tcg_gen_shli_i64(tmp_reg, src, 8);
 222            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 223            tcg_temp_free_i64(tmp_reg);
 224        } else if (!s->tbi0 && !s->tbi1) {
 225            /* Neither bit set, just load it as-is */
 226            tcg_gen_mov_i64(cpu_pc, src);
 227        } else {
 228            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 229            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 230            TCGv_i64 tcg_zero   = tcg_const_i64(0);
 231
 232            tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 233
 234            if (s->tbi0) {
 235                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 236                tcg_gen_andi_i64(tcg_tmpval, src,
 237                                 0x00FFFFFFFFFFFFFFull);
 238                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 239                                    tcg_tmpval, src);
 240            } else {
 241                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 242                tcg_gen_ori_i64(tcg_tmpval, src,
 243                                0xFF00000000000000ull);
 244                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 245                                    tcg_tmpval, src);
 246            }
 247            tcg_temp_free_i64(tcg_zero);
 248            tcg_temp_free_i64(tcg_bit55);
 249            tcg_temp_free_i64(tcg_tmpval);
 250        }
 251    } else {  /* EL > 1 */
 252        if (s->tbi0) {
 253            /* Force tag byte to all zero */
 254            tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 255        } else {
 256            /* Load unmodified address */
 257            tcg_gen_mov_i64(cpu_pc, src);
 258        }
 259    }
 260}
 261
 262typedef struct DisasCompare64 {
 263    TCGCond cond;
 264    TCGv_i64 value;
 265} DisasCompare64;
 266
 267static void a64_test_cc(DisasCompare64 *c64, int cc)
 268{
 269    DisasCompare c32;
 270
 271    arm_test_cc(&c32, cc);
 272
 273    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 274       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 275    c64->cond = c32.cond;
 276    c64->value = tcg_temp_new_i64();
 277    tcg_gen_ext_i32_i64(c64->value, c32.value);
 278
 279    arm_free_cc(&c32);
 280}
 281
 282static void a64_free_cc(DisasCompare64 *c64)
 283{
 284    tcg_temp_free_i64(c64->value);
 285}
 286
 287static void gen_exception_internal(int excp)
 288{
 289    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 290
 291    assert(excp_is_internal(excp));
 292    gen_helper_exception_internal(cpu_env, tcg_excp);
 293    tcg_temp_free_i32(tcg_excp);
 294}
 295
 296static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 297{
 298    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 299    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 300    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 301
 302    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 303                                       tcg_syn, tcg_el);
 304    tcg_temp_free_i32(tcg_el);
 305    tcg_temp_free_i32(tcg_syn);
 306    tcg_temp_free_i32(tcg_excp);
 307}
 308
 309static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 310{
 311    gen_a64_set_pc_im(s->pc - offset);
 312    gen_exception_internal(excp);
 313    s->base.is_jmp = DISAS_NORETURN;
 314}
 315
 316static void gen_exception_insn(DisasContext *s, int offset, int excp,
 317                               uint32_t syndrome, uint32_t target_el)
 318{
 319    gen_a64_set_pc_im(s->pc - offset);
 320    gen_exception(excp, syndrome, target_el);
 321    s->base.is_jmp = DISAS_NORETURN;
 322}
 323
 324static void gen_exception_bkpt_insn(DisasContext *s, int offset,
 325                                    uint32_t syndrome)
 326{
 327    TCGv_i32 tcg_syn;
 328
 329    gen_a64_set_pc_im(s->pc - offset);
 330    tcg_syn = tcg_const_i32(syndrome);
 331    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 332    tcg_temp_free_i32(tcg_syn);
 333    s->base.is_jmp = DISAS_NORETURN;
 334}
 335
 336static void gen_ss_advance(DisasContext *s)
 337{
 338    /* If the singlestep state is Active-not-pending, advance to
 339     * Active-pending.
 340     */
 341    if (s->ss_active) {
 342        s->pstate_ss = 0;
 343        gen_helper_clear_pstate_ss(cpu_env);
 344    }
 345}
 346
 347static void gen_step_complete_exception(DisasContext *s)
 348{
 349    /* We just completed step of an insn. Move from Active-not-pending
 350     * to Active-pending, and then also take the swstep exception.
 351     * This corresponds to making the (IMPDEF) choice to prioritize
 352     * swstep exceptions over asynchronous exceptions taken to an exception
 353     * level where debug is disabled. This choice has the advantage that
 354     * we do not need to maintain internal state corresponding to the
 355     * ISV/EX syndrome bits between completion of the step and generation
 356     * of the exception, and our syndrome information is always correct.
 357     */
 358    gen_ss_advance(s);
 359    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 360                  default_exception_el(s));
 361    s->base.is_jmp = DISAS_NORETURN;
 362}
 363
 364static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 365{
 366    /* No direct tb linking with singlestep (either QEMU's or the ARM
 367     * debug architecture kind) or deterministic io
 368     */
 369    if (s->base.singlestep_enabled || s->ss_active ||
 370        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 371        return false;
 372    }
 373
 374#ifndef CONFIG_USER_ONLY
 375    /* Only link tbs from inside the same guest page */
 376    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 377        return false;
 378    }
 379#endif
 380
 381    return true;
 382}
 383
 384static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 385{
 386    TranslationBlock *tb;
 387
 388    tb = s->base.tb;
 389    if (use_goto_tb(s, n, dest)) {
 390        tcg_gen_goto_tb(n);
 391        gen_a64_set_pc_im(dest);
 392        tcg_gen_exit_tb((intptr_t)tb + n);
 393        s->base.is_jmp = DISAS_NORETURN;
 394    } else {
 395        gen_a64_set_pc_im(dest);
 396        if (s->ss_active) {
 397            gen_step_complete_exception(s);
 398        } else if (s->base.singlestep_enabled) {
 399            gen_exception_internal(EXCP_DEBUG);
 400        } else {
 401            tcg_gen_lookup_and_goto_ptr();
 402            s->base.is_jmp = DISAS_NORETURN;
 403        }
 404    }
 405}
 406
 407static void unallocated_encoding(DisasContext *s)
 408{
 409    /* Unallocated and reserved encodings are uncategorized */
 410    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 411                       default_exception_el(s));
 412}
 413
 414#define unsupported_encoding(s, insn)                                    \
 415    do {                                                                 \
 416        qemu_log_mask(LOG_UNIMP,                                         \
 417                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 418                      "at pc=%016" PRIx64 "\n",                          \
 419                      __FILE__, __LINE__, insn, s->pc - 4);              \
 420        unallocated_encoding(s);                                         \
 421    } while (0)
 422
 423static void init_tmp_a64_array(DisasContext *s)
 424{
 425#ifdef CONFIG_DEBUG_TCG
 426    memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 427#endif
 428    s->tmp_a64_count = 0;
 429}
 430
 431static void free_tmp_a64(DisasContext *s)
 432{
 433    int i;
 434    for (i = 0; i < s->tmp_a64_count; i++) {
 435        tcg_temp_free_i64(s->tmp_a64[i]);
 436    }
 437    init_tmp_a64_array(s);
 438}
 439
 440static TCGv_i64 new_tmp_a64(DisasContext *s)
 441{
 442    assert(s->tmp_a64_count < TMP_A64_MAX);
 443    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 444}
 445
 446static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 447{
 448    TCGv_i64 t = new_tmp_a64(s);
 449    tcg_gen_movi_i64(t, 0);
 450    return t;
 451}
 452
 453/*
 454 * Register access functions
 455 *
 456 * These functions are used for directly accessing a register in where
 457 * changes to the final register value are likely to be made. If you
 458 * need to use a register for temporary calculation (e.g. index type
 459 * operations) use the read_* form.
 460 *
 461 * B1.2.1 Register mappings
 462 *
 463 * In instruction register encoding 31 can refer to ZR (zero register) or
 464 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 465 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 466 * This is the point of the _sp forms.
 467 */
 468static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 469{
 470    if (reg == 31) {
 471        return new_tmp_a64_zero(s);
 472    } else {
 473        return cpu_X[reg];
 474    }
 475}
 476
 477/* register access for when 31 == SP */
 478static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 479{
 480    return cpu_X[reg];
 481}
 482
 483/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 484 * representing the register contents. This TCGv is an auto-freed
 485 * temporary so it need not be explicitly freed, and may be modified.
 486 */
 487static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 488{
 489    TCGv_i64 v = new_tmp_a64(s);
 490    if (reg != 31) {
 491        if (sf) {
 492            tcg_gen_mov_i64(v, cpu_X[reg]);
 493        } else {
 494            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 495        }
 496    } else {
 497        tcg_gen_movi_i64(v, 0);
 498    }
 499    return v;
 500}
 501
 502static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 503{
 504    TCGv_i64 v = new_tmp_a64(s);
 505    if (sf) {
 506        tcg_gen_mov_i64(v, cpu_X[reg]);
 507    } else {
 508        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 509    }
 510    return v;
 511}
 512
 513/* We should have at some point before trying to access an FP register
 514 * done the necessary access check, so assert that
 515 * (a) we did the check and
 516 * (b) we didn't then just plough ahead anyway if it failed.
 517 * Print the instruction pattern in the abort message so we can figure
 518 * out what we need to fix if a user encounters this problem in the wild.
 519 */
 520static inline void assert_fp_access_checked(DisasContext *s)
 521{
 522#ifdef CONFIG_DEBUG_TCG
 523    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 524        fprintf(stderr, "target-arm: FP access check missing for "
 525                "instruction 0x%08x\n", s->insn);
 526        abort();
 527    }
 528#endif
 529}
 530
 531/* Return the offset into CPUARMState of an element of specified
 532 * size, 'element' places in from the least significant end of
 533 * the FP/vector register Qn.
 534 */
 535static inline int vec_reg_offset(DisasContext *s, int regno,
 536                                 int element, TCGMemOp size)
 537{
 538    int offs = 0;
 539#ifdef HOST_WORDS_BIGENDIAN
 540    /* This is complicated slightly because vfp.zregs[n].d[0] is
 541     * still the low half and vfp.zregs[n].d[1] the high half
 542     * of the 128 bit vector, even on big endian systems.
 543     * Calculate the offset assuming a fully bigendian 128 bits,
 544     * then XOR to account for the order of the two 64 bit halves.
 545     */
 546    offs += (16 - ((element + 1) * (1 << size)));
 547    offs ^= 8;
 548#else
 549    offs += element * (1 << size);
 550#endif
 551    offs += offsetof(CPUARMState, vfp.zregs[regno]);
 552    assert_fp_access_checked(s);
 553    return offs;
 554}
 555
 556/* Return the offset info CPUARMState of the "whole" vector register Qn.  */
 557static inline int vec_full_reg_offset(DisasContext *s, int regno)
 558{
 559    assert_fp_access_checked(s);
 560    return offsetof(CPUARMState, vfp.zregs[regno]);
 561}
 562
 563/* Return a newly allocated pointer to the vector register.  */
 564static TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
 565{
 566    TCGv_ptr ret = tcg_temp_new_ptr();
 567    tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
 568    return ret;
 569}
 570
 571/* Return the byte size of the "whole" vector register, VL / 8.  */
 572static inline int vec_full_reg_size(DisasContext *s)
 573{
 574    /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
 575       In the meantime this is just the AdvSIMD length of 128.  */
 576    return 128 / 8;
 577}
 578
 579/* Return the offset into CPUARMState of a slice (from
 580 * the least significant end) of FP register Qn (ie
 581 * Dn, Sn, Hn or Bn).
 582 * (Note that this is not the same mapping as for A32; see cpu.h)
 583 */
 584static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 585{
 586    return vec_reg_offset(s, regno, 0, size);
 587}
 588
 589/* Offset of the high half of the 128 bit vector Qn */
 590static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 591{
 592    return vec_reg_offset(s, regno, 1, MO_64);
 593}
 594
 595/* Convenience accessors for reading and writing single and double
 596 * FP registers. Writing clears the upper parts of the associated
 597 * 128 bit vector register, as required by the architecture.
 598 * Note that unlike the GP register accessors, the values returned
 599 * by the read functions must be manually freed.
 600 */
 601static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 602{
 603    TCGv_i64 v = tcg_temp_new_i64();
 604
 605    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 606    return v;
 607}
 608
 609static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 610{
 611    TCGv_i32 v = tcg_temp_new_i32();
 612
 613    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 614    return v;
 615}
 616
 617/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 618 * If SVE is not enabled, then there are only 128 bits in the vector.
 619 */
 620static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 621{
 622    unsigned ofs = fp_reg_offset(s, rd, MO_64);
 623    unsigned vsz = vec_full_reg_size(s);
 624
 625    if (!is_q) {
 626        TCGv_i64 tcg_zero = tcg_const_i64(0);
 627        tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 628        tcg_temp_free_i64(tcg_zero);
 629    }
 630    if (vsz > 16) {
 631        tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 632    }
 633}
 634
 635static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 636{
 637    unsigned ofs = fp_reg_offset(s, reg, MO_64);
 638
 639    tcg_gen_st_i64(v, cpu_env, ofs);
 640    clear_vec_high(s, false, reg);
 641}
 642
 643static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 644{
 645    TCGv_i64 tmp = tcg_temp_new_i64();
 646
 647    tcg_gen_extu_i32_i64(tmp, v);
 648    write_fp_dreg(s, reg, tmp);
 649    tcg_temp_free_i64(tmp);
 650}
 651
 652static TCGv_ptr get_fpstatus_ptr(bool is_f16)
 653{
 654    TCGv_ptr statusptr = tcg_temp_new_ptr();
 655    int offset;
 656
 657    /* In A64 all instructions (both FP and Neon) use the FPCR; there
 658     * is no equivalent of the A32 Neon "standard FPSCR value".
 659     * However half-precision operations operate under a different
 660     * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 661     */
 662    if (is_f16) {
 663        offset = offsetof(CPUARMState, vfp.fp_status_f16);
 664    } else {
 665        offset = offsetof(CPUARMState, vfp.fp_status);
 666    }
 667    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 668    return statusptr;
 669}
 670
 671/* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 672static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 673                         GVecGen2Fn *gvec_fn, int vece)
 674{
 675    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 676            is_q ? 16 : 8, vec_full_reg_size(s));
 677}
 678
 679/* Expand a 2-operand + immediate AdvSIMD vector operation using
 680 * an expander function.
 681 */
 682static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 683                          int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 684{
 685    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 686            imm, is_q ? 16 : 8, vec_full_reg_size(s));
 687}
 688
 689/* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 690static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 691                         GVecGen3Fn *gvec_fn, int vece)
 692{
 693    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 694            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 695}
 696
 697/* Expand a 2-operand + immediate AdvSIMD vector operation using
 698 * an op descriptor.
 699 */
 700static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 701                          int rn, int64_t imm, const GVecGen2i *gvec_op)
 702{
 703    tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 704                    is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 705}
 706
 707/* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 708static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 709                         int rn, int rm, const GVecGen3 *gvec_op)
 710{
 711    tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 712                   vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 713                   vec_full_reg_size(s), gvec_op);
 714}
 715
 716/* Expand a 3-operand + env pointer operation using
 717 * an out-of-line helper.
 718 */
 719static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 720                             int rn, int rm, gen_helper_gvec_3_ptr *fn)
 721{
 722    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 723                       vec_full_reg_offset(s, rn),
 724                       vec_full_reg_offset(s, rm), cpu_env,
 725                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 726}
 727
 728/* Expand a 3-operand + fpstatus pointer + simd data value operation using
 729 * an out-of-line helper.
 730 */
 731static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 732                              int rm, bool is_fp16, int data,
 733                              gen_helper_gvec_3_ptr *fn)
 734{
 735    TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 736    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 737                       vec_full_reg_offset(s, rn),
 738                       vec_full_reg_offset(s, rm), fpst,
 739                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 740    tcg_temp_free_ptr(fpst);
 741}
 742
 743/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 744 * than the 32 bit equivalent.
 745 */
 746static inline void gen_set_NZ64(TCGv_i64 result)
 747{
 748    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 749    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 750}
 751
 752/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 753static inline void gen_logic_CC(int sf, TCGv_i64 result)
 754{
 755    if (sf) {
 756        gen_set_NZ64(result);
 757    } else {
 758        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 759        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 760    }
 761    tcg_gen_movi_i32(cpu_CF, 0);
 762    tcg_gen_movi_i32(cpu_VF, 0);
 763}
 764
 765/* dest = T0 + T1; compute C, N, V and Z flags */
 766static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 767{
 768    if (sf) {
 769        TCGv_i64 result, flag, tmp;
 770        result = tcg_temp_new_i64();
 771        flag = tcg_temp_new_i64();
 772        tmp = tcg_temp_new_i64();
 773
 774        tcg_gen_movi_i64(tmp, 0);
 775        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 776
 777        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 778
 779        gen_set_NZ64(result);
 780
 781        tcg_gen_xor_i64(flag, result, t0);
 782        tcg_gen_xor_i64(tmp, t0, t1);
 783        tcg_gen_andc_i64(flag, flag, tmp);
 784        tcg_temp_free_i64(tmp);
 785        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 786
 787        tcg_gen_mov_i64(dest, result);
 788        tcg_temp_free_i64(result);
 789        tcg_temp_free_i64(flag);
 790    } else {
 791        /* 32 bit arithmetic */
 792        TCGv_i32 t0_32 = tcg_temp_new_i32();
 793        TCGv_i32 t1_32 = tcg_temp_new_i32();
 794        TCGv_i32 tmp = tcg_temp_new_i32();
 795
 796        tcg_gen_movi_i32(tmp, 0);
 797        tcg_gen_extrl_i64_i32(t0_32, t0);
 798        tcg_gen_extrl_i64_i32(t1_32, t1);
 799        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 800        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 801        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 802        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 803        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 804        tcg_gen_extu_i32_i64(dest, cpu_NF);
 805
 806        tcg_temp_free_i32(tmp);
 807        tcg_temp_free_i32(t0_32);
 808        tcg_temp_free_i32(t1_32);
 809    }
 810}
 811
 812/* dest = T0 - T1; compute C, N, V and Z flags */
 813static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 814{
 815    if (sf) {
 816        /* 64 bit arithmetic */
 817        TCGv_i64 result, flag, tmp;
 818
 819        result = tcg_temp_new_i64();
 820        flag = tcg_temp_new_i64();
 821        tcg_gen_sub_i64(result, t0, t1);
 822
 823        gen_set_NZ64(result);
 824
 825        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 826        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 827
 828        tcg_gen_xor_i64(flag, result, t0);
 829        tmp = tcg_temp_new_i64();
 830        tcg_gen_xor_i64(tmp, t0, t1);
 831        tcg_gen_and_i64(flag, flag, tmp);
 832        tcg_temp_free_i64(tmp);
 833        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 834        tcg_gen_mov_i64(dest, result);
 835        tcg_temp_free_i64(flag);
 836        tcg_temp_free_i64(result);
 837    } else {
 838        /* 32 bit arithmetic */
 839        TCGv_i32 t0_32 = tcg_temp_new_i32();
 840        TCGv_i32 t1_32 = tcg_temp_new_i32();
 841        TCGv_i32 tmp;
 842
 843        tcg_gen_extrl_i64_i32(t0_32, t0);
 844        tcg_gen_extrl_i64_i32(t1_32, t1);
 845        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 846        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 847        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 848        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 849        tmp = tcg_temp_new_i32();
 850        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 851        tcg_temp_free_i32(t0_32);
 852        tcg_temp_free_i32(t1_32);
 853        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 854        tcg_temp_free_i32(tmp);
 855        tcg_gen_extu_i32_i64(dest, cpu_NF);
 856    }
 857}
 858
 859/* dest = T0 + T1 + CF; do not compute flags. */
 860static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 861{
 862    TCGv_i64 flag = tcg_temp_new_i64();
 863    tcg_gen_extu_i32_i64(flag, cpu_CF);
 864    tcg_gen_add_i64(dest, t0, t1);
 865    tcg_gen_add_i64(dest, dest, flag);
 866    tcg_temp_free_i64(flag);
 867
 868    if (!sf) {
 869        tcg_gen_ext32u_i64(dest, dest);
 870    }
 871}
 872
 873/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 874static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 875{
 876    if (sf) {
 877        TCGv_i64 result, cf_64, vf_64, tmp;
 878        result = tcg_temp_new_i64();
 879        cf_64 = tcg_temp_new_i64();
 880        vf_64 = tcg_temp_new_i64();
 881        tmp = tcg_const_i64(0);
 882
 883        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 884        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 885        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 886        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 887        gen_set_NZ64(result);
 888
 889        tcg_gen_xor_i64(vf_64, result, t0);
 890        tcg_gen_xor_i64(tmp, t0, t1);
 891        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 892        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 893
 894        tcg_gen_mov_i64(dest, result);
 895
 896        tcg_temp_free_i64(tmp);
 897        tcg_temp_free_i64(vf_64);
 898        tcg_temp_free_i64(cf_64);
 899        tcg_temp_free_i64(result);
 900    } else {
 901        TCGv_i32 t0_32, t1_32, tmp;
 902        t0_32 = tcg_temp_new_i32();
 903        t1_32 = tcg_temp_new_i32();
 904        tmp = tcg_const_i32(0);
 905
 906        tcg_gen_extrl_i64_i32(t0_32, t0);
 907        tcg_gen_extrl_i64_i32(t1_32, t1);
 908        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 909        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 910
 911        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 912        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 913        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 914        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 915        tcg_gen_extu_i32_i64(dest, cpu_NF);
 916
 917        tcg_temp_free_i32(tmp);
 918        tcg_temp_free_i32(t1_32);
 919        tcg_temp_free_i32(t0_32);
 920    }
 921}
 922
 923/*
 924 * Load/Store generators
 925 */
 926
 927/*
 928 * Store from GPR register to memory.
 929 */
 930static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 931                             TCGv_i64 tcg_addr, int size, int memidx,
 932                             bool iss_valid,
 933                             unsigned int iss_srt,
 934                             bool iss_sf, bool iss_ar)
 935{
 936    g_assert(size <= 3);
 937    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 938
 939    if (iss_valid) {
 940        uint32_t syn;
 941
 942        syn = syn_data_abort_with_iss(0,
 943                                      size,
 944                                      false,
 945                                      iss_srt,
 946                                      iss_sf,
 947                                      iss_ar,
 948                                      0, 0, 0, 0, 0, false);
 949        disas_set_insn_syndrome(s, syn);
 950    }
 951}
 952
 953static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 954                      TCGv_i64 tcg_addr, int size,
 955                      bool iss_valid,
 956                      unsigned int iss_srt,
 957                      bool iss_sf, bool iss_ar)
 958{
 959    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 960                     iss_valid, iss_srt, iss_sf, iss_ar);
 961}
 962
 963/*
 964 * Load from memory to GPR register
 965 */
 966static void do_gpr_ld_memidx(DisasContext *s,
 967                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 968                             int size, bool is_signed,
 969                             bool extend, int memidx,
 970                             bool iss_valid, unsigned int iss_srt,
 971                             bool iss_sf, bool iss_ar)
 972{
 973    TCGMemOp memop = s->be_data + size;
 974
 975    g_assert(size <= 3);
 976
 977    if (is_signed) {
 978        memop += MO_SIGN;
 979    }
 980
 981    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 982
 983    if (extend && is_signed) {
 984        g_assert(size < 3);
 985        tcg_gen_ext32u_i64(dest, dest);
 986    }
 987
 988    if (iss_valid) {
 989        uint32_t syn;
 990
 991        syn = syn_data_abort_with_iss(0,
 992                                      size,
 993                                      is_signed,
 994                                      iss_srt,
 995                                      iss_sf,
 996                                      iss_ar,
 997                                      0, 0, 0, 0, 0, false);
 998        disas_set_insn_syndrome(s, syn);
 999    }
1000}
1001
1002static void do_gpr_ld(DisasContext *s,
1003                      TCGv_i64 dest, TCGv_i64 tcg_addr,
1004                      int size, bool is_signed, bool extend,
1005                      bool iss_valid, unsigned int iss_srt,
1006                      bool iss_sf, bool iss_ar)
1007{
1008    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
1009                     get_mem_index(s),
1010                     iss_valid, iss_srt, iss_sf, iss_ar);
1011}
1012
1013/*
1014 * Store from FP register to memory
1015 */
1016static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
1017{
1018    /* This writes the bottom N bits of a 128 bit wide vector to memory */
1019    TCGv_i64 tmp = tcg_temp_new_i64();
1020    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
1021    if (size < 4) {
1022        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
1023                            s->be_data + size);
1024    } else {
1025        bool be = s->be_data == MO_BE;
1026        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
1027
1028        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1029        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1030                            s->be_data | MO_Q);
1031        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
1032        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1033                            s->be_data | MO_Q);
1034        tcg_temp_free_i64(tcg_hiaddr);
1035    }
1036
1037    tcg_temp_free_i64(tmp);
1038}
1039
1040/*
1041 * Load from memory to FP register
1042 */
1043static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
1044{
1045    /* This always zero-extends and writes to a full 128 bit wide vector */
1046    TCGv_i64 tmplo = tcg_temp_new_i64();
1047    TCGv_i64 tmphi;
1048
1049    if (size < 4) {
1050        TCGMemOp memop = s->be_data + size;
1051        tmphi = tcg_const_i64(0);
1052        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
1053    } else {
1054        bool be = s->be_data == MO_BE;
1055        TCGv_i64 tcg_hiaddr;
1056
1057        tmphi = tcg_temp_new_i64();
1058        tcg_hiaddr = tcg_temp_new_i64();
1059
1060        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1061        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1062                            s->be_data | MO_Q);
1063        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1064                            s->be_data | MO_Q);
1065        tcg_temp_free_i64(tcg_hiaddr);
1066    }
1067
1068    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1069    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1070
1071    tcg_temp_free_i64(tmplo);
1072    tcg_temp_free_i64(tmphi);
1073
1074    clear_vec_high(s, true, destidx);
1075}
1076
1077/*
1078 * Vector load/store helpers.
1079 *
1080 * The principal difference between this and a FP load is that we don't
1081 * zero extend as we are filling a partial chunk of the vector register.
1082 * These functions don't support 128 bit loads/stores, which would be
1083 * normal load/store operations.
1084 *
1085 * The _i32 versions are useful when operating on 32 bit quantities
1086 * (eg for floating point single or using Neon helper functions).
1087 */
1088
1089/* Get value of an element within a vector register */
1090static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1091                             int element, TCGMemOp memop)
1092{
1093    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1094    switch (memop) {
1095    case MO_8:
1096        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1097        break;
1098    case MO_16:
1099        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1100        break;
1101    case MO_32:
1102        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1103        break;
1104    case MO_8|MO_SIGN:
1105        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1106        break;
1107    case MO_16|MO_SIGN:
1108        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1109        break;
1110    case MO_32|MO_SIGN:
1111        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1112        break;
1113    case MO_64:
1114    case MO_64|MO_SIGN:
1115        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1116        break;
1117    default:
1118        g_assert_not_reached();
1119    }
1120}
1121
1122static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1123                                 int element, TCGMemOp memop)
1124{
1125    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1126    switch (memop) {
1127    case MO_8:
1128        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1129        break;
1130    case MO_16:
1131        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1132        break;
1133    case MO_8|MO_SIGN:
1134        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1135        break;
1136    case MO_16|MO_SIGN:
1137        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1138        break;
1139    case MO_32:
1140    case MO_32|MO_SIGN:
1141        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1142        break;
1143    default:
1144        g_assert_not_reached();
1145    }
1146}
1147
1148/* Set value of an element within a vector register */
1149static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1150                              int element, TCGMemOp memop)
1151{
1152    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1153    switch (memop) {
1154    case MO_8:
1155        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1156        break;
1157    case MO_16:
1158        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1159        break;
1160    case MO_32:
1161        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1162        break;
1163    case MO_64:
1164        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1165        break;
1166    default:
1167        g_assert_not_reached();
1168    }
1169}
1170
1171static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1172                                  int destidx, int element, TCGMemOp memop)
1173{
1174    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1175    switch (memop) {
1176    case MO_8:
1177        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1178        break;
1179    case MO_16:
1180        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1181        break;
1182    case MO_32:
1183        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1184        break;
1185    default:
1186        g_assert_not_reached();
1187    }
1188}
1189
1190/* Store from vector register to memory */
1191static void do_vec_st(DisasContext *s, int srcidx, int element,
1192                      TCGv_i64 tcg_addr, int size)
1193{
1194    TCGMemOp memop = s->be_data + size;
1195    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1196
1197    read_vec_element(s, tcg_tmp, srcidx, element, size);
1198    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1199
1200    tcg_temp_free_i64(tcg_tmp);
1201}
1202
1203/* Load from memory to vector register */
1204static void do_vec_ld(DisasContext *s, int destidx, int element,
1205                      TCGv_i64 tcg_addr, int size)
1206{
1207    TCGMemOp memop = s->be_data + size;
1208    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1209
1210    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1211    write_vec_element(s, tcg_tmp, destidx, element, size);
1212
1213    tcg_temp_free_i64(tcg_tmp);
1214}
1215
1216/* Check that FP/Neon access is enabled. If it is, return
1217 * true. If not, emit code to generate an appropriate exception,
1218 * and return false; the caller should not emit any code for
1219 * the instruction. Note that this check must happen after all
1220 * unallocated-encoding checks (otherwise the syndrome information
1221 * for the resulting exception will be incorrect).
1222 */
1223static inline bool fp_access_check(DisasContext *s)
1224{
1225    assert(!s->fp_access_checked);
1226    s->fp_access_checked = true;
1227
1228    if (!s->fp_excp_el) {
1229        return true;
1230    }
1231
1232    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1233                       s->fp_excp_el);
1234    return false;
1235}
1236
1237/* Check that SVE access is enabled.  If it is, return true.
1238 * If not, emit code to generate an appropriate exception and return false.
1239 */
1240static inline bool sve_access_check(DisasContext *s)
1241{
1242    if (s->sve_excp_el) {
1243        gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
1244                           s->sve_excp_el);
1245        return false;
1246    }
1247    return true;
1248}
1249
1250/*
1251 * This utility function is for doing register extension with an
1252 * optional shift. You will likely want to pass a temporary for the
1253 * destination register. See DecodeRegExtend() in the ARM ARM.
1254 */
1255static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1256                              int option, unsigned int shift)
1257{
1258    int extsize = extract32(option, 0, 2);
1259    bool is_signed = extract32(option, 2, 1);
1260
1261    if (is_signed) {
1262        switch (extsize) {
1263        case 0:
1264            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1265            break;
1266        case 1:
1267            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1268            break;
1269        case 2:
1270            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1271            break;
1272        case 3:
1273            tcg_gen_mov_i64(tcg_out, tcg_in);
1274            break;
1275        }
1276    } else {
1277        switch (extsize) {
1278        case 0:
1279            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1280            break;
1281        case 1:
1282            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1283            break;
1284        case 2:
1285            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1286            break;
1287        case 3:
1288            tcg_gen_mov_i64(tcg_out, tcg_in);
1289            break;
1290        }
1291    }
1292
1293    if (shift) {
1294        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1295    }
1296}
1297
1298static inline void gen_check_sp_alignment(DisasContext *s)
1299{
1300    /* The AArch64 architecture mandates that (if enabled via PSTATE
1301     * or SCTLR bits) there is a check that SP is 16-aligned on every
1302     * SP-relative load or store (with an exception generated if it is not).
1303     * In line with general QEMU practice regarding misaligned accesses,
1304     * we omit these checks for the sake of guest program performance.
1305     * This function is provided as a hook so we can more easily add these
1306     * checks in future (possibly as a "favour catching guest program bugs
1307     * over speed" user selectable option).
1308     */
1309}
1310
1311/*
1312 * This provides a simple table based table lookup decoder. It is
1313 * intended to be used when the relevant bits for decode are too
1314 * awkwardly placed and switch/if based logic would be confusing and
1315 * deeply nested. Since it's a linear search through the table, tables
1316 * should be kept small.
1317 *
1318 * It returns the first handler where insn & mask == pattern, or
1319 * NULL if there is no match.
1320 * The table is terminated by an empty mask (i.e. 0)
1321 */
1322static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1323                                               uint32_t insn)
1324{
1325    const AArch64DecodeTable *tptr = table;
1326
1327    while (tptr->mask) {
1328        if ((insn & tptr->mask) == tptr->pattern) {
1329            return tptr->disas_fn;
1330        }
1331        tptr++;
1332    }
1333    return NULL;
1334}
1335
1336/*
1337 * The instruction disassembly implemented here matches
1338 * the instruction encoding classifications in chapter C4
1339 * of the ARM Architecture Reference Manual (DDI0487B_a);
1340 * classification names and decode diagrams here should generally
1341 * match up with those in the manual.
1342 */
1343
1344/* Unconditional branch (immediate)
1345 *   31  30       26 25                                  0
1346 * +----+-----------+-------------------------------------+
1347 * | op | 0 0 1 0 1 |                 imm26               |
1348 * +----+-----------+-------------------------------------+
1349 */
1350static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1351{
1352    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1353
1354    if (insn & (1U << 31)) {
1355        /* BL Branch with link */
1356        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1357    }
1358
1359    /* B Branch / BL Branch with link */
1360    gen_goto_tb(s, 0, addr);
1361}
1362
1363/* Compare and branch (immediate)
1364 *   31  30         25  24  23                  5 4      0
1365 * +----+-------------+----+---------------------+--------+
1366 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1367 * +----+-------------+----+---------------------+--------+
1368 */
1369static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1370{
1371    unsigned int sf, op, rt;
1372    uint64_t addr;
1373    TCGLabel *label_match;
1374    TCGv_i64 tcg_cmp;
1375
1376    sf = extract32(insn, 31, 1);
1377    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1378    rt = extract32(insn, 0, 5);
1379    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1380
1381    tcg_cmp = read_cpu_reg(s, rt, sf);
1382    label_match = gen_new_label();
1383
1384    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1385                        tcg_cmp, 0, label_match);
1386
1387    gen_goto_tb(s, 0, s->pc);
1388    gen_set_label(label_match);
1389    gen_goto_tb(s, 1, addr);
1390}
1391
1392/* Test and branch (immediate)
1393 *   31  30         25  24  23   19 18          5 4    0
1394 * +----+-------------+----+-------+-------------+------+
1395 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1396 * +----+-------------+----+-------+-------------+------+
1397 */
1398static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1399{
1400    unsigned int bit_pos, op, rt;
1401    uint64_t addr;
1402    TCGLabel *label_match;
1403    TCGv_i64 tcg_cmp;
1404
1405    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1406    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1407    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1408    rt = extract32(insn, 0, 5);
1409
1410    tcg_cmp = tcg_temp_new_i64();
1411    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1412    label_match = gen_new_label();
1413    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1414                        tcg_cmp, 0, label_match);
1415    tcg_temp_free_i64(tcg_cmp);
1416    gen_goto_tb(s, 0, s->pc);
1417    gen_set_label(label_match);
1418    gen_goto_tb(s, 1, addr);
1419}
1420
1421/* Conditional branch (immediate)
1422 *  31           25  24  23                  5   4  3    0
1423 * +---------------+----+---------------------+----+------+
1424 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1425 * +---------------+----+---------------------+----+------+
1426 */
1427static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1428{
1429    unsigned int cond;
1430    uint64_t addr;
1431
1432    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1433        unallocated_encoding(s);
1434        return;
1435    }
1436    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1437    cond = extract32(insn, 0, 4);
1438
1439    if (cond < 0x0e) {
1440        /* genuinely conditional branches */
1441        TCGLabel *label_match = gen_new_label();
1442        arm_gen_test_cc(cond, label_match);
1443        gen_goto_tb(s, 0, s->pc);
1444        gen_set_label(label_match);
1445        gen_goto_tb(s, 1, addr);
1446    } else {
1447        /* 0xe and 0xf are both "always" conditions */
1448        gen_goto_tb(s, 0, addr);
1449    }
1450}
1451
1452/* HINT instruction group, including various allocated HINTs */
1453static void handle_hint(DisasContext *s, uint32_t insn,
1454                        unsigned int op1, unsigned int op2, unsigned int crm)
1455{
1456    unsigned int selector = crm << 3 | op2;
1457
1458    if (op1 != 3) {
1459        unallocated_encoding(s);
1460        return;
1461    }
1462
1463    switch (selector) {
1464    case 0: /* NOP */
1465        return;
1466    case 3: /* WFI */
1467        s->base.is_jmp = DISAS_WFI;
1468        return;
1469        /* When running in MTTCG we don't generate jumps to the yield and
1470         * WFE helpers as it won't affect the scheduling of other vCPUs.
1471         * If we wanted to more completely model WFE/SEV so we don't busy
1472         * spin unnecessarily we would need to do something more involved.
1473         */
1474    case 1: /* YIELD */
1475        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1476            s->base.is_jmp = DISAS_YIELD;
1477        }
1478        return;
1479    case 2: /* WFE */
1480        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1481            s->base.is_jmp = DISAS_WFE;
1482        }
1483        return;
1484    case 4: /* SEV */
1485    case 5: /* SEVL */
1486        /* we treat all as NOP at least for now */
1487        return;
1488    default:
1489        /* default specified as NOP equivalent */
1490        return;
1491    }
1492}
1493
1494static void gen_clrex(DisasContext *s, uint32_t insn)
1495{
1496    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1497}
1498
1499/* CLREX, DSB, DMB, ISB */
1500static void handle_sync(DisasContext *s, uint32_t insn,
1501                        unsigned int op1, unsigned int op2, unsigned int crm)
1502{
1503    TCGBar bar;
1504
1505    if (op1 != 3) {
1506        unallocated_encoding(s);
1507        return;
1508    }
1509
1510    switch (op2) {
1511    case 2: /* CLREX */
1512        gen_clrex(s, insn);
1513        return;
1514    case 4: /* DSB */
1515    case 5: /* DMB */
1516        switch (crm & 3) {
1517        case 1: /* MBReqTypes_Reads */
1518            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1519            break;
1520        case 2: /* MBReqTypes_Writes */
1521            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1522            break;
1523        default: /* MBReqTypes_All */
1524            bar = TCG_BAR_SC | TCG_MO_ALL;
1525            break;
1526        }
1527        tcg_gen_mb(bar);
1528        return;
1529    case 6: /* ISB */
1530        /* We need to break the TB after this insn to execute
1531         * a self-modified code correctly and also to take
1532         * any pending interrupts immediately.
1533         */
1534        gen_goto_tb(s, 0, s->pc);
1535        return;
1536    default:
1537        unallocated_encoding(s);
1538        return;
1539    }
1540}
1541
1542/* MSR (immediate) - move immediate to processor state field */
1543static void handle_msr_i(DisasContext *s, uint32_t insn,
1544                         unsigned int op1, unsigned int op2, unsigned int crm)
1545{
1546    int op = op1 << 3 | op2;
1547    switch (op) {
1548    case 0x05: /* SPSel */
1549        if (s->current_el == 0) {
1550            unallocated_encoding(s);
1551            return;
1552        }
1553        /* fall through */
1554    case 0x1e: /* DAIFSet */
1555    case 0x1f: /* DAIFClear */
1556    {
1557        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1558        TCGv_i32 tcg_op = tcg_const_i32(op);
1559        gen_a64_set_pc_im(s->pc - 4);
1560        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1561        tcg_temp_free_i32(tcg_imm);
1562        tcg_temp_free_i32(tcg_op);
1563        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1564        gen_a64_set_pc_im(s->pc);
1565        s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1566        break;
1567    }
1568    default:
1569        unallocated_encoding(s);
1570        return;
1571    }
1572}
1573
1574static void gen_get_nzcv(TCGv_i64 tcg_rt)
1575{
1576    TCGv_i32 tmp = tcg_temp_new_i32();
1577    TCGv_i32 nzcv = tcg_temp_new_i32();
1578
1579    /* build bit 31, N */
1580    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1581    /* build bit 30, Z */
1582    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1583    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1584    /* build bit 29, C */
1585    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1586    /* build bit 28, V */
1587    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1588    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1589    /* generate result */
1590    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1591
1592    tcg_temp_free_i32(nzcv);
1593    tcg_temp_free_i32(tmp);
1594}
1595
1596static void gen_set_nzcv(TCGv_i64 tcg_rt)
1597
1598{
1599    TCGv_i32 nzcv = tcg_temp_new_i32();
1600
1601    /* take NZCV from R[t] */
1602    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1603
1604    /* bit 31, N */
1605    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1606    /* bit 30, Z */
1607    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1608    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1609    /* bit 29, C */
1610    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1611    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1612    /* bit 28, V */
1613    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1614    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1615    tcg_temp_free_i32(nzcv);
1616}
1617
1618/* MRS - move from system register
1619 * MSR (register) - move to system register
1620 * SYS
1621 * SYSL
1622 * These are all essentially the same insn in 'read' and 'write'
1623 * versions, with varying op0 fields.
1624 */
1625static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1626                       unsigned int op0, unsigned int op1, unsigned int op2,
1627                       unsigned int crn, unsigned int crm, unsigned int rt)
1628{
1629    const ARMCPRegInfo *ri;
1630    TCGv_i64 tcg_rt;
1631
1632    ri = get_arm_cp_reginfo(s->cp_regs,
1633                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1634                                               crn, crm, op0, op1, op2));
1635
1636    if (!ri) {
1637        /* Unknown register; this might be a guest error or a QEMU
1638         * unimplemented feature.
1639         */
1640        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1641                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1642                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1643        unallocated_encoding(s);
1644        return;
1645    }
1646
1647    /* Check access permissions */
1648    if (!cp_access_ok(s->current_el, ri, isread)) {
1649        unallocated_encoding(s);
1650        return;
1651    }
1652
1653    if (ri->accessfn) {
1654        /* Emit code to perform further access permissions checks at
1655         * runtime; this may result in an exception.
1656         */
1657        TCGv_ptr tmpptr;
1658        TCGv_i32 tcg_syn, tcg_isread;
1659        uint32_t syndrome;
1660
1661        gen_a64_set_pc_im(s->pc - 4);
1662        tmpptr = tcg_const_ptr(ri);
1663        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1664        tcg_syn = tcg_const_i32(syndrome);
1665        tcg_isread = tcg_const_i32(isread);
1666        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1667        tcg_temp_free_ptr(tmpptr);
1668        tcg_temp_free_i32(tcg_syn);
1669        tcg_temp_free_i32(tcg_isread);
1670    }
1671
1672    /* Handle special cases first */
1673    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1674    case ARM_CP_NOP:
1675        return;
1676    case ARM_CP_NZCV:
1677        tcg_rt = cpu_reg(s, rt);
1678        if (isread) {
1679            gen_get_nzcv(tcg_rt);
1680        } else {
1681            gen_set_nzcv(tcg_rt);
1682        }
1683        return;
1684    case ARM_CP_CURRENTEL:
1685        /* Reads as current EL value from pstate, which is
1686         * guaranteed to be constant by the tb flags.
1687         */
1688        tcg_rt = cpu_reg(s, rt);
1689        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1690        return;
1691    case ARM_CP_DC_ZVA:
1692        /* Writes clear the aligned block of memory which rt points into. */
1693        tcg_rt = cpu_reg(s, rt);
1694        gen_helper_dc_zva(cpu_env, tcg_rt);
1695        return;
1696    default:
1697        break;
1698    }
1699    if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1700        return;
1701    }
1702    if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1703        return;
1704    }
1705
1706    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1707        gen_io_start();
1708    }
1709
1710    tcg_rt = cpu_reg(s, rt);
1711
1712    if (isread) {
1713        if (ri->type & ARM_CP_CONST) {
1714            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1715        } else if (ri->readfn) {
1716            TCGv_ptr tmpptr;
1717            tmpptr = tcg_const_ptr(ri);
1718            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1719            tcg_temp_free_ptr(tmpptr);
1720        } else {
1721            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1722        }
1723    } else {
1724        if (ri->type & ARM_CP_CONST) {
1725            /* If not forbidden by access permissions, treat as WI */
1726            return;
1727        } else if (ri->writefn) {
1728            TCGv_ptr tmpptr;
1729            tmpptr = tcg_const_ptr(ri);
1730            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1731            tcg_temp_free_ptr(tmpptr);
1732        } else {
1733            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1734        }
1735    }
1736
1737    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1738        /* I/O operations must end the TB here (whether read or write) */
1739        gen_io_end();
1740        s->base.is_jmp = DISAS_UPDATE;
1741    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1742        /* We default to ending the TB on a coprocessor register write,
1743         * but allow this to be suppressed by the register definition
1744         * (usually only necessary to work around guest bugs).
1745         */
1746        s->base.is_jmp = DISAS_UPDATE;
1747    }
1748}
1749
1750/* System
1751 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1752 * +---------------------+---+-----+-----+-------+-------+-----+------+
1753 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1754 * +---------------------+---+-----+-----+-------+-------+-----+------+
1755 */
1756static void disas_system(DisasContext *s, uint32_t insn)
1757{
1758    unsigned int l, op0, op1, crn, crm, op2, rt;
1759    l = extract32(insn, 21, 1);
1760    op0 = extract32(insn, 19, 2);
1761    op1 = extract32(insn, 16, 3);
1762    crn = extract32(insn, 12, 4);
1763    crm = extract32(insn, 8, 4);
1764    op2 = extract32(insn, 5, 3);
1765    rt = extract32(insn, 0, 5);
1766
1767    if (op0 == 0) {
1768        if (l || rt != 31) {
1769            unallocated_encoding(s);
1770            return;
1771        }
1772        switch (crn) {
1773        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1774            handle_hint(s, insn, op1, op2, crm);
1775            break;
1776        case 3: /* CLREX, DSB, DMB, ISB */
1777            handle_sync(s, insn, op1, op2, crm);
1778            break;
1779        case 4: /* MSR (immediate) */
1780            handle_msr_i(s, insn, op1, op2, crm);
1781            break;
1782        default:
1783            unallocated_encoding(s);
1784            break;
1785        }
1786        return;
1787    }
1788    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1789}
1790
1791/* Exception generation
1792 *
1793 *  31             24 23 21 20                     5 4   2 1  0
1794 * +-----------------+-----+------------------------+-----+----+
1795 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1796 * +-----------------------+------------------------+----------+
1797 */
1798static void disas_exc(DisasContext *s, uint32_t insn)
1799{
1800    int opc = extract32(insn, 21, 3);
1801    int op2_ll = extract32(insn, 0, 5);
1802    int imm16 = extract32(insn, 5, 16);
1803    TCGv_i32 tmp;
1804
1805    switch (opc) {
1806    case 0:
1807        /* For SVC, HVC and SMC we advance the single-step state
1808         * machine before taking the exception. This is architecturally
1809         * mandated, to ensure that single-stepping a system call
1810         * instruction works properly.
1811         */
1812        switch (op2_ll) {
1813        case 1:                                                     /* SVC */
1814            gen_ss_advance(s);
1815            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1816                               default_exception_el(s));
1817            break;
1818        case 2:                                                     /* HVC */
1819            if (s->current_el == 0) {
1820                unallocated_encoding(s);
1821                break;
1822            }
1823            /* The pre HVC helper handles cases when HVC gets trapped
1824             * as an undefined insn by runtime configuration.
1825             */
1826            gen_a64_set_pc_im(s->pc - 4);
1827            gen_helper_pre_hvc(cpu_env);
1828            gen_ss_advance(s);
1829            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1830            break;
1831        case 3:                                                     /* SMC */
1832            if (s->current_el == 0) {
1833                unallocated_encoding(s);
1834                break;
1835            }
1836            gen_a64_set_pc_im(s->pc - 4);
1837            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1838            gen_helper_pre_smc(cpu_env, tmp);
1839            tcg_temp_free_i32(tmp);
1840            gen_ss_advance(s);
1841            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1842            break;
1843        default:
1844            unallocated_encoding(s);
1845            break;
1846        }
1847        break;
1848    case 1:
1849        if (op2_ll != 0) {
1850            unallocated_encoding(s);
1851            break;
1852        }
1853        /* BRK */
1854        gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
1855        break;
1856    case 2:
1857        if (op2_ll != 0) {
1858            unallocated_encoding(s);
1859            break;
1860        }
1861        /* HLT. This has two purposes.
1862         * Architecturally, it is an external halting debug instruction.
1863         * Since QEMU doesn't implement external debug, we treat this as
1864         * it is required for halting debug disabled: it will UNDEF.
1865         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1866         */
1867        if (semihosting_enabled() && imm16 == 0xf000) {
1868#ifndef CONFIG_USER_ONLY
1869            /* In system mode, don't allow userspace access to semihosting,
1870             * to provide some semblance of security (and for consistency
1871             * with our 32-bit semihosting).
1872             */
1873            if (s->current_el == 0) {
1874                unsupported_encoding(s, insn);
1875                break;
1876            }
1877#endif
1878            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1879        } else {
1880            unsupported_encoding(s, insn);
1881        }
1882        break;
1883    case 5:
1884        if (op2_ll < 1 || op2_ll > 3) {
1885            unallocated_encoding(s);
1886            break;
1887        }
1888        /* DCPS1, DCPS2, DCPS3 */
1889        unsupported_encoding(s, insn);
1890        break;
1891    default:
1892        unallocated_encoding(s);
1893        break;
1894    }
1895}
1896
1897/* Unconditional branch (register)
1898 *  31           25 24   21 20   16 15   10 9    5 4     0
1899 * +---------------+-------+-------+-------+------+-------+
1900 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1901 * +---------------+-------+-------+-------+------+-------+
1902 */
1903static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1904{
1905    unsigned int opc, op2, op3, rn, op4;
1906
1907    opc = extract32(insn, 21, 4);
1908    op2 = extract32(insn, 16, 5);
1909    op3 = extract32(insn, 10, 6);
1910    rn = extract32(insn, 5, 5);
1911    op4 = extract32(insn, 0, 5);
1912
1913    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1914        unallocated_encoding(s);
1915        return;
1916    }
1917
1918    switch (opc) {
1919    case 0: /* BR */
1920    case 1: /* BLR */
1921    case 2: /* RET */
1922        gen_a64_set_pc(s, cpu_reg(s, rn));
1923        /* BLR also needs to load return address */
1924        if (opc == 1) {
1925            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1926        }
1927        break;
1928    case 4: /* ERET */
1929        if (s->current_el == 0) {
1930            unallocated_encoding(s);
1931            return;
1932        }
1933        gen_helper_exception_return(cpu_env);
1934        /* Must exit loop to check un-masked IRQs */
1935        s->base.is_jmp = DISAS_EXIT;
1936        return;
1937    case 5: /* DRPS */
1938        if (rn != 0x1f) {
1939            unallocated_encoding(s);
1940        } else {
1941            unsupported_encoding(s, insn);
1942        }
1943        return;
1944    default:
1945        unallocated_encoding(s);
1946        return;
1947    }
1948
1949    s->base.is_jmp = DISAS_JUMP;
1950}
1951
1952/* Branches, exception generating and system instructions */
1953static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1954{
1955    switch (extract32(insn, 25, 7)) {
1956    case 0x0a: case 0x0b:
1957    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1958        disas_uncond_b_imm(s, insn);
1959        break;
1960    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1961        disas_comp_b_imm(s, insn);
1962        break;
1963    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1964        disas_test_b_imm(s, insn);
1965        break;
1966    case 0x2a: /* Conditional branch (immediate) */
1967        disas_cond_b_imm(s, insn);
1968        break;
1969    case 0x6a: /* Exception generation / System */
1970        if (insn & (1 << 24)) {
1971            disas_system(s, insn);
1972        } else {
1973            disas_exc(s, insn);
1974        }
1975        break;
1976    case 0x6b: /* Unconditional branch (register) */
1977        disas_uncond_b_reg(s, insn);
1978        break;
1979    default:
1980        unallocated_encoding(s);
1981        break;
1982    }
1983}
1984
1985/*
1986 * Load/Store exclusive instructions are implemented by remembering
1987 * the value/address loaded, and seeing if these are the same
1988 * when the store is performed. This is not actually the architecturally
1989 * mandated semantics, but it works for typical guest code sequences
1990 * and avoids having to monitor regular stores.
1991 *
1992 * The store exclusive uses the atomic cmpxchg primitives to avoid
1993 * races in multi-threaded linux-user and when MTTCG softmmu is
1994 * enabled.
1995 */
1996static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1997                               TCGv_i64 addr, int size, bool is_pair)
1998{
1999    int idx = get_mem_index(s);
2000    TCGMemOp memop = s->be_data;
2001
2002    g_assert(size <= 3);
2003    if (is_pair) {
2004        g_assert(size >= 2);
2005        if (size == 2) {
2006            /* The pair must be single-copy atomic for the doubleword.  */
2007            memop |= MO_64 | MO_ALIGN;
2008            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2009            if (s->be_data == MO_LE) {
2010                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2011                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2012            } else {
2013                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2014                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2015            }
2016        } else {
2017            /* The pair must be single-copy atomic for *each* doubleword, not
2018               the entire quadword, however it must be quadword aligned.  */
2019            memop |= MO_64;
2020            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2021                                memop | MO_ALIGN_16);
2022
2023            TCGv_i64 addr2 = tcg_temp_new_i64();
2024            tcg_gen_addi_i64(addr2, addr, 8);
2025            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2026            tcg_temp_free_i64(addr2);
2027
2028            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2029            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2030        }
2031    } else {
2032        memop |= size | MO_ALIGN;
2033        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2034        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2035    }
2036    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2037}
2038
2039static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2040                                TCGv_i64 addr, int size, int is_pair)
2041{
2042    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2043     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2044     *     [addr] = {Rt};
2045     *     if (is_pair) {
2046     *         [addr + datasize] = {Rt2};
2047     *     }
2048     *     {Rd} = 0;
2049     * } else {
2050     *     {Rd} = 1;
2051     * }
2052     * env->exclusive_addr = -1;
2053     */
2054    TCGLabel *fail_label = gen_new_label();
2055    TCGLabel *done_label = gen_new_label();
2056    TCGv_i64 tmp;
2057
2058    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2059
2060    tmp = tcg_temp_new_i64();
2061    if (is_pair) {
2062        if (size == 2) {
2063            if (s->be_data == MO_LE) {
2064                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2065            } else {
2066                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2067            }
2068            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2069                                       cpu_exclusive_val, tmp,
2070                                       get_mem_index(s),
2071                                       MO_64 | MO_ALIGN | s->be_data);
2072            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2073        } else if (s->be_data == MO_LE) {
2074            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2075                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2076                                                        cpu_exclusive_addr,
2077                                                        cpu_reg(s, rt),
2078                                                        cpu_reg(s, rt2));
2079            } else {
2080                gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2081                                               cpu_reg(s, rt), cpu_reg(s, rt2));
2082            }
2083        } else {
2084            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2085                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2086                                                        cpu_exclusive_addr,
2087                                                        cpu_reg(s, rt),
2088                                                        cpu_reg(s, rt2));
2089            } else {
2090                gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2091                                               cpu_reg(s, rt), cpu_reg(s, rt2));
2092            }
2093        }
2094    } else {
2095        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2096                                   cpu_reg(s, rt), get_mem_index(s),
2097                                   size | MO_ALIGN | s->be_data);
2098        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2099    }
2100    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2101    tcg_temp_free_i64(tmp);
2102    tcg_gen_br(done_label);
2103
2104    gen_set_label(fail_label);
2105    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2106    gen_set_label(done_label);
2107    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2108}
2109
2110/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2111 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2112 */
2113static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2114{
2115    int opc0 = extract32(opc, 0, 1);
2116    int regsize;
2117
2118    if (is_signed) {
2119        regsize = opc0 ? 32 : 64;
2120    } else {
2121        regsize = size == 3 ? 64 : 32;
2122    }
2123    return regsize == 64;
2124}
2125
2126/* Load/store exclusive
2127 *
2128 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2129 * +-----+-------------+----+---+----+------+----+-------+------+------+
2130 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2131 * +-----+-------------+----+---+----+------+----+-------+------+------+
2132 *
2133 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2134 *   L: 0 -> store, 1 -> load
2135 *  o2: 0 -> exclusive, 1 -> not
2136 *  o1: 0 -> single register, 1 -> register pair
2137 *  o0: 1 -> load-acquire/store-release, 0 -> not
2138 */
2139static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2140{
2141    int rt = extract32(insn, 0, 5);
2142    int rn = extract32(insn, 5, 5);
2143    int rt2 = extract32(insn, 10, 5);
2144    int is_lasr = extract32(insn, 15, 1);
2145    int rs = extract32(insn, 16, 5);
2146    int is_pair = extract32(insn, 21, 1);
2147    int is_store = !extract32(insn, 22, 1);
2148    int is_excl = !extract32(insn, 23, 1);
2149    int size = extract32(insn, 30, 2);
2150    TCGv_i64 tcg_addr;
2151
2152    if ((!is_excl && !is_pair && !is_lasr) ||
2153        (!is_excl && is_pair) ||
2154        (is_pair && size < 2)) {
2155        unallocated_encoding(s);
2156        return;
2157    }
2158
2159    if (rn == 31) {
2160        gen_check_sp_alignment(s);
2161    }
2162    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2163
2164    /* Note that since TCG is single threaded load-acquire/store-release
2165     * semantics require no extra if (is_lasr) { ... } handling.
2166     */
2167
2168    if (is_excl) {
2169        if (!is_store) {
2170            s->is_ldex = true;
2171            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2172            if (is_lasr) {
2173                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2174            }
2175        } else {
2176            if (is_lasr) {
2177                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2178            }
2179            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2180        }
2181    } else {
2182        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2183        bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2184
2185        /* Generate ISS for non-exclusive accesses including LASR.  */
2186        if (is_store) {
2187            if (is_lasr) {
2188                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2189            }
2190            do_gpr_st(s, tcg_rt, tcg_addr, size,
2191                      true, rt, iss_sf, is_lasr);
2192        } else {
2193            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2194                      true, rt, iss_sf, is_lasr);
2195            if (is_lasr) {
2196                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2197            }
2198        }
2199    }
2200}
2201
2202/*
2203 * Load register (literal)
2204 *
2205 *  31 30 29   27  26 25 24 23                5 4     0
2206 * +-----+-------+---+-----+-------------------+-------+
2207 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2208 * +-----+-------+---+-----+-------------------+-------+
2209 *
2210 * V: 1 -> vector (simd/fp)
2211 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2212 *                   10-> 32 bit signed, 11 -> prefetch
2213 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2214 */
2215static void disas_ld_lit(DisasContext *s, uint32_t insn)
2216{
2217    int rt = extract32(insn, 0, 5);
2218    int64_t imm = sextract32(insn, 5, 19) << 2;
2219    bool is_vector = extract32(insn, 26, 1);
2220    int opc = extract32(insn, 30, 2);
2221    bool is_signed = false;
2222    int size = 2;
2223    TCGv_i64 tcg_rt, tcg_addr;
2224
2225    if (is_vector) {
2226        if (opc == 3) {
2227            unallocated_encoding(s);
2228            return;
2229        }
2230        size = 2 + opc;
2231        if (!fp_access_check(s)) {
2232            return;
2233        }
2234    } else {
2235        if (opc == 3) {
2236            /* PRFM (literal) : prefetch */
2237            return;
2238        }
2239        size = 2 + extract32(opc, 0, 1);
2240        is_signed = extract32(opc, 1, 1);
2241    }
2242
2243    tcg_rt = cpu_reg(s, rt);
2244
2245    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2246    if (is_vector) {
2247        do_fp_ld(s, rt, tcg_addr, size);
2248    } else {
2249        /* Only unsigned 32bit loads target 32bit registers.  */
2250        bool iss_sf = opc != 0;
2251
2252        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2253                  true, rt, iss_sf, false);
2254    }
2255    tcg_temp_free_i64(tcg_addr);
2256}
2257
2258/*
2259 * LDNP (Load Pair - non-temporal hint)
2260 * LDP (Load Pair - non vector)
2261 * LDPSW (Load Pair Signed Word - non vector)
2262 * STNP (Store Pair - non-temporal hint)
2263 * STP (Store Pair - non vector)
2264 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2265 * LDP (Load Pair of SIMD&FP)
2266 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2267 * STP (Store Pair of SIMD&FP)
2268 *
2269 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2270 * +-----+-------+---+---+-------+---+-----------------------------+
2271 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2272 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2273 *
2274 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2275 *      LDPSW                    01
2276 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2277 *   V: 0 -> GPR, 1 -> Vector
2278 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2279 *      10 -> signed offset, 11 -> pre-index
2280 *   L: 0 -> Store 1 -> Load
2281 *
2282 * Rt, Rt2 = GPR or SIMD registers to be stored
2283 * Rn = general purpose register containing address
2284 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2285 */
2286static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2287{
2288    int rt = extract32(insn, 0, 5);
2289    int rn = extract32(insn, 5, 5);
2290    int rt2 = extract32(insn, 10, 5);
2291    uint64_t offset = sextract64(insn, 15, 7);
2292    int index = extract32(insn, 23, 2);
2293    bool is_vector = extract32(insn, 26, 1);
2294    bool is_load = extract32(insn, 22, 1);
2295    int opc = extract32(insn, 30, 2);
2296
2297    bool is_signed = false;
2298    bool postindex = false;
2299    bool wback = false;
2300
2301    TCGv_i64 tcg_addr; /* calculated address */
2302    int size;
2303
2304    if (opc == 3) {
2305        unallocated_encoding(s);
2306        return;
2307    }
2308
2309    if (is_vector) {
2310        size = 2 + opc;
2311    } else {
2312        size = 2 + extract32(opc, 1, 1);
2313        is_signed = extract32(opc, 0, 1);
2314        if (!is_load && is_signed) {
2315            unallocated_encoding(s);
2316            return;
2317        }
2318    }
2319
2320    switch (index) {
2321    case 1: /* post-index */
2322        postindex = true;
2323        wback = true;
2324        break;
2325    case 0:
2326        /* signed offset with "non-temporal" hint. Since we don't emulate
2327         * caches we don't care about hints to the cache system about
2328         * data access patterns, and handle this identically to plain
2329         * signed offset.
2330         */
2331        if (is_signed) {
2332            /* There is no non-temporal-hint version of LDPSW */
2333            unallocated_encoding(s);
2334            return;
2335        }
2336        postindex = false;
2337        break;
2338    case 2: /* signed offset, rn not updated */
2339        postindex = false;
2340        break;
2341    case 3: /* pre-index */
2342        postindex = false;
2343        wback = true;
2344        break;
2345    }
2346
2347    if (is_vector && !fp_access_check(s)) {
2348        return;
2349    }
2350
2351    offset <<= size;
2352
2353    if (rn == 31) {
2354        gen_check_sp_alignment(s);
2355    }
2356
2357    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2358
2359    if (!postindex) {
2360        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2361    }
2362
2363    if (is_vector) {
2364        if (is_load) {
2365            do_fp_ld(s, rt, tcg_addr, size);
2366        } else {
2367            do_fp_st(s, rt, tcg_addr, size);
2368        }
2369        tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2370        if (is_load) {
2371            do_fp_ld(s, rt2, tcg_addr, size);
2372        } else {
2373            do_fp_st(s, rt2, tcg_addr, size);
2374        }
2375    } else {
2376        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2377        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2378
2379        if (is_load) {
2380            TCGv_i64 tmp = tcg_temp_new_i64();
2381
2382            /* Do not modify tcg_rt before recognizing any exception
2383             * from the second load.
2384             */
2385            do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2386                      false, 0, false, false);
2387            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2388            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2389                      false, 0, false, false);
2390
2391            tcg_gen_mov_i64(tcg_rt, tmp);
2392            tcg_temp_free_i64(tmp);
2393        } else {
2394            do_gpr_st(s, tcg_rt, tcg_addr, size,
2395                      false, 0, false, false);
2396            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2397            do_gpr_st(s, tcg_rt2, tcg_addr, size,
2398                      false, 0, false, false);
2399        }
2400    }
2401
2402    if (wback) {
2403        if (postindex) {
2404            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2405        } else {
2406            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2407        }
2408        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2409    }
2410}
2411
2412/*
2413 * Load/store (immediate post-indexed)
2414 * Load/store (immediate pre-indexed)
2415 * Load/store (unscaled immediate)
2416 *
2417 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2418 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2419 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2420 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2421 *
2422 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2423         10 -> unprivileged
2424 * V = 0 -> non-vector
2425 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2426 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2427 */
2428static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2429                                int opc,
2430                                int size,
2431                                int rt,
2432                                bool is_vector)
2433{
2434    int rn = extract32(insn, 5, 5);
2435    int imm9 = sextract32(insn, 12, 9);
2436    int idx = extract32(insn, 10, 2);
2437    bool is_signed = false;
2438    bool is_store = false;
2439    bool is_extended = false;
2440    bool is_unpriv = (idx == 2);
2441    bool iss_valid = !is_vector;
2442    bool post_index;
2443    bool writeback;
2444
2445    TCGv_i64 tcg_addr;
2446
2447    if (is_vector) {
2448        size |= (opc & 2) << 1;
2449        if (size > 4 || is_unpriv) {
2450            unallocated_encoding(s);
2451            return;
2452        }
2453        is_store = ((opc & 1) == 0);
2454        if (!fp_access_check(s)) {
2455            return;
2456        }
2457    } else {
2458        if (size == 3 && opc == 2) {
2459            /* PRFM - prefetch */
2460            if (is_unpriv) {
2461                unallocated_encoding(s);
2462                return;
2463            }
2464            return;
2465        }
2466        if (opc == 3 && size > 1) {
2467            unallocated_encoding(s);
2468            return;
2469        }
2470        is_store = (opc == 0);
2471        is_signed = extract32(opc, 1, 1);
2472        is_extended = (size < 3) && extract32(opc, 0, 1);
2473    }
2474
2475    switch (idx) {
2476    case 0:
2477    case 2:
2478        post_index = false;
2479        writeback = false;
2480        break;
2481    case 1:
2482        post_index = true;
2483        writeback = true;
2484        break;
2485    case 3:
2486        post_index = false;
2487        writeback = true;
2488        break;
2489    default:
2490        g_assert_not_reached();
2491    }
2492
2493    if (rn == 31) {
2494        gen_check_sp_alignment(s);
2495    }
2496    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2497
2498    if (!post_index) {
2499        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2500    }
2501
2502    if (is_vector) {
2503        if (is_store) {
2504            do_fp_st(s, rt, tcg_addr, size);
2505        } else {
2506            do_fp_ld(s, rt, tcg_addr, size);
2507        }
2508    } else {
2509        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2510        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2511        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2512
2513        if (is_store) {
2514            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2515                             iss_valid, rt, iss_sf, false);
2516        } else {
2517            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2518                             is_signed, is_extended, memidx,
2519                             iss_valid, rt, iss_sf, false);
2520        }
2521    }
2522
2523    if (writeback) {
2524        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2525        if (post_index) {
2526            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2527        }
2528        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2529    }
2530}
2531
2532/*
2533 * Load/store (register offset)
2534 *
2535 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2536 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2537 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2538 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2539 *
2540 * For non-vector:
2541 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2542 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2543 * For vector:
2544 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2545 *   opc<0>: 0 -> store, 1 -> load
2546 * V: 1 -> vector/simd
2547 * opt: extend encoding (see DecodeRegExtend)
2548 * S: if S=1 then scale (essentially index by sizeof(size))
2549 * Rt: register to transfer into/out of
2550 * Rn: address register or SP for base
2551 * Rm: offset register or ZR for offset
2552 */
2553static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2554                                   int opc,
2555                                   int size,
2556                                   int rt,
2557                                   bool is_vector)
2558{
2559    int rn = extract32(insn, 5, 5);
2560    int shift = extract32(insn, 12, 1);
2561    int rm = extract32(insn, 16, 5);
2562    int opt = extract32(insn, 13, 3);
2563    bool is_signed = false;
2564    bool is_store = false;
2565    bool is_extended = false;
2566
2567    TCGv_i64 tcg_rm;
2568    TCGv_i64 tcg_addr;
2569
2570    if (extract32(opt, 1, 1) == 0) {
2571        unallocated_encoding(s);
2572        return;
2573    }
2574
2575    if (is_vector) {
2576        size |= (opc & 2) << 1;
2577        if (size > 4) {
2578            unallocated_encoding(s);
2579            return;
2580        }
2581        is_store = !extract32(opc, 0, 1);
2582        if (!fp_access_check(s)) {
2583            return;
2584        }
2585    } else {
2586        if (size == 3 && opc == 2) {
2587            /* PRFM - prefetch */
2588            return;
2589        }
2590        if (opc == 3 && size > 1) {
2591            unallocated_encoding(s);
2592            return;
2593        }
2594        is_store = (opc == 0);
2595        is_signed = extract32(opc, 1, 1);
2596        is_extended = (size < 3) && extract32(opc, 0, 1);
2597    }
2598
2599    if (rn == 31) {
2600        gen_check_sp_alignment(s);
2601    }
2602    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2603
2604    tcg_rm = read_cpu_reg(s, rm, 1);
2605    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2606
2607    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2608
2609    if (is_vector) {
2610        if (is_store) {
2611            do_fp_st(s, rt, tcg_addr, size);
2612        } else {
2613            do_fp_ld(s, rt, tcg_addr, size);
2614        }
2615    } else {
2616        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2617        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2618        if (is_store) {
2619            do_gpr_st(s, tcg_rt, tcg_addr, size,
2620                      true, rt, iss_sf, false);
2621        } else {
2622            do_gpr_ld(s, tcg_rt, tcg_addr, size,
2623                      is_signed, is_extended,
2624                      true, rt, iss_sf, false);
2625        }
2626    }
2627}
2628
2629/*
2630 * Load/store (unsigned immediate)
2631 *
2632 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2633 * +----+-------+---+-----+-----+------------+-------+------+
2634 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2635 * +----+-------+---+-----+-----+------------+-------+------+
2636 *
2637 * For non-vector:
2638 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2639 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2640 * For vector:
2641 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2642 *   opc<0>: 0 -> store, 1 -> load
2643 * Rn: base address register (inc SP)
2644 * Rt: target register
2645 */
2646static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2647                                        int opc,
2648                                        int size,
2649                                        int rt,
2650                                        bool is_vector)
2651{
2652    int rn = extract32(insn, 5, 5);
2653    unsigned int imm12 = extract32(insn, 10, 12);
2654    unsigned int offset;
2655
2656    TCGv_i64 tcg_addr;
2657
2658    bool is_store;
2659    bool is_signed = false;
2660    bool is_extended = false;
2661
2662    if (is_vector) {
2663        size |= (opc & 2) << 1;
2664        if (size > 4) {
2665            unallocated_encoding(s);
2666            return;
2667        }
2668        is_store = !extract32(opc, 0, 1);
2669        if (!fp_access_check(s)) {
2670            return;
2671        }
2672    } else {
2673        if (size == 3 && opc == 2) {
2674            /* PRFM - prefetch */
2675            return;
2676        }
2677        if (opc == 3 && size > 1) {
2678            unallocated_encoding(s);
2679            return;
2680        }
2681        is_store = (opc == 0);
2682        is_signed = extract32(opc, 1, 1);
2683        is_extended = (size < 3) && extract32(opc, 0, 1);
2684    }
2685
2686    if (rn == 31) {
2687        gen_check_sp_alignment(s);
2688    }
2689    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2690    offset = imm12 << size;
2691    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2692
2693    if (is_vector) {
2694        if (is_store) {
2695            do_fp_st(s, rt, tcg_addr, size);
2696        } else {
2697            do_fp_ld(s, rt, tcg_addr, size);
2698        }
2699    } else {
2700        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2701        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2702        if (is_store) {
2703            do_gpr_st(s, tcg_rt, tcg_addr, size,
2704                      true, rt, iss_sf, false);
2705        } else {
2706            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2707                      true, rt, iss_sf, false);
2708        }
2709    }
2710}
2711
2712/* Load/store register (all forms) */
2713static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2714{
2715    int rt = extract32(insn, 0, 5);
2716    int opc = extract32(insn, 22, 2);
2717    bool is_vector = extract32(insn, 26, 1);
2718    int size = extract32(insn, 30, 2);
2719
2720    switch (extract32(insn, 24, 2)) {
2721    case 0:
2722        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2723            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2724        } else {
2725            /* Load/store register (unscaled immediate)
2726             * Load/store immediate pre/post-indexed
2727             * Load/store register unprivileged
2728             */
2729            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2730        }
2731        break;
2732    case 1:
2733        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2734        break;
2735    default:
2736        unallocated_encoding(s);
2737        break;
2738    }
2739}
2740
2741/* AdvSIMD load/store multiple structures
2742 *
2743 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2744 * +---+---+---------------+---+-------------+--------+------+------+------+
2745 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2746 * +---+---+---------------+---+-------------+--------+------+------+------+
2747 *
2748 * AdvSIMD load/store multiple structures (post-indexed)
2749 *
2750 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2751 * +---+---+---------------+---+---+---------+--------+------+------+------+
2752 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2753 * +---+---+---------------+---+---+---------+--------+------+------+------+
2754 *
2755 * Rt: first (or only) SIMD&FP register to be transferred
2756 * Rn: base address or SP
2757 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2758 */
2759static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2760{
2761    int rt = extract32(insn, 0, 5);
2762    int rn = extract32(insn, 5, 5);
2763    int size = extract32(insn, 10, 2);
2764    int opcode = extract32(insn, 12, 4);
2765    bool is_store = !extract32(insn, 22, 1);
2766    bool is_postidx = extract32(insn, 23, 1);
2767    bool is_q = extract32(insn, 30, 1);
2768    TCGv_i64 tcg_addr, tcg_rn;
2769
2770    int ebytes = 1 << size;
2771    int elements = (is_q ? 128 : 64) / (8 << size);
2772    int rpt;    /* num iterations */
2773    int selem;  /* structure elements */
2774    int r;
2775
2776    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2777        unallocated_encoding(s);
2778        return;
2779    }
2780
2781    /* From the shared decode logic */
2782    switch (opcode) {
2783    case 0x0:
2784        rpt = 1;
2785        selem = 4;
2786        break;
2787    case 0x2:
2788        rpt = 4;
2789        selem = 1;
2790        break;
2791    case 0x4:
2792        rpt = 1;
2793        selem = 3;
2794        break;
2795    case 0x6:
2796        rpt = 3;
2797        selem = 1;
2798        break;
2799    case 0x7:
2800        rpt = 1;
2801        selem = 1;
2802        break;
2803    case 0x8:
2804        rpt = 1;
2805        selem = 2;
2806        break;
2807    case 0xa:
2808        rpt = 2;
2809        selem = 1;
2810        break;
2811    default:
2812        unallocated_encoding(s);
2813        return;
2814    }
2815
2816    if (size == 3 && !is_q && selem != 1) {
2817        /* reserved */
2818        unallocated_encoding(s);
2819        return;
2820    }
2821
2822    if (!fp_access_check(s)) {
2823        return;
2824    }
2825
2826    if (rn == 31) {
2827        gen_check_sp_alignment(s);
2828    }
2829
2830    tcg_rn = cpu_reg_sp(s, rn);
2831    tcg_addr = tcg_temp_new_i64();
2832    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2833
2834    for (r = 0; r < rpt; r++) {
2835        int e;
2836        for (e = 0; e < elements; e++) {
2837            int tt = (rt + r) % 32;
2838            int xs;
2839            for (xs = 0; xs < selem; xs++) {
2840                if (is_store) {
2841                    do_vec_st(s, tt, e, tcg_addr, size);
2842                } else {
2843                    do_vec_ld(s, tt, e, tcg_addr, size);
2844
2845                    /* For non-quad operations, setting a slice of the low
2846                     * 64 bits of the register clears the high 64 bits (in
2847                     * the ARM ARM pseudocode this is implicit in the fact
2848                     * that 'rval' is a 64 bit wide variable).
2849                     * For quad operations, we might still need to zero the
2850                     * high bits of SVE.  We optimize by noticing that we only
2851                     * need to do this the first time we touch a register.
2852                     */
2853                    if (e == 0 && (r == 0 || xs == selem - 1)) {
2854                        clear_vec_high(s, is_q, tt);
2855                    }
2856                }
2857                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2858                tt = (tt + 1) % 32;
2859            }
2860        }
2861    }
2862
2863    if (is_postidx) {
2864        int rm = extract32(insn, 16, 5);
2865        if (rm == 31) {
2866            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2867        } else {
2868            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2869        }
2870    }
2871    tcg_temp_free_i64(tcg_addr);
2872}
2873
2874/* AdvSIMD load/store single structure
2875 *
2876 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2877 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2878 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2879 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2880 *
2881 * AdvSIMD load/store single structure (post-indexed)
2882 *
2883 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2884 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2885 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2886 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2887 *
2888 * Rt: first (or only) SIMD&FP register to be transferred
2889 * Rn: base address or SP
2890 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2891 * index = encoded in Q:S:size dependent on size
2892 *
2893 * lane_size = encoded in R, opc
2894 * transfer width = encoded in opc, S, size
2895 */
2896static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2897{
2898    int rt = extract32(insn, 0, 5);
2899    int rn = extract32(insn, 5, 5);
2900    int size = extract32(insn, 10, 2);
2901    int S = extract32(insn, 12, 1);
2902    int opc = extract32(insn, 13, 3);
2903    int R = extract32(insn, 21, 1);
2904    int is_load = extract32(insn, 22, 1);
2905    int is_postidx = extract32(insn, 23, 1);
2906    int is_q = extract32(insn, 30, 1);
2907
2908    int scale = extract32(opc, 1, 2);
2909    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2910    bool replicate = false;
2911    int index = is_q << 3 | S << 2 | size;
2912    int ebytes, xs;
2913    TCGv_i64 tcg_addr, tcg_rn;
2914
2915    switch (scale) {
2916    case 3:
2917        if (!is_load || S) {
2918            unallocated_encoding(s);
2919            return;
2920        }
2921        scale = size;
2922        replicate = true;
2923        break;
2924    case 0:
2925        break;
2926    case 1:
2927        if (extract32(size, 0, 1)) {
2928            unallocated_encoding(s);
2929            return;
2930        }
2931        index >>= 1;
2932        break;
2933    case 2:
2934        if (extract32(size, 1, 1)) {
2935            unallocated_encoding(s);
2936            return;
2937        }
2938        if (!extract32(size, 0, 1)) {
2939            index >>= 2;
2940        } else {
2941            if (S) {
2942                unallocated_encoding(s);
2943                return;
2944            }
2945            index >>= 3;
2946            scale = 3;
2947        }
2948        break;
2949    default:
2950        g_assert_not_reached();
2951    }
2952
2953    if (!fp_access_check(s)) {
2954        return;
2955    }
2956
2957    ebytes = 1 << scale;
2958
2959    if (rn == 31) {
2960        gen_check_sp_alignment(s);
2961    }
2962
2963    tcg_rn = cpu_reg_sp(s, rn);
2964    tcg_addr = tcg_temp_new_i64();
2965    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2966
2967    for (xs = 0; xs < selem; xs++) {
2968        if (replicate) {
2969            /* Load and replicate to all elements */
2970            uint64_t mulconst;
2971            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2972
2973            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2974                                get_mem_index(s), s->be_data + scale);
2975            switch (scale) {
2976            case 0:
2977                mulconst = 0x0101010101010101ULL;
2978                break;
2979            case 1:
2980                mulconst = 0x0001000100010001ULL;
2981                break;
2982            case 2:
2983                mulconst = 0x0000000100000001ULL;
2984                break;
2985            case 3:
2986                mulconst = 0;
2987                break;
2988            default:
2989                g_assert_not_reached();
2990            }
2991            if (mulconst) {
2992                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2993            }
2994            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2995            if (is_q) {
2996                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2997            }
2998            tcg_temp_free_i64(tcg_tmp);
2999            clear_vec_high(s, is_q, rt);
3000        } else {
3001            /* Load/store one element per register */
3002            if (is_load) {
3003                do_vec_ld(s, rt, index, tcg_addr, scale);
3004            } else {
3005                do_vec_st(s, rt, index, tcg_addr, scale);
3006            }
3007        }
3008        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
3009        rt = (rt + 1) % 32;
3010    }
3011
3012    if (is_postidx) {
3013        int rm = extract32(insn, 16, 5);
3014        if (rm == 31) {
3015            tcg_gen_mov_i64(tcg_rn, tcg_addr);
3016        } else {
3017            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3018        }
3019    }
3020    tcg_temp_free_i64(tcg_addr);
3021}
3022
3023/* Loads and stores */
3024static void disas_ldst(DisasContext *s, uint32_t insn)
3025{
3026    switch (extract32(insn, 24, 6)) {
3027    case 0x08: /* Load/store exclusive */
3028        disas_ldst_excl(s, insn);
3029        break;
3030    case 0x18: case 0x1c: /* Load register (literal) */
3031        disas_ld_lit(s, insn);
3032        break;
3033    case 0x28: case 0x29:
3034    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3035        disas_ldst_pair(s, insn);
3036        break;
3037    case 0x38: case 0x39:
3038    case 0x3c: case 0x3d: /* Load/store register (all forms) */
3039        disas_ldst_reg(s, insn);
3040        break;
3041    case 0x0c: /* AdvSIMD load/store multiple structures */
3042        disas_ldst_multiple_struct(s, insn);
3043        break;
3044    case 0x0d: /* AdvSIMD load/store single structure */
3045        disas_ldst_single_struct(s, insn);
3046        break;
3047    default:
3048        unallocated_encoding(s);
3049        break;
3050    }
3051}
3052
3053/* PC-rel. addressing
3054 *   31  30   29 28       24 23                5 4    0
3055 * +----+-------+-----------+-------------------+------+
3056 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3057 * +----+-------+-----------+-------------------+------+
3058 */
3059static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3060{
3061    unsigned int page, rd;
3062    uint64_t base;
3063    uint64_t offset;
3064
3065    page = extract32(insn, 31, 1);
3066    /* SignExtend(immhi:immlo) -> offset */
3067    offset = sextract64(insn, 5, 19);
3068    offset = offset << 2 | extract32(insn, 29, 2);
3069    rd = extract32(insn, 0, 5);
3070    base = s->pc - 4;
3071
3072    if (page) {
3073        /* ADRP (page based) */
3074        base &= ~0xfff;
3075        offset <<= 12;
3076    }
3077
3078    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3079}
3080
3081/*
3082 * Add/subtract (immediate)
3083 *
3084 *  31 30 29 28       24 23 22 21         10 9   5 4   0
3085 * +--+--+--+-----------+-----+-------------+-----+-----+
3086 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3087 * +--+--+--+-----------+-----+-------------+-----+-----+
3088 *
3089 *    sf: 0 -> 32bit, 1 -> 64bit
3090 *    op: 0 -> add  , 1 -> sub
3091 *     S: 1 -> set flags
3092 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3093 */
3094static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3095{
3096    int rd = extract32(insn, 0, 5);
3097    int rn = extract32(insn, 5, 5);
3098    uint64_t imm = extract32(insn, 10, 12);
3099    int shift = extract32(insn, 22, 2);
3100    bool setflags = extract32(insn, 29, 1);
3101    bool sub_op = extract32(insn, 30, 1);
3102    bool is_64bit = extract32(insn, 31, 1);
3103
3104    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3105    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3106    TCGv_i64 tcg_result;
3107
3108    switch (shift) {
3109    case 0x0:
3110        break;
3111    case 0x1:
3112        imm <<= 12;
3113        break;
3114    default:
3115        unallocated_encoding(s);
3116        return;
3117    }
3118
3119    tcg_result = tcg_temp_new_i64();
3120    if (!setflags) {
3121        if (sub_op) {
3122            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3123        } else {
3124            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3125        }
3126    } else {
3127        TCGv_i64 tcg_imm = tcg_const_i64(imm);
3128        if (sub_op) {
3129            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3130        } else {
3131            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3132        }
3133        tcg_temp_free_i64(tcg_imm);
3134    }
3135
3136    if (is_64bit) {
3137        tcg_gen_mov_i64(tcg_rd, tcg_result);
3138    } else {
3139        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3140    }
3141
3142    tcg_temp_free_i64(tcg_result);
3143}
3144
3145/* The input should be a value in the bottom e bits (with higher
3146 * bits zero); returns that value replicated into every element
3147 * of size e in a 64 bit integer.
3148 */
3149static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3150{
3151    assert(e != 0);
3152    while (e < 64) {
3153        mask |= mask << e;
3154        e *= 2;
3155    }
3156    return mask;
3157}
3158
3159/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3160static inline uint64_t bitmask64(unsigned int length)
3161{
3162    assert(length > 0 && length <= 64);
3163    return ~0ULL >> (64 - length);
3164}
3165
3166/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3167 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3168 * value (ie should cause a guest UNDEF exception), and true if they are
3169 * valid, in which case the decoded bit pattern is written to result.
3170 */
3171static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3172                                   unsigned int imms, unsigned int immr)
3173{
3174    uint64_t mask;
3175    unsigned e, levels, s, r;
3176    int len;
3177
3178    assert(immn < 2 && imms < 64 && immr < 64);
3179
3180    /* The bit patterns we create here are 64 bit patterns which
3181     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3182     * 64 bits each. Each element contains the same value: a run
3183     * of between 1 and e-1 non-zero bits, rotated within the
3184     * element by between 0 and e-1 bits.
3185     *
3186     * The element size and run length are encoded into immn (1 bit)
3187     * and imms (6 bits) as follows:
3188     * 64 bit elements: immn = 1, imms = <length of run - 1>
3189     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3190     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3191     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3192     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3193     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3194     * Notice that immn = 0, imms = 11111x is the only combination
3195     * not covered by one of the above options; this is reserved.
3196     * Further, <length of run - 1> all-ones is a reserved pattern.
3197     *
3198     * In all cases the rotation is by immr % e (and immr is 6 bits).
3199     */
3200
3201    /* First determine the element size */
3202    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3203    if (len < 1) {
3204        /* This is the immn == 0, imms == 0x11111x case */
3205        return false;
3206    }
3207    e = 1 << len;
3208
3209    levels = e - 1;
3210    s = imms & levels;
3211    r = immr & levels;
3212
3213    if (s == levels) {
3214        /* <length of run - 1> mustn't be all-ones. */
3215        return false;
3216    }
3217
3218    /* Create the value of one element: s+1 set bits rotated
3219     * by r within the element (which is e bits wide)...
3220     */
3221    mask = bitmask64(s + 1);
3222    if (r) {
3223        mask = (mask >> r) | (mask << (e - r));
3224        mask &= bitmask64(e);
3225    }
3226    /* ...then replicate the element over the whole 64 bit value */
3227    mask = bitfield_replicate(mask, e);
3228    *result = mask;
3229    return true;
3230}
3231
3232/* Logical (immediate)
3233 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3234 * +----+-----+-------------+---+------+------+------+------+
3235 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3236 * +----+-----+-------------+---+------+------+------+------+
3237 */
3238static void disas_logic_imm(DisasContext *s, uint32_t insn)
3239{
3240    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3241    TCGv_i64 tcg_rd, tcg_rn;
3242    uint64_t wmask;
3243    bool is_and = false;
3244
3245    sf = extract32(insn, 31, 1);
3246    opc = extract32(insn, 29, 2);
3247    is_n = extract32(insn, 22, 1);
3248    immr = extract32(insn, 16, 6);
3249    imms = extract32(insn, 10, 6);
3250    rn = extract32(insn, 5, 5);
3251    rd = extract32(insn, 0, 5);
3252
3253    if (!sf && is_n) {
3254        unallocated_encoding(s);
3255        return;
3256    }
3257
3258    if (opc == 0x3) { /* ANDS */
3259        tcg_rd = cpu_reg(s, rd);
3260    } else {
3261        tcg_rd = cpu_reg_sp(s, rd);
3262    }
3263    tcg_rn = cpu_reg(s, rn);
3264
3265    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3266        /* some immediate field values are reserved */
3267        unallocated_encoding(s);
3268        return;
3269    }
3270
3271    if (!sf) {
3272        wmask &= 0xffffffff;
3273    }
3274
3275    switch (opc) {
3276    case 0x3: /* ANDS */
3277    case 0x0: /* AND */
3278        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3279        is_and = true;
3280        break;
3281    case 0x1: /* ORR */
3282        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3283        break;
3284    case 0x2: /* EOR */
3285        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3286        break;
3287    default:
3288        assert(FALSE); /* must handle all above */
3289        break;
3290    }
3291
3292    if (!sf && !is_and) {
3293        /* zero extend final result; we know we can skip this for AND
3294         * since the immediate had the high 32 bits clear.
3295         */
3296        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3297    }
3298
3299    if (opc == 3) { /* ANDS */
3300        gen_logic_CC(sf, tcg_rd);
3301    }
3302}
3303
3304/*
3305 * Move wide (immediate)
3306 *
3307 *  31 30 29 28         23 22 21 20             5 4    0
3308 * +--+-----+-------------+-----+----------------+------+
3309 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3310 * +--+-----+-------------+-----+----------------+------+
3311 *
3312 * sf: 0 -> 32 bit, 1 -> 64 bit
3313 * opc: 00 -> N, 10 -> Z, 11 -> K
3314 * hw: shift/16 (0,16, and sf only 32, 48)
3315 */
3316static void disas_movw_imm(DisasContext *s, uint32_t insn)
3317{
3318    int rd = extract32(insn, 0, 5);
3319    uint64_t imm = extract32(insn, 5, 16);
3320    int sf = extract32(insn, 31, 1);
3321    int opc = extract32(insn, 29, 2);
3322    int pos = extract32(insn, 21, 2) << 4;
3323    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3324    TCGv_i64 tcg_imm;
3325
3326    if (!sf && (pos >= 32)) {
3327        unallocated_encoding(s);
3328        return;
3329    }
3330
3331    switch (opc) {
3332    case 0: /* MOVN */
3333    case 2: /* MOVZ */
3334        imm <<= pos;
3335        if (opc == 0) {
3336            imm = ~imm;
3337        }
3338        if (!sf) {
3339            imm &= 0xffffffffu;
3340        }
3341        tcg_gen_movi_i64(tcg_rd, imm);
3342        break;
3343    case 3: /* MOVK */
3344        tcg_imm = tcg_const_i64(imm);
3345        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3346        tcg_temp_free_i64(tcg_imm);
3347        if (!sf) {
3348            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3349        }
3350        break;
3351    default:
3352        unallocated_encoding(s);
3353        break;
3354    }
3355}
3356
3357/* Bitfield
3358 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3359 * +----+-----+-------------+---+------+------+------+------+
3360 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3361 * +----+-----+-------------+---+------+------+------+------+
3362 */
3363static void disas_bitfield(DisasContext *s, uint32_t insn)
3364{
3365    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3366    TCGv_i64 tcg_rd, tcg_tmp;
3367
3368    sf = extract32(insn, 31, 1);
3369    opc = extract32(insn, 29, 2);
3370    n = extract32(insn, 22, 1);
3371    ri = extract32(insn, 16, 6);
3372    si = extract32(insn, 10, 6);
3373    rn = extract32(insn, 5, 5);
3374    rd = extract32(insn, 0, 5);
3375    bitsize = sf ? 64 : 32;
3376
3377    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3378        unallocated_encoding(s);
3379        return;
3380    }
3381
3382    tcg_rd = cpu_reg(s, rd);
3383
3384    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3385       to be smaller than bitsize, we'll never reference data outside the
3386       low 32-bits anyway.  */
3387    tcg_tmp = read_cpu_reg(s, rn, 1);
3388
3389    /* Recognize simple(r) extractions.  */
3390    if (si >= ri) {
3391        /* Wd<s-r:0> = Wn<s:r> */
3392        len = (si - ri) + 1;
3393        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3394            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3395            goto done;
3396        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3397            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3398            return;
3399        }
3400        /* opc == 1, BXFIL fall through to deposit */
3401        tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3402        pos = 0;
3403    } else {
3404        /* Handle the ri > si case with a deposit
3405         * Wd<32+s-r,32-r> = Wn<s:0>
3406         */
3407        len = si + 1;
3408        pos = (bitsize - ri) & (bitsize - 1);
3409    }
3410
3411    if (opc == 0 && len < ri) {
3412        /* SBFM: sign extend the destination field from len to fill
3413           the balance of the word.  Let the deposit below insert all
3414           of those sign bits.  */
3415        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3416        len = ri;
3417    }
3418
3419    if (opc == 1) { /* BFM, BXFIL */
3420        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3421    } else {
3422        /* SBFM or UBFM: We start with zero, and we haven't modified
3423           any bits outside bitsize, therefore the zero-extension
3424           below is unneeded.  */
3425        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3426        return;
3427    }
3428
3429 done:
3430    if (!sf) { /* zero extend final result */
3431        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3432    }
3433}
3434
3435/* Extract
3436 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3437 * +----+------+-------------+---+----+------+--------+------+------+
3438 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3439 * +----+------+-------------+---+----+------+--------+------+------+
3440 */
3441static void disas_extract(DisasContext *s, uint32_t insn)
3442{
3443    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3444
3445    sf = extract32(insn, 31, 1);
3446    n = extract32(insn, 22, 1);
3447    rm = extract32(insn, 16, 5);
3448    imm = extract32(insn, 10, 6);
3449    rn = extract32(insn, 5, 5);
3450    rd = extract32(insn, 0, 5);
3451    op21 = extract32(insn, 29, 2);
3452    op0 = extract32(insn, 21, 1);
3453    bitsize = sf ? 64 : 32;
3454
3455    if (sf != n || op21 || op0 || imm >= bitsize) {
3456        unallocated_encoding(s);
3457    } else {
3458        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3459
3460        tcg_rd = cpu_reg(s, rd);
3461
3462        if (unlikely(imm == 0)) {
3463            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3464             * so an extract from bit 0 is a special case.
3465             */
3466            if (sf) {
3467                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3468            } else {
3469                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3470            }
3471        } else if (rm == rn) { /* ROR */
3472            tcg_rm = cpu_reg(s, rm);
3473            if (sf) {
3474                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3475            } else {
3476                TCGv_i32 tmp = tcg_temp_new_i32();
3477                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3478                tcg_gen_rotri_i32(tmp, tmp, imm);
3479                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3480                tcg_temp_free_i32(tmp);
3481            }
3482        } else {
3483            tcg_rm = read_cpu_reg(s, rm, sf);
3484            tcg_rn = read_cpu_reg(s, rn, sf);
3485            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3486            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3487            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3488            if (!sf) {
3489                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3490            }
3491        }
3492    }
3493}
3494
3495/* Data processing - immediate */
3496static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3497{
3498    switch (extract32(insn, 23, 6)) {
3499    case 0x20: case 0x21: /* PC-rel. addressing */
3500        disas_pc_rel_adr(s, insn);
3501        break;
3502    case 0x22: case 0x23: /* Add/subtract (immediate) */
3503        disas_add_sub_imm(s, insn);
3504        break;
3505    case 0x24: /* Logical (immediate) */
3506        disas_logic_imm(s, insn);
3507        break;
3508    case 0x25: /* Move wide (immediate) */
3509        disas_movw_imm(s, insn);
3510        break;
3511    case 0x26: /* Bitfield */
3512        disas_bitfield(s, insn);
3513        break;
3514    case 0x27: /* Extract */
3515        disas_extract(s, insn);
3516        break;
3517    default:
3518        unallocated_encoding(s);
3519        break;
3520    }
3521}
3522
3523/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3524 * Note that it is the caller's responsibility to ensure that the
3525 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3526 * mandated semantics for out of range shifts.
3527 */
3528static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3529                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3530{
3531    switch (shift_type) {
3532    case A64_SHIFT_TYPE_LSL:
3533        tcg_gen_shl_i64(dst, src, shift_amount);
3534        break;
3535    case A64_SHIFT_TYPE_LSR:
3536        tcg_gen_shr_i64(dst, src, shift_amount);
3537        break;
3538    case A64_SHIFT_TYPE_ASR:
3539        if (!sf) {
3540            tcg_gen_ext32s_i64(dst, src);
3541        }
3542        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3543        break;
3544    case A64_SHIFT_TYPE_ROR:
3545        if (sf) {
3546            tcg_gen_rotr_i64(dst, src, shift_amount);
3547        } else {
3548            TCGv_i32 t0, t1;
3549            t0 = tcg_temp_new_i32();
3550            t1 = tcg_temp_new_i32();
3551            tcg_gen_extrl_i64_i32(t0, src);
3552            tcg_gen_extrl_i64_i32(t1, shift_amount);
3553            tcg_gen_rotr_i32(t0, t0, t1);
3554            tcg_gen_extu_i32_i64(dst, t0);
3555            tcg_temp_free_i32(t0);
3556            tcg_temp_free_i32(t1);
3557        }
3558        break;
3559    default:
3560        assert(FALSE); /* all shift types should be handled */
3561        break;
3562    }
3563
3564    if (!sf) { /* zero extend final result */
3565        tcg_gen_ext32u_i64(dst, dst);
3566    }
3567}
3568
3569/* Shift a TCGv src by immediate, put result in dst.
3570 * The shift amount must be in range (this should always be true as the
3571 * relevant instructions will UNDEF on bad shift immediates).
3572 */
3573static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3574                          enum a64_shift_type shift_type, unsigned int shift_i)
3575{
3576    assert(shift_i < (sf ? 64 : 32));
3577
3578    if (shift_i == 0) {
3579        tcg_gen_mov_i64(dst, src);
3580    } else {
3581        TCGv_i64 shift_const;
3582
3583        shift_const = tcg_const_i64(shift_i);
3584        shift_reg(dst, src, sf, shift_type, shift_const);
3585        tcg_temp_free_i64(shift_const);
3586    }
3587}
3588
3589/* Logical (shifted register)
3590 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3591 * +----+-----+-----------+-------+---+------+--------+------+------+
3592 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3593 * +----+-----+-----------+-------+---+------+--------+------+------+
3594 */
3595static void disas_logic_reg(DisasContext *s, uint32_t insn)
3596{
3597    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3598    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3599
3600    sf = extract32(insn, 31, 1);
3601    opc = extract32(insn, 29, 2);
3602    shift_type = extract32(insn, 22, 2);
3603    invert = extract32(insn, 21, 1);
3604    rm = extract32(insn, 16, 5);
3605    shift_amount = extract32(insn, 10, 6);
3606    rn = extract32(insn, 5, 5);
3607    rd = extract32(insn, 0, 5);
3608
3609    if (!sf && (shift_amount & (1 << 5))) {
3610        unallocated_encoding(s);
3611        return;
3612    }
3613
3614    tcg_rd = cpu_reg(s, rd);
3615
3616    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3617        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3618         * register-register MOV and MVN, so it is worth special casing.
3619         */
3620        tcg_rm = cpu_reg(s, rm);
3621        if (invert) {
3622            tcg_gen_not_i64(tcg_rd, tcg_rm);
3623            if (!sf) {
3624                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3625            }
3626        } else {
3627            if (sf) {
3628                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3629            } else {
3630                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3631            }
3632        }
3633        return;
3634    }
3635
3636    tcg_rm = read_cpu_reg(s, rm, sf);
3637
3638    if (shift_amount) {
3639        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3640    }
3641
3642    tcg_rn = cpu_reg(s, rn);
3643
3644    switch (opc | (invert << 2)) {
3645    case 0: /* AND */
3646    case 3: /* ANDS */
3647        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3648        break;
3649    case 1: /* ORR */
3650        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3651        break;
3652    case 2: /* EOR */
3653        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3654        break;
3655    case 4: /* BIC */
3656    case 7: /* BICS */
3657        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3658        break;
3659    case 5: /* ORN */
3660        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3661        break;
3662    case 6: /* EON */
3663        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3664        break;
3665    default:
3666        assert(FALSE);
3667        break;
3668    }
3669
3670    if (!sf) {
3671        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3672    }
3673
3674    if (opc == 3) {
3675        gen_logic_CC(sf, tcg_rd);
3676    }
3677}
3678
3679/*
3680 * Add/subtract (extended register)
3681 *
3682 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3683 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3684 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3685 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3686 *
3687 *  sf: 0 -> 32bit, 1 -> 64bit
3688 *  op: 0 -> add  , 1 -> sub
3689 *   S: 1 -> set flags
3690 * opt: 00
3691 * option: extension type (see DecodeRegExtend)
3692 * imm3: optional shift to Rm
3693 *
3694 * Rd = Rn + LSL(extend(Rm), amount)
3695 */
3696static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3697{
3698    int rd = extract32(insn, 0, 5);
3699    int rn = extract32(insn, 5, 5);
3700    int imm3 = extract32(insn, 10, 3);
3701    int option = extract32(insn, 13, 3);
3702    int rm = extract32(insn, 16, 5);
3703    bool setflags = extract32(insn, 29, 1);
3704    bool sub_op = extract32(insn, 30, 1);
3705    bool sf = extract32(insn, 31, 1);
3706
3707    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3708    TCGv_i64 tcg_rd;
3709    TCGv_i64 tcg_result;
3710
3711    if (imm3 > 4) {
3712        unallocated_encoding(s);
3713        return;
3714    }
3715
3716    /* non-flag setting ops may use SP */
3717    if (!setflags) {
3718        tcg_rd = cpu_reg_sp(s, rd);
3719    } else {
3720        tcg_rd = cpu_reg(s, rd);
3721    }
3722    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3723
3724    tcg_rm = read_cpu_reg(s, rm, sf);
3725    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3726
3727    tcg_result = tcg_temp_new_i64();
3728
3729    if (!setflags) {
3730        if (sub_op) {
3731            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3732        } else {
3733            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3734        }
3735    } else {
3736        if (sub_op) {
3737            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3738        } else {
3739            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3740        }
3741    }
3742
3743    if (sf) {
3744        tcg_gen_mov_i64(tcg_rd, tcg_result);
3745    } else {
3746        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3747    }
3748
3749    tcg_temp_free_i64(tcg_result);
3750}
3751
3752/*
3753 * Add/subtract (shifted register)
3754 *
3755 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3756 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3757 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3758 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3759 *
3760 *    sf: 0 -> 32bit, 1 -> 64bit
3761 *    op: 0 -> add  , 1 -> sub
3762 *     S: 1 -> set flags
3763 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3764 *  imm6: Shift amount to apply to Rm before the add/sub
3765 */
3766static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3767{
3768    int rd = extract32(insn, 0, 5);
3769    int rn = extract32(insn, 5, 5);
3770    int imm6 = extract32(insn, 10, 6);
3771    int rm = extract32(insn, 16, 5);
3772    int shift_type = extract32(insn, 22, 2);
3773    bool setflags = extract32(insn, 29, 1);
3774    bool sub_op = extract32(insn, 30, 1);
3775    bool sf = extract32(insn, 31, 1);
3776
3777    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3778    TCGv_i64 tcg_rn, tcg_rm;
3779    TCGv_i64 tcg_result;
3780
3781    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3782        unallocated_encoding(s);
3783        return;
3784    }
3785
3786    tcg_rn = read_cpu_reg(s, rn, sf);
3787    tcg_rm = read_cpu_reg(s, rm, sf);
3788
3789    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3790
3791    tcg_result = tcg_temp_new_i64();
3792
3793    if (!setflags) {
3794        if (sub_op) {
3795            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3796        } else {
3797            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3798        }
3799    } else {
3800        if (sub_op) {
3801            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3802        } else {
3803            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3804        }
3805    }
3806
3807    if (sf) {
3808        tcg_gen_mov_i64(tcg_rd, tcg_result);
3809    } else {
3810        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3811    }
3812
3813    tcg_temp_free_i64(tcg_result);
3814}
3815
3816/* Data-processing (3 source)
3817 *
3818 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3819 *  +--+------+-----------+------+------+----+------+------+------+
3820 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3821 *  +--+------+-----------+------+------+----+------+------+------+
3822 */
3823static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3824{
3825    int rd = extract32(insn, 0, 5);
3826    int rn = extract32(insn, 5, 5);
3827    int ra = extract32(insn, 10, 5);
3828    int rm = extract32(insn, 16, 5);
3829    int op_id = (extract32(insn, 29, 3) << 4) |
3830        (extract32(insn, 21, 3) << 1) |
3831        extract32(insn, 15, 1);
3832    bool sf = extract32(insn, 31, 1);
3833    bool is_sub = extract32(op_id, 0, 1);
3834    bool is_high = extract32(op_id, 2, 1);
3835    bool is_signed = false;
3836    TCGv_i64 tcg_op1;
3837    TCGv_i64 tcg_op2;
3838    TCGv_i64 tcg_tmp;
3839
3840    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3841    switch (op_id) {
3842    case 0x42: /* SMADDL */
3843    case 0x43: /* SMSUBL */
3844    case 0x44: /* SMULH */
3845        is_signed = true;
3846        break;
3847    case 0x0: /* MADD (32bit) */
3848    case 0x1: /* MSUB (32bit) */
3849    case 0x40: /* MADD (64bit) */
3850    case 0x41: /* MSUB (64bit) */
3851    case 0x4a: /* UMADDL */
3852    case 0x4b: /* UMSUBL */
3853    case 0x4c: /* UMULH */
3854        break;
3855    default:
3856        unallocated_encoding(s);
3857        return;
3858    }
3859
3860    if (is_high) {
3861        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3862        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3863        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3864        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3865
3866        if (is_signed) {
3867            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3868        } else {
3869            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3870        }
3871
3872        tcg_temp_free_i64(low_bits);
3873        return;
3874    }
3875
3876    tcg_op1 = tcg_temp_new_i64();
3877    tcg_op2 = tcg_temp_new_i64();
3878    tcg_tmp = tcg_temp_new_i64();
3879
3880    if (op_id < 0x42) {
3881        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3882        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3883    } else {
3884        if (is_signed) {
3885            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3886            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3887        } else {
3888            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3889            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3890        }
3891    }
3892
3893    if (ra == 31 && !is_sub) {
3894        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3895        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3896    } else {
3897        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3898        if (is_sub) {
3899            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3900        } else {
3901            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3902        }
3903    }
3904
3905    if (!sf) {
3906        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3907    }
3908
3909    tcg_temp_free_i64(tcg_op1);
3910    tcg_temp_free_i64(tcg_op2);
3911    tcg_temp_free_i64(tcg_tmp);
3912}
3913
3914/* Add/subtract (with carry)
3915 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3916 * +--+--+--+------------------------+------+---------+------+-----+
3917 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3918 * +--+--+--+------------------------+------+---------+------+-----+
3919 *                                            [000000]
3920 */
3921
3922static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3923{
3924    unsigned int sf, op, setflags, rm, rn, rd;
3925    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3926
3927    if (extract32(insn, 10, 6) != 0) {
3928        unallocated_encoding(s);
3929        return;
3930    }
3931
3932    sf = extract32(insn, 31, 1);
3933    op = extract32(insn, 30, 1);
3934    setflags = extract32(insn, 29, 1);
3935    rm = extract32(insn, 16, 5);
3936    rn = extract32(insn, 5, 5);
3937    rd = extract32(insn, 0, 5);
3938
3939    tcg_rd = cpu_reg(s, rd);
3940    tcg_rn = cpu_reg(s, rn);
3941
3942    if (op) {
3943        tcg_y = new_tmp_a64(s);
3944        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3945    } else {
3946        tcg_y = cpu_reg(s, rm);
3947    }
3948
3949    if (setflags) {
3950        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3951    } else {
3952        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3953    }
3954}
3955
3956/* Conditional compare (immediate / register)
3957 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3958 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3959 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3960 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3961 *        [1]                             y                [0]       [0]
3962 */
3963static void disas_cc(DisasContext *s, uint32_t insn)
3964{
3965    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3966    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3967    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3968    DisasCompare c;
3969
3970    if (!extract32(insn, 29, 1)) {
3971        unallocated_encoding(s);
3972        return;
3973    }
3974    if (insn & (1 << 10 | 1 << 4)) {
3975        unallocated_encoding(s);
3976        return;
3977    }
3978    sf = extract32(insn, 31, 1);
3979    op = extract32(insn, 30, 1);
3980    is_imm = extract32(insn, 11, 1);
3981    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3982    cond = extract32(insn, 12, 4);
3983    rn = extract32(insn, 5, 5);
3984    nzcv = extract32(insn, 0, 4);
3985
3986    /* Set T0 = !COND.  */
3987    tcg_t0 = tcg_temp_new_i32();
3988    arm_test_cc(&c, cond);
3989    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3990    arm_free_cc(&c);
3991
3992    /* Load the arguments for the new comparison.  */
3993    if (is_imm) {
3994        tcg_y = new_tmp_a64(s);
3995        tcg_gen_movi_i64(tcg_y, y);
3996    } else {
3997        tcg_y = cpu_reg(s, y);
3998    }
3999    tcg_rn = cpu_reg(s, rn);
4000
4001    /* Set the flags for the new comparison.  */
4002    tcg_tmp = tcg_temp_new_i64();
4003    if (op) {
4004        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4005    } else {
4006        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4007    }
4008    tcg_temp_free_i64(tcg_tmp);
4009
4010    /* If COND was false, force the flags to #nzcv.  Compute two masks
4011     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4012     * For tcg hosts that support ANDC, we can make do with just T1.
4013     * In either case, allow the tcg optimizer to delete any unused mask.
4014     */
4015    tcg_t1 = tcg_temp_new_i32();
4016    tcg_t2 = tcg_temp_new_i32();
4017    tcg_gen_neg_i32(tcg_t1, tcg_t0);
4018    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4019
4020    if (nzcv & 8) { /* N */
4021        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4022    } else {
4023        if (TCG_TARGET_HAS_andc_i32) {
4024            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4025        } else {
4026            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4027        }
4028    }
4029    if (nzcv & 4) { /* Z */
4030        if (TCG_TARGET_HAS_andc_i32) {
4031            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4032        } else {
4033            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4034        }
4035    } else {
4036        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4037    }
4038    if (nzcv & 2) { /* C */
4039        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4040    } else {
4041        if (TCG_TARGET_HAS_andc_i32) {
4042            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4043        } else {
4044            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4045        }
4046    }
4047    if (nzcv & 1) { /* V */
4048        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4049    } else {
4050        if (TCG_TARGET_HAS_andc_i32) {
4051            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4052        } else {
4053            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4054        }
4055    }
4056    tcg_temp_free_i32(tcg_t0);
4057    tcg_temp_free_i32(tcg_t1);
4058    tcg_temp_free_i32(tcg_t2);
4059}
4060
4061/* Conditional select
4062 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4063 * +----+----+---+-----------------+------+------+-----+------+------+
4064 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4065 * +----+----+---+-----------------+------+------+-----+------+------+
4066 */
4067static void disas_cond_select(DisasContext *s, uint32_t insn)
4068{
4069    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4070    TCGv_i64 tcg_rd, zero;
4071    DisasCompare64 c;
4072
4073    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4074        /* S == 1 or op2<1> == 1 */
4075        unallocated_encoding(s);
4076        return;
4077    }
4078    sf = extract32(insn, 31, 1);
4079    else_inv = extract32(insn, 30, 1);
4080    rm = extract32(insn, 16, 5);
4081    cond = extract32(insn, 12, 4);
4082    else_inc = extract32(insn, 10, 1);
4083    rn = extract32(insn, 5, 5);
4084    rd = extract32(insn, 0, 5);
4085
4086    tcg_rd = cpu_reg(s, rd);
4087
4088    a64_test_cc(&c, cond);
4089    zero = tcg_const_i64(0);
4090
4091    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4092        /* CSET & CSETM.  */
4093        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4094        if (else_inv) {
4095            tcg_gen_neg_i64(tcg_rd, tcg_rd);
4096        }
4097    } else {
4098        TCGv_i64 t_true = cpu_reg(s, rn);
4099        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4100        if (else_inv && else_inc) {
4101            tcg_gen_neg_i64(t_false, t_false);
4102        } else if (else_inv) {
4103            tcg_gen_not_i64(t_false, t_false);
4104        } else if (else_inc) {
4105            tcg_gen_addi_i64(t_false, t_false, 1);
4106        }
4107        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4108    }
4109
4110    tcg_temp_free_i64(zero);
4111    a64_free_cc(&c);
4112
4113    if (!sf) {
4114        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4115    }
4116}
4117
4118static void handle_clz(DisasContext *s, unsigned int sf,
4119                       unsigned int rn, unsigned int rd)
4120{
4121    TCGv_i64 tcg_rd, tcg_rn;
4122    tcg_rd = cpu_reg(s, rd);
4123    tcg_rn = cpu_reg(s, rn);
4124
4125    if (sf) {
4126        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4127    } else {
4128        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4129        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4130        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4131        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4132        tcg_temp_free_i32(tcg_tmp32);
4133    }
4134}
4135
4136static void handle_cls(DisasContext *s, unsigned int sf,
4137                       unsigned int rn, unsigned int rd)
4138{
4139    TCGv_i64 tcg_rd, tcg_rn;
4140    tcg_rd = cpu_reg(s, rd);
4141    tcg_rn = cpu_reg(s, rn);
4142
4143    if (sf) {
4144        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4145    } else {
4146        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4147        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4148        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4149        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4150        tcg_temp_free_i32(tcg_tmp32);
4151    }
4152}
4153
4154static void handle_rbit(DisasContext *s, unsigned int sf,
4155                        unsigned int rn, unsigned int rd)
4156{
4157    TCGv_i64 tcg_rd, tcg_rn;
4158    tcg_rd = cpu_reg(s, rd);
4159    tcg_rn = cpu_reg(s, rn);
4160
4161    if (sf) {
4162        gen_helper_rbit64(tcg_rd, tcg_rn);
4163    } else {
4164        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4165        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4166        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4167        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4168        tcg_temp_free_i32(tcg_tmp32);
4169    }
4170}
4171
4172/* REV with sf==1, opcode==3 ("REV64") */
4173static void handle_rev64(DisasContext *s, unsigned int sf,
4174                         unsigned int rn, unsigned int rd)
4175{
4176    if (!sf) {
4177        unallocated_encoding(s);
4178        return;
4179    }
4180    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4181}
4182
4183/* REV with sf==0, opcode==2
4184 * REV32 (sf==1, opcode==2)
4185 */
4186static void handle_rev32(DisasContext *s, unsigned int sf,
4187                         unsigned int rn, unsigned int rd)
4188{
4189    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4190
4191    if (sf) {
4192        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4193        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4194
4195        /* bswap32_i64 requires zero high word */
4196        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4197        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4198        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4199        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4200        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4201
4202        tcg_temp_free_i64(tcg_tmp);
4203    } else {
4204        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4205        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4206    }
4207}
4208
4209/* REV16 (opcode==1) */
4210static void handle_rev16(DisasContext *s, unsigned int sf,
4211                         unsigned int rn, unsigned int rd)
4212{
4213    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4214    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4215    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4216    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4217
4218    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4219    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4220    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4221    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4222    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4223
4224    tcg_temp_free_i64(mask);
4225    tcg_temp_free_i64(tcg_tmp);
4226}
4227
4228/* Data-processing (1 source)
4229 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4230 * +----+---+---+-----------------+---------+--------+------+------+
4231 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4232 * +----+---+---+-----------------+---------+--------+------+------+
4233 */
4234static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4235{
4236    unsigned int sf, opcode, rn, rd;
4237
4238    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4239        unallocated_encoding(s);
4240        return;
4241    }
4242
4243    sf = extract32(insn, 31, 1);
4244    opcode = extract32(insn, 10, 6);
4245    rn = extract32(insn, 5, 5);
4246    rd = extract32(insn, 0, 5);
4247
4248    switch (opcode) {
4249    case 0: /* RBIT */
4250        handle_rbit(s, sf, rn, rd);
4251        break;
4252    case 1: /* REV16 */
4253        handle_rev16(s, sf, rn, rd);
4254        break;
4255    case 2: /* REV32 */
4256        handle_rev32(s, sf, rn, rd);
4257        break;
4258    case 3: /* REV64 */
4259        handle_rev64(s, sf, rn, rd);
4260        break;
4261    case 4: /* CLZ */
4262        handle_clz(s, sf, rn, rd);
4263        break;
4264    case 5: /* CLS */
4265        handle_cls(s, sf, rn, rd);
4266        break;
4267    }
4268}
4269
4270static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4271                       unsigned int rm, unsigned int rn, unsigned int rd)
4272{
4273    TCGv_i64 tcg_n, tcg_m, tcg_rd;
4274    tcg_rd = cpu_reg(s, rd);
4275
4276    if (!sf && is_signed) {
4277        tcg_n = new_tmp_a64(s);
4278        tcg_m = new_tmp_a64(s);
4279        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4280        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4281    } else {
4282        tcg_n = read_cpu_reg(s, rn, sf);
4283        tcg_m = read_cpu_reg(s, rm, sf);
4284    }
4285
4286    if (is_signed) {
4287        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4288    } else {
4289        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4290    }
4291
4292    if (!sf) { /* zero extend final result */
4293        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4294    }
4295}
4296
4297/* LSLV, LSRV, ASRV, RORV */
4298static void handle_shift_reg(DisasContext *s,
4299                             enum a64_shift_type shift_type, unsigned int sf,
4300                             unsigned int rm, unsigned int rn, unsigned int rd)
4301{
4302    TCGv_i64 tcg_shift = tcg_temp_new_i64();
4303    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4304    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4305
4306    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4307    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4308    tcg_temp_free_i64(tcg_shift);
4309}
4310
4311/* CRC32[BHWX], CRC32C[BHWX] */
4312static void handle_crc32(DisasContext *s,
4313                         unsigned int sf, unsigned int sz, bool crc32c,
4314                         unsigned int rm, unsigned int rn, unsigned int rd)
4315{
4316    TCGv_i64 tcg_acc, tcg_val;
4317    TCGv_i32 tcg_bytes;
4318
4319    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4320        || (sf == 1 && sz != 3)
4321        || (sf == 0 && sz == 3)) {
4322        unallocated_encoding(s);
4323        return;
4324    }
4325
4326    if (sz == 3) {
4327        tcg_val = cpu_reg(s, rm);
4328    } else {
4329        uint64_t mask;
4330        switch (sz) {
4331        case 0:
4332            mask = 0xFF;
4333            break;
4334        case 1:
4335            mask = 0xFFFF;
4336            break;
4337        case 2:
4338            mask = 0xFFFFFFFF;
4339            break;
4340        default:
4341            g_assert_not_reached();
4342        }
4343        tcg_val = new_tmp_a64(s);
4344        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4345    }
4346
4347    tcg_acc = cpu_reg(s, rn);
4348    tcg_bytes = tcg_const_i32(1 << sz);
4349
4350    if (crc32c) {
4351        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4352    } else {
4353        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4354    }
4355
4356    tcg_temp_free_i32(tcg_bytes);
4357}
4358
4359/* Data-processing (2 source)
4360 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4361 * +----+---+---+-----------------+------+--------+------+------+
4362 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4363 * +----+---+---+-----------------+------+--------+------+------+
4364 */
4365static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4366{
4367    unsigned int sf, rm, opcode, rn, rd;
4368    sf = extract32(insn, 31, 1);
4369    rm = extract32(insn, 16, 5);
4370    opcode = extract32(insn, 10, 6);
4371    rn = extract32(insn, 5, 5);
4372    rd = extract32(insn, 0, 5);
4373
4374    if (extract32(insn, 29, 1)) {
4375        unallocated_encoding(s);
4376        return;
4377    }
4378
4379    switch (opcode) {
4380    case 2: /* UDIV */
4381        handle_div(s, false, sf, rm, rn, rd);
4382        break;
4383    case 3: /* SDIV */
4384        handle_div(s, true, sf, rm, rn, rd);
4385        break;
4386    case 8: /* LSLV */
4387        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4388        break;
4389    case 9: /* LSRV */
4390        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4391        break;
4392    case 10: /* ASRV */
4393        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4394        break;
4395    case 11: /* RORV */
4396        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4397        break;
4398    case 16:
4399    case 17:
4400    case 18:
4401    case 19:
4402    case 20:
4403    case 21:
4404    case 22:
4405    case 23: /* CRC32 */
4406    {
4407        int sz = extract32(opcode, 0, 2);
4408        bool crc32c = extract32(opcode, 2, 1);
4409        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4410        break;
4411    }
4412    default:
4413        unallocated_encoding(s);
4414        break;
4415    }
4416}
4417
4418/* Data processing - register */
4419static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4420{
4421    switch (extract32(insn, 24, 5)) {
4422    case 0x0a: /* Logical (shifted register) */
4423        disas_logic_reg(s, insn);
4424        break;
4425    case 0x0b: /* Add/subtract */
4426        if (insn & (1 << 21)) { /* (extended register) */
4427            disas_add_sub_ext_reg(s, insn);
4428        } else {
4429            disas_add_sub_reg(s, insn);
4430        }
4431        break;
4432    case 0x1b: /* Data-processing (3 source) */
4433        disas_data_proc_3src(s, insn);
4434        break;
4435    case 0x1a:
4436        switch (extract32(insn, 21, 3)) {
4437        case 0x0: /* Add/subtract (with carry) */
4438            disas_adc_sbc(s, insn);
4439            break;
4440        case 0x2: /* Conditional compare */
4441            disas_cc(s, insn); /* both imm and reg forms */
4442            break;
4443        case 0x4: /* Conditional select */
4444            disas_cond_select(s, insn);
4445            break;
4446        case 0x6: /* Data-processing */
4447            if (insn & (1 << 30)) { /* (1 source) */
4448                disas_data_proc_1src(s, insn);
4449            } else {            /* (2 source) */
4450                disas_data_proc_2src(s, insn);
4451            }
4452            break;
4453        default:
4454            unallocated_encoding(s);
4455            break;
4456        }
4457        break;
4458    default:
4459        unallocated_encoding(s);
4460        break;
4461    }
4462}
4463
4464static void handle_fp_compare(DisasContext *s, bool is_double,
4465                              unsigned int rn, unsigned int rm,
4466                              bool cmp_with_zero, bool signal_all_nans)
4467{
4468    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4469    TCGv_ptr fpst = get_fpstatus_ptr(false);
4470
4471    if (is_double) {
4472        TCGv_i64 tcg_vn, tcg_vm;
4473
4474        tcg_vn = read_fp_dreg(s, rn);
4475        if (cmp_with_zero) {
4476            tcg_vm = tcg_const_i64(0);
4477        } else {
4478            tcg_vm = read_fp_dreg(s, rm);
4479        }
4480        if (signal_all_nans) {
4481            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4482        } else {
4483            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4484        }
4485        tcg_temp_free_i64(tcg_vn);
4486        tcg_temp_free_i64(tcg_vm);
4487    } else {
4488        TCGv_i32 tcg_vn, tcg_vm;
4489
4490        tcg_vn = read_fp_sreg(s, rn);
4491        if (cmp_with_zero) {
4492            tcg_vm = tcg_const_i32(0);
4493        } else {
4494            tcg_vm = read_fp_sreg(s, rm);
4495        }
4496        if (signal_all_nans) {
4497            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4498        } else {
4499            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4500        }
4501        tcg_temp_free_i32(tcg_vn);
4502        tcg_temp_free_i32(tcg_vm);
4503    }
4504
4505    tcg_temp_free_ptr(fpst);
4506
4507    gen_set_nzcv(tcg_flags);
4508
4509    tcg_temp_free_i64(tcg_flags);
4510}
4511
4512/* Floating point compare
4513 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4514 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4515 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4516 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4517 */
4518static void disas_fp_compare(DisasContext *s, uint32_t insn)
4519{
4520    unsigned int mos, type, rm, op, rn, opc, op2r;
4521
4522    mos = extract32(insn, 29, 3);
4523    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4524    rm = extract32(insn, 16, 5);
4525    op = extract32(insn, 14, 2);
4526    rn = extract32(insn, 5, 5);
4527    opc = extract32(insn, 3, 2);
4528    op2r = extract32(insn, 0, 3);
4529
4530    if (mos || op || op2r || type > 1) {
4531        unallocated_encoding(s);
4532        return;
4533    }
4534
4535    if (!fp_access_check(s)) {
4536        return;
4537    }
4538
4539    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4540}
4541
4542/* Floating point conditional compare
4543 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4544 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4545 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4546 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4547 */
4548static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4549{
4550    unsigned int mos, type, rm, cond, rn, op, nzcv;
4551    TCGv_i64 tcg_flags;
4552    TCGLabel *label_continue = NULL;
4553
4554    mos = extract32(insn, 29, 3);
4555    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4556    rm = extract32(insn, 16, 5);
4557    cond = extract32(insn, 12, 4);
4558    rn = extract32(insn, 5, 5);
4559    op = extract32(insn, 4, 1);
4560    nzcv = extract32(insn, 0, 4);
4561
4562    if (mos || type > 1) {
4563        unallocated_encoding(s);
4564        return;
4565    }
4566
4567    if (!fp_access_check(s)) {
4568        return;
4569    }
4570
4571    if (cond < 0x0e) { /* not always */
4572        TCGLabel *label_match = gen_new_label();
4573        label_continue = gen_new_label();
4574        arm_gen_test_cc(cond, label_match);
4575        /* nomatch: */
4576        tcg_flags = tcg_const_i64(nzcv << 28);
4577        gen_set_nzcv(tcg_flags);
4578        tcg_temp_free_i64(tcg_flags);
4579        tcg_gen_br(label_continue);
4580        gen_set_label(label_match);
4581    }
4582
4583    handle_fp_compare(s, type, rn, rm, false, op);
4584
4585    if (cond < 0x0e) {
4586        gen_set_label(label_continue);
4587    }
4588}
4589
4590/* Floating point conditional select
4591 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4592 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4593 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4594 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4595 */
4596static void disas_fp_csel(DisasContext *s, uint32_t insn)
4597{
4598    unsigned int mos, type, rm, cond, rn, rd;
4599    TCGv_i64 t_true, t_false, t_zero;
4600    DisasCompare64 c;
4601
4602    mos = extract32(insn, 29, 3);
4603    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4604    rm = extract32(insn, 16, 5);
4605    cond = extract32(insn, 12, 4);
4606    rn = extract32(insn, 5, 5);
4607    rd = extract32(insn, 0, 5);
4608
4609    if (mos || type > 1) {
4610        unallocated_encoding(s);
4611        return;
4612    }
4613
4614    if (!fp_access_check(s)) {
4615        return;
4616    }
4617
4618    /* Zero extend sreg inputs to 64 bits now.  */
4619    t_true = tcg_temp_new_i64();
4620    t_false = tcg_temp_new_i64();
4621    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4622    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4623
4624    a64_test_cc(&c, cond);
4625    t_zero = tcg_const_i64(0);
4626    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4627    tcg_temp_free_i64(t_zero);
4628    tcg_temp_free_i64(t_false);
4629    a64_free_cc(&c);
4630
4631    /* Note that sregs write back zeros to the high bits,
4632       and we've already done the zero-extension.  */
4633    write_fp_dreg(s, rd, t_true);
4634    tcg_temp_free_i64(t_true);
4635}
4636
4637/* Floating-point data-processing (1 source) - half precision */
4638static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
4639{
4640    TCGv_ptr fpst = NULL;
4641    TCGv_i32 tcg_op = tcg_temp_new_i32();
4642    TCGv_i32 tcg_res = tcg_temp_new_i32();
4643
4644    read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
4645
4646    switch (opcode) {
4647    case 0x0: /* FMOV */
4648        tcg_gen_mov_i32(tcg_res, tcg_op);
4649        break;
4650    case 0x1: /* FABS */
4651        tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
4652        break;
4653    case 0x2: /* FNEG */
4654        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
4655        break;
4656    case 0x3: /* FSQRT */
4657        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
4658        break;
4659    case 0x8: /* FRINTN */
4660    case 0x9: /* FRINTP */
4661    case 0xa: /* FRINTM */
4662    case 0xb: /* FRINTZ */
4663    case 0xc: /* FRINTA */
4664    {
4665        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4666        fpst = get_fpstatus_ptr(true);
4667
4668        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4669        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4670
4671        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4672        tcg_temp_free_i32(tcg_rmode);
4673        break;
4674    }
4675    case 0xe: /* FRINTX */
4676        fpst = get_fpstatus_ptr(true);
4677        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
4678        break;
4679    case 0xf: /* FRINTI */
4680        fpst = get_fpstatus_ptr(true);
4681        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4682        break;
4683    default:
4684        abort();
4685    }
4686
4687    write_fp_sreg(s, rd, tcg_res);
4688
4689    if (fpst) {
4690        tcg_temp_free_ptr(fpst);
4691    }
4692    tcg_temp_free_i32(tcg_op);
4693    tcg_temp_free_i32(tcg_res);
4694}
4695
4696/* Floating-point data-processing (1 source) - single precision */
4697static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4698{
4699    TCGv_ptr fpst;
4700    TCGv_i32 tcg_op;
4701    TCGv_i32 tcg_res;
4702
4703    fpst = get_fpstatus_ptr(false);
4704    tcg_op = read_fp_sreg(s, rn);
4705    tcg_res = tcg_temp_new_i32();
4706
4707    switch (opcode) {
4708    case 0x0: /* FMOV */
4709        tcg_gen_mov_i32(tcg_res, tcg_op);
4710        break;
4711    case 0x1: /* FABS */
4712        gen_helper_vfp_abss(tcg_res, tcg_op);
4713        break;
4714    case 0x2: /* FNEG */
4715        gen_helper_vfp_negs(tcg_res, tcg_op);
4716        break;
4717    case 0x3: /* FSQRT */
4718        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4719        break;
4720    case 0x8: /* FRINTN */
4721    case 0x9: /* FRINTP */
4722    case 0xa: /* FRINTM */
4723    case 0xb: /* FRINTZ */
4724    case 0xc: /* FRINTA */
4725    {
4726        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4727
4728        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4729        gen_helper_rints(tcg_res, tcg_op, fpst);
4730
4731        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4732        tcg_temp_free_i32(tcg_rmode);
4733        break;
4734    }
4735    case 0xe: /* FRINTX */
4736        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4737        break;
4738    case 0xf: /* FRINTI */
4739        gen_helper_rints(tcg_res, tcg_op, fpst);
4740        break;
4741    default:
4742        abort();
4743    }
4744
4745    write_fp_sreg(s, rd, tcg_res);
4746
4747    tcg_temp_free_ptr(fpst);
4748    tcg_temp_free_i32(tcg_op);
4749    tcg_temp_free_i32(tcg_res);
4750}
4751
4752/* Floating-point data-processing (1 source) - double precision */
4753static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4754{
4755    TCGv_ptr fpst;
4756    TCGv_i64 tcg_op;
4757    TCGv_i64 tcg_res;
4758
4759    switch (opcode) {
4760    case 0x0: /* FMOV */
4761        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
4762        return;
4763    }
4764
4765    fpst = get_fpstatus_ptr(false);
4766    tcg_op = read_fp_dreg(s, rn);
4767    tcg_res = tcg_temp_new_i64();
4768
4769    switch (opcode) {
4770    case 0x1: /* FABS */
4771        gen_helper_vfp_absd(tcg_res, tcg_op);
4772        break;
4773    case 0x2: /* FNEG */
4774        gen_helper_vfp_negd(tcg_res, tcg_op);
4775        break;
4776    case 0x3: /* FSQRT */
4777        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4778        break;
4779    case 0x8: /* FRINTN */
4780    case 0x9: /* FRINTP */
4781    case 0xa: /* FRINTM */
4782    case 0xb: /* FRINTZ */
4783    case 0xc: /* FRINTA */
4784    {
4785        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4786
4787        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4788        gen_helper_rintd(tcg_res, tcg_op, fpst);
4789
4790        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4791        tcg_temp_free_i32(tcg_rmode);
4792        break;
4793    }
4794    case 0xe: /* FRINTX */
4795        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4796        break;
4797    case 0xf: /* FRINTI */
4798        gen_helper_rintd(tcg_res, tcg_op, fpst);
4799        break;
4800    default:
4801        abort();
4802    }
4803
4804    write_fp_dreg(s, rd, tcg_res);
4805
4806    tcg_temp_free_ptr(fpst);
4807    tcg_temp_free_i64(tcg_op);
4808    tcg_temp_free_i64(tcg_res);
4809}
4810
4811static void handle_fp_fcvt(DisasContext *s, int opcode,
4812                           int rd, int rn, int dtype, int ntype)
4813{
4814    switch (ntype) {
4815    case 0x0:
4816    {
4817        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4818        if (dtype == 1) {
4819            /* Single to double */
4820            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4821            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4822            write_fp_dreg(s, rd, tcg_rd);
4823            tcg_temp_free_i64(tcg_rd);
4824        } else {
4825            /* Single to half */
4826            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4827            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4828            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4829            write_fp_sreg(s, rd, tcg_rd);
4830            tcg_temp_free_i32(tcg_rd);
4831        }
4832        tcg_temp_free_i32(tcg_rn);
4833        break;
4834    }
4835    case 0x1:
4836    {
4837        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4838        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4839        if (dtype == 0) {
4840            /* Double to single */
4841            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4842        } else {
4843            /* Double to half */
4844            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4845            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4846        }
4847        write_fp_sreg(s, rd, tcg_rd);
4848        tcg_temp_free_i32(tcg_rd);
4849        tcg_temp_free_i64(tcg_rn);
4850        break;
4851    }
4852    case 0x3:
4853    {
4854        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4855        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4856        if (dtype == 0) {
4857            /* Half to single */
4858            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4859            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4860            write_fp_sreg(s, rd, tcg_rd);
4861            tcg_temp_free_i32(tcg_rd);
4862        } else {
4863            /* Half to double */
4864            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4865            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4866            write_fp_dreg(s, rd, tcg_rd);
4867            tcg_temp_free_i64(tcg_rd);
4868        }
4869        tcg_temp_free_i32(tcg_rn);
4870        break;
4871    }
4872    default:
4873        abort();
4874    }
4875}
4876
4877/* Floating point data-processing (1 source)
4878 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4879 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4880 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4881 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4882 */
4883static void disas_fp_1src(DisasContext *s, uint32_t insn)
4884{
4885    int type = extract32(insn, 22, 2);
4886    int opcode = extract32(insn, 15, 6);
4887    int rn = extract32(insn, 5, 5);
4888    int rd = extract32(insn, 0, 5);
4889
4890    switch (opcode) {
4891    case 0x4: case 0x5: case 0x7:
4892    {
4893        /* FCVT between half, single and double precision */
4894        int dtype = extract32(opcode, 0, 2);
4895        if (type == 2 || dtype == type) {
4896            unallocated_encoding(s);
4897            return;
4898        }
4899        if (!fp_access_check(s)) {
4900            return;
4901        }
4902
4903        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4904        break;
4905    }
4906    case 0x0 ... 0x3:
4907    case 0x8 ... 0xc:
4908    case 0xe ... 0xf:
4909        /* 32-to-32 and 64-to-64 ops */
4910        switch (type) {
4911        case 0:
4912            if (!fp_access_check(s)) {
4913                return;
4914            }
4915
4916            handle_fp_1src_single(s, opcode, rd, rn);
4917            break;
4918        case 1:
4919            if (!fp_access_check(s)) {
4920                return;
4921            }
4922
4923            handle_fp_1src_double(s, opcode, rd, rn);
4924            break;
4925        case 3:
4926            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4927                unallocated_encoding(s);
4928                return;
4929            }
4930
4931            if (!fp_access_check(s)) {
4932                return;
4933            }
4934
4935            handle_fp_1src_half(s, opcode, rd, rn);
4936            break;
4937        default:
4938            unallocated_encoding(s);
4939        }
4940        break;
4941    default:
4942        unallocated_encoding(s);
4943        break;
4944    }
4945}
4946
4947/* Floating-point data-processing (2 source) - single precision */
4948static void handle_fp_2src_single(DisasContext *s, int opcode,
4949                                  int rd, int rn, int rm)
4950{
4951    TCGv_i32 tcg_op1;
4952    TCGv_i32 tcg_op2;
4953    TCGv_i32 tcg_res;
4954    TCGv_ptr fpst;
4955
4956    tcg_res = tcg_temp_new_i32();
4957    fpst = get_fpstatus_ptr(false);
4958    tcg_op1 = read_fp_sreg(s, rn);
4959    tcg_op2 = read_fp_sreg(s, rm);
4960
4961    switch (opcode) {
4962    case 0x0: /* FMUL */
4963        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4964        break;
4965    case 0x1: /* FDIV */
4966        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4967        break;
4968    case 0x2: /* FADD */
4969        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4970        break;
4971    case 0x3: /* FSUB */
4972        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4973        break;
4974    case 0x4: /* FMAX */
4975        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4976        break;
4977    case 0x5: /* FMIN */
4978        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4979        break;
4980    case 0x6: /* FMAXNM */
4981        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4982        break;
4983    case 0x7: /* FMINNM */
4984        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4985        break;
4986    case 0x8: /* FNMUL */
4987        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4988        gen_helper_vfp_negs(tcg_res, tcg_res);
4989        break;
4990    }
4991
4992    write_fp_sreg(s, rd, tcg_res);
4993
4994    tcg_temp_free_ptr(fpst);
4995    tcg_temp_free_i32(tcg_op1);
4996    tcg_temp_free_i32(tcg_op2);
4997    tcg_temp_free_i32(tcg_res);
4998}
4999
5000/* Floating-point data-processing (2 source) - double precision */
5001static void handle_fp_2src_double(DisasContext *s, int opcode,
5002                                  int rd, int rn, int rm)
5003{
5004    TCGv_i64 tcg_op1;
5005    TCGv_i64 tcg_op2;
5006    TCGv_i64 tcg_res;
5007    TCGv_ptr fpst;
5008
5009    tcg_res = tcg_temp_new_i64();
5010    fpst = get_fpstatus_ptr(false);
5011    tcg_op1 = read_fp_dreg(s, rn);
5012    tcg_op2 = read_fp_dreg(s, rm);
5013
5014    switch (opcode) {
5015    case 0x0: /* FMUL */
5016        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5017        break;
5018    case 0x1: /* FDIV */
5019        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5020        break;
5021    case 0x2: /* FADD */
5022        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5023        break;
5024    case 0x3: /* FSUB */
5025        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5026        break;
5027    case 0x4: /* FMAX */
5028        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5029        break;
5030    case 0x5: /* FMIN */
5031        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5032        break;
5033    case 0x6: /* FMAXNM */
5034        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5035        break;
5036    case 0x7: /* FMINNM */
5037        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5038        break;
5039    case 0x8: /* FNMUL */
5040        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5041        gen_helper_vfp_negd(tcg_res, tcg_res);
5042        break;
5043    }
5044
5045    write_fp_dreg(s, rd, tcg_res);
5046
5047    tcg_temp_free_ptr(fpst);
5048    tcg_temp_free_i64(tcg_op1);
5049    tcg_temp_free_i64(tcg_op2);
5050    tcg_temp_free_i64(tcg_res);
5051}
5052
5053/* Floating point data-processing (2 source)
5054 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5055 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5056 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
5057 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5058 */
5059static void disas_fp_2src(DisasContext *s, uint32_t insn)
5060{
5061    int type = extract32(insn, 22, 2);
5062    int rd = extract32(insn, 0, 5);
5063    int rn = extract32(insn, 5, 5);
5064    int rm = extract32(insn, 16, 5);
5065    int opcode = extract32(insn, 12, 4);
5066
5067    if (opcode > 8) {
5068        unallocated_encoding(s);
5069        return;
5070    }
5071
5072    switch (type) {
5073    case 0:
5074        if (!fp_access_check(s)) {
5075            return;
5076        }
5077        handle_fp_2src_single(s, opcode, rd, rn, rm);
5078        break;
5079    case 1:
5080        if (!fp_access_check(s)) {
5081            return;
5082        }
5083        handle_fp_2src_double(s, opcode, rd, rn, rm);
5084        break;
5085    default:
5086        unallocated_encoding(s);
5087    }
5088}
5089
5090/* Floating-point data-processing (3 source) - single precision */
5091static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
5092                                  int rd, int rn, int rm, int ra)
5093{
5094    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5095    TCGv_i32 tcg_res = tcg_temp_new_i32();
5096    TCGv_ptr fpst = get_fpstatus_ptr(false);
5097
5098    tcg_op1 = read_fp_sreg(s, rn);
5099    tcg_op2 = read_fp_sreg(s, rm);
5100    tcg_op3 = read_fp_sreg(s, ra);
5101
5102    /* These are fused multiply-add, and must be done as one
5103     * floating point operation with no rounding between the
5104     * multiplication and addition steps.
5105     * NB that doing the negations here as separate steps is
5106     * correct : an input NaN should come out with its sign bit
5107     * flipped if it is a negated-input.
5108     */
5109    if (o1 == true) {
5110        gen_helper_vfp_negs(tcg_op3, tcg_op3);
5111    }
5112
5113    if (o0 != o1) {
5114        gen_helper_vfp_negs(tcg_op1, tcg_op1);
5115    }
5116
5117    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5118
5119    write_fp_sreg(s, rd, tcg_res);
5120
5121    tcg_temp_free_ptr(fpst);
5122    tcg_temp_free_i32(tcg_op1);
5123    tcg_temp_free_i32(tcg_op2);
5124    tcg_temp_free_i32(tcg_op3);
5125    tcg_temp_free_i32(tcg_res);
5126}
5127
5128/* Floating-point data-processing (3 source) - double precision */
5129static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
5130                                  int rd, int rn, int rm, int ra)
5131{
5132    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
5133    TCGv_i64 tcg_res = tcg_temp_new_i64();
5134    TCGv_ptr fpst = get_fpstatus_ptr(false);
5135
5136    tcg_op1 = read_fp_dreg(s, rn);
5137    tcg_op2 = read_fp_dreg(s, rm);
5138    tcg_op3 = read_fp_dreg(s, ra);
5139
5140    /* These are fused multiply-add, and must be done as one
5141     * floating point operation with no rounding between the
5142     * multiplication and addition steps.
5143     * NB that doing the negations here as separate steps is
5144     * correct : an input NaN should come out with its sign bit
5145     * flipped if it is a negated-input.
5146     */
5147    if (o1 == true) {
5148        gen_helper_vfp_negd(tcg_op3, tcg_op3);
5149    }
5150
5151    if (o0 != o1) {
5152        gen_helper_vfp_negd(tcg_op1, tcg_op1);
5153    }
5154
5155    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5156
5157    write_fp_dreg(s, rd, tcg_res);
5158
5159    tcg_temp_free_ptr(fpst);
5160    tcg_temp_free_i64(tcg_op1);
5161    tcg_temp_free_i64(tcg_op2);
5162    tcg_temp_free_i64(tcg_op3);
5163    tcg_temp_free_i64(tcg_res);
5164}
5165
5166/* Floating point data-processing (3 source)
5167 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
5168 * +---+---+---+-----------+------+----+------+----+------+------+------+
5169 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
5170 * +---+---+---+-----------+------+----+------+----+------+------+------+
5171 */
5172static void disas_fp_3src(DisasContext *s, uint32_t insn)
5173{
5174    int type = extract32(insn, 22, 2);
5175    int rd = extract32(insn, 0, 5);
5176    int rn = extract32(insn, 5, 5);
5177    int ra = extract32(insn, 10, 5);
5178    int rm = extract32(insn, 16, 5);
5179    bool o0 = extract32(insn, 15, 1);
5180    bool o1 = extract32(insn, 21, 1);
5181
5182    switch (type) {
5183    case 0:
5184        if (!fp_access_check(s)) {
5185            return;
5186        }
5187        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
5188        break;
5189    case 1:
5190        if (!fp_access_check(s)) {
5191            return;
5192        }
5193        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
5194        break;
5195    default:
5196        unallocated_encoding(s);
5197    }
5198}
5199
5200/* The imm8 encodes the sign bit, enough bits to represent an exponent in
5201 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
5202 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
5203 */
5204static uint64_t vfp_expand_imm(int size, uint8_t imm8)
5205{
5206    uint64_t imm;
5207
5208    switch (size) {
5209    case MO_64:
5210        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5211            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5212            extract32(imm8, 0, 6);
5213        imm <<= 48;
5214        break;
5215    case MO_32:
5216        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5217            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5218            (extract32(imm8, 0, 6) << 3);
5219        imm <<= 16;
5220        break;
5221    case MO_16:
5222        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5223            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
5224            (extract32(imm8, 0, 6) << 6);
5225        break;
5226    default:
5227        g_assert_not_reached();
5228    }
5229    return imm;
5230}
5231
5232/* Floating point immediate
5233 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5234 * +---+---+---+-----------+------+---+------------+-------+------+------+
5235 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5236 * +---+---+---+-----------+------+---+------------+-------+------+------+
5237 */
5238static void disas_fp_imm(DisasContext *s, uint32_t insn)
5239{
5240    int rd = extract32(insn, 0, 5);
5241    int imm8 = extract32(insn, 13, 8);
5242    int is_double = extract32(insn, 22, 2);
5243    uint64_t imm;
5244    TCGv_i64 tcg_res;
5245
5246    if (is_double > 1) {
5247        unallocated_encoding(s);
5248        return;
5249    }
5250
5251    if (!fp_access_check(s)) {
5252        return;
5253    }
5254
5255    imm = vfp_expand_imm(MO_32 + is_double, imm8);
5256
5257    tcg_res = tcg_const_i64(imm);
5258    write_fp_dreg(s, rd, tcg_res);
5259    tcg_temp_free_i64(tcg_res);
5260}
5261
5262/* Handle floating point <=> fixed point conversions. Note that we can
5263 * also deal with fp <=> integer conversions as a special case (scale == 64)
5264 * OPTME: consider handling that special case specially or at least skipping
5265 * the call to scalbn in the helpers for zero shifts.
5266 */
5267static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5268                           bool itof, int rmode, int scale, int sf, int type)
5269{
5270    bool is_signed = !(opcode & 1);
5271    bool is_double = type;
5272    TCGv_ptr tcg_fpstatus;
5273    TCGv_i32 tcg_shift;
5274
5275    tcg_fpstatus = get_fpstatus_ptr(false);
5276
5277    tcg_shift = tcg_const_i32(64 - scale);
5278
5279    if (itof) {
5280        TCGv_i64 tcg_int = cpu_reg(s, rn);
5281        if (!sf) {
5282            TCGv_i64 tcg_extend = new_tmp_a64(s);
5283
5284            if (is_signed) {
5285                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5286            } else {
5287                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5288            }
5289
5290            tcg_int = tcg_extend;
5291        }
5292
5293        if (is_double) {
5294            TCGv_i64 tcg_double = tcg_temp_new_i64();
5295            if (is_signed) {
5296                gen_helper_vfp_sqtod(tcg_double, tcg_int,
5297                                     tcg_shift, tcg_fpstatus);
5298            } else {
5299                gen_helper_vfp_uqtod(tcg_double, tcg_int,
5300                                     tcg_shift, tcg_fpstatus);
5301            }
5302            write_fp_dreg(s, rd, tcg_double);
5303            tcg_temp_free_i64(tcg_double);
5304        } else {
5305            TCGv_i32 tcg_single = tcg_temp_new_i32();
5306            if (is_signed) {
5307                gen_helper_vfp_sqtos(tcg_single, tcg_int,
5308                                     tcg_shift, tcg_fpstatus);
5309            } else {
5310                gen_helper_vfp_uqtos(tcg_single, tcg_int,
5311                                     tcg_shift, tcg_fpstatus);
5312            }
5313            write_fp_sreg(s, rd, tcg_single);
5314            tcg_temp_free_i32(tcg_single);
5315        }
5316    } else {
5317        TCGv_i64 tcg_int = cpu_reg(s, rd);
5318        TCGv_i32 tcg_rmode;
5319
5320        if (extract32(opcode, 2, 1)) {
5321            /* There are too many rounding modes to all fit into rmode,
5322             * so FCVTA[US] is a special case.
5323             */
5324            rmode = FPROUNDING_TIEAWAY;
5325        }
5326
5327        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5328
5329        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5330
5331        if (is_double) {
5332            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5333            if (is_signed) {
5334                if (!sf) {
5335                    gen_helper_vfp_tosld(tcg_int, tcg_double,
5336                                         tcg_shift, tcg_fpstatus);
5337                } else {
5338                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
5339                                         tcg_shift, tcg_fpstatus);
5340                }
5341            } else {
5342                if (!sf) {
5343                    gen_helper_vfp_tould(tcg_int, tcg_double,
5344                                         tcg_shift, tcg_fpstatus);
5345                } else {
5346                    gen_helper_vfp_touqd(tcg_int, tcg_double,
5347                                         tcg_shift, tcg_fpstatus);
5348                }
5349            }
5350            tcg_temp_free_i64(tcg_double);
5351        } else {
5352            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5353            if (sf) {
5354                if (is_signed) {
5355                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
5356                                         tcg_shift, tcg_fpstatus);
5357                } else {
5358                    gen_helper_vfp_touqs(tcg_int, tcg_single,
5359                                         tcg_shift, tcg_fpstatus);
5360                }
5361            } else {
5362                TCGv_i32 tcg_dest = tcg_temp_new_i32();
5363                if (is_signed) {
5364                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
5365                                         tcg_shift, tcg_fpstatus);
5366                } else {
5367                    gen_helper_vfp_touls(tcg_dest, tcg_single,
5368                                         tcg_shift, tcg_fpstatus);
5369                }
5370                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5371                tcg_temp_free_i32(tcg_dest);
5372            }
5373            tcg_temp_free_i32(tcg_single);
5374        }
5375
5376        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5377        tcg_temp_free_i32(tcg_rmode);
5378
5379        if (!sf) {
5380            tcg_gen_ext32u_i64(tcg_int, tcg_int);
5381        }
5382    }
5383
5384    tcg_temp_free_ptr(tcg_fpstatus);
5385    tcg_temp_free_i32(tcg_shift);
5386}
5387
5388/* Floating point <-> fixed point conversions
5389 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5390 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5391 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5392 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5393 */
5394static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5395{
5396    int rd = extract32(insn, 0, 5);
5397    int rn = extract32(insn, 5, 5);
5398    int scale = extract32(insn, 10, 6);
5399    int opcode = extract32(insn, 16, 3);
5400    int rmode = extract32(insn, 19, 2);
5401    int type = extract32(insn, 22, 2);
5402    bool sbit = extract32(insn, 29, 1);
5403    bool sf = extract32(insn, 31, 1);
5404    bool itof;
5405
5406    if (sbit || (type > 1)
5407        || (!sf && scale < 32)) {
5408        unallocated_encoding(s);
5409        return;
5410    }
5411
5412    switch ((rmode << 3) | opcode) {
5413    case 0x2: /* SCVTF */
5414    case 0x3: /* UCVTF */
5415        itof = true;
5416        break;
5417    case 0x18: /* FCVTZS */
5418    case 0x19: /* FCVTZU */
5419        itof = false;
5420        break;
5421    default:
5422        unallocated_encoding(s);
5423        return;
5424    }
5425
5426    if (!fp_access_check(s)) {
5427        return;
5428    }
5429
5430    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5431}
5432
5433static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5434{
5435    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5436     * without conversion.
5437     */
5438
5439    if (itof) {
5440        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5441
5442        switch (type) {
5443        case 0:
5444        {
5445            /* 32 bit */
5446            TCGv_i64 tmp = tcg_temp_new_i64();
5447            tcg_gen_ext32u_i64(tmp, tcg_rn);
5448            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5449            tcg_gen_movi_i64(tmp, 0);
5450            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5451            tcg_temp_free_i64(tmp);
5452            break;
5453        }
5454        case 1:
5455        {
5456            /* 64 bit */
5457            TCGv_i64 tmp = tcg_const_i64(0);
5458            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5459            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5460            tcg_temp_free_i64(tmp);
5461            break;
5462        }
5463        case 2:
5464            /* 64 bit to top half. */
5465            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5466            break;
5467        }
5468    } else {
5469        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5470
5471        switch (type) {
5472        case 0:
5473            /* 32 bit */
5474            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5475            break;
5476        case 1:
5477            /* 64 bit */
5478            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5479            break;
5480        case 2:
5481            /* 64 bits from top half */
5482            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5483            break;
5484        }
5485    }
5486}
5487
5488/* Floating point <-> integer conversions
5489 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5490 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5491 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5492 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5493 */
5494static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5495{
5496    int rd = extract32(insn, 0, 5);
5497    int rn = extract32(insn, 5, 5);
5498    int opcode = extract32(insn, 16, 3);
5499    int rmode = extract32(insn, 19, 2);
5500    int type = extract32(insn, 22, 2);
5501    bool sbit = extract32(insn, 29, 1);
5502    bool sf = extract32(insn, 31, 1);
5503
5504    if (sbit) {
5505        unallocated_encoding(s);
5506        return;
5507    }
5508
5509    if (opcode > 5) {
5510        /* FMOV */
5511        bool itof = opcode & 1;
5512
5513        if (rmode >= 2) {
5514            unallocated_encoding(s);
5515            return;
5516        }
5517
5518        switch (sf << 3 | type << 1 | rmode) {
5519        case 0x0: /* 32 bit */
5520        case 0xa: /* 64 bit */
5521        case 0xd: /* 64 bit to top half of quad */
5522            break;
5523        default:
5524            /* all other sf/type/rmode combinations are invalid */
5525            unallocated_encoding(s);
5526            break;
5527        }
5528
5529        if (!fp_access_check(s)) {
5530            return;
5531        }
5532        handle_fmov(s, rd, rn, type, itof);
5533    } else {
5534        /* actual FP conversions */
5535        bool itof = extract32(opcode, 1, 1);
5536
5537        if (type > 1 || (rmode != 0 && opcode > 1)) {
5538            unallocated_encoding(s);
5539            return;
5540        }
5541
5542        if (!fp_access_check(s)) {
5543            return;
5544        }
5545        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5546    }
5547}
5548
5549/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5550 *   31  30  29 28     25 24                          0
5551 * +---+---+---+---------+-----------------------------+
5552 * |   | 0 |   | 1 1 1 1 |                             |
5553 * +---+---+---+---------+-----------------------------+
5554 */
5555static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5556{
5557    if (extract32(insn, 24, 1)) {
5558        /* Floating point data-processing (3 source) */
5559        disas_fp_3src(s, insn);
5560    } else if (extract32(insn, 21, 1) == 0) {
5561        /* Floating point to fixed point conversions */
5562        disas_fp_fixed_conv(s, insn);
5563    } else {
5564        switch (extract32(insn, 10, 2)) {
5565        case 1:
5566            /* Floating point conditional compare */
5567            disas_fp_ccomp(s, insn);
5568            break;
5569        case 2:
5570            /* Floating point data-processing (2 source) */
5571            disas_fp_2src(s, insn);
5572            break;
5573        case 3:
5574            /* Floating point conditional select */
5575            disas_fp_csel(s, insn);
5576            break;
5577        case 0:
5578            switch (ctz32(extract32(insn, 12, 4))) {
5579            case 0: /* [15:12] == xxx1 */
5580                /* Floating point immediate */
5581                disas_fp_imm(s, insn);
5582                break;
5583            case 1: /* [15:12] == xx10 */
5584                /* Floating point compare */
5585                disas_fp_compare(s, insn);
5586                break;
5587            case 2: /* [15:12] == x100 */
5588                /* Floating point data-processing (1 source) */
5589                disas_fp_1src(s, insn);
5590                break;
5591            case 3: /* [15:12] == 1000 */
5592                unallocated_encoding(s);
5593                break;
5594            default: /* [15:12] == 0000 */
5595                /* Floating point <-> integer conversions */
5596                disas_fp_int_conv(s, insn);
5597                break;
5598            }
5599            break;
5600        }
5601    }
5602}
5603
5604static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5605                     int pos)
5606{
5607    /* Extract 64 bits from the middle of two concatenated 64 bit
5608     * vector register slices left:right. The extracted bits start
5609     * at 'pos' bits into the right (least significant) side.
5610     * We return the result in tcg_right, and guarantee not to
5611     * trash tcg_left.
5612     */
5613    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5614    assert(pos > 0 && pos < 64);
5615
5616    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5617    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5618    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5619
5620    tcg_temp_free_i64(tcg_tmp);
5621}
5622
5623/* EXT
5624 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5625 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5626 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5627 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5628 */
5629static void disas_simd_ext(DisasContext *s, uint32_t insn)
5630{
5631    int is_q = extract32(insn, 30, 1);
5632    int op2 = extract32(insn, 22, 2);
5633    int imm4 = extract32(insn, 11, 4);
5634    int rm = extract32(insn, 16, 5);
5635    int rn = extract32(insn, 5, 5);
5636    int rd = extract32(insn, 0, 5);
5637    int pos = imm4 << 3;
5638    TCGv_i64 tcg_resl, tcg_resh;
5639
5640    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5641        unallocated_encoding(s);
5642        return;
5643    }
5644
5645    if (!fp_access_check(s)) {
5646        return;
5647    }
5648
5649    tcg_resh = tcg_temp_new_i64();
5650    tcg_resl = tcg_temp_new_i64();
5651
5652    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5653     * either extracting 128 bits from a 128:128 concatenation, or
5654     * extracting 64 bits from a 64:64 concatenation.
5655     */
5656    if (!is_q) {
5657        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5658        if (pos != 0) {
5659            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5660            do_ext64(s, tcg_resh, tcg_resl, pos);
5661        }
5662        tcg_gen_movi_i64(tcg_resh, 0);
5663    } else {
5664        TCGv_i64 tcg_hh;
5665        typedef struct {
5666            int reg;
5667            int elt;
5668        } EltPosns;
5669        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5670        EltPosns *elt = eltposns;
5671
5672        if (pos >= 64) {
5673            elt++;
5674            pos -= 64;
5675        }
5676
5677        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5678        elt++;
5679        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5680        elt++;
5681        if (pos != 0) {
5682            do_ext64(s, tcg_resh, tcg_resl, pos);
5683            tcg_hh = tcg_temp_new_i64();
5684            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5685            do_ext64(s, tcg_hh, tcg_resh, pos);
5686            tcg_temp_free_i64(tcg_hh);
5687        }
5688    }
5689
5690    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5691    tcg_temp_free_i64(tcg_resl);
5692    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5693    tcg_temp_free_i64(tcg_resh);
5694}
5695
5696/* TBL/TBX
5697 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5698 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5699 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5700 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5701 */
5702static void disas_simd_tb(DisasContext *s, uint32_t insn)
5703{
5704    int op2 = extract32(insn, 22, 2);
5705    int is_q = extract32(insn, 30, 1);
5706    int rm = extract32(insn, 16, 5);
5707    int rn = extract32(insn, 5, 5);
5708    int rd = extract32(insn, 0, 5);
5709    int is_tblx = extract32(insn, 12, 1);
5710    int len = extract32(insn, 13, 2);
5711    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5712    TCGv_i32 tcg_regno, tcg_numregs;
5713
5714    if (op2 != 0) {
5715        unallocated_encoding(s);
5716        return;
5717    }
5718
5719    if (!fp_access_check(s)) {
5720        return;
5721    }
5722
5723    /* This does a table lookup: for every byte element in the input
5724     * we index into a table formed from up to four vector registers,
5725     * and then the output is the result of the lookups. Our helper
5726     * function does the lookup operation for a single 64 bit part of
5727     * the input.
5728     */
5729    tcg_resl = tcg_temp_new_i64();
5730    tcg_resh = tcg_temp_new_i64();
5731
5732    if (is_tblx) {
5733        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5734    } else {
5735        tcg_gen_movi_i64(tcg_resl, 0);
5736    }
5737    if (is_tblx && is_q) {
5738        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5739    } else {
5740        tcg_gen_movi_i64(tcg_resh, 0);
5741    }
5742
5743    tcg_idx = tcg_temp_new_i64();
5744    tcg_regno = tcg_const_i32(rn);
5745    tcg_numregs = tcg_const_i32(len + 1);
5746    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5747    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5748                        tcg_regno, tcg_numregs);
5749    if (is_q) {
5750        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5751        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5752                            tcg_regno, tcg_numregs);
5753    }
5754    tcg_temp_free_i64(tcg_idx);
5755    tcg_temp_free_i32(tcg_regno);
5756    tcg_temp_free_i32(tcg_numregs);
5757
5758    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5759    tcg_temp_free_i64(tcg_resl);
5760    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5761    tcg_temp_free_i64(tcg_resh);
5762}
5763
5764/* ZIP/UZP/TRN
5765 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5766 * +---+---+-------------+------+---+------+---+------------------+------+
5767 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5768 * +---+---+-------------+------+---+------+---+------------------+------+
5769 */
5770static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5771{
5772    int rd = extract32(insn, 0, 5);
5773    int rn = extract32(insn, 5, 5);
5774    int rm = extract32(insn, 16, 5);
5775    int size = extract32(insn, 22, 2);
5776    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5777     * bit 2 indicates 1 vs 2 variant of the insn.
5778     */
5779    int opcode = extract32(insn, 12, 2);
5780    bool part = extract32(insn, 14, 1);
5781    bool is_q = extract32(insn, 30, 1);
5782    int esize = 8 << size;
5783    int i, ofs;
5784    int datasize = is_q ? 128 : 64;
5785    int elements = datasize / esize;
5786    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5787
5788    if (opcode == 0 || (size == 3 && !is_q)) {
5789        unallocated_encoding(s);
5790        return;
5791    }
5792
5793    if (!fp_access_check(s)) {
5794        return;
5795    }
5796
5797    tcg_resl = tcg_const_i64(0);
5798    tcg_resh = tcg_const_i64(0);
5799    tcg_res = tcg_temp_new_i64();
5800
5801    for (i = 0; i < elements; i++) {
5802        switch (opcode) {
5803        case 1: /* UZP1/2 */
5804        {
5805            int midpoint = elements / 2;
5806            if (i < midpoint) {
5807                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5808            } else {
5809                read_vec_element(s, tcg_res, rm,
5810                                 2 * (i - midpoint) + part, size);
5811            }
5812            break;
5813        }
5814        case 2: /* TRN1/2 */
5815            if (i & 1) {
5816                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5817            } else {
5818                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5819            }
5820            break;
5821        case 3: /* ZIP1/2 */
5822        {
5823            int base = part * elements / 2;
5824            if (i & 1) {
5825                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5826            } else {
5827                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5828            }
5829            break;
5830        }
5831        default:
5832            g_assert_not_reached();
5833        }
5834
5835        ofs = i * esize;
5836        if (ofs < 64) {
5837            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5838            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5839        } else {
5840            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5841            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5842        }
5843    }
5844
5845    tcg_temp_free_i64(tcg_res);
5846
5847    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5848    tcg_temp_free_i64(tcg_resl);
5849    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5850    tcg_temp_free_i64(tcg_resh);
5851}
5852
5853/*
5854 * do_reduction_op helper
5855 *
5856 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
5857 * important for correct NaN propagation that we do these
5858 * operations in exactly the order specified by the pseudocode.
5859 *
5860 * This is a recursive function, TCG temps should be freed by the
5861 * calling function once it is done with the values.
5862 */
5863static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
5864                                int esize, int size, int vmap, TCGv_ptr fpst)
5865{
5866    if (esize == size) {
5867        int element;
5868        TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
5869        TCGv_i32 tcg_elem;
5870
5871        /* We should have one register left here */
5872        assert(ctpop8(vmap) == 1);
5873        element = ctz32(vmap);
5874        assert(element < 8);
5875
5876        tcg_elem = tcg_temp_new_i32();
5877        read_vec_element_i32(s, tcg_elem, rn, element, msize);
5878        return tcg_elem;
5879    } else {
5880        int bits = size / 2;
5881        int shift = ctpop8(vmap) / 2;
5882        int vmap_lo = (vmap >> shift) & vmap;
5883        int vmap_hi = (vmap & ~vmap_lo);
5884        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
5885
5886        tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
5887        tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
5888        tcg_res = tcg_temp_new_i32();
5889
5890        switch (fpopcode) {
5891        case 0x0c: /* fmaxnmv half-precision */
5892            gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
5893            break;
5894        case 0x0f: /* fmaxv half-precision */
5895            gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
5896            break;
5897        case 0x1c: /* fminnmv half-precision */
5898            gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
5899            break;
5900        case 0x1f: /* fminv half-precision */
5901            gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
5902            break;
5903        case 0x2c: /* fmaxnmv */
5904            gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
5905            break;
5906        case 0x2f: /* fmaxv */
5907            gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
5908            break;
5909        case 0x3c: /* fminnmv */
5910            gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
5911            break;
5912        case 0x3f: /* fminv */
5913            gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
5914            break;
5915        default:
5916            g_assert_not_reached();
5917        }
5918
5919        tcg_temp_free_i32(tcg_hi);
5920        tcg_temp_free_i32(tcg_lo);
5921        return tcg_res;
5922    }
5923}
5924
5925/* AdvSIMD across lanes
5926 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5927 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5928 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5929 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5930 */
5931static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5932{
5933    int rd = extract32(insn, 0, 5);
5934    int rn = extract32(insn, 5, 5);
5935    int size = extract32(insn, 22, 2);
5936    int opcode = extract32(insn, 12, 5);
5937    bool is_q = extract32(insn, 30, 1);
5938    bool is_u = extract32(insn, 29, 1);
5939    bool is_fp = false;
5940    bool is_min = false;
5941    int esize;
5942    int elements;
5943    int i;
5944    TCGv_i64 tcg_res, tcg_elt;
5945
5946    switch (opcode) {
5947    case 0x1b: /* ADDV */
5948        if (is_u) {
5949            unallocated_encoding(s);
5950            return;
5951        }
5952        /* fall through */
5953    case 0x3: /* SADDLV, UADDLV */
5954    case 0xa: /* SMAXV, UMAXV */
5955    case 0x1a: /* SMINV, UMINV */
5956        if (size == 3 || (size == 2 && !is_q)) {
5957            unallocated_encoding(s);
5958            return;
5959        }
5960        break;
5961    case 0xc: /* FMAXNMV, FMINNMV */
5962    case 0xf: /* FMAXV, FMINV */
5963        /* Bit 1 of size field encodes min vs max and the actual size
5964         * depends on the encoding of the U bit. If not set (and FP16
5965         * enabled) then we do half-precision float instead of single
5966         * precision.
5967         */
5968        is_min = extract32(size, 1, 1);
5969        is_fp = true;
5970        if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5971            size = 1;
5972        } else if (!is_u || !is_q || extract32(size, 0, 1)) {
5973            unallocated_encoding(s);
5974            return;
5975        } else {
5976            size = 2;
5977        }
5978        break;
5979    default:
5980        unallocated_encoding(s);
5981        return;
5982    }
5983
5984    if (!fp_access_check(s)) {
5985        return;
5986    }
5987
5988    esize = 8 << size;
5989    elements = (is_q ? 128 : 64) / esize;
5990
5991    tcg_res = tcg_temp_new_i64();
5992    tcg_elt = tcg_temp_new_i64();
5993
5994    /* These instructions operate across all lanes of a vector
5995     * to produce a single result. We can guarantee that a 64
5996     * bit intermediate is sufficient:
5997     *  + for [US]ADDLV the maximum element size is 32 bits, and
5998     *    the result type is 64 bits
5999     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
6000     *    same as the element size, which is 32 bits at most
6001     * For the integer operations we can choose to work at 64
6002     * or 32 bits and truncate at the end; for simplicity
6003     * we use 64 bits always. The floating point
6004     * ops do require 32 bit intermediates, though.
6005     */
6006    if (!is_fp) {
6007        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
6008
6009        for (i = 1; i < elements; i++) {
6010            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
6011
6012            switch (opcode) {
6013            case 0x03: /* SADDLV / UADDLV */
6014            case 0x1b: /* ADDV */
6015                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
6016                break;
6017            case 0x0a: /* SMAXV / UMAXV */
6018                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
6019                                    tcg_res,
6020                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
6021                break;
6022            case 0x1a: /* SMINV / UMINV */
6023                tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
6024                                    tcg_res,
6025                                    tcg_res, tcg_elt, tcg_res, tcg_elt);
6026                break;
6027                break;
6028            default:
6029                g_assert_not_reached();
6030            }
6031
6032        }
6033    } else {
6034        /* Floating point vector reduction ops which work across 32
6035         * bit (single) or 16 bit (half-precision) intermediates.
6036         * Note that correct NaN propagation requires that we do these
6037         * operations in exactly the order specified by the pseudocode.
6038         */
6039        TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
6040        int fpopcode = opcode | is_min << 4 | is_u << 5;
6041        int vmap = (1 << elements) - 1;
6042        TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
6043                                             (is_q ? 128 : 64), vmap, fpst);
6044        tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
6045        tcg_temp_free_i32(tcg_res32);
6046        tcg_temp_free_ptr(fpst);
6047    }
6048
6049    tcg_temp_free_i64(tcg_elt);
6050
6051    /* Now truncate the result to the width required for the final output */
6052    if (opcode == 0x03) {
6053        /* SADDLV, UADDLV: result is 2*esize */
6054        size++;
6055    }
6056
6057    switch (size) {
6058    case 0:
6059        tcg_gen_ext8u_i64(tcg_res, tcg_res);
6060        break;
6061    case 1:
6062        tcg_gen_ext16u_i64(tcg_res, tcg_res);
6063        break;
6064    case 2:
6065        tcg_gen_ext32u_i64(tcg_res, tcg_res);
6066        break;
6067    case 3:
6068        break;
6069    default:
6070        g_assert_not_reached();
6071    }
6072
6073    write_fp_dreg(s, rd, tcg_res);
6074    tcg_temp_free_i64(tcg_res);
6075}
6076
6077/* DUP (Element, Vector)
6078 *
6079 *  31  30   29              21 20    16 15        10  9    5 4    0
6080 * +---+---+-------------------+--------+-------------+------+------+
6081 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6082 * +---+---+-------------------+--------+-------------+------+------+
6083 *
6084 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6085 */
6086static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
6087                             int imm5)
6088{
6089    int size = ctz32(imm5);
6090    int index = imm5 >> (size + 1);
6091
6092    if (size > 3 || (size == 3 && !is_q)) {
6093        unallocated_encoding(s);
6094        return;
6095    }
6096
6097    if (!fp_access_check(s)) {
6098        return;
6099    }
6100
6101    tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
6102                         vec_reg_offset(s, rn, index, size),
6103                         is_q ? 16 : 8, vec_full_reg_size(s));
6104}
6105
6106/* DUP (element, scalar)
6107 *  31                   21 20    16 15        10  9    5 4    0
6108 * +-----------------------+--------+-------------+------+------+
6109 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6110 * +-----------------------+--------+-------------+------+------+
6111 */
6112static void handle_simd_dupes(DisasContext *s, int rd, int rn,
6113                              int imm5)
6114{
6115    int size = ctz32(imm5);
6116    int index;
6117    TCGv_i64 tmp;
6118
6119    if (size > 3) {
6120        unallocated_encoding(s);
6121        return;
6122    }
6123
6124    if (!fp_access_check(s)) {
6125        return;
6126    }
6127
6128    index = imm5 >> (size + 1);
6129
6130    /* This instruction just extracts the specified element and
6131     * zero-extends it into the bottom of the destination register.
6132     */
6133    tmp = tcg_temp_new_i64();
6134    read_vec_element(s, tmp, rn, index, size);
6135    write_fp_dreg(s, rd, tmp);
6136    tcg_temp_free_i64(tmp);
6137}
6138
6139/* DUP (General)
6140 *
6141 *  31  30   29              21 20    16 15        10  9    5 4    0
6142 * +---+---+-------------------+--------+-------------+------+------+
6143 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
6144 * +---+---+-------------------+--------+-------------+------+------+
6145 *
6146 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6147 */
6148static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
6149                             int imm5)
6150{
6151    int size = ctz32(imm5);
6152    uint32_t dofs, oprsz, maxsz;
6153
6154    if (size > 3 || ((size == 3) && !is_q)) {
6155        unallocated_encoding(s);
6156        return;
6157    }
6158
6159    if (!fp_access_check(s)) {
6160        return;
6161    }
6162
6163    dofs = vec_full_reg_offset(s, rd);
6164    oprsz = is_q ? 16 : 8;
6165    maxsz = vec_full_reg_size(s);
6166
6167    tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
6168}
6169
6170/* INS (Element)
6171 *
6172 *  31                   21 20    16 15  14    11  10 9    5 4    0
6173 * +-----------------------+--------+------------+---+------+------+
6174 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
6175 * +-----------------------+--------+------------+---+------+------+
6176 *
6177 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6178 * index: encoded in imm5<4:size+1>
6179 */
6180static void handle_simd_inse(DisasContext *s, int rd, int rn,
6181                             int imm4, int imm5)
6182{
6183    int size = ctz32(imm5);
6184    int src_index, dst_index;
6185    TCGv_i64 tmp;
6186
6187    if (size > 3) {
6188        unallocated_encoding(s);
6189        return;
6190    }
6191
6192    if (!fp_access_check(s)) {
6193        return;
6194    }
6195
6196    dst_index = extract32(imm5, 1+size, 5);
6197    src_index = extract32(imm4, size, 4);
6198
6199    tmp = tcg_temp_new_i64();
6200
6201    read_vec_element(s, tmp, rn, src_index, size);
6202    write_vec_element(s, tmp, rd, dst_index, size);
6203
6204    tcg_temp_free_i64(tmp);
6205}
6206
6207
6208/* INS (General)
6209 *
6210 *  31                   21 20    16 15        10  9    5 4    0
6211 * +-----------------------+--------+-------------+------+------+
6212 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
6213 * +-----------------------+--------+-------------+------+------+
6214 *
6215 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6216 * index: encoded in imm5<4:size+1>
6217 */
6218static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
6219{
6220    int size = ctz32(imm5);
6221    int idx;
6222
6223    if (size > 3) {
6224        unallocated_encoding(s);
6225        return;
6226    }
6227
6228    if (!fp_access_check(s)) {
6229        return;
6230    }
6231
6232    idx = extract32(imm5, 1 + size, 4 - size);
6233    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6234}
6235
6236/*
6237 * UMOV (General)
6238 * SMOV (General)
6239 *
6240 *  31  30   29              21 20    16 15    12   10 9    5 4    0
6241 * +---+---+-------------------+--------+-------------+------+------+
6242 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6243 * +---+---+-------------------+--------+-------------+------+------+
6244 *
6245 * U: unsigned when set
6246 * size: encoded in imm5 (see ARM ARM LowestSetBit())
6247 */
6248static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6249                                  int rn, int rd, int imm5)
6250{
6251    int size = ctz32(imm5);
6252    int element;
6253    TCGv_i64 tcg_rd;
6254
6255    /* Check for UnallocatedEncodings */
6256    if (is_signed) {
6257        if (size > 2 || (size == 2 && !is_q)) {
6258            unallocated_encoding(s);
6259            return;
6260        }
6261    } else {
6262        if (size > 3
6263            || (size < 3 && is_q)
6264            || (size == 3 && !is_q)) {
6265            unallocated_encoding(s);
6266            return;
6267        }
6268    }
6269
6270    if (!fp_access_check(s)) {
6271        return;
6272    }
6273
6274    element = extract32(imm5, 1+size, 4);
6275
6276    tcg_rd = cpu_reg(s, rd);
6277    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6278    if (is_signed && !is_q) {
6279        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6280    }
6281}
6282
6283/* AdvSIMD copy
6284 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6285 * +---+---+----+-----------------+------+---+------+---+------+------+
6286 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6287 * +---+---+----+-----------------+------+---+------+---+------+------+
6288 */
6289static void disas_simd_copy(DisasContext *s, uint32_t insn)
6290{
6291    int rd = extract32(insn, 0, 5);
6292    int rn = extract32(insn, 5, 5);
6293    int imm4 = extract32(insn, 11, 4);
6294    int op = extract32(insn, 29, 1);
6295    int is_q = extract32(insn, 30, 1);
6296    int imm5 = extract32(insn, 16, 5);
6297
6298    if (op) {
6299        if (is_q) {
6300            /* INS (element) */
6301            handle_simd_inse(s, rd, rn, imm4, imm5);
6302        } else {
6303            unallocated_encoding(s);
6304        }
6305    } else {
6306        switch (imm4) {
6307        case 0:
6308            /* DUP (element - vector) */
6309            handle_simd_dupe(s, is_q, rd, rn, imm5);
6310            break;
6311        case 1:
6312            /* DUP (general) */
6313            handle_simd_dupg(s, is_q, rd, rn, imm5);
6314            break;
6315        case 3:
6316            if (is_q) {
6317                /* INS (general) */
6318                handle_simd_insg(s, rd, rn, imm5);
6319            } else {
6320                unallocated_encoding(s);
6321            }
6322            break;
6323        case 5:
6324        case 7:
6325            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6326            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6327            break;
6328        default:
6329            unallocated_encoding(s);
6330            break;
6331        }
6332    }
6333}
6334
6335/* AdvSIMD modified immediate
6336 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6337 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6338 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6339 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6340 *
6341 * There are a number of operations that can be carried out here:
6342 *   MOVI - move (shifted) imm into register
6343 *   MVNI - move inverted (shifted) imm into register
6344 *   ORR  - bitwise OR of (shifted) imm with register
6345 *   BIC  - bitwise clear of (shifted) imm with register
6346 * With ARMv8.2 we also have:
6347 *   FMOV half-precision
6348 */
6349static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6350{
6351    int rd = extract32(insn, 0, 5);
6352    int cmode = extract32(insn, 12, 4);
6353    int cmode_3_1 = extract32(cmode, 1, 3);
6354    int cmode_0 = extract32(cmode, 0, 1);
6355    int o2 = extract32(insn, 11, 1);
6356    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6357    bool is_neg = extract32(insn, 29, 1);
6358    bool is_q = extract32(insn, 30, 1);
6359    uint64_t imm = 0;
6360
6361    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6362        /* Check for FMOV (vector, immediate) - half-precision */
6363        if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
6364            unallocated_encoding(s);
6365            return;
6366        }
6367    }
6368
6369    if (!fp_access_check(s)) {
6370        return;
6371    }
6372
6373    /* See AdvSIMDExpandImm() in ARM ARM */
6374    switch (cmode_3_1) {
6375    case 0: /* Replicate(Zeros(24):imm8, 2) */
6376    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6377    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6378    case 3: /* Replicate(imm8:Zeros(24), 2) */
6379    {
6380        int shift = cmode_3_1 * 8;
6381        imm = bitfield_replicate(abcdefgh << shift, 32);
6382        break;
6383    }
6384    case 4: /* Replicate(Zeros(8):imm8, 4) */
6385    case 5: /* Replicate(imm8:Zeros(8), 4) */
6386    {
6387        int shift = (cmode_3_1 & 0x1) * 8;
6388        imm = bitfield_replicate(abcdefgh << shift, 16);
6389        break;
6390    }
6391    case 6:
6392        if (cmode_0) {
6393            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6394            imm = (abcdefgh << 16) | 0xffff;
6395        } else {
6396            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6397            imm = (abcdefgh << 8) | 0xff;
6398        }
6399        imm = bitfield_replicate(imm, 32);
6400        break;
6401    case 7:
6402        if (!cmode_0 && !is_neg) {
6403            imm = bitfield_replicate(abcdefgh, 8);
6404        } else if (!cmode_0 && is_neg) {
6405            int i;
6406            imm = 0;
6407            for (i = 0; i < 8; i++) {
6408                if ((abcdefgh) & (1 << i)) {
6409                    imm |= 0xffULL << (i * 8);
6410                }
6411            }
6412        } else if (cmode_0) {
6413            if (is_neg) {
6414                imm = (abcdefgh & 0x3f) << 48;
6415                if (abcdefgh & 0x80) {
6416                    imm |= 0x8000000000000000ULL;
6417                }
6418                if (abcdefgh & 0x40) {
6419                    imm |= 0x3fc0000000000000ULL;
6420                } else {
6421                    imm |= 0x4000000000000000ULL;
6422                }
6423            } else {
6424                if (o2) {
6425                    /* FMOV (vector, immediate) - half-precision */
6426                    imm = vfp_expand_imm(MO_16, abcdefgh);
6427                    /* now duplicate across the lanes */
6428                    imm = bitfield_replicate(imm, 16);
6429                } else {
6430                    imm = (abcdefgh & 0x3f) << 19;
6431                    if (abcdefgh & 0x80) {
6432                        imm |= 0x80000000;
6433                    }
6434                    if (abcdefgh & 0x40) {
6435                        imm |= 0x3e000000;
6436                    } else {
6437                        imm |= 0x40000000;
6438                    }
6439                    imm |= (imm << 32);
6440                }
6441            }
6442        }
6443        break;
6444    default:
6445        fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
6446        g_assert_not_reached();
6447    }
6448
6449    if (cmode_3_1 != 7 && is_neg) {
6450        imm = ~imm;
6451    }
6452
6453    if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
6454        /* MOVI or MVNI, with MVNI negation handled above.  */
6455        tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
6456                            vec_full_reg_size(s), imm);
6457    } else {
6458        /* ORR or BIC, with BIC negation to AND handled above.  */
6459        if (is_neg) {
6460            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
6461        } else {
6462            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
6463        }
6464    }
6465}
6466
6467/* AdvSIMD scalar copy
6468 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6469 * +-----+----+-----------------+------+---+------+---+------+------+
6470 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6471 * +-----+----+-----------------+------+---+------+---+------+------+
6472 */
6473static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6474{
6475    int rd = extract32(insn, 0, 5);
6476    int rn = extract32(insn, 5, 5);
6477    int imm4 = extract32(insn, 11, 4);
6478    int imm5 = extract32(insn, 16, 5);
6479    int op = extract32(insn, 29, 1);
6480
6481    if (op != 0 || imm4 != 0) {
6482        unallocated_encoding(s);
6483        return;
6484    }
6485
6486    /* DUP (element, scalar) */
6487    handle_simd_dupes(s, rd, rn, imm5);
6488}
6489
6490/* AdvSIMD scalar pairwise
6491 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6492 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6493 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6494 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6495 */
6496static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6497{
6498    int u = extract32(insn, 29, 1);
6499    int size = extract32(insn, 22, 2);
6500    int opcode = extract32(insn, 12, 5);
6501    int rn = extract32(insn, 5, 5);
6502    int rd = extract32(insn, 0, 5);
6503    TCGv_ptr fpst;
6504
6505    /* For some ops (the FP ones), size[1] is part of the encoding.
6506     * For ADDP strictly it is not but size[1] is always 1 for valid
6507     * encodings.
6508     */
6509    opcode |= (extract32(size, 1, 1) << 5);
6510
6511    switch (opcode) {
6512    case 0x3b: /* ADDP */
6513        if (u || size != 3) {
6514            unallocated_encoding(s);
6515            return;
6516        }
6517        if (!fp_access_check(s)) {
6518            return;
6519        }
6520
6521        fpst = NULL;
6522        break;
6523    case 0xc: /* FMAXNMP */
6524    case 0xd: /* FADDP */
6525    case 0xf: /* FMAXP */
6526    case 0x2c: /* FMINNMP */
6527    case 0x2f: /* FMINP */
6528        /* FP op, size[0] is 32 or 64 bit*/
6529        if (!u) {
6530            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
6531                unallocated_encoding(s);
6532                return;
6533            } else {
6534                size = MO_16;
6535            }
6536        } else {
6537            size = extract32(size, 0, 1) ? MO_64 : MO_32;
6538        }
6539
6540        if (!fp_access_check(s)) {
6541            return;
6542        }
6543
6544        fpst = get_fpstatus_ptr(size == MO_16);
6545        break;
6546    default:
6547        unallocated_encoding(s);
6548        return;
6549    }
6550
6551    if (size == MO_64) {
6552        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6553        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6554        TCGv_i64 tcg_res = tcg_temp_new_i64();
6555
6556        read_vec_element(s, tcg_op1, rn, 0, MO_64);
6557        read_vec_element(s, tcg_op2, rn, 1, MO_64);
6558
6559        switch (opcode) {
6560        case 0x3b: /* ADDP */
6561            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6562            break;
6563        case 0xc: /* FMAXNMP */
6564            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6565            break;
6566        case 0xd: /* FADDP */
6567            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6568            break;
6569        case 0xf: /* FMAXP */
6570            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6571            break;
6572        case 0x2c: /* FMINNMP */
6573            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6574            break;
6575        case 0x2f: /* FMINP */
6576            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6577            break;
6578        default:
6579            g_assert_not_reached();
6580        }
6581
6582        write_fp_dreg(s, rd, tcg_res);
6583
6584        tcg_temp_free_i64(tcg_op1);
6585        tcg_temp_free_i64(tcg_op2);
6586        tcg_temp_free_i64(tcg_res);
6587    } else {
6588        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6589        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6590        TCGv_i32 tcg_res = tcg_temp_new_i32();
6591
6592        read_vec_element_i32(s, tcg_op1, rn, 0, size);
6593        read_vec_element_i32(s, tcg_op2, rn, 1, size);
6594
6595        if (size == MO_16) {
6596            switch (opcode) {
6597            case 0xc: /* FMAXNMP */
6598                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6599                break;
6600            case 0xd: /* FADDP */
6601                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6602                break;
6603            case 0xf: /* FMAXP */
6604                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6605                break;
6606            case 0x2c: /* FMINNMP */
6607                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6608                break;
6609            case 0x2f: /* FMINP */
6610                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6611                break;
6612            default:
6613                g_assert_not_reached();
6614            }
6615        } else {
6616            switch (opcode) {
6617            case 0xc: /* FMAXNMP */
6618                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6619                break;
6620            case 0xd: /* FADDP */
6621                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6622                break;
6623            case 0xf: /* FMAXP */
6624                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6625                break;
6626            case 0x2c: /* FMINNMP */
6627                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6628                break;
6629            case 0x2f: /* FMINP */
6630                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6631                break;
6632            default:
6633                g_assert_not_reached();
6634            }
6635        }
6636
6637        write_fp_sreg(s, rd, tcg_res);
6638
6639        tcg_temp_free_i32(tcg_op1);
6640        tcg_temp_free_i32(tcg_op2);
6641        tcg_temp_free_i32(tcg_res);
6642    }
6643
6644    if (fpst) {
6645        tcg_temp_free_ptr(fpst);
6646    }
6647}
6648
6649/*
6650 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6651 *
6652 * This code is handles the common shifting code and is used by both
6653 * the vector and scalar code.
6654 */
6655static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6656                                    TCGv_i64 tcg_rnd, bool accumulate,
6657                                    bool is_u, int size, int shift)
6658{
6659    bool extended_result = false;
6660    bool round = tcg_rnd != NULL;
6661    int ext_lshift = 0;
6662    TCGv_i64 tcg_src_hi;
6663
6664    if (round && size == 3) {
6665        extended_result = true;
6666        ext_lshift = 64 - shift;
6667        tcg_src_hi = tcg_temp_new_i64();
6668    } else if (shift == 64) {
6669        if (!accumulate && is_u) {
6670            /* result is zero */
6671            tcg_gen_movi_i64(tcg_res, 0);
6672            return;
6673        }
6674    }
6675
6676    /* Deal with the rounding step */
6677    if (round) {
6678        if (extended_result) {
6679            TCGv_i64 tcg_zero = tcg_const_i64(0);
6680            if (!is_u) {
6681                /* take care of sign extending tcg_res */
6682                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6683                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6684                                 tcg_src, tcg_src_hi,
6685                                 tcg_rnd, tcg_zero);
6686            } else {
6687                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6688                                 tcg_src, tcg_zero,
6689                                 tcg_rnd, tcg_zero);
6690            }
6691            tcg_temp_free_i64(tcg_zero);
6692        } else {
6693            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6694        }
6695    }
6696
6697    /* Now do the shift right */
6698    if (round && extended_result) {
6699        /* extended case, >64 bit precision required */
6700        if (ext_lshift == 0) {
6701            /* special case, only high bits matter */
6702            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6703        } else {
6704            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6705            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6706            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6707        }
6708    } else {
6709        if (is_u) {
6710            if (shift == 64) {
6711                /* essentially shifting in 64 zeros */
6712                tcg_gen_movi_i64(tcg_src, 0);
6713            } else {
6714                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6715            }
6716        } else {
6717            if (shift == 64) {
6718                /* effectively extending the sign-bit */
6719                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6720            } else {
6721                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6722            }
6723        }
6724    }
6725
6726    if (accumulate) {
6727        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6728    } else {
6729        tcg_gen_mov_i64(tcg_res, tcg_src);
6730    }
6731
6732    if (extended_result) {
6733        tcg_temp_free_i64(tcg_src_hi);
6734    }
6735}
6736
6737/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6738static void handle_scalar_simd_shri(DisasContext *s,
6739                                    bool is_u, int immh, int immb,
6740                                    int opcode, int rn, int rd)
6741{
6742    const int size = 3;
6743    int immhb = immh << 3 | immb;
6744    int shift = 2 * (8 << size) - immhb;
6745    bool accumulate = false;
6746    bool round = false;
6747    bool insert = false;
6748    TCGv_i64 tcg_rn;
6749    TCGv_i64 tcg_rd;
6750    TCGv_i64 tcg_round;
6751
6752    if (!extract32(immh, 3, 1)) {
6753        unallocated_encoding(s);
6754        return;
6755    }
6756
6757    if (!fp_access_check(s)) {
6758        return;
6759    }
6760
6761    switch (opcode) {
6762    case 0x02: /* SSRA / USRA (accumulate) */
6763        accumulate = true;
6764        break;
6765    case 0x04: /* SRSHR / URSHR (rounding) */
6766        round = true;
6767        break;
6768    case 0x06: /* SRSRA / URSRA (accum + rounding) */
6769        accumulate = round = true;
6770        break;
6771    case 0x08: /* SRI */
6772        insert = true;
6773        break;
6774    }
6775
6776    if (round) {
6777        uint64_t round_const = 1ULL << (shift - 1);
6778        tcg_round = tcg_const_i64(round_const);
6779    } else {
6780        tcg_round = NULL;
6781    }
6782
6783    tcg_rn = read_fp_dreg(s, rn);
6784    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6785
6786    if (insert) {
6787        /* shift count same as element size is valid but does nothing;
6788         * special case to avoid potential shift by 64.
6789         */
6790        int esize = 8 << size;
6791        if (shift != esize) {
6792            tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
6793            tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
6794        }
6795    } else {
6796        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6797                                accumulate, is_u, size, shift);
6798    }
6799
6800    write_fp_dreg(s, rd, tcg_rd);
6801
6802    tcg_temp_free_i64(tcg_rn);
6803    tcg_temp_free_i64(tcg_rd);
6804    if (round) {
6805        tcg_temp_free_i64(tcg_round);
6806    }
6807}
6808
6809/* SHL/SLI - Scalar shift left */
6810static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6811                                    int immh, int immb, int opcode,
6812                                    int rn, int rd)
6813{
6814    int size = 32 - clz32(immh) - 1;
6815    int immhb = immh << 3 | immb;
6816    int shift = immhb - (8 << size);
6817    TCGv_i64 tcg_rn = new_tmp_a64(s);
6818    TCGv_i64 tcg_rd = new_tmp_a64(s);
6819
6820    if (!extract32(immh, 3, 1)) {
6821        unallocated_encoding(s);
6822        return;
6823    }
6824
6825    if (!fp_access_check(s)) {
6826        return;
6827    }
6828
6829    tcg_rn = read_fp_dreg(s, rn);
6830    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6831
6832    if (insert) {
6833        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
6834    } else {
6835        tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
6836    }
6837
6838    write_fp_dreg(s, rd, tcg_rd);
6839
6840    tcg_temp_free_i64(tcg_rn);
6841    tcg_temp_free_i64(tcg_rd);
6842}
6843
6844/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6845 * (signed/unsigned) narrowing */
6846static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6847                                   bool is_u_shift, bool is_u_narrow,
6848                                   int immh, int immb, int opcode,
6849                                   int rn, int rd)
6850{
6851    int immhb = immh << 3 | immb;
6852    int size = 32 - clz32(immh) - 1;
6853    int esize = 8 << size;
6854    int shift = (2 * esize) - immhb;
6855    int elements = is_scalar ? 1 : (64 / esize);
6856    bool round = extract32(opcode, 0, 1);
6857    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6858    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6859    TCGv_i32 tcg_rd_narrowed;
6860    TCGv_i64 tcg_final;
6861
6862    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6863        { gen_helper_neon_narrow_sat_s8,
6864          gen_helper_neon_unarrow_sat8 },
6865        { gen_helper_neon_narrow_sat_s16,
6866          gen_helper_neon_unarrow_sat16 },
6867        { gen_helper_neon_narrow_sat_s32,
6868          gen_helper_neon_unarrow_sat32 },
6869        { NULL, NULL },
6870    };
6871    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6872        gen_helper_neon_narrow_sat_u8,
6873        gen_helper_neon_narrow_sat_u16,
6874        gen_helper_neon_narrow_sat_u32,
6875        NULL
6876    };
6877    NeonGenNarrowEnvFn *narrowfn;
6878
6879    int i;
6880
6881    assert(size < 4);
6882
6883    if (extract32(immh, 3, 1)) {
6884        unallocated_encoding(s);
6885        return;
6886    }
6887
6888    if (!fp_access_check(s)) {
6889        return;
6890    }
6891
6892    if (is_u_shift) {
6893        narrowfn = unsigned_narrow_fns[size];
6894    } else {
6895        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6896    }
6897
6898    tcg_rn = tcg_temp_new_i64();
6899    tcg_rd = tcg_temp_new_i64();
6900    tcg_rd_narrowed = tcg_temp_new_i32();
6901    tcg_final = tcg_const_i64(0);
6902
6903    if (round) {
6904        uint64_t round_const = 1ULL << (shift - 1);
6905        tcg_round = tcg_const_i64(round_const);
6906    } else {
6907        tcg_round = NULL;
6908    }
6909
6910    for (i = 0; i < elements; i++) {
6911        read_vec_element(s, tcg_rn, rn, i, ldop);
6912        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6913                                false, is_u_shift, size+1, shift);
6914        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6915        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6916        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6917    }
6918
6919    if (!is_q) {
6920        write_vec_element(s, tcg_final, rd, 0, MO_64);
6921    } else {
6922        write_vec_element(s, tcg_final, rd, 1, MO_64);
6923    }
6924
6925    if (round) {
6926        tcg_temp_free_i64(tcg_round);
6927    }
6928    tcg_temp_free_i64(tcg_rn);
6929    tcg_temp_free_i64(tcg_rd);
6930    tcg_temp_free_i32(tcg_rd_narrowed);
6931    tcg_temp_free_i64(tcg_final);
6932
6933    clear_vec_high(s, is_q, rd);
6934}
6935
6936/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6937static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6938                             bool src_unsigned, bool dst_unsigned,
6939                             int immh, int immb, int rn, int rd)
6940{
6941    int immhb = immh << 3 | immb;
6942    int size = 32 - clz32(immh) - 1;
6943    int shift = immhb - (8 << size);
6944    int pass;
6945
6946    assert(immh != 0);
6947    assert(!(scalar && is_q));
6948
6949    if (!scalar) {
6950        if (!is_q && extract32(immh, 3, 1)) {
6951            unallocated_encoding(s);
6952            return;
6953        }
6954
6955        /* Since we use the variable-shift helpers we must
6956         * replicate the shift count into each element of
6957         * the tcg_shift value.
6958         */
6959        switch (size) {
6960        case 0:
6961            shift |= shift << 8;
6962            /* fall through */
6963        case 1:
6964            shift |= shift << 16;
6965            break;
6966        case 2:
6967        case 3:
6968            break;
6969        default:
6970            g_assert_not_reached();
6971        }
6972    }
6973
6974    if (!fp_access_check(s)) {
6975        return;
6976    }
6977
6978    if (size == 3) {
6979        TCGv_i64 tcg_shift = tcg_const_i64(shift);
6980        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6981            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6982            { NULL, gen_helper_neon_qshl_u64 },
6983        };
6984        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6985        int maxpass = is_q ? 2 : 1;
6986
6987        for (pass = 0; pass < maxpass; pass++) {
6988            TCGv_i64 tcg_op = tcg_temp_new_i64();
6989
6990            read_vec_element(s, tcg_op, rn, pass, MO_64);
6991            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6992            write_vec_element(s, tcg_op, rd, pass, MO_64);
6993
6994            tcg_temp_free_i64(tcg_op);
6995        }
6996        tcg_temp_free_i64(tcg_shift);
6997        clear_vec_high(s, is_q, rd);
6998    } else {
6999        TCGv_i32 tcg_shift = tcg_const_i32(shift);
7000        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
7001            {
7002                { gen_helper_neon_qshl_s8,
7003                  gen_helper_neon_qshl_s16,
7004                  gen_helper_neon_qshl_s32 },
7005                { gen_helper_neon_qshlu_s8,
7006                  gen_helper_neon_qshlu_s16,
7007                  gen_helper_neon_qshlu_s32 }
7008            }, {
7009                { NULL, NULL, NULL },
7010                { gen_helper_neon_qshl_u8,
7011                  gen_helper_neon_qshl_u16,
7012                  gen_helper_neon_qshl_u32 }
7013            }
7014        };
7015        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
7016        TCGMemOp memop = scalar ? size : MO_32;
7017        int maxpass = scalar ? 1 : is_q ? 4 : 2;
7018
7019        for (pass = 0; pass < maxpass; pass++) {
7020            TCGv_i32 tcg_op = tcg_temp_new_i32();
7021
7022            read_vec_element_i32(s, tcg_op, rn, pass, memop);
7023            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7024            if (scalar) {
7025                switch (size) {
7026                case 0:
7027                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
7028                    break;
7029                case 1:
7030                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
7031                    break;
7032                case 2:
7033                    break;
7034                default:
7035                    g_assert_not_reached();
7036                }
7037                write_fp_sreg(s, rd, tcg_op);
7038            } else {
7039                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7040            }
7041
7042            tcg_temp_free_i32(tcg_op);
7043        }
7044        tcg_temp_free_i32(tcg_shift);
7045
7046        if (!scalar) {
7047            clear_vec_high(s, is_q, rd);
7048        }
7049    }
7050}
7051
7052/* Common vector code for handling integer to FP conversion */
7053static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
7054                                   int elements, int is_signed,
7055                                   int fracbits, int size)
7056{
7057    TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
7058    TCGv_i32 tcg_shift = NULL;
7059
7060    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
7061    int pass;
7062
7063    if (fracbits || size == MO_64) {
7064        tcg_shift = tcg_const_i32(fracbits);
7065    }
7066
7067    if (size == MO_64) {
7068        TCGv_i64 tcg_int64 = tcg_temp_new_i64();
7069        TCGv_i64 tcg_double = tcg_temp_new_i64();
7070
7071        for (pass = 0; pass < elements; pass++) {
7072            read_vec_element(s, tcg_int64, rn, pass, mop);
7073
7074            if (is_signed) {
7075                gen_helper_vfp_sqtod(tcg_double, tcg_int64,
7076                                     tcg_shift, tcg_fpst);
7077            } else {
7078                gen_helper_vfp_uqtod(tcg_double, tcg_int64,
7079                                     tcg_shift, tcg_fpst);
7080            }
7081            if (elements == 1) {
7082                write_fp_dreg(s, rd, tcg_double);
7083            } else {
7084                write_vec_element(s, tcg_double, rd, pass, MO_64);
7085            }
7086        }
7087
7088        tcg_temp_free_i64(tcg_int64);
7089        tcg_temp_free_i64(tcg_double);
7090
7091    } else {
7092        TCGv_i32 tcg_int32 = tcg_temp_new_i32();
7093        TCGv_i32 tcg_float = tcg_temp_new_i32();
7094
7095        for (pass = 0; pass < elements; pass++) {
7096            read_vec_element_i32(s, tcg_int32, rn, pass, mop);
7097
7098            switch (size) {
7099            case MO_32:
7100                if (fracbits) {
7101                    if (is_signed) {
7102                        gen_helper_vfp_sltos(tcg_float, tcg_int32,
7103                                             tcg_shift, tcg_fpst);
7104                    } else {
7105                        gen_helper_vfp_ultos(tcg_float, tcg_int32,
7106                                             tcg_shift, tcg_fpst);
7107                    }
7108                } else {
7109                    if (is_signed) {
7110                        gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
7111                    } else {
7112                        gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
7113                    }
7114                }
7115                break;
7116            case MO_16:
7117                if (fracbits) {
7118                    if (is_signed) {
7119                        gen_helper_vfp_sltoh(tcg_float, tcg_int32,
7120                                             tcg_shift, tcg_fpst);
7121                    } else {
7122                        gen_helper_vfp_ultoh(tcg_float, tcg_int32,
7123                                             tcg_shift, tcg_fpst);
7124                    }
7125                } else {
7126                    if (is_signed) {
7127                        gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
7128                    } else {
7129                        gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
7130                    }
7131                }
7132                break;
7133            default:
7134                g_assert_not_reached();
7135            }
7136
7137            if (elements == 1) {
7138                write_fp_sreg(s, rd, tcg_float);
7139            } else {
7140                write_vec_element_i32(s, tcg_float, rd, pass, size);
7141            }
7142        }
7143
7144        tcg_temp_free_i32(tcg_int32);
7145        tcg_temp_free_i32(tcg_float);
7146    }
7147
7148    tcg_temp_free_ptr(tcg_fpst);
7149    if (tcg_shift) {
7150        tcg_temp_free_i32(tcg_shift);
7151    }
7152
7153    clear_vec_high(s, elements << size == 16, rd);
7154}
7155
7156/* UCVTF/SCVTF - Integer to FP conversion */
7157static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
7158                                         bool is_q, bool is_u,
7159                                         int immh, int immb, int opcode,
7160                                         int rn, int rd)
7161{
7162    bool is_double = extract32(immh, 3, 1);
7163    int size = is_double ? MO_64 : MO_32;
7164    int elements;
7165    int immhb = immh << 3 | immb;
7166    int fracbits = (is_double ? 128 : 64) - immhb;
7167
7168    if (!extract32(immh, 2, 2)) {
7169        unallocated_encoding(s);
7170        return;
7171    }
7172
7173    if (is_scalar) {
7174        elements = 1;
7175    } else {
7176        elements = is_double ? 2 : is_q ? 4 : 2;
7177        if (is_double && !is_q) {
7178            unallocated_encoding(s);
7179            return;
7180        }
7181    }
7182
7183    if (!fp_access_check(s)) {
7184        return;
7185    }
7186
7187    /* immh == 0 would be a failure of the decode logic */
7188    g_assert(immh);
7189
7190    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
7191}
7192
7193/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
7194static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
7195                                         bool is_q, bool is_u,
7196                                         int immh, int immb, int rn, int rd)
7197{
7198    bool is_double = extract32(immh, 3, 1);
7199    int immhb = immh << 3 | immb;
7200    int fracbits = (is_double ? 128 : 64) - immhb;
7201    int pass;
7202    TCGv_ptr tcg_fpstatus;
7203    TCGv_i32 tcg_rmode, tcg_shift;
7204
7205    if (!extract32(immh, 2, 2)) {
7206        unallocated_encoding(s);
7207        return;
7208    }
7209
7210    if (!is_scalar && !is_q && is_double) {
7211        unallocated_encoding(s);
7212        return;
7213    }
7214
7215    if (!fp_access_check(s)) {
7216        return;
7217    }
7218
7219    assert(!(is_scalar && is_q));
7220
7221    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
7222    tcg_fpstatus = get_fpstatus_ptr(false);
7223    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7224    tcg_shift = tcg_const_i32(fracbits);
7225
7226    if (is_double) {
7227        int maxpass = is_scalar ? 1 : 2;
7228
7229        for (pass = 0; pass < maxpass; pass++) {
7230            TCGv_i64 tcg_op = tcg_temp_new_i64();
7231
7232            read_vec_element(s, tcg_op, rn, pass, MO_64);
7233            if (is_u) {
7234                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7235            } else {
7236                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7237            }
7238            write_vec_element(s, tcg_op, rd, pass, MO_64);
7239            tcg_temp_free_i64(tcg_op);
7240        }
7241        clear_vec_high(s, is_q, rd);
7242    } else {
7243        int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
7244        for (pass = 0; pass < maxpass; pass++) {
7245            TCGv_i32 tcg_op = tcg_temp_new_i32();
7246
7247            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7248            if (is_u) {
7249                gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7250            } else {
7251                gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7252            }
7253            if (is_scalar) {
7254                write_fp_sreg(s, rd, tcg_op);
7255            } else {
7256                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7257            }
7258            tcg_temp_free_i32(tcg_op);
7259        }
7260        if (!is_scalar) {
7261            clear_vec_high(s, is_q, rd);
7262        }
7263    }
7264
7265    tcg_temp_free_ptr(tcg_fpstatus);
7266    tcg_temp_free_i32(tcg_shift);
7267    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7268    tcg_temp_free_i32(tcg_rmode);
7269}
7270
7271/* AdvSIMD scalar shift by immediate
7272 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
7273 * +-----+---+-------------+------+------+--------+---+------+------+
7274 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
7275 * +-----+---+-------------+------+------+--------+---+------+------+
7276 *
7277 * This is the scalar version so it works on a fixed sized registers
7278 */
7279static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
7280{
7281    int rd = extract32(insn, 0, 5);
7282    int rn = extract32(insn, 5, 5);
7283    int opcode = extract32(insn, 11, 5);
7284    int immb = extract32(insn, 16, 3);
7285    int immh = extract32(insn, 19, 4);
7286    bool is_u = extract32(insn, 29, 1);
7287
7288    if (immh == 0) {
7289        unallocated_encoding(s);
7290        return;
7291    }
7292
7293    switch (opcode) {
7294    case 0x08: /* SRI */
7295        if (!is_u) {
7296            unallocated_encoding(s);
7297            return;
7298        }
7299        /* fall through */
7300    case 0x00: /* SSHR / USHR */
7301    case 0x02: /* SSRA / USRA */
7302    case 0x04: /* SRSHR / URSHR */
7303    case 0x06: /* SRSRA / URSRA */
7304        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7305        break;
7306    case 0x0a: /* SHL / SLI */
7307        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7308        break;
7309    case 0x1c: /* SCVTF, UCVTF */
7310        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7311                                     opcode, rn, rd);
7312        break;
7313    case 0x10: /* SQSHRUN, SQSHRUN2 */
7314    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7315        if (!is_u) {
7316            unallocated_encoding(s);
7317            return;
7318        }
7319        handle_vec_simd_sqshrn(s, true, false, false, true,
7320                               immh, immb, opcode, rn, rd);
7321        break;
7322    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7323    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7324        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7325                               immh, immb, opcode, rn, rd);
7326        break;
7327    case 0xc: /* SQSHLU */
7328        if (!is_u) {
7329            unallocated_encoding(s);
7330            return;
7331        }
7332        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7333        break;
7334    case 0xe: /* SQSHL, UQSHL */
7335        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7336        break;
7337    case 0x1f: /* FCVTZS, FCVTZU */
7338        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7339        break;
7340    default:
7341        unallocated_encoding(s);
7342        break;
7343    }
7344}
7345
7346/* AdvSIMD scalar three different
7347 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7348 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7349 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7350 * +-----+---+-----------+------+---+------+--------+-----+------+------+
7351 */
7352static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7353{
7354    bool is_u = extract32(insn, 29, 1);
7355    int size = extract32(insn, 22, 2);
7356    int opcode = extract32(insn, 12, 4);
7357    int rm = extract32(insn, 16, 5);
7358    int rn = extract32(insn, 5, 5);
7359    int rd = extract32(insn, 0, 5);
7360
7361    if (is_u) {
7362        unallocated_encoding(s);
7363        return;
7364    }
7365
7366    switch (opcode) {
7367    case 0x9: /* SQDMLAL, SQDMLAL2 */
7368    case 0xb: /* SQDMLSL, SQDMLSL2 */
7369    case 0xd: /* SQDMULL, SQDMULL2 */
7370        if (size == 0 || size == 3) {
7371            unallocated_encoding(s);
7372            return;
7373        }
7374        break;
7375    default:
7376        unallocated_encoding(s);
7377        return;
7378    }
7379
7380    if (!fp_access_check(s)) {
7381        return;
7382    }
7383
7384    if (size == 2) {
7385        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7386        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7387        TCGv_i64 tcg_res = tcg_temp_new_i64();
7388
7389        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7390        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7391
7392        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7393        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7394
7395        switch (opcode) {
7396        case 0xd: /* SQDMULL, SQDMULL2 */
7397            break;
7398        case 0xb: /* SQDMLSL, SQDMLSL2 */
7399            tcg_gen_neg_i64(tcg_res, tcg_res);
7400            /* fall through */
7401        case 0x9: /* SQDMLAL, SQDMLAL2 */
7402            read_vec_element(s, tcg_op1, rd, 0, MO_64);
7403            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7404                                              tcg_res, tcg_op1);
7405            break;
7406        default:
7407            g_assert_not_reached();
7408        }
7409
7410        write_fp_dreg(s, rd, tcg_res);
7411
7412        tcg_temp_free_i64(tcg_op1);
7413        tcg_temp_free_i64(tcg_op2);
7414        tcg_temp_free_i64(tcg_res);
7415    } else {
7416        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7417        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7418        TCGv_i64 tcg_res = tcg_temp_new_i64();
7419
7420        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7421        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7422
7423        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7424        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7425
7426        switch (opcode) {
7427        case 0xd: /* SQDMULL, SQDMULL2 */
7428            break;
7429        case 0xb: /* SQDMLSL, SQDMLSL2 */
7430            gen_helper_neon_negl_u32(tcg_res, tcg_res);
7431            /* fall through */
7432        case 0x9: /* SQDMLAL, SQDMLAL2 */
7433        {
7434            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7435            read_vec_element(s, tcg_op3, rd, 0, MO_32);
7436            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7437                                              tcg_res, tcg_op3);
7438            tcg_temp_free_i64(tcg_op3);
7439            break;
7440        }
7441        default:
7442            g_assert_not_reached();
7443        }
7444
7445        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7446        write_fp_dreg(s, rd, tcg_res);
7447
7448        tcg_temp_free_i32(tcg_op1);
7449        tcg_temp_free_i32(tcg_op2);
7450        tcg_temp_free_i64(tcg_res);
7451    }
7452}
7453
7454/* CMTST : test is "if (X & Y != 0)". */
7455static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
7456{
7457    tcg_gen_and_i32(d, a, b);
7458    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
7459    tcg_gen_neg_i32(d, d);
7460}
7461
7462static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
7463{
7464    tcg_gen_and_i64(d, a, b);
7465    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
7466    tcg_gen_neg_i64(d, d);
7467}
7468
7469static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
7470{
7471    tcg_gen_and_vec(vece, d, a, b);
7472    tcg_gen_dupi_vec(vece, a, 0);
7473    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
7474}
7475
7476static void handle_3same_64(DisasContext *s, int opcode, bool u,
7477                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7478{
7479    /* Handle 64x64->64 opcodes which are shared between the scalar
7480     * and vector 3-same groups. We cover every opcode where size == 3
7481     * is valid in either the three-reg-same (integer, not pairwise)
7482     * or scalar-three-reg-same groups.
7483     */
7484    TCGCond cond;
7485
7486    switch (opcode) {
7487    case 0x1: /* SQADD */
7488        if (u) {
7489            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7490        } else {
7491            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7492        }
7493        break;
7494    case 0x5: /* SQSUB */
7495        if (u) {
7496            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7497        } else {
7498            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7499        }
7500        break;
7501    case 0x6: /* CMGT, CMHI */
7502        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7503         * We implement this using setcond (test) and then negating.
7504         */
7505        cond = u ? TCG_COND_GTU : TCG_COND_GT;
7506    do_cmop:
7507        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7508        tcg_gen_neg_i64(tcg_rd, tcg_rd);
7509        break;
7510    case 0x7: /* CMGE, CMHS */
7511        cond = u ? TCG_COND_GEU : TCG_COND_GE;
7512        goto do_cmop;
7513    case 0x11: /* CMTST, CMEQ */
7514        if (u) {
7515            cond = TCG_COND_EQ;
7516            goto do_cmop;
7517        }
7518        gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
7519        break;
7520    case 0x8: /* SSHL, USHL */
7521        if (u) {
7522            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7523        } else {
7524            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7525        }
7526        break;
7527    case 0x9: /* SQSHL, UQSHL */
7528        if (u) {
7529            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7530        } else {
7531            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7532        }
7533        break;
7534    case 0xa: /* SRSHL, URSHL */
7535        if (u) {
7536            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7537        } else {
7538            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7539        }
7540        break;
7541    case 0xb: /* SQRSHL, UQRSHL */
7542        if (u) {
7543            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7544        } else {
7545            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7546        }
7547        break;
7548    case 0x10: /* ADD, SUB */
7549        if (u) {
7550            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7551        } else {
7552            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7553        }
7554        break;
7555    default:
7556        g_assert_not_reached();
7557    }
7558}
7559
7560/* Handle the 3-same-operands float operations; shared by the scalar
7561 * and vector encodings. The caller must filter out any encodings
7562 * not allocated for the encoding it is dealing with.
7563 */
7564static void handle_3same_float(DisasContext *s, int size, int elements,
7565                               int fpopcode, int rd, int rn, int rm)
7566{
7567    int pass;
7568    TCGv_ptr fpst = get_fpstatus_ptr(false);
7569
7570    for (pass = 0; pass < elements; pass++) {
7571        if (size) {
7572            /* Double */
7573            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7574            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7575            TCGv_i64 tcg_res = tcg_temp_new_i64();
7576
7577            read_vec_element(s, tcg_op1, rn, pass, MO_64);
7578            read_vec_element(s, tcg_op2, rm, pass, MO_64);
7579
7580            switch (fpopcode) {
7581            case 0x39: /* FMLS */
7582                /* As usual for ARM, separate negation for fused multiply-add */
7583                gen_helper_vfp_negd(tcg_op1, tcg_op1);
7584                /* fall through */
7585            case 0x19: /* FMLA */
7586                read_vec_element(s, tcg_res, rd, pass, MO_64);
7587                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7588                                       tcg_res, fpst);
7589                break;
7590            case 0x18: /* FMAXNM */
7591                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7592                break;
7593            case 0x1a: /* FADD */
7594                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7595                break;
7596            case 0x1b: /* FMULX */
7597                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7598                break;
7599            case 0x1c: /* FCMEQ */
7600                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7601                break;
7602            case 0x1e: /* FMAX */
7603                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7604                break;
7605            case 0x1f: /* FRECPS */
7606                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7607                break;
7608            case 0x38: /* FMINNM */
7609                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7610                break;
7611            case 0x3a: /* FSUB */
7612                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7613                break;
7614            case 0x3e: /* FMIN */
7615                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7616                break;
7617            case 0x3f: /* FRSQRTS */
7618                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7619                break;
7620            case 0x5b: /* FMUL */
7621                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7622                break;
7623            case 0x5c: /* FCMGE */
7624                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7625                break;
7626            case 0x5d: /* FACGE */
7627                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7628                break;
7629            case 0x5f: /* FDIV */
7630                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7631                break;
7632            case 0x7a: /* FABD */
7633                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7634                gen_helper_vfp_absd(tcg_res, tcg_res);
7635                break;
7636            case 0x7c: /* FCMGT */
7637                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7638                break;
7639            case 0x7d: /* FACGT */
7640                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7641                break;
7642            default:
7643                g_assert_not_reached();
7644            }
7645
7646            write_vec_element(s, tcg_res, rd, pass, MO_64);
7647
7648            tcg_temp_free_i64(tcg_res);
7649            tcg_temp_free_i64(tcg_op1);
7650            tcg_temp_free_i64(tcg_op2);
7651        } else {
7652            /* Single */
7653            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7654            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7655            TCGv_i32 tcg_res = tcg_temp_new_i32();
7656
7657            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7658            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7659
7660            switch (fpopcode) {
7661            case 0x39: /* FMLS */
7662                /* As usual for ARM, separate negation for fused multiply-add */
7663                gen_helper_vfp_negs(tcg_op1, tcg_op1);
7664                /* fall through */
7665            case 0x19: /* FMLA */
7666                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7667                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7668                                       tcg_res, fpst);
7669                break;
7670            case 0x1a: /* FADD */
7671                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7672                break;
7673            case 0x1b: /* FMULX */
7674                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7675                break;
7676            case 0x1c: /* FCMEQ */
7677                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7678                break;
7679            case 0x1e: /* FMAX */
7680                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7681                break;
7682            case 0x1f: /* FRECPS */
7683                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7684                break;
7685            case 0x18: /* FMAXNM */
7686                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7687                break;
7688            case 0x38: /* FMINNM */
7689                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7690                break;
7691            case 0x3a: /* FSUB */
7692                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7693                break;
7694            case 0x3e: /* FMIN */
7695                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7696                break;
7697            case 0x3f: /* FRSQRTS */
7698                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7699                break;
7700            case 0x5b: /* FMUL */
7701                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7702                break;
7703            case 0x5c: /* FCMGE */
7704                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7705                break;
7706            case 0x5d: /* FACGE */
7707                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7708                break;
7709            case 0x5f: /* FDIV */
7710                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7711                break;
7712            case 0x7a: /* FABD */
7713                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7714                gen_helper_vfp_abss(tcg_res, tcg_res);
7715                break;
7716            case 0x7c: /* FCMGT */
7717                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7718                break;
7719            case 0x7d: /* FACGT */
7720                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7721                break;
7722            default:
7723                g_assert_not_reached();
7724            }
7725
7726            if (elements == 1) {
7727                /* scalar single so clear high part */
7728                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7729
7730                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7731                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7732                tcg_temp_free_i64(tcg_tmp);
7733            } else {
7734                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7735            }
7736
7737            tcg_temp_free_i32(tcg_res);
7738            tcg_temp_free_i32(tcg_op1);
7739            tcg_temp_free_i32(tcg_op2);
7740        }
7741    }
7742
7743    tcg_temp_free_ptr(fpst);
7744
7745    clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
7746}
7747
7748/* AdvSIMD scalar three same
7749 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7750 * +-----+---+-----------+------+---+------+--------+---+------+------+
7751 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7752 * +-----+---+-----------+------+---+------+--------+---+------+------+
7753 */
7754static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7755{
7756    int rd = extract32(insn, 0, 5);
7757    int rn = extract32(insn, 5, 5);
7758    int opcode = extract32(insn, 11, 5);
7759    int rm = extract32(insn, 16, 5);
7760    int size = extract32(insn, 22, 2);
7761    bool u = extract32(insn, 29, 1);
7762    TCGv_i64 tcg_rd;
7763
7764    if (opcode >= 0x18) {
7765        /* Floating point: U, size[1] and opcode indicate operation */
7766        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7767        switch (fpopcode) {
7768        case 0x1b: /* FMULX */
7769        case 0x1f: /* FRECPS */
7770        case 0x3f: /* FRSQRTS */
7771        case 0x5d: /* FACGE */
7772        case 0x7d: /* FACGT */
7773        case 0x1c: /* FCMEQ */
7774        case 0x5c: /* FCMGE */
7775        case 0x7c: /* FCMGT */
7776        case 0x7a: /* FABD */
7777            break;
7778        default:
7779            unallocated_encoding(s);
7780            return;
7781        }
7782
7783        if (!fp_access_check(s)) {
7784            return;
7785        }
7786
7787        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7788        return;
7789    }
7790
7791    switch (opcode) {
7792    case 0x1: /* SQADD, UQADD */
7793    case 0x5: /* SQSUB, UQSUB */
7794    case 0x9: /* SQSHL, UQSHL */
7795    case 0xb: /* SQRSHL, UQRSHL */
7796        break;
7797    case 0x8: /* SSHL, USHL */
7798    case 0xa: /* SRSHL, URSHL */
7799    case 0x6: /* CMGT, CMHI */
7800    case 0x7: /* CMGE, CMHS */
7801    case 0x11: /* CMTST, CMEQ */
7802    case 0x10: /* ADD, SUB (vector) */
7803        if (size != 3) {
7804            unallocated_encoding(s);
7805            return;
7806        }
7807        break;
7808    case 0x16: /* SQDMULH, SQRDMULH (vector) */
7809        if (size != 1 && size != 2) {
7810            unallocated_encoding(s);
7811            return;
7812        }
7813        break;
7814    default:
7815        unallocated_encoding(s);
7816        return;
7817    }
7818
7819    if (!fp_access_check(s)) {
7820        return;
7821    }
7822
7823    tcg_rd = tcg_temp_new_i64();
7824
7825    if (size == 3) {
7826        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7827        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7828
7829        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7830        tcg_temp_free_i64(tcg_rn);
7831        tcg_temp_free_i64(tcg_rm);
7832    } else {
7833        /* Do a single operation on the lowest element in the vector.
7834         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7835         * no side effects for all these operations.
7836         * OPTME: special-purpose helpers would avoid doing some
7837         * unnecessary work in the helper for the 8 and 16 bit cases.
7838         */
7839        NeonGenTwoOpEnvFn *genenvfn;
7840        TCGv_i32 tcg_rn = tcg_temp_new_i32();
7841        TCGv_i32 tcg_rm = tcg_temp_new_i32();
7842        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7843
7844        read_vec_element_i32(s, tcg_rn, rn, 0, size);
7845        read_vec_element_i32(s, tcg_rm, rm, 0, size);
7846
7847        switch (opcode) {
7848        case 0x1: /* SQADD, UQADD */
7849        {
7850            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7851                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7852                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7853                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7854            };
7855            genenvfn = fns[size][u];
7856            break;
7857        }
7858        case 0x5: /* SQSUB, UQSUB */
7859        {
7860            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7861                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7862                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7863                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7864            };
7865            genenvfn = fns[size][u];
7866            break;
7867        }
7868        case 0x9: /* SQSHL, UQSHL */
7869        {
7870            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7871                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7872                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7873                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7874            };
7875            genenvfn = fns[size][u];
7876            break;
7877        }
7878        case 0xb: /* SQRSHL, UQRSHL */
7879        {
7880            static NeonGenTwoOpEnvFn * const fns[3][2] = {
7881                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7882                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7883                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7884            };
7885            genenvfn = fns[size][u];
7886            break;
7887        }
7888        case 0x16: /* SQDMULH, SQRDMULH */
7889        {
7890            static NeonGenTwoOpEnvFn * const fns[2][2] = {
7891                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7892                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7893            };
7894            assert(size == 1 || size == 2);
7895            genenvfn = fns[size - 1][u];
7896            break;
7897        }
7898        default:
7899            g_assert_not_reached();
7900        }
7901
7902        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7903        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7904        tcg_temp_free_i32(tcg_rd32);
7905        tcg_temp_free_i32(tcg_rn);
7906        tcg_temp_free_i32(tcg_rm);
7907    }
7908
7909    write_fp_dreg(s, rd, tcg_rd);
7910
7911    tcg_temp_free_i64(tcg_rd);
7912}
7913
7914/* AdvSIMD scalar three same FP16
7915 *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
7916 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
7917 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
7918 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
7919 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
7920 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
7921 */
7922static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
7923                                                  uint32_t insn)
7924{
7925    int rd = extract32(insn, 0, 5);
7926    int rn = extract32(insn, 5, 5);
7927    int opcode = extract32(insn, 11, 3);
7928    int rm = extract32(insn, 16, 5);
7929    bool u = extract32(insn, 29, 1);
7930    bool a = extract32(insn, 23, 1);
7931    int fpopcode = opcode | (a << 3) |  (u << 4);
7932    TCGv_ptr fpst;
7933    TCGv_i32 tcg_op1;
7934    TCGv_i32 tcg_op2;
7935    TCGv_i32 tcg_res;
7936
7937    switch (fpopcode) {
7938    case 0x03: /* FMULX */
7939    case 0x04: /* FCMEQ (reg) */
7940    case 0x07: /* FRECPS */
7941    case 0x0f: /* FRSQRTS */
7942    case 0x14: /* FCMGE (reg) */
7943    case 0x15: /* FACGE */
7944    case 0x1a: /* FABD */
7945    case 0x1c: /* FCMGT (reg) */
7946    case 0x1d: /* FACGT */
7947        break;
7948    default:
7949        unallocated_encoding(s);
7950        return;
7951    }
7952
7953    if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7954        unallocated_encoding(s);
7955    }
7956
7957    if (!fp_access_check(s)) {
7958        return;
7959    }
7960
7961    fpst = get_fpstatus_ptr(true);
7962
7963    tcg_op1 = tcg_temp_new_i32();
7964    tcg_op2 = tcg_temp_new_i32();
7965    tcg_res = tcg_temp_new_i32();
7966
7967    read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7968    read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7969
7970    switch (fpopcode) {
7971    case 0x03: /* FMULX */
7972        gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
7973        break;
7974    case 0x04: /* FCMEQ (reg) */
7975        gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7976        break;
7977    case 0x07: /* FRECPS */
7978        gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7979        break;
7980    case 0x0f: /* FRSQRTS */
7981        gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7982        break;
7983    case 0x14: /* FCMGE (reg) */
7984        gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7985        break;
7986    case 0x15: /* FACGE */
7987        gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7988        break;
7989    case 0x1a: /* FABD */
7990        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
7991        tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
7992        break;
7993    case 0x1c: /* FCMGT (reg) */
7994        gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7995        break;
7996    case 0x1d: /* FACGT */
7997        gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7998        break;
7999    default:
8000        g_assert_not_reached();
8001    }
8002
8003    write_fp_sreg(s, rd, tcg_res);
8004
8005
8006    tcg_temp_free_i32(tcg_res);
8007    tcg_temp_free_i32(tcg_op1);
8008    tcg_temp_free_i32(tcg_op2);
8009    tcg_temp_free_ptr(fpst);
8010}
8011
8012/* AdvSIMD scalar three same extra
8013 *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
8014 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8015 * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
8016 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8017 */
8018static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
8019                                                   uint32_t insn)
8020{
8021    int rd = extract32(insn, 0, 5);
8022    int rn = extract32(insn, 5, 5);
8023    int opcode = extract32(insn, 11, 4);
8024    int rm = extract32(insn, 16, 5);
8025    int size = extract32(insn, 22, 2);
8026    bool u = extract32(insn, 29, 1);
8027    TCGv_i32 ele1, ele2, ele3;
8028    TCGv_i64 res;
8029    int feature;
8030
8031    switch (u * 16 + opcode) {
8032    case 0x10: /* SQRDMLAH (vector) */
8033    case 0x11: /* SQRDMLSH (vector) */
8034        if (size != 1 && size != 2) {
8035            unallocated_encoding(s);
8036            return;
8037        }
8038        feature = ARM_FEATURE_V8_RDM;
8039        break;
8040    default:
8041        unallocated_encoding(s);
8042        return;
8043    }
8044    if (!arm_dc_feature(s, feature)) {
8045        unallocated_encoding(s);
8046        return;
8047    }
8048    if (!fp_access_check(s)) {
8049        return;
8050    }
8051
8052    /* Do a single operation on the lowest element in the vector.
8053     * We use the standard Neon helpers and rely on 0 OP 0 == 0
8054     * with no side effects for all these operations.
8055     * OPTME: special-purpose helpers would avoid doing some
8056     * unnecessary work in the helper for the 16 bit cases.
8057     */
8058    ele1 = tcg_temp_new_i32();
8059    ele2 = tcg_temp_new_i32();
8060    ele3 = tcg_temp_new_i32();
8061
8062    read_vec_element_i32(s, ele1, rn, 0, size);
8063    read_vec_element_i32(s, ele2, rm, 0, size);
8064    read_vec_element_i32(s, ele3, rd, 0, size);
8065
8066    switch (opcode) {
8067    case 0x0: /* SQRDMLAH */
8068        if (size == 1) {
8069            gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
8070        } else {
8071            gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
8072        }
8073        break;
8074    case 0x1: /* SQRDMLSH */
8075        if (size == 1) {
8076            gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
8077        } else {
8078            gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
8079        }
8080        break;
8081    default:
8082        g_assert_not_reached();
8083    }
8084    tcg_temp_free_i32(ele1);
8085    tcg_temp_free_i32(ele2);
8086
8087    res = tcg_temp_new_i64();
8088    tcg_gen_extu_i32_i64(res, ele3);
8089    tcg_temp_free_i32(ele3);
8090
8091    write_fp_dreg(s, rd, res);
8092    tcg_temp_free_i64(res);
8093}
8094
8095static void handle_2misc_64(DisasContext *s, int opcode, bool u,
8096                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
8097                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
8098{
8099    /* Handle 64->64 opcodes which are shared between the scalar and
8100     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
8101     * is valid in either group and also the double-precision fp ops.
8102     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
8103     * requires them.
8104     */
8105    TCGCond cond;
8106
8107    switch (opcode) {
8108    case 0x4: /* CLS, CLZ */
8109        if (u) {
8110            tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8111        } else {
8112            tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8113        }
8114        break;
8115    case 0x5: /* NOT */
8116        /* This opcode is shared with CNT and RBIT but we have earlier
8117         * enforced that size == 3 if and only if this is the NOT insn.
8118         */
8119        tcg_gen_not_i64(tcg_rd, tcg_rn);
8120        break;
8121    case 0x7: /* SQABS, SQNEG */
8122        if (u) {
8123            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
8124        } else {
8125            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
8126        }
8127        break;
8128    case 0xa: /* CMLT */
8129        /* 64 bit integer comparison against zero, result is
8130         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
8131         * subtracting 1.
8132         */
8133        cond = TCG_COND_LT;
8134    do_cmop:
8135        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
8136        tcg_gen_neg_i64(tcg_rd, tcg_rd);
8137        break;
8138    case 0x8: /* CMGT, CMGE */
8139        cond = u ? TCG_COND_GE : TCG_COND_GT;
8140        goto do_cmop;
8141    case 0x9: /* CMEQ, CMLE */
8142        cond = u ? TCG_COND_LE : TCG_COND_EQ;
8143        goto do_cmop;
8144    case 0xb: /* ABS, NEG */
8145        if (u) {
8146            tcg_gen_neg_i64(tcg_rd, tcg_rn);
8147        } else {
8148            TCGv_i64 tcg_zero = tcg_const_i64(0);
8149            tcg_gen_neg_i64(tcg_rd, tcg_rn);
8150            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
8151                                tcg_rn, tcg_rd);
8152            tcg_temp_free_i64(tcg_zero);
8153        }
8154        break;
8155    case 0x2f: /* FABS */
8156        gen_helper_vfp_absd(tcg_rd, tcg_rn);
8157        break;
8158    case 0x6f: /* FNEG */
8159        gen_helper_vfp_negd(tcg_rd, tcg_rn);
8160        break;
8161    case 0x7f: /* FSQRT */
8162        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
8163        break;
8164    case 0x1a: /* FCVTNS */
8165    case 0x1b: /* FCVTMS */
8166    case 0x1c: /* FCVTAS */
8167    case 0x3a: /* FCVTPS */
8168    case 0x3b: /* FCVTZS */
8169    {
8170        TCGv_i32 tcg_shift = tcg_const_i32(0);
8171        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8172        tcg_temp_free_i32(tcg_shift);
8173        break;
8174    }
8175    case 0x5a: /* FCVTNU */
8176    case 0x5b: /* FCVTMU */
8177    case 0x5c: /* FCVTAU */
8178    case 0x7a: /* FCVTPU */
8179    case 0x7b: /* FCVTZU */
8180    {
8181        TCGv_i32 tcg_shift = tcg_const_i32(0);
8182        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8183        tcg_temp_free_i32(tcg_shift);
8184        break;
8185    }
8186    case 0x18: /* FRINTN */
8187    case 0x19: /* FRINTM */
8188    case 0x38: /* FRINTP */
8189    case 0x39: /* FRINTZ */
8190    case 0x58: /* FRINTA */
8191    case 0x79: /* FRINTI */
8192        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
8193        break;
8194    case 0x59: /* FRINTX */
8195        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
8196        break;
8197    default:
8198        g_assert_not_reached();
8199    }
8200}
8201
8202static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
8203                                   bool is_scalar, bool is_u, bool is_q,
8204                                   int size, int rn, int rd)
8205{
8206    bool is_double = (size == MO_64);
8207    TCGv_ptr fpst;
8208
8209    if (!fp_access_check(s)) {
8210        return;
8211    }
8212
8213    fpst = get_fpstatus_ptr(size == MO_16);
8214
8215    if (is_double) {
8216        TCGv_i64 tcg_op = tcg_temp_new_i64();
8217        TCGv_i64 tcg_zero = tcg_const_i64(0);
8218        TCGv_i64 tcg_res = tcg_temp_new_i64();
8219        NeonGenTwoDoubleOPFn *genfn;
8220        bool swap = false;
8221        int pass;
8222
8223        switch (opcode) {
8224        case 0x2e: /* FCMLT (zero) */
8225            swap = true;
8226            /* fallthrough */
8227        case 0x2c: /* FCMGT (zero) */
8228            genfn = gen_helper_neon_cgt_f64;
8229            break;
8230        case 0x2d: /* FCMEQ (zero) */
8231            genfn = gen_helper_neon_ceq_f64;
8232            break;
8233        case 0x6d: /* FCMLE (zero) */
8234            swap = true;
8235            /* fall through */
8236        case 0x6c: /* FCMGE (zero) */
8237            genfn = gen_helper_neon_cge_f64;
8238            break;
8239        default:
8240            g_assert_not_reached();
8241        }
8242
8243        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8244            read_vec_element(s, tcg_op, rn, pass, MO_64);
8245            if (swap) {
8246                genfn(tcg_res, tcg_zero, tcg_op, fpst);
8247            } else {
8248                genfn(tcg_res, tcg_op, tcg_zero, fpst);
8249            }
8250            write_vec_element(s, tcg_res, rd, pass, MO_64);
8251        }
8252        tcg_temp_free_i64(tcg_res);
8253        tcg_temp_free_i64(tcg_zero);
8254        tcg_temp_free_i64(tcg_op);
8255
8256        clear_vec_high(s, !is_scalar, rd);
8257    } else {
8258        TCGv_i32 tcg_op = tcg_temp_new_i32();
8259        TCGv_i32 tcg_zero = tcg_const_i32(0);
8260        TCGv_i32 tcg_res = tcg_temp_new_i32();
8261        NeonGenTwoSingleOPFn *genfn;
8262        bool swap = false;
8263        int pass, maxpasses;
8264
8265        if (size == MO_16) {
8266            switch (opcode) {
8267            case 0x2e: /* FCMLT (zero) */
8268                swap = true;
8269                /* fall through */
8270            case 0x2c: /* FCMGT (zero) */
8271                genfn = gen_helper_advsimd_cgt_f16;
8272                break;
8273            case 0x2d: /* FCMEQ (zero) */
8274                genfn = gen_helper_advsimd_ceq_f16;
8275                break;
8276            case 0x6d: /* FCMLE (zero) */
8277                swap = true;
8278                /* fall through */
8279            case 0x6c: /* FCMGE (zero) */
8280                genfn = gen_helper_advsimd_cge_f16;
8281                break;
8282            default:
8283                g_assert_not_reached();
8284            }
8285        } else {
8286            switch (opcode) {
8287            case 0x2e: /* FCMLT (zero) */
8288                swap = true;
8289                /* fall through */
8290            case 0x2c: /* FCMGT (zero) */
8291                genfn = gen_helper_neon_cgt_f32;
8292                break;
8293            case 0x2d: /* FCMEQ (zero) */
8294                genfn = gen_helper_neon_ceq_f32;
8295                break;
8296            case 0x6d: /* FCMLE (zero) */
8297                swap = true;
8298                /* fall through */
8299            case 0x6c: /* FCMGE (zero) */
8300                genfn = gen_helper_neon_cge_f32;
8301                break;
8302            default:
8303                g_assert_not_reached();
8304            }
8305        }
8306
8307        if (is_scalar) {
8308            maxpasses = 1;
8309        } else {
8310            int vector_size = 8 << is_q;
8311            maxpasses = vector_size >> size;
8312        }
8313
8314        for (pass = 0; pass < maxpasses; pass++) {
8315            read_vec_element_i32(s, tcg_op, rn, pass, size);
8316            if (swap) {
8317                genfn(tcg_res, tcg_zero, tcg_op, fpst);
8318            } else {
8319                genfn(tcg_res, tcg_op, tcg_zero, fpst);
8320            }
8321            if (is_scalar) {
8322                write_fp_sreg(s, rd, tcg_res);
8323            } else {
8324                write_vec_element_i32(s, tcg_res, rd, pass, size);
8325            }
8326        }
8327        tcg_temp_free_i32(tcg_res);
8328        tcg_temp_free_i32(tcg_zero);
8329        tcg_temp_free_i32(tcg_op);
8330        if (!is_scalar) {
8331            clear_vec_high(s, is_q, rd);
8332        }
8333    }
8334
8335    tcg_temp_free_ptr(fpst);
8336}
8337
8338static void handle_2misc_reciprocal(DisasContext *s, int opcode,
8339                                    bool is_scalar, bool is_u, bool is_q,
8340                                    int size, int rn, int rd)
8341{
8342    bool is_double = (size == 3);
8343    TCGv_ptr fpst = get_fpstatus_ptr(false);
8344
8345    if (is_double) {
8346        TCGv_i64 tcg_op = tcg_temp_new_i64();
8347        TCGv_i64 tcg_res = tcg_temp_new_i64();
8348        int pass;
8349
8350        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8351            read_vec_element(s, tcg_op, rn, pass, MO_64);
8352            switch (opcode) {
8353            case 0x3d: /* FRECPE */
8354                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
8355                break;
8356            case 0x3f: /* FRECPX */
8357                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
8358                break;
8359            case 0x7d: /* FRSQRTE */
8360                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
8361                break;
8362            default:
8363                g_assert_not_reached();
8364            }
8365            write_vec_element(s, tcg_res, rd, pass, MO_64);
8366        }
8367        tcg_temp_free_i64(tcg_res);
8368        tcg_temp_free_i64(tcg_op);
8369        clear_vec_high(s, !is_scalar, rd);
8370    } else {
8371        TCGv_i32 tcg_op = tcg_temp_new_i32();
8372        TCGv_i32 tcg_res = tcg_temp_new_i32();
8373        int pass, maxpasses;
8374
8375        if (is_scalar) {
8376            maxpasses = 1;
8377        } else {
8378            maxpasses = is_q ? 4 : 2;
8379        }
8380
8381        for (pass = 0; pass < maxpasses; pass++) {
8382            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8383
8384            switch (opcode) {
8385            case 0x3c: /* URECPE */
8386                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
8387                break;
8388            case 0x3d: /* FRECPE */
8389                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
8390                break;
8391            case 0x3f: /* FRECPX */
8392                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
8393                break;
8394            case 0x7d: /* FRSQRTE */
8395                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
8396                break;
8397            default:
8398                g_assert_not_reached();
8399            }
8400
8401            if (is_scalar) {
8402                write_fp_sreg(s, rd, tcg_res);
8403            } else {
8404                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8405            }
8406        }
8407        tcg_temp_free_i32(tcg_res);
8408        tcg_temp_free_i32(tcg_op);
8409        if (!is_scalar) {
8410            clear_vec_high(s, is_q, rd);
8411        }
8412    }
8413    tcg_temp_free_ptr(fpst);
8414}
8415
8416static void handle_2misc_narrow(DisasContext *s, bool scalar,
8417                                int opcode, bool u, bool is_q,
8418                                int size, int rn, int rd)
8419{
8420    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
8421     * in the source becomes a size element in the destination).
8422     */
8423    int pass;
8424    TCGv_i32 tcg_res[2];
8425    int destelt = is_q ? 2 : 0;
8426    int passes = scalar ? 1 : 2;
8427
8428    if (scalar) {
8429        tcg_res[1] = tcg_const_i32(0);
8430    }
8431
8432    for (pass = 0; pass < passes; pass++) {
8433        TCGv_i64 tcg_op = tcg_temp_new_i64();
8434        NeonGenNarrowFn *genfn = NULL;
8435        NeonGenNarrowEnvFn *genenvfn = NULL;
8436
8437        if (scalar) {
8438            read_vec_element(s, tcg_op, rn, pass, size + 1);
8439        } else {
8440            read_vec_element(s, tcg_op, rn, pass, MO_64);
8441        }
8442        tcg_res[pass] = tcg_temp_new_i32();
8443
8444        switch (opcode) {
8445        case 0x12: /* XTN, SQXTUN */
8446        {
8447            static NeonGenNarrowFn * const xtnfns[3] = {
8448                gen_helper_neon_narrow_u8,
8449                gen_helper_neon_narrow_u16,
8450                tcg_gen_extrl_i64_i32,
8451            };
8452            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
8453                gen_helper_neon_unarrow_sat8,
8454                gen_helper_neon_unarrow_sat16,
8455                gen_helper_neon_unarrow_sat32,
8456            };
8457            if (u) {
8458                genenvfn = sqxtunfns[size];
8459            } else {
8460                genfn = xtnfns[size];
8461            }
8462            break;
8463        }
8464        case 0x14: /* SQXTN, UQXTN */
8465        {
8466            static NeonGenNarrowEnvFn * const fns[3][2] = {
8467                { gen_helper_neon_narrow_sat_s8,
8468                  gen_helper_neon_narrow_sat_u8 },
8469                { gen_helper_neon_narrow_sat_s16,
8470                  gen_helper_neon_narrow_sat_u16 },
8471                { gen_helper_neon_narrow_sat_s32,
8472                  gen_helper_neon_narrow_sat_u32 },
8473            };
8474            genenvfn = fns[size][u];
8475            break;
8476        }
8477        case 0x16: /* FCVTN, FCVTN2 */
8478            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
8479            if (size == 2) {
8480                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
8481            } else {
8482                TCGv_i32 tcg_lo = tcg_temp_new_i32();
8483                TCGv_i32 tcg_hi = tcg_temp_new_i32();
8484                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
8485                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
8486                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
8487                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
8488                tcg_temp_free_i32(tcg_lo);
8489                tcg_temp_free_i32(tcg_hi);
8490            }
8491            break;
8492        case 0x56:  /* FCVTXN, FCVTXN2 */
8493            /* 64 bit to 32 bit float conversion
8494             * with von Neumann rounding (round to odd)
8495             */
8496            assert(size == 2);
8497            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
8498            break;
8499        default:
8500            g_assert_not_reached();
8501        }
8502
8503        if (genfn) {
8504            genfn(tcg_res[pass], tcg_op);
8505        } else if (genenvfn) {
8506            genenvfn(tcg_res[pass], cpu_env, tcg_op);
8507        }
8508
8509        tcg_temp_free_i64(tcg_op);
8510    }
8511
8512    for (pass = 0; pass < 2; pass++) {
8513        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8514        tcg_temp_free_i32(tcg_res[pass]);
8515    }
8516    clear_vec_high(s, is_q, rd);
8517}
8518
8519/* Remaining saturating accumulating ops */
8520static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8521                                bool is_q, int size, int rn, int rd)
8522{
8523    bool is_double = (size == 3);
8524
8525    if (is_double) {
8526        TCGv_i64 tcg_rn = tcg_temp_new_i64();
8527        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8528        int pass;
8529
8530        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8531            read_vec_element(s, tcg_rn, rn, pass, MO_64);
8532            read_vec_element(s, tcg_rd, rd, pass, MO_64);
8533
8534            if (is_u) { /* USQADD */
8535                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8536            } else { /* SUQADD */
8537                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8538            }
8539            write_vec_element(s, tcg_rd, rd, pass, MO_64);
8540        }
8541        tcg_temp_free_i64(tcg_rd);
8542        tcg_temp_free_i64(tcg_rn);
8543        clear_vec_high(s, !is_scalar, rd);
8544    } else {
8545        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8546        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8547        int pass, maxpasses;
8548
8549        if (is_scalar) {
8550            maxpasses = 1;
8551        } else {
8552            maxpasses = is_q ? 4 : 2;
8553        }
8554
8555        for (pass = 0; pass < maxpasses; pass++) {
8556            if (is_scalar) {
8557                read_vec_element_i32(s, tcg_rn, rn, pass, size);
8558                read_vec_element_i32(s, tcg_rd, rd, pass, size);
8559            } else {
8560                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8561                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8562            }
8563
8564            if (is_u) { /* USQADD */
8565                switch (size) {
8566                case 0:
8567                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8568                    break;
8569                case 1:
8570                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8571                    break;
8572                case 2:
8573                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8574                    break;
8575                default:
8576                    g_assert_not_reached();
8577                }
8578            } else { /* SUQADD */
8579                switch (size) {
8580                case 0:
8581                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8582                    break;
8583                case 1:
8584                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8585                    break;
8586                case 2:
8587                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8588                    break;
8589                default:
8590                    g_assert_not_reached();
8591                }
8592            }
8593
8594            if (is_scalar) {
8595                TCGv_i64 tcg_zero = tcg_const_i64(0);
8596                write_vec_element(s, tcg_zero, rd, 0, MO_64);
8597                tcg_temp_free_i64(tcg_zero);
8598            }
8599            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8600        }
8601        tcg_temp_free_i32(tcg_rd);
8602        tcg_temp_free_i32(tcg_rn);
8603        clear_vec_high(s, is_q, rd);
8604    }
8605}
8606
8607/* AdvSIMD scalar two reg misc
8608 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8609 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8610 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8611 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8612 */
8613static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8614{
8615    int rd = extract32(insn, 0, 5);
8616    int rn = extract32(insn, 5, 5);
8617    int opcode = extract32(insn, 12, 5);
8618    int size = extract32(insn, 22, 2);
8619    bool u = extract32(insn, 29, 1);
8620    bool is_fcvt = false;
8621    int rmode;
8622    TCGv_i32 tcg_rmode;
8623    TCGv_ptr tcg_fpstatus;
8624
8625    switch (opcode) {
8626    case 0x3: /* USQADD / SUQADD*/
8627        if (!fp_access_check(s)) {
8628            return;
8629        }
8630        handle_2misc_satacc(s, true, u, false, size, rn, rd);
8631        return;
8632    case 0x7: /* SQABS / SQNEG */
8633        break;
8634    case 0xa: /* CMLT */
8635        if (u) {
8636            unallocated_encoding(s);
8637            return;
8638        }
8639        /* fall through */
8640    case 0x8: /* CMGT, CMGE */
8641    case 0x9: /* CMEQ, CMLE */
8642    case 0xb: /* ABS, NEG */
8643        if (size != 3) {
8644            unallocated_encoding(s);
8645            return;
8646        }
8647        break;
8648    case 0x12: /* SQXTUN */
8649        if (!u) {
8650            unallocated_encoding(s);
8651            return;
8652        }
8653        /* fall through */
8654    case 0x14: /* SQXTN, UQXTN */
8655        if (size == 3) {
8656            unallocated_encoding(s);
8657            return;
8658        }
8659        if (!fp_access_check(s)) {
8660            return;
8661        }
8662        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8663        return;
8664    case 0xc ... 0xf:
8665    case 0x16 ... 0x1d:
8666    case 0x1f:
8667        /* Floating point: U, size[1] and opcode indicate operation;
8668         * size[0] indicates single or double precision.
8669         */
8670        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8671        size = extract32(size, 0, 1) ? 3 : 2;
8672        switch (opcode) {
8673        case 0x2c: /* FCMGT (zero) */
8674        case 0x2d: /* FCMEQ (zero) */
8675        case 0x2e: /* FCMLT (zero) */
8676        case 0x6c: /* FCMGE (zero) */
8677        case 0x6d: /* FCMLE (zero) */
8678            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8679            return;
8680        case 0x1d: /* SCVTF */
8681        case 0x5d: /* UCVTF */
8682        {
8683            bool is_signed = (opcode == 0x1d);
8684            if (!fp_access_check(s)) {
8685                return;
8686            }
8687            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8688            return;
8689        }
8690        case 0x3d: /* FRECPE */
8691        case 0x3f: /* FRECPX */
8692        case 0x7d: /* FRSQRTE */
8693            if (!fp_access_check(s)) {
8694                return;
8695            }
8696            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8697            return;
8698        case 0x1a: /* FCVTNS */
8699        case 0x1b: /* FCVTMS */
8700        case 0x3a: /* FCVTPS */
8701        case 0x3b: /* FCVTZS */
8702        case 0x5a: /* FCVTNU */
8703        case 0x5b: /* FCVTMU */
8704        case 0x7a: /* FCVTPU */
8705        case 0x7b: /* FCVTZU */
8706            is_fcvt = true;
8707            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8708            break;
8709        case 0x1c: /* FCVTAS */
8710        case 0x5c: /* FCVTAU */
8711            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8712            is_fcvt = true;
8713            rmode = FPROUNDING_TIEAWAY;
8714            break;
8715        case 0x56: /* FCVTXN, FCVTXN2 */
8716            if (size == 2) {
8717                unallocated_encoding(s);
8718                return;
8719            }
8720            if (!fp_access_check(s)) {
8721                return;
8722            }
8723            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8724            return;
8725        default:
8726            unallocated_encoding(s);
8727            return;
8728        }
8729        break;
8730    default:
8731        unallocated_encoding(s);
8732        return;
8733    }
8734
8735    if (!fp_access_check(s)) {
8736        return;
8737    }
8738
8739    if (is_fcvt) {
8740        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8741        tcg_fpstatus = get_fpstatus_ptr(false);
8742        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8743    } else {
8744        tcg_rmode = NULL;
8745        tcg_fpstatus = NULL;
8746    }
8747
8748    if (size == 3) {
8749        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8750        TCGv_i64 tcg_rd = tcg_temp_new_i64();
8751
8752        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8753        write_fp_dreg(s, rd, tcg_rd);
8754        tcg_temp_free_i64(tcg_rd);
8755        tcg_temp_free_i64(tcg_rn);
8756    } else {
8757        TCGv_i32 tcg_rn = tcg_temp_new_i32();
8758        TCGv_i32 tcg_rd = tcg_temp_new_i32();
8759
8760        read_vec_element_i32(s, tcg_rn, rn, 0, size);
8761
8762        switch (opcode) {
8763        case 0x7: /* SQABS, SQNEG */
8764        {
8765            NeonGenOneOpEnvFn *genfn;
8766            static NeonGenOneOpEnvFn * const fns[3][2] = {
8767                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8768                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8769                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8770            };
8771            genfn = fns[size][u];
8772            genfn(tcg_rd, cpu_env, tcg_rn);
8773            break;
8774        }
8775        case 0x1a: /* FCVTNS */
8776        case 0x1b: /* FCVTMS */
8777        case 0x1c: /* FCVTAS */
8778        case 0x3a: /* FCVTPS */
8779        case 0x3b: /* FCVTZS */
8780        {
8781            TCGv_i32 tcg_shift = tcg_const_i32(0);
8782            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8783            tcg_temp_free_i32(tcg_shift);
8784            break;
8785        }
8786        case 0x5a: /* FCVTNU */
8787        case 0x5b: /* FCVTMU */
8788        case 0x5c: /* FCVTAU */
8789        case 0x7a: /* FCVTPU */
8790        case 0x7b: /* FCVTZU */
8791        {
8792            TCGv_i32 tcg_shift = tcg_const_i32(0);
8793            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8794            tcg_temp_free_i32(tcg_shift);
8795            break;
8796        }
8797        default:
8798            g_assert_not_reached();
8799        }
8800
8801        write_fp_sreg(s, rd, tcg_rd);
8802        tcg_temp_free_i32(tcg_rd);
8803        tcg_temp_free_i32(tcg_rn);
8804    }
8805
8806    if (is_fcvt) {
8807        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8808        tcg_temp_free_i32(tcg_rmode);
8809        tcg_temp_free_ptr(tcg_fpstatus);
8810    }
8811}
8812
8813static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8814{
8815    tcg_gen_vec_sar8i_i64(a, a, shift);
8816    tcg_gen_vec_add8_i64(d, d, a);
8817}
8818
8819static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8820{
8821    tcg_gen_vec_sar16i_i64(a, a, shift);
8822    tcg_gen_vec_add16_i64(d, d, a);
8823}
8824
8825static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8826{
8827    tcg_gen_sari_i32(a, a, shift);
8828    tcg_gen_add_i32(d, d, a);
8829}
8830
8831static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8832{
8833    tcg_gen_sari_i64(a, a, shift);
8834    tcg_gen_add_i64(d, d, a);
8835}
8836
8837static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
8838{
8839    tcg_gen_sari_vec(vece, a, a, sh);
8840    tcg_gen_add_vec(vece, d, d, a);
8841}
8842
8843static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8844{
8845    tcg_gen_vec_shr8i_i64(a, a, shift);
8846    tcg_gen_vec_add8_i64(d, d, a);
8847}
8848
8849static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8850{
8851    tcg_gen_vec_shr16i_i64(a, a, shift);
8852    tcg_gen_vec_add16_i64(d, d, a);
8853}
8854
8855static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8856{
8857    tcg_gen_shri_i32(a, a, shift);
8858    tcg_gen_add_i32(d, d, a);
8859}
8860
8861static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8862{
8863    tcg_gen_shri_i64(a, a, shift);
8864    tcg_gen_add_i64(d, d, a);
8865}
8866
8867static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
8868{
8869    tcg_gen_shri_vec(vece, a, a, sh);
8870    tcg_gen_add_vec(vece, d, d, a);
8871}
8872
8873static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8874{
8875    uint64_t mask = dup_const(MO_8, 0xff >> shift);
8876    TCGv_i64 t = tcg_temp_new_i64();
8877
8878    tcg_gen_shri_i64(t, a, shift);
8879    tcg_gen_andi_i64(t, t, mask);
8880    tcg_gen_andi_i64(d, d, ~mask);
8881    tcg_gen_or_i64(d, d, t);
8882    tcg_temp_free_i64(t);
8883}
8884
8885static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8886{
8887    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
8888    TCGv_i64 t = tcg_temp_new_i64();
8889
8890    tcg_gen_shri_i64(t, a, shift);
8891    tcg_gen_andi_i64(t, t, mask);
8892    tcg_gen_andi_i64(d, d, ~mask);
8893    tcg_gen_or_i64(d, d, t);
8894    tcg_temp_free_i64(t);
8895}
8896
8897static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8898{
8899    tcg_gen_shri_i32(a, a, shift);
8900    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
8901}
8902
8903static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8904{
8905    tcg_gen_shri_i64(a, a, shift);
8906    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
8907}
8908
8909static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
8910{
8911    uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
8912    TCGv_vec t = tcg_temp_new_vec_matching(d);
8913    TCGv_vec m = tcg_temp_new_vec_matching(d);
8914
8915    tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
8916    tcg_gen_shri_vec(vece, t, a, sh);
8917    tcg_gen_and_vec(vece, d, d, m);
8918    tcg_gen_or_vec(vece, d, d, t);
8919
8920    tcg_temp_free_vec(t);
8921    tcg_temp_free_vec(m);
8922}
8923
8924/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8925static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8926                                 int immh, int immb, int opcode, int rn, int rd)
8927{
8928    static const GVecGen2i ssra_op[4] = {
8929        { .fni8 = gen_ssra8_i64,
8930          .fniv = gen_ssra_vec,
8931          .load_dest = true,
8932          .opc = INDEX_op_sari_vec,
8933          .vece = MO_8 },
8934        { .fni8 = gen_ssra16_i64,
8935          .fniv = gen_ssra_vec,
8936          .load_dest = true,
8937          .opc = INDEX_op_sari_vec,
8938          .vece = MO_16 },
8939        { .fni4 = gen_ssra32_i32,
8940          .fniv = gen_ssra_vec,
8941          .load_dest = true,
8942          .opc = INDEX_op_sari_vec,
8943          .vece = MO_32 },
8944        { .fni8 = gen_ssra64_i64,
8945          .fniv = gen_ssra_vec,
8946          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
8947          .load_dest = true,
8948          .opc = INDEX_op_sari_vec,
8949          .vece = MO_64 },
8950    };
8951    static const GVecGen2i usra_op[4] = {
8952        { .fni8 = gen_usra8_i64,
8953          .fniv = gen_usra_vec,
8954          .load_dest = true,
8955          .opc = INDEX_op_shri_vec,
8956          .vece = MO_8, },
8957        { .fni8 = gen_usra16_i64,
8958          .fniv = gen_usra_vec,
8959          .load_dest = true,
8960          .opc = INDEX_op_shri_vec,
8961          .vece = MO_16, },
8962        { .fni4 = gen_usra32_i32,
8963          .fniv = gen_usra_vec,
8964          .load_dest = true,
8965          .opc = INDEX_op_shri_vec,
8966          .vece = MO_32, },
8967        { .fni8 = gen_usra64_i64,
8968          .fniv = gen_usra_vec,
8969          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
8970          .load_dest = true,
8971          .opc = INDEX_op_shri_vec,
8972          .vece = MO_64, },
8973    };
8974    static const GVecGen2i sri_op[4] = {
8975        { .fni8 = gen_shr8_ins_i64,
8976          .fniv = gen_shr_ins_vec,
8977          .load_dest = true,
8978          .opc = INDEX_op_shri_vec,
8979          .vece = MO_8 },
8980        { .fni8 = gen_shr16_ins_i64,
8981          .fniv = gen_shr_ins_vec,
8982          .load_dest = true,
8983          .opc = INDEX_op_shri_vec,
8984          .vece = MO_16 },
8985        { .fni4 = gen_shr32_ins_i32,
8986          .fniv = gen_shr_ins_vec,
8987          .load_dest = true,
8988          .opc = INDEX_op_shri_vec,
8989          .vece = MO_32 },
8990        { .fni8 = gen_shr64_ins_i64,
8991          .fniv = gen_shr_ins_vec,
8992          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
8993          .load_dest = true,
8994          .opc = INDEX_op_shri_vec,
8995          .vece = MO_64 },
8996    };
8997
8998    int size = 32 - clz32(immh) - 1;
8999    int immhb = immh << 3 | immb;
9000    int shift = 2 * (8 << size) - immhb;
9001    bool accumulate = false;
9002    int dsize = is_q ? 128 : 64;
9003    int esize = 8 << size;
9004    int elements = dsize/esize;
9005    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
9006    TCGv_i64 tcg_rn = new_tmp_a64(s);
9007    TCGv_i64 tcg_rd = new_tmp_a64(s);
9008    TCGv_i64 tcg_round;
9009    uint64_t round_const;
9010    int i;
9011
9012    if (extract32(immh, 3, 1) && !is_q) {
9013        unallocated_encoding(s);
9014        return;
9015    }
9016
9017    if (size > 3 && !is_q) {
9018        unallocated_encoding(s);
9019        return;
9020    }
9021
9022    if (!fp_access_check(s)) {
9023        return;
9024    }
9025
9026    switch (opcode) {
9027    case 0x02: /* SSRA / USRA (accumulate) */
9028        if (is_u) {
9029            /* Shift count same as element size produces zero to add.  */
9030            if (shift == 8 << size) {
9031                goto done;
9032            }
9033            gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
9034        } else {
9035            /* Shift count same as element size produces all sign to add.  */
9036            if (shift == 8 << size) {
9037                shift -= 1;
9038            }
9039            gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
9040        }
9041        return;
9042    case 0x08: /* SRI */
9043        /* Shift count same as element size is valid but does nothing.  */
9044        if (shift == 8 << size) {
9045            goto done;
9046        }
9047        gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
9048        return;
9049
9050    case 0x00: /* SSHR / USHR */
9051        if (is_u) {
9052            if (shift == 8 << size) {
9053                /* Shift count the same size as element size produces zero.  */
9054                tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
9055                                   is_q ? 16 : 8, vec_full_reg_size(s), 0);
9056            } else {
9057                gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
9058            }
9059        } else {
9060            /* Shift count the same size as element size produces all sign.  */
9061            if (shift == 8 << size) {
9062                shift -= 1;
9063            }
9064            gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
9065        }
9066        return;
9067
9068    case 0x04: /* SRSHR / URSHR (rounding) */
9069        break;
9070    case 0x06: /* SRSRA / URSRA (accum + rounding) */
9071        accumulate = true;
9072        break;
9073    default:
9074        g_assert_not_reached();
9075    }
9076
9077    round_const = 1ULL << (shift - 1);
9078    tcg_round = tcg_const_i64(round_const);
9079
9080    for (i = 0; i < elements; i++) {
9081        read_vec_element(s, tcg_rn, rn, i, memop);
9082        if (accumulate) {
9083            read_vec_element(s, tcg_rd, rd, i, memop);
9084        }
9085
9086        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9087                                accumulate, is_u, size, shift);
9088
9089        write_vec_element(s, tcg_rd, rd, i, size);
9090    }
9091    tcg_temp_free_i64(tcg_round);
9092
9093 done:
9094    clear_vec_high(s, is_q, rd);
9095}
9096
9097static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9098{
9099    uint64_t mask = dup_const(MO_8, 0xff << shift);
9100    TCGv_i64 t = tcg_temp_new_i64();
9101
9102    tcg_gen_shli_i64(t, a, shift);
9103    tcg_gen_andi_i64(t, t, mask);
9104    tcg_gen_andi_i64(d, d, ~mask);
9105    tcg_gen_or_i64(d, d, t);
9106    tcg_temp_free_i64(t);
9107}
9108
9109static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9110{
9111    uint64_t mask = dup_const(MO_16, 0xffff << shift);
9112    TCGv_i64 t = tcg_temp_new_i64();
9113
9114    tcg_gen_shli_i64(t, a, shift);
9115    tcg_gen_andi_i64(t, t, mask);
9116    tcg_gen_andi_i64(d, d, ~mask);
9117    tcg_gen_or_i64(d, d, t);
9118    tcg_temp_free_i64(t);
9119}
9120
9121static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9122{
9123    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
9124}
9125
9126static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9127{
9128    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
9129}
9130
9131static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9132{
9133    uint64_t mask = (1ull << sh) - 1;
9134    TCGv_vec t = tcg_temp_new_vec_matching(d);
9135    TCGv_vec m = tcg_temp_new_vec_matching(d);
9136
9137    tcg_gen_dupi_vec(vece, m, mask);
9138    tcg_gen_shli_vec(vece, t, a, sh);
9139    tcg_gen_and_vec(vece, d, d, m);
9140    tcg_gen_or_vec(vece, d, d, t);
9141
9142    tcg_temp_free_vec(t);
9143    tcg_temp_free_vec(m);
9144}
9145
9146/* SHL/SLI - Vector shift left */
9147static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
9148                                 int immh, int immb, int opcode, int rn, int rd)
9149{
9150    static const GVecGen2i shi_op[4] = {
9151        { .fni8 = gen_shl8_ins_i64,
9152          .fniv = gen_shl_ins_vec,
9153          .opc = INDEX_op_shli_vec,
9154          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9155          .load_dest = true,
9156          .vece = MO_8 },
9157        { .fni8 = gen_shl16_ins_i64,
9158          .fniv = gen_shl_ins_vec,
9159          .opc = INDEX_op_shli_vec,
9160          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9161          .load_dest = true,
9162          .vece = MO_16 },
9163        { .fni4 = gen_shl32_ins_i32,
9164          .fniv = gen_shl_ins_vec,
9165          .opc = INDEX_op_shli_vec,
9166          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9167          .load_dest = true,
9168          .vece = MO_32 },
9169        { .fni8 = gen_shl64_ins_i64,
9170          .fniv = gen_shl_ins_vec,
9171          .opc = INDEX_op_shli_vec,
9172          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9173          .load_dest = true,
9174          .vece = MO_64 },
9175    };
9176    int size = 32 - clz32(immh) - 1;
9177    int immhb = immh << 3 | immb;
9178    int shift = immhb - (8 << size);
9179
9180    if (extract32(immh, 3, 1) && !is_q) {
9181        unallocated_encoding(s);
9182        return;
9183    }
9184
9185    if (size > 3 && !is_q) {
9186        unallocated_encoding(s);
9187        return;
9188    }
9189
9190    if (!fp_access_check(s)) {
9191        return;
9192    }
9193
9194    if (insert) {
9195        gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
9196    } else {
9197        gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
9198    }
9199}
9200
9201/* USHLL/SHLL - Vector shift left with widening */
9202static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
9203                                 int immh, int immb, int opcode, int rn, int rd)
9204{
9205    int size = 32 - clz32(immh) - 1;
9206    int immhb = immh << 3 | immb;
9207    int shift = immhb - (8 << size);
9208    int dsize = 64;
9209    int esize = 8 << size;
9210    int elements = dsize/esize;
9211    TCGv_i64 tcg_rn = new_tmp_a64(s);
9212    TCGv_i64 tcg_rd = new_tmp_a64(s);
9213    int i;
9214
9215    if (size >= 3) {
9216        unallocated_encoding(s);
9217        return;
9218    }
9219
9220    if (!fp_access_check(s)) {
9221        return;
9222    }
9223
9224    /* For the LL variants the store is larger than the load,
9225     * so if rd == rn we would overwrite parts of our input.
9226     * So load everything right now and use shifts in the main loop.
9227     */
9228    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
9229
9230    for (i = 0; i < elements; i++) {
9231        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
9232        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
9233        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
9234        write_vec_element(s, tcg_rd, rd, i, size + 1);
9235    }
9236}
9237
9238/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
9239static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
9240                                 int immh, int immb, int opcode, int rn, int rd)
9241{
9242    int immhb = immh << 3 | immb;
9243    int size = 32 - clz32(immh) - 1;
9244    int dsize = 64;
9245    int esize = 8 << size;
9246    int elements = dsize/esize;
9247    int shift = (2 * esize) - immhb;
9248    bool round = extract32(opcode, 0, 1);
9249    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
9250    TCGv_i64 tcg_round;
9251    int i;
9252
9253    if (extract32(immh, 3, 1)) {
9254        unallocated_encoding(s);
9255        return;
9256    }
9257
9258    if (!fp_access_check(s)) {
9259        return;
9260    }
9261
9262    tcg_rn = tcg_temp_new_i64();
9263    tcg_rd = tcg_temp_new_i64();
9264    tcg_final = tcg_temp_new_i64();
9265    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
9266
9267    if (round) {
9268        uint64_t round_const = 1ULL << (shift - 1);
9269        tcg_round = tcg_const_i64(round_const);
9270    } else {
9271        tcg_round = NULL;
9272    }
9273
9274    for (i = 0; i < elements; i++) {
9275        read_vec_element(s, tcg_rn, rn, i, size+1);
9276        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9277                                false, true, size+1, shift);
9278
9279        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9280    }
9281
9282    if (!is_q) {
9283        write_vec_element(s, tcg_final, rd, 0, MO_64);
9284    } else {
9285        write_vec_element(s, tcg_final, rd, 1, MO_64);
9286    }
9287    if (round) {
9288        tcg_temp_free_i64(tcg_round);
9289    }
9290    tcg_temp_free_i64(tcg_rn);
9291    tcg_temp_free_i64(tcg_rd);
9292    tcg_temp_free_i64(tcg_final);
9293
9294    clear_vec_high(s, is_q, rd);
9295}
9296
9297
9298/* AdvSIMD shift by immediate
9299 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
9300 * +---+---+---+-------------+------+------+--------+---+------+------+
9301 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9302 * +---+---+---+-------------+------+------+--------+---+------+------+
9303 */
9304static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
9305{
9306    int rd = extract32(insn, 0, 5);
9307    int rn = extract32(insn, 5, 5);
9308    int opcode = extract32(insn, 11, 5);
9309    int immb = extract32(insn, 16, 3);
9310    int immh = extract32(insn, 19, 4);
9311    bool is_u = extract32(insn, 29, 1);
9312    bool is_q = extract32(insn, 30, 1);
9313
9314    switch (opcode) {
9315    case 0x08: /* SRI */
9316        if (!is_u) {
9317            unallocated_encoding(s);
9318            return;
9319        }
9320        /* fall through */
9321    case 0x00: /* SSHR / USHR */
9322    case 0x02: /* SSRA / USRA (accumulate) */
9323    case 0x04: /* SRSHR / URSHR (rounding) */
9324    case 0x06: /* SRSRA / URSRA (accum + rounding) */
9325        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
9326        break;
9327    case 0x0a: /* SHL / SLI */
9328        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9329        break;
9330    case 0x10: /* SHRN */
9331    case 0x11: /* RSHRN / SQRSHRUN */
9332        if (is_u) {
9333            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
9334                                   opcode, rn, rd);
9335        } else {
9336            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
9337        }
9338        break;
9339    case 0x12: /* SQSHRN / UQSHRN */
9340    case 0x13: /* SQRSHRN / UQRSHRN */
9341        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
9342                               opcode, rn, rd);
9343        break;
9344    case 0x14: /* SSHLL / USHLL */
9345        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9346        break;
9347    case 0x1c: /* SCVTF / UCVTF */
9348        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
9349                                     opcode, rn, rd);
9350        break;
9351    case 0xc: /* SQSHLU */
9352        if (!is_u) {
9353            unallocated_encoding(s);
9354            return;
9355        }
9356        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
9357        break;
9358    case 0xe: /* SQSHL, UQSHL */
9359        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
9360        break;
9361    case 0x1f: /* FCVTZS/ FCVTZU */
9362        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
9363        return;
9364    default:
9365        unallocated_encoding(s);
9366        return;
9367    }
9368}
9369
9370/* Generate code to do a "long" addition or subtraction, ie one done in
9371 * TCGv_i64 on vector lanes twice the width specified by size.
9372 */
9373static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
9374                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
9375{
9376    static NeonGenTwo64OpFn * const fns[3][2] = {
9377        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
9378        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
9379        { tcg_gen_add_i64, tcg_gen_sub_i64 },
9380    };
9381    NeonGenTwo64OpFn *genfn;
9382    assert(size < 3);
9383
9384    genfn = fns[size][is_sub];
9385    genfn(tcg_res, tcg_op1, tcg_op2);
9386}
9387
9388static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
9389                                int opcode, int rd, int rn, int rm)
9390{
9391    /* 3-reg-different widening insns: 64 x 64 -> 128 */
9392    TCGv_i64 tcg_res[2];
9393    int pass, accop;
9394
9395    tcg_res[0] = tcg_temp_new_i64();
9396    tcg_res[1] = tcg_temp_new_i64();
9397
9398    /* Does this op do an adding accumulate, a subtracting accumulate,
9399     * or no accumulate at all?
9400     */
9401    switch (opcode) {
9402    case 5:
9403    case 8:
9404    case 9:
9405        accop = 1;
9406        break;
9407    case 10:
9408    case 11:
9409        accop = -1;
9410        break;
9411    default:
9412        accop = 0;
9413        break;
9414    }
9415
9416    if (accop != 0) {
9417        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
9418        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
9419    }
9420
9421    /* size == 2 means two 32x32->64 operations; this is worth special
9422     * casing because we can generally handle it inline.
9423     */
9424    if (size == 2) {
9425        for (pass = 0; pass < 2; pass++) {
9426            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9427            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9428            TCGv_i64 tcg_passres;
9429            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
9430
9431            int elt = pass + is_q * 2;
9432
9433            read_vec_element(s, tcg_op1, rn, elt, memop);
9434            read_vec_element(s, tcg_op2, rm, elt, memop);
9435
9436            if (accop == 0) {
9437                tcg_passres = tcg_res[pass];
9438            } else {
9439                tcg_passres = tcg_temp_new_i64();
9440            }
9441
9442            switch (opcode) {
9443            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9444                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
9445                break;
9446            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9447                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
9448                break;
9449            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9450            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9451            {
9452                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
9453                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
9454
9455                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
9456                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
9457                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
9458                                    tcg_passres,
9459                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
9460                tcg_temp_free_i64(tcg_tmp1);
9461                tcg_temp_free_i64(tcg_tmp2);
9462                break;
9463            }
9464            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9465            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9466            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9467                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9468                break;
9469            case 9: /* SQDMLAL, SQDMLAL2 */
9470            case 11: /* SQDMLSL, SQDMLSL2 */
9471            case 13: /* SQDMULL, SQDMULL2 */
9472                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9473                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
9474                                                  tcg_passres, tcg_passres);
9475                break;
9476            default:
9477                g_assert_not_reached();
9478            }
9479
9480            if (opcode == 9 || opcode == 11) {
9481                /* saturating accumulate ops */
9482                if (accop < 0) {
9483                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
9484                }
9485                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
9486                                                  tcg_res[pass], tcg_passres);
9487            } else if (accop > 0) {
9488                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9489            } else if (accop < 0) {
9490                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9491            }
9492
9493            if (accop != 0) {
9494                tcg_temp_free_i64(tcg_passres);
9495            }
9496
9497            tcg_temp_free_i64(tcg_op1);
9498            tcg_temp_free_i64(tcg_op2);
9499        }
9500    } else {
9501        /* size 0 or 1, generally helper functions */
9502        for (pass = 0; pass < 2; pass++) {
9503            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9504            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9505            TCGv_i64 tcg_passres;
9506            int elt = pass + is_q * 2;
9507
9508            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
9509            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
9510
9511            if (accop == 0) {
9512                tcg_passres = tcg_res[pass];
9513            } else {
9514                tcg_passres = tcg_temp_new_i64();
9515            }
9516
9517            switch (opcode) {
9518            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9519            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9520            {
9521                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
9522                static NeonGenWidenFn * const widenfns[2][2] = {
9523                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
9524                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
9525                };
9526                NeonGenWidenFn *widenfn = widenfns[size][is_u];
9527
9528                widenfn(tcg_op2_64, tcg_op2);
9529                widenfn(tcg_passres, tcg_op1);
9530                gen_neon_addl(size, (opcode == 2), tcg_passres,
9531                              tcg_passres, tcg_op2_64);
9532                tcg_temp_free_i64(tcg_op2_64);
9533                break;
9534            }
9535            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9536            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9537                if (size == 0) {
9538                    if (is_u) {
9539                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
9540                    } else {
9541                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
9542                    }
9543                } else {
9544                    if (is_u) {
9545                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
9546                    } else {
9547                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
9548                    }
9549                }
9550                break;
9551            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9552            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9553            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9554                if (size == 0) {
9555                    if (is_u) {
9556                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
9557                    } else {
9558                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
9559                    }
9560                } else {
9561                    if (is_u) {
9562                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
9563                    } else {
9564                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
9565                    }
9566                }
9567                break;
9568            case 9: /* SQDMLAL, SQDMLAL2 */
9569            case 11: /* SQDMLSL, SQDMLSL2 */
9570            case 13: /* SQDMULL, SQDMULL2 */
9571                assert(size == 1);
9572                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
9573                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
9574                                                  tcg_passres, tcg_passres);
9575                break;
9576            case 14: /* PMULL */
9577                assert(size == 0);
9578                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
9579                break;
9580            default:
9581                g_assert_not_reached();
9582            }
9583            tcg_temp_free_i32(tcg_op1);
9584            tcg_temp_free_i32(tcg_op2);
9585
9586            if (accop != 0) {
9587                if (opcode == 9 || opcode == 11) {
9588                    /* saturating accumulate ops */
9589                    if (accop < 0) {
9590                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
9591                    }
9592                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
9593                                                      tcg_res[pass],
9594                                                      tcg_passres);
9595                } else {
9596                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
9597                                  tcg_res[pass], tcg_passres);
9598                }
9599                tcg_temp_free_i64(tcg_passres);
9600            }
9601        }
9602    }
9603
9604    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9605    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9606    tcg_temp_free_i64(tcg_res[0]);
9607    tcg_temp_free_i64(tcg_res[1]);
9608}
9609
9610static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
9611                            int opcode, int rd, int rn, int rm)
9612{
9613    TCGv_i64 tcg_res[2];
9614    int part = is_q ? 2 : 0;
9615    int pass;
9616
9617    for (pass = 0; pass < 2; pass++) {
9618        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9619        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9620        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
9621        static NeonGenWidenFn * const widenfns[3][2] = {
9622            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
9623            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
9624            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
9625        };
9626        NeonGenWidenFn *widenfn = widenfns[size][is_u];
9627
9628        read_vec_element(s, tcg_op1, rn, pass, MO_64);
9629        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
9630        widenfn(tcg_op2_wide, tcg_op2);
9631        tcg_temp_free_i32(tcg_op2);
9632        tcg_res[pass] = tcg_temp_new_i64();
9633        gen_neon_addl(size, (opcode == 3),
9634                      tcg_res[pass], tcg_op1, tcg_op2_wide);
9635        tcg_temp_free_i64(tcg_op1);
9636        tcg_temp_free_i64(tcg_op2_wide);
9637    }
9638
9639    for (pass = 0; pass < 2; pass++) {
9640        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9641        tcg_temp_free_i64(tcg_res[pass]);
9642    }
9643}
9644
9645static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
9646{
9647    tcg_gen_addi_i64(in, in, 1U << 31);
9648    tcg_gen_extrh_i64_i32(res, in);
9649}
9650
9651static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
9652                                 int opcode, int rd, int rn, int rm)
9653{
9654    TCGv_i32 tcg_res[2];
9655    int part = is_q ? 2 : 0;
9656    int pass;
9657
9658    for (pass = 0; pass < 2; pass++) {
9659        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9660        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9661        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
9662        static NeonGenNarrowFn * const narrowfns[3][2] = {
9663            { gen_helper_neon_narrow_high_u8,
9664              gen_helper_neon_narrow_round_high_u8 },
9665            { gen_helper_neon_narrow_high_u16,
9666              gen_helper_neon_narrow_round_high_u16 },
9667            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
9668        };
9669        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
9670
9671        read_vec_element(s, tcg_op1, rn, pass, MO_64);
9672        read_vec_element(s, tcg_op2, rm, pass, MO_64);
9673
9674        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
9675
9676        tcg_temp_free_i64(tcg_op1);
9677        tcg_temp_free_i64(tcg_op2);
9678
9679        tcg_res[pass] = tcg_temp_new_i32();
9680        gennarrow(tcg_res[pass], tcg_wideres);
9681        tcg_temp_free_i64(tcg_wideres);
9682    }
9683
9684    for (pass = 0; pass < 2; pass++) {
9685        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
9686        tcg_temp_free_i32(tcg_res[pass]);
9687    }
9688    clear_vec_high(s, is_q, rd);
9689}
9690
9691static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
9692{
9693    /* PMULL of 64 x 64 -> 128 is an odd special case because it
9694     * is the only three-reg-diff instruction which produces a
9695     * 128-bit wide result from a single operation. However since
9696     * it's possible to calculate the two halves more or less
9697     * separately we just use two helper calls.
9698     */
9699    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9700    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9701    TCGv_i64 tcg_res = tcg_temp_new_i64();
9702
9703    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
9704    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
9705    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
9706    write_vec_element(s, tcg_res, rd, 0, MO_64);
9707    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
9708    write_vec_element(s, tcg_res, rd, 1, MO_64);
9709
9710    tcg_temp_free_i64(tcg_op1);
9711    tcg_temp_free_i64(tcg_op2);
9712    tcg_temp_free_i64(tcg_res);
9713}
9714
9715/* AdvSIMD three different
9716 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
9717 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
9718 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
9719 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
9720 */
9721static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
9722{
9723    /* Instructions in this group fall into three basic classes
9724     * (in each case with the operation working on each element in
9725     * the input vectors):
9726     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
9727     *     128 bit input)
9728     * (2) wide 64 x 128 -> 128
9729     * (3) narrowing 128 x 128 -> 64
9730     * Here we do initial decode, catch unallocated cases and
9731     * dispatch to separate functions for each class.
9732     */
9733    int is_q = extract32(insn, 30, 1);
9734    int is_u = extract32(insn, 29, 1);
9735    int size = extract32(insn, 22, 2);
9736    int opcode = extract32(insn, 12, 4);
9737    int rm = extract32(insn, 16, 5);
9738    int rn = extract32(insn, 5, 5);
9739    int rd = extract32(insn, 0, 5);
9740
9741    switch (opcode) {
9742    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
9743    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
9744        /* 64 x 128 -> 128 */
9745        if (size == 3) {
9746            unallocated_encoding(s);
9747            return;
9748        }
9749        if (!fp_access_check(s)) {
9750            return;
9751        }
9752        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
9753        break;
9754    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
9755    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
9756        /* 128 x 128 -> 64 */
9757        if (size == 3) {
9758            unallocated_encoding(s);
9759            return;
9760        }
9761        if (!fp_access_check(s)) {
9762            return;
9763        }
9764        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9765        break;
9766    case 14: /* PMULL, PMULL2 */
9767        if (is_u || size == 1 || size == 2) {
9768            unallocated_encoding(s);
9769            return;
9770        }
9771        if (size == 3) {
9772            if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9773                unallocated_encoding(s);
9774                return;
9775            }
9776            if (!fp_access_check(s)) {
9777                return;
9778            }
9779            handle_pmull_64(s, is_q, rd, rn, rm);
9780            return;
9781        }
9782        goto is_widening;
9783    case 9: /* SQDMLAL, SQDMLAL2 */
9784    case 11: /* SQDMLSL, SQDMLSL2 */
9785    case 13: /* SQDMULL, SQDMULL2 */
9786        if (is_u || size == 0) {
9787            unallocated_encoding(s);
9788            return;
9789        }
9790        /* fall through */
9791    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9792    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9793    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9794    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9795    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9796    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9797    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9798        /* 64 x 64 -> 128 */
9799        if (size == 3) {
9800            unallocated_encoding(s);
9801            return;
9802        }
9803    is_widening:
9804        if (!fp_access_check(s)) {
9805            return;
9806        }
9807
9808        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9809        break;
9810    default:
9811        /* opcode 15 not allocated */
9812        unallocated_encoding(s);
9813        break;
9814    }
9815}
9816
9817static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9818{
9819    tcg_gen_xor_i64(rn, rn, rm);
9820    tcg_gen_and_i64(rn, rn, rd);
9821    tcg_gen_xor_i64(rd, rm, rn);
9822}
9823
9824static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9825{
9826    tcg_gen_xor_i64(rn, rn, rd);
9827    tcg_gen_and_i64(rn, rn, rm);
9828    tcg_gen_xor_i64(rd, rd, rn);
9829}
9830
9831static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9832{
9833    tcg_gen_xor_i64(rn, rn, rd);
9834    tcg_gen_andc_i64(rn, rn, rm);
9835    tcg_gen_xor_i64(rd, rd, rn);
9836}
9837
9838static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9839{
9840    tcg_gen_xor_vec(vece, rn, rn, rm);
9841    tcg_gen_and_vec(vece, rn, rn, rd);
9842    tcg_gen_xor_vec(vece, rd, rm, rn);
9843}
9844
9845static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9846{
9847    tcg_gen_xor_vec(vece, rn, rn, rd);
9848    tcg_gen_and_vec(vece, rn, rn, rm);
9849    tcg_gen_xor_vec(vece, rd, rd, rn);
9850}
9851
9852static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9853{
9854    tcg_gen_xor_vec(vece, rn, rn, rd);
9855    tcg_gen_andc_vec(vece, rn, rn, rm);
9856    tcg_gen_xor_vec(vece, rd, rd, rn);
9857}
9858
9859/* Logic op (opcode == 3) subgroup of C3.6.16. */
9860static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9861{
9862    static const GVecGen3 bsl_op = {
9863        .fni8 = gen_bsl_i64,
9864        .fniv = gen_bsl_vec,
9865        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9866        .load_dest = true
9867    };
9868    static const GVecGen3 bit_op = {
9869        .fni8 = gen_bit_i64,
9870        .fniv = gen_bit_vec,
9871        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9872        .load_dest = true
9873    };
9874    static const GVecGen3 bif_op = {
9875        .fni8 = gen_bif_i64,
9876        .fniv = gen_bif_vec,
9877        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9878        .load_dest = true
9879    };
9880
9881    int rd = extract32(insn, 0, 5);
9882    int rn = extract32(insn, 5, 5);
9883    int rm = extract32(insn, 16, 5);
9884    int size = extract32(insn, 22, 2);
9885    bool is_u = extract32(insn, 29, 1);
9886    bool is_q = extract32(insn, 30, 1);
9887
9888    if (!fp_access_check(s)) {
9889        return;
9890    }
9891
9892    switch (size + 4 * is_u) {
9893    case 0: /* AND */
9894        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
9895        return;
9896    case 1: /* BIC */
9897        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
9898        return;
9899    case 2: /* ORR */
9900        if (rn == rm) { /* MOV */
9901            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
9902        } else {
9903            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
9904        }
9905        return;
9906    case 3: /* ORN */
9907        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
9908        return;
9909    case 4: /* EOR */
9910        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
9911        return;
9912
9913    case 5: /* BSL bitwise select */
9914        gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
9915        return;
9916    case 6: /* BIT, bitwise insert if true */
9917        gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
9918        return;
9919    case 7: /* BIF, bitwise insert if false */
9920        gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
9921        return;
9922
9923    default:
9924        g_assert_not_reached();
9925    }
9926}
9927
9928/* Helper functions for 32 bit comparisons */
9929static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9930{
9931    tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9932}
9933
9934static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9935{
9936    tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9937}
9938
9939static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9940{
9941    tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9942}
9943
9944static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9945{
9946    tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9947}
9948
9949/* Pairwise op subgroup of C3.6.16.
9950 *
9951 * This is called directly or via the handle_3same_float for float pairwise
9952 * operations where the opcode and size are calculated differently.
9953 */
9954static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9955                                   int size, int rn, int rm, int rd)
9956{
9957    TCGv_ptr fpst;
9958    int pass;
9959
9960    /* Floating point operations need fpst */
9961    if (opcode >= 0x58) {
9962        fpst = get_fpstatus_ptr(false);
9963    } else {
9964        fpst = NULL;
9965    }
9966
9967    if (!fp_access_check(s)) {
9968        return;
9969    }
9970
9971    /* These operations work on the concatenated rm:rn, with each pair of
9972     * adjacent elements being operated on to produce an element in the result.
9973     */
9974    if (size == 3) {
9975        TCGv_i64 tcg_res[2];
9976
9977        for (pass = 0; pass < 2; pass++) {
9978            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9979            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9980            int passreg = (pass == 0) ? rn : rm;
9981
9982            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9983            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9984            tcg_res[pass] = tcg_temp_new_i64();
9985
9986            switch (opcode) {
9987            case 0x17: /* ADDP */
9988                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9989                break;
9990            case 0x58: /* FMAXNMP */
9991                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9992                break;
9993            case 0x5a: /* FADDP */
9994                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9995                break;
9996            case 0x5e: /* FMAXP */
9997                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9998                break;
9999            case 0x78: /* FMINNMP */
10000                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10001                break;
10002            case 0x7e: /* FMINP */
10003                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10004                break;
10005            default:
10006                g_assert_not_reached();
10007            }
10008
10009            tcg_temp_free_i64(tcg_op1);
10010            tcg_temp_free_i64(tcg_op2);
10011        }
10012
10013        for (pass = 0; pass < 2; pass++) {
10014            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10015            tcg_temp_free_i64(tcg_res[pass]);
10016        }
10017    } else {
10018        int maxpass = is_q ? 4 : 2;
10019        TCGv_i32 tcg_res[4];
10020
10021        for (pass = 0; pass < maxpass; pass++) {
10022            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10023            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10024            NeonGenTwoOpFn *genfn = NULL;
10025            int passreg = pass < (maxpass / 2) ? rn : rm;
10026            int passelt = (is_q && (pass & 1)) ? 2 : 0;
10027
10028            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10029            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10030            tcg_res[pass] = tcg_temp_new_i32();
10031
10032            switch (opcode) {
10033            case 0x17: /* ADDP */
10034            {
10035                static NeonGenTwoOpFn * const fns[3] = {
10036                    gen_helper_neon_padd_u8,
10037                    gen_helper_neon_padd_u16,
10038                    tcg_gen_add_i32,
10039                };
10040                genfn = fns[size];
10041                break;
10042            }
10043            case 0x14: /* SMAXP, UMAXP */
10044            {
10045                static NeonGenTwoOpFn * const fns[3][2] = {
10046                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10047                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10048                    { gen_max_s32, gen_max_u32 },
10049                };
10050                genfn = fns[size][u];
10051                break;
10052            }
10053            case 0x15: /* SMINP, UMINP */
10054            {
10055                static NeonGenTwoOpFn * const fns[3][2] = {
10056                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10057                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10058                    { gen_min_s32, gen_min_u32 },
10059                };
10060                genfn = fns[size][u];
10061                break;
10062            }
10063            /* The FP operations are all on single floats (32 bit) */
10064            case 0x58: /* FMAXNMP */
10065                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10066                break;
10067            case 0x5a: /* FADDP */
10068                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10069                break;
10070            case 0x5e: /* FMAXP */
10071                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10072                break;
10073            case 0x78: /* FMINNMP */
10074                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10075                break;
10076            case 0x7e: /* FMINP */
10077                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10078                break;
10079            default:
10080                g_assert_not_reached();
10081            }
10082
10083            /* FP ops called directly, otherwise call now */
10084            if (genfn) {
10085                genfn(tcg_res[pass], tcg_op1, tcg_op2);
10086            }
10087
10088            tcg_temp_free_i32(tcg_op1);
10089            tcg_temp_free_i32(tcg_op2);
10090        }
10091
10092        for (pass = 0; pass < maxpass; pass++) {
10093            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10094            tcg_temp_free_i32(tcg_res[pass]);
10095        }
10096        clear_vec_high(s, is_q, rd);
10097    }
10098
10099    if (fpst) {
10100        tcg_temp_free_ptr(fpst);
10101    }
10102}
10103
10104/* Floating point op subgroup of C3.6.16. */
10105static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10106{
10107    /* For floating point ops, the U, size[1] and opcode bits
10108     * together indicate the operation. size[0] indicates single
10109     * or double.
10110     */
10111    int fpopcode = extract32(insn, 11, 5)
10112        | (extract32(insn, 23, 1) << 5)
10113        | (extract32(insn, 29, 1) << 6);
10114    int is_q = extract32(insn, 30, 1);
10115    int size = extract32(insn, 22, 1);
10116    int rm = extract32(insn, 16, 5);
10117    int rn = extract32(insn, 5, 5);
10118    int rd = extract32(insn, 0, 5);
10119
10120    int datasize = is_q ? 128 : 64;
10121    int esize = 32 << size;
10122    int elements = datasize / esize;
10123
10124    if (size == 1 && !is_q) {
10125        unallocated_encoding(s);
10126        return;
10127    }
10128
10129    switch (fpopcode) {
10130    case 0x58: /* FMAXNMP */
10131    case 0x5a: /* FADDP */
10132    case 0x5e: /* FMAXP */
10133    case 0x78: /* FMINNMP */
10134    case 0x7e: /* FMINP */
10135        if (size && !is_q) {
10136            unallocated_encoding(s);
10137            return;
10138        }
10139        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10140                               rn, rm, rd);
10141        return;
10142    case 0x1b: /* FMULX */
10143    case 0x1f: /* FRECPS */
10144    case 0x3f: /* FRSQRTS */
10145    case 0x5d: /* FACGE */
10146    case 0x7d: /* FACGT */
10147    case 0x19: /* FMLA */
10148    case 0x39: /* FMLS */
10149    case 0x18: /* FMAXNM */
10150    case 0x1a: /* FADD */
10151    case 0x1c: /* FCMEQ */
10152    case 0x1e: /* FMAX */
10153    case 0x38: /* FMINNM */
10154    case 0x3a: /* FSUB */
10155    case 0x3e: /* FMIN */
10156    case 0x5b: /* FMUL */
10157    case 0x5c: /* FCMGE */
10158    case 0x5f: /* FDIV */
10159    case 0x7a: /* FABD */
10160    case 0x7c: /* FCMGT */
10161        if (!fp_access_check(s)) {
10162            return;
10163        }
10164
10165        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10166        return;
10167    default:
10168        unallocated_encoding(s);
10169        return;
10170    }
10171}
10172
10173static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10174{
10175    gen_helper_neon_mul_u8(a, a, b);
10176    gen_helper_neon_add_u8(d, d, a);
10177}
10178
10179static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10180{
10181    gen_helper_neon_mul_u16(a, a, b);
10182    gen_helper_neon_add_u16(d, d, a);
10183}
10184
10185static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10186{
10187    tcg_gen_mul_i32(a, a, b);
10188    tcg_gen_add_i32(d, d, a);
10189}
10190
10191static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10192{
10193    tcg_gen_mul_i64(a, a, b);
10194    tcg_gen_add_i64(d, d, a);
10195}
10196
10197static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10198{
10199    tcg_gen_mul_vec(vece, a, a, b);
10200    tcg_gen_add_vec(vece, d, d, a);
10201}
10202
10203static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10204{
10205    gen_helper_neon_mul_u8(a, a, b);
10206    gen_helper_neon_sub_u8(d, d, a);
10207}
10208
10209static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10210{
10211    gen_helper_neon_mul_u16(a, a, b);
10212    gen_helper_neon_sub_u16(d, d, a);
10213}
10214
10215static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10216{
10217    tcg_gen_mul_i32(a, a, b);
10218    tcg_gen_sub_i32(d, d, a);
10219}
10220
10221static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10222{
10223    tcg_gen_mul_i64(a, a, b);
10224    tcg_gen_sub_i64(d, d, a);
10225}
10226
10227static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10228{
10229    tcg_gen_mul_vec(vece, a, a, b);
10230    tcg_gen_sub_vec(vece, d, d, a);
10231}
10232
10233/* Integer op subgroup of C3.6.16. */
10234static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10235{
10236    static const GVecGen3 cmtst_op[4] = {
10237        { .fni4 = gen_helper_neon_tst_u8,
10238          .fniv = gen_cmtst_vec,
10239          .vece = MO_8 },
10240        { .fni4 = gen_helper_neon_tst_u16,
10241          .fniv = gen_cmtst_vec,
10242          .vece = MO_16 },
10243        { .fni4 = gen_cmtst_i32,
10244          .fniv = gen_cmtst_vec,
10245          .vece = MO_32 },
10246        { .fni8 = gen_cmtst_i64,
10247          .fniv = gen_cmtst_vec,
10248          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10249          .vece = MO_64 },
10250    };
10251    static const GVecGen3 mla_op[4] = {
10252        { .fni4 = gen_mla8_i32,
10253          .fniv = gen_mla_vec,
10254          .opc = INDEX_op_mul_vec,
10255          .load_dest = true,
10256          .vece = MO_8 },
10257        { .fni4 = gen_mla16_i32,
10258          .fniv = gen_mla_vec,
10259          .opc = INDEX_op_mul_vec,
10260          .load_dest = true,
10261          .vece = MO_16 },
10262        { .fni4 = gen_mla32_i32,
10263          .fniv = gen_mla_vec,
10264          .opc = INDEX_op_mul_vec,
10265          .load_dest = true,
10266          .vece = MO_32 },
10267        { .fni8 = gen_mla64_i64,
10268          .fniv = gen_mla_vec,
10269          .opc = INDEX_op_mul_vec,
10270          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10271          .load_dest = true,
10272          .vece = MO_64 },
10273    };
10274    static const GVecGen3 mls_op[4] = {
10275        { .fni4 = gen_mls8_i32,
10276          .fniv = gen_mls_vec,
10277          .opc = INDEX_op_mul_vec,
10278          .load_dest = true,
10279          .vece = MO_8 },
10280        { .fni4 = gen_mls16_i32,
10281          .fniv = gen_mls_vec,
10282          .opc = INDEX_op_mul_vec,
10283          .load_dest = true,
10284          .vece = MO_16 },
10285        { .fni4 = gen_mls32_i32,
10286          .fniv = gen_mls_vec,
10287          .opc = INDEX_op_mul_vec,
10288          .load_dest = true,
10289          .vece = MO_32 },
10290        { .fni8 = gen_mls64_i64,
10291          .fniv = gen_mls_vec,
10292          .opc = INDEX_op_mul_vec,
10293          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10294          .load_dest = true,
10295          .vece = MO_64 },
10296    };
10297
10298    int is_q = extract32(insn, 30, 1);
10299    int u = extract32(insn, 29, 1);
10300    int size = extract32(insn, 22, 2);
10301    int opcode = extract32(insn, 11, 5);
10302    int rm = extract32(insn, 16, 5);
10303    int rn = extract32(insn, 5, 5);
10304    int rd = extract32(insn, 0, 5);
10305    int pass;
10306    TCGCond cond;
10307
10308    switch (opcode) {
10309    case 0x13: /* MUL, PMUL */
10310        if (u && size != 0) {
10311            unallocated_encoding(s);
10312            return;
10313        }
10314        /* fall through */
10315    case 0x0: /* SHADD, UHADD */
10316    case 0x2: /* SRHADD, URHADD */
10317    case 0x4: /* SHSUB, UHSUB */
10318    case 0xc: /* SMAX, UMAX */
10319    case 0xd: /* SMIN, UMIN */
10320    case 0xe: /* SABD, UABD */
10321    case 0xf: /* SABA, UABA */
10322    case 0x12: /* MLA, MLS */
10323        if (size == 3) {
10324            unallocated_encoding(s);
10325            return;
10326        }
10327        break;
10328    case 0x16: /* SQDMULH, SQRDMULH */
10329        if (size == 0 || size == 3) {
10330            unallocated_encoding(s);
10331            return;
10332        }
10333        break;
10334    default:
10335        if (size == 3 && !is_q) {
10336            unallocated_encoding(s);
10337            return;
10338        }
10339        break;
10340    }
10341
10342    if (!fp_access_check(s)) {
10343        return;
10344    }
10345
10346    switch (opcode) {
10347    case 0x10: /* ADD, SUB */
10348        if (u) {
10349            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
10350        } else {
10351            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
10352        }
10353        return;
10354    case 0x13: /* MUL, PMUL */
10355        if (!u) { /* MUL */
10356            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
10357            return;
10358        }
10359        break;
10360    case 0x12: /* MLA, MLS */
10361        if (u) {
10362            gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
10363        } else {
10364            gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
10365        }
10366        return;
10367    case 0x11:
10368        if (!u) { /* CMTST */
10369            gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
10370            return;
10371        }
10372        /* else CMEQ */
10373        cond = TCG_COND_EQ;
10374        goto do_gvec_cmp;
10375    case 0x06: /* CMGT, CMHI */
10376        cond = u ? TCG_COND_GTU : TCG_COND_GT;
10377        goto do_gvec_cmp;
10378    case 0x07: /* CMGE, CMHS */
10379        cond = u ? TCG_COND_GEU : TCG_COND_GE;
10380    do_gvec_cmp:
10381        tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
10382                         vec_full_reg_offset(s, rn),
10383                         vec_full_reg_offset(s, rm),
10384                         is_q ? 16 : 8, vec_full_reg_size(s));
10385        return;
10386    }
10387
10388    if (size == 3) {
10389        assert(is_q);
10390        for (pass = 0; pass < 2; pass++) {
10391            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10392            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10393            TCGv_i64 tcg_res = tcg_temp_new_i64();
10394
10395            read_vec_element(s, tcg_op1, rn, pass, MO_64);
10396            read_vec_element(s, tcg_op2, rm, pass, MO_64);
10397
10398            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
10399
10400            write_vec_element(s, tcg_res, rd, pass, MO_64);
10401
10402            tcg_temp_free_i64(tcg_res);
10403            tcg_temp_free_i64(tcg_op1);
10404            tcg_temp_free_i64(tcg_op2);
10405        }
10406    } else {
10407        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10408            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10409            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10410            TCGv_i32 tcg_res = tcg_temp_new_i32();
10411            NeonGenTwoOpFn *genfn = NULL;
10412            NeonGenTwoOpEnvFn *genenvfn = NULL;
10413
10414            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
10415            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
10416
10417            switch (opcode) {
10418            case 0x0: /* SHADD, UHADD */
10419            {
10420                static NeonGenTwoOpFn * const fns[3][2] = {
10421                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
10422                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
10423                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
10424                };
10425                genfn = fns[size][u];
10426                break;
10427            }
10428            case 0x1: /* SQADD, UQADD */
10429            {
10430                static NeonGenTwoOpEnvFn * const fns[3][2] = {
10431                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
10432                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
10433                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
10434                };
10435                genenvfn = fns[size][u];
10436                break;
10437            }
10438            case 0x2: /* SRHADD, URHADD */
10439            {
10440                static NeonGenTwoOpFn * const fns[3][2] = {
10441                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
10442                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
10443                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
10444                };
10445                genfn = fns[size][u];
10446                break;
10447            }
10448            case 0x4: /* SHSUB, UHSUB */
10449            {
10450                static NeonGenTwoOpFn * const fns[3][2] = {
10451                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
10452                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
10453                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
10454                };
10455                genfn = fns[size][u];
10456                break;
10457            }
10458            case 0x5: /* SQSUB, UQSUB */
10459            {
10460                static NeonGenTwoOpEnvFn * const fns[3][2] = {
10461                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
10462                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
10463                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
10464                };
10465                genenvfn = fns[size][u];
10466                break;
10467            }
10468            case 0x8: /* SSHL, USHL */
10469            {
10470                static NeonGenTwoOpFn * const fns[3][2] = {
10471                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
10472                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
10473                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
10474                };
10475                genfn = fns[size][u];
10476                break;
10477            }
10478            case 0x9: /* SQSHL, UQSHL */
10479            {
10480                static NeonGenTwoOpEnvFn * const fns[3][2] = {
10481                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
10482                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
10483                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
10484                };
10485                genenvfn = fns[size][u];
10486                break;
10487            }
10488            case 0xa: /* SRSHL, URSHL */
10489            {
10490                static NeonGenTwoOpFn * const fns[3][2] = {
10491                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
10492                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
10493                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
10494                };
10495                genfn = fns[size][u];
10496                break;
10497            }
10498            case 0xb: /* SQRSHL, UQRSHL */
10499            {
10500                static NeonGenTwoOpEnvFn * const fns[3][2] = {
10501                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
10502                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
10503                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
10504                };
10505                genenvfn = fns[size][u];
10506                break;
10507            }
10508            case 0xc: /* SMAX, UMAX */
10509            {
10510                static NeonGenTwoOpFn * const fns[3][2] = {
10511                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
10512                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
10513                    { gen_max_s32, gen_max_u32 },
10514                };
10515                genfn = fns[size][u];
10516                break;
10517            }
10518
10519            case 0xd: /* SMIN, UMIN */
10520            {
10521                static NeonGenTwoOpFn * const fns[3][2] = {
10522                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
10523                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
10524                    { gen_min_s32, gen_min_u32 },
10525                };
10526                genfn = fns[size][u];
10527                break;
10528            }
10529            case 0xe: /* SABD, UABD */
10530            case 0xf: /* SABA, UABA */
10531            {
10532                static NeonGenTwoOpFn * const fns[3][2] = {
10533                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
10534                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
10535                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
10536                };
10537                genfn = fns[size][u];
10538                break;
10539            }
10540            case 0x13: /* MUL, PMUL */
10541                assert(u); /* PMUL */
10542                assert(size == 0);
10543                genfn = gen_helper_neon_mul_p8;
10544                break;
10545            case 0x16: /* SQDMULH, SQRDMULH */
10546            {
10547                static NeonGenTwoOpEnvFn * const fns[2][2] = {
10548                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
10549                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
10550                };
10551                assert(size == 1 || size == 2);
10552                genenvfn = fns[size - 1][u];
10553                break;
10554            }
10555            default:
10556                g_assert_not_reached();
10557            }
10558
10559            if (genenvfn) {
10560                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
10561            } else {
10562                genfn(tcg_res, tcg_op1, tcg_op2);
10563            }
10564
10565            if (opcode == 0xf) {
10566                /* SABA, UABA: accumulating ops */
10567                static NeonGenTwoOpFn * const fns[3] = {
10568                    gen_helper_neon_add_u8,
10569                    gen_helper_neon_add_u16,
10570                    tcg_gen_add_i32,
10571                };
10572
10573                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
10574                fns[size](tcg_res, tcg_op1, tcg_res);
10575            }
10576
10577            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10578
10579            tcg_temp_free_i32(tcg_res);
10580            tcg_temp_free_i32(tcg_op1);
10581            tcg_temp_free_i32(tcg_op2);
10582        }
10583    }
10584    clear_vec_high(s, is_q, rd);
10585}
10586
10587/* AdvSIMD three same
10588 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
10589 * +---+---+---+-----------+------+---+------+--------+---+------+------+
10590 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
10591 * +---+---+---+-----------+------+---+------+--------+---+------+------+
10592 */
10593static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
10594{
10595    int opcode = extract32(insn, 11, 5);
10596
10597    switch (opcode) {
10598    case 0x3: /* logic ops */
10599        disas_simd_3same_logic(s, insn);
10600        break;
10601    case 0x17: /* ADDP */
10602    case 0x14: /* SMAXP, UMAXP */
10603    case 0x15: /* SMINP, UMINP */
10604    {
10605        /* Pairwise operations */
10606        int is_q = extract32(insn, 30, 1);
10607        int u = extract32(insn, 29, 1);
10608        int size = extract32(insn, 22, 2);
10609        int rm = extract32(insn, 16, 5);
10610        int rn = extract32(insn, 5, 5);
10611        int rd = extract32(insn, 0, 5);
10612        if (opcode == 0x17) {
10613            if (u || (size == 3 && !is_q)) {
10614                unallocated_encoding(s);
10615                return;
10616            }
10617        } else {
10618            if (size == 3) {
10619                unallocated_encoding(s);
10620                return;
10621            }
10622        }
10623        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
10624        break;
10625    }
10626    case 0x18 ... 0x31:
10627        /* floating point ops, sz[1] and U are part of opcode */
10628        disas_simd_3same_float(s, insn);
10629        break;
10630    default:
10631        disas_simd_3same_int(s, insn);
10632        break;
10633    }
10634}
10635
10636/*
10637 * Advanced SIMD three same (ARMv8.2 FP16 variants)
10638 *
10639 *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
10640 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
10641 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
10642 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
10643 *
10644 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
10645 * (register), FACGE, FABD, FCMGT (register) and FACGT.
10646 *
10647 */
10648static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
10649{
10650    int opcode, fpopcode;
10651    int is_q, u, a, rm, rn, rd;
10652    int datasize, elements;
10653    int pass;
10654    TCGv_ptr fpst;
10655    bool pairwise = false;
10656
10657    if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
10658        unallocated_encoding(s);
10659        return;
10660    }
10661
10662    if (!fp_access_check(s)) {
10663        return;
10664    }
10665
10666    /* For these floating point ops, the U, a and opcode bits
10667     * together indicate the operation.
10668     */
10669    opcode = extract32(insn, 11, 3);
10670    u = extract32(insn, 29, 1);
10671    a = extract32(insn, 23, 1);
10672    is_q = extract32(insn, 30, 1);
10673    rm = extract32(insn, 16, 5);
10674    rn = extract32(insn, 5, 5);
10675    rd = extract32(insn, 0, 5);
10676
10677    fpopcode = opcode | (a << 3) |  (u << 4);
10678    datasize = is_q ? 128 : 64;
10679    elements = datasize / 16;
10680
10681    switch (fpopcode) {
10682    case 0x10: /* FMAXNMP */
10683    case 0x12: /* FADDP */
10684    case 0x16: /* FMAXP */
10685    case 0x18: /* FMINNMP */
10686    case 0x1e: /* FMINP */
10687        pairwise = true;
10688        break;
10689    }
10690
10691    fpst = get_fpstatus_ptr(true);
10692
10693    if (pairwise) {
10694        int maxpass = is_q ? 8 : 4;
10695        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10696        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10697        TCGv_i32 tcg_res[8];
10698
10699        for (pass = 0; pass < maxpass; pass++) {
10700            int passreg = pass < (maxpass / 2) ? rn : rm;
10701            int passelt = (pass << 1) & (maxpass - 1);
10702
10703            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
10704            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
10705            tcg_res[pass] = tcg_temp_new_i32();
10706
10707            switch (fpopcode) {
10708            case 0x10: /* FMAXNMP */
10709                gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
10710                                           fpst);
10711                break;
10712            case 0x12: /* FADDP */
10713                gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10714                break;
10715            case 0x16: /* FMAXP */
10716                gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10717                break;
10718            case 0x18: /* FMINNMP */
10719                gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
10720                                           fpst);
10721                break;
10722            case 0x1e: /* FMINP */
10723                gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10724                break;
10725            default:
10726                g_assert_not_reached();
10727            }
10728        }
10729
10730        for (pass = 0; pass < maxpass; pass++) {
10731            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
10732            tcg_temp_free_i32(tcg_res[pass]);
10733        }
10734
10735        tcg_temp_free_i32(tcg_op1);
10736        tcg_temp_free_i32(tcg_op2);
10737
10738    } else {
10739        for (pass = 0; pass < elements; pass++) {
10740            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10741            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10742            TCGv_i32 tcg_res = tcg_temp_new_i32();
10743
10744            read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
10745            read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
10746
10747            switch (fpopcode) {
10748            case 0x0: /* FMAXNM */
10749                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
10750                break;
10751            case 0x1: /* FMLA */
10752                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10753                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
10754                                           fpst);
10755                break;
10756            case 0x2: /* FADD */
10757                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
10758                break;
10759            case 0x3: /* FMULX */
10760                gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
10761                break;
10762            case 0x4: /* FCMEQ */
10763                gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10764                break;
10765            case 0x6: /* FMAX */
10766                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
10767                break;
10768            case 0x7: /* FRECPS */
10769                gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10770                break;
10771            case 0x8: /* FMINNM */
10772                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
10773                break;
10774            case 0x9: /* FMLS */
10775                /* As usual for ARM, separate negation for fused multiply-add */
10776                tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
10777                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10778                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
10779                                           fpst);
10780                break;
10781            case 0xa: /* FSUB */
10782                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
10783                break;
10784            case 0xe: /* FMIN */
10785                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
10786                break;
10787            case 0xf: /* FRSQRTS */
10788                gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10789                break;
10790            case 0x13: /* FMUL */
10791                gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
10792                break;
10793            case 0x14: /* FCMGE */
10794                gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10795                break;
10796            case 0x15: /* FACGE */
10797                gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10798                break;
10799            case 0x17: /* FDIV */
10800                gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
10801                break;
10802            case 0x1a: /* FABD */
10803                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
10804                tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
10805                break;
10806            case 0x1c: /* FCMGT */
10807                gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10808                break;
10809            case 0x1d: /* FACGT */
10810                gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10811                break;
10812            default:
10813                fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
10814                        __func__, insn, fpopcode, s->pc);
10815                g_assert_not_reached();
10816            }
10817
10818            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10819            tcg_temp_free_i32(tcg_res);
10820            tcg_temp_free_i32(tcg_op1);
10821            tcg_temp_free_i32(tcg_op2);
10822        }
10823    }
10824
10825    tcg_temp_free_ptr(fpst);
10826
10827    clear_vec_high(s, is_q, rd);
10828}
10829
10830/* AdvSIMD three same extra
10831 *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
10832 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
10833 * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
10834 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
10835 */
10836static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
10837{
10838    int rd = extract32(insn, 0, 5);
10839    int rn = extract32(insn, 5, 5);
10840    int opcode = extract32(insn, 11, 4);
10841    int rm = extract32(insn, 16, 5);
10842    int size = extract32(insn, 22, 2);
10843    bool u = extract32(insn, 29, 1);
10844    bool is_q = extract32(insn, 30, 1);
10845    int feature, rot;
10846
10847    switch (u * 16 + opcode) {
10848    case 0x10: /* SQRDMLAH (vector) */
10849    case 0x11: /* SQRDMLSH (vector) */
10850        if (size != 1 && size != 2) {
10851            unallocated_encoding(s);
10852            return;
10853        }
10854        feature = ARM_FEATURE_V8_RDM;
10855        break;
10856    case 0x8: /* FCMLA, #0 */
10857    case 0x9: /* FCMLA, #90 */
10858    case 0xa: /* FCMLA, #180 */
10859    case 0xb: /* FCMLA, #270 */
10860    case 0xc: /* FCADD, #90 */
10861    case 0xe: /* FCADD, #270 */
10862        if (size == 0
10863            || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
10864            || (size == 3 && !is_q)) {
10865            unallocated_encoding(s);
10866            return;
10867        }
10868        feature = ARM_FEATURE_V8_FCMA;
10869        break;
10870    default:
10871        unallocated_encoding(s);
10872        return;
10873    }
10874    if (!arm_dc_feature(s, feature)) {
10875        unallocated_encoding(s);
10876        return;
10877    }
10878    if (!fp_access_check(s)) {
10879        return;
10880    }
10881
10882    switch (opcode) {
10883    case 0x0: /* SQRDMLAH (vector) */
10884        switch (size) {
10885        case 1:
10886            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
10887            break;
10888        case 2:
10889            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
10890            break;
10891        default:
10892            g_assert_not_reached();
10893        }
10894        return;
10895
10896    case 0x1: /* SQRDMLSH (vector) */
10897        switch (size) {
10898        case 1:
10899            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
10900            break;
10901        case 2:
10902            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
10903            break;
10904        default:
10905            g_assert_not_reached();
10906        }
10907        return;
10908
10909    case 0x8: /* FCMLA, #0 */
10910    case 0x9: /* FCMLA, #90 */
10911    case 0xa: /* FCMLA, #180 */
10912    case 0xb: /* FCMLA, #270 */
10913        rot = extract32(opcode, 0, 2);
10914        switch (size) {
10915        case 1:
10916            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
10917                              gen_helper_gvec_fcmlah);
10918            break;
10919        case 2:
10920            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
10921                              gen_helper_gvec_fcmlas);
10922            break;
10923        case 3:
10924            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
10925                              gen_helper_gvec_fcmlad);
10926            break;
10927        default:
10928            g_assert_not_reached();
10929        }
10930        return;
10931
10932    case 0xc: /* FCADD, #90 */
10933    case 0xe: /* FCADD, #270 */
10934        rot = extract32(opcode, 1, 1);
10935        switch (size) {
10936        case 1:
10937            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
10938                              gen_helper_gvec_fcaddh);
10939            break;
10940        case 2:
10941            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
10942                              gen_helper_gvec_fcadds);
10943            break;
10944        case 3:
10945            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
10946                              gen_helper_gvec_fcaddd);
10947            break;
10948        default:
10949            g_assert_not_reached();
10950        }
10951        return;
10952
10953    default:
10954        g_assert_not_reached();
10955    }
10956}
10957
10958static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
10959                                  int size, int rn, int rd)
10960{
10961    /* Handle 2-reg-misc ops which are widening (so each size element
10962     * in the source becomes a 2*size element in the destination.
10963     * The only instruction like this is FCVTL.
10964     */
10965    int pass;
10966
10967    if (size == 3) {
10968        /* 32 -> 64 bit fp conversion */
10969        TCGv_i64 tcg_res[2];
10970        int srcelt = is_q ? 2 : 0;
10971
10972        for (pass = 0; pass < 2; pass++) {
10973            TCGv_i32 tcg_op = tcg_temp_new_i32();
10974            tcg_res[pass] = tcg_temp_new_i64();
10975
10976            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
10977            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
10978            tcg_temp_free_i32(tcg_op);
10979        }
10980        for (pass = 0; pass < 2; pass++) {
10981            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10982            tcg_temp_free_i64(tcg_res[pass]);
10983        }
10984    } else {
10985        /* 16 -> 32 bit fp conversion */
10986        int srcelt = is_q ? 4 : 0;
10987        TCGv_i32 tcg_res[4];
10988
10989        for (pass = 0; pass < 4; pass++) {
10990            tcg_res[pass] = tcg_temp_new_i32();
10991
10992            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
10993            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10994                                           cpu_env);
10995        }
10996        for (pass = 0; pass < 4; pass++) {
10997            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10998            tcg_temp_free_i32(tcg_res[pass]);
10999        }
11000    }
11001}
11002
11003static void handle_rev(DisasContext *s, int opcode, bool u,
11004                       bool is_q, int size, int rn, int rd)
11005{
11006    int op = (opcode << 1) | u;
11007    int opsz = op + size;
11008    int grp_size = 3 - opsz;
11009    int dsize = is_q ? 128 : 64;
11010    int i;
11011
11012    if (opsz >= 3) {
11013        unallocated_encoding(s);
11014        return;
11015    }
11016
11017    if (!fp_access_check(s)) {
11018        return;
11019    }
11020
11021    if (size == 0) {
11022        /* Special case bytes, use bswap op on each group of elements */
11023        int groups = dsize / (8 << grp_size);
11024
11025        for (i = 0; i < groups; i++) {
11026            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11027
11028            read_vec_element(s, tcg_tmp, rn, i, grp_size);
11029            switch (grp_size) {
11030            case MO_16:
11031                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11032                break;
11033            case MO_32:
11034                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11035                break;
11036            case MO_64:
11037                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11038                break;
11039            default:
11040                g_assert_not_reached();
11041            }
11042            write_vec_element(s, tcg_tmp, rd, i, grp_size);
11043            tcg_temp_free_i64(tcg_tmp);
11044        }
11045        clear_vec_high(s, is_q, rd);
11046    } else {
11047        int revmask = (1 << grp_size) - 1;
11048        int esize = 8 << size;
11049        int elements = dsize / esize;
11050        TCGv_i64 tcg_rn = tcg_temp_new_i64();
11051        TCGv_i64 tcg_rd = tcg_const_i64(0);
11052        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11053
11054        for (i = 0; i < elements; i++) {
11055            int e_rev = (i & 0xf) ^ revmask;
11056            int off = e_rev * esize;
11057            read_vec_element(s, tcg_rn, rn, i, size);
11058            if (off >= 64) {
11059                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11060                                    tcg_rn, off - 64, esize);
11061            } else {
11062                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11063            }
11064        }
11065        write_vec_element(s, tcg_rd, rd, 0, MO_64);
11066        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11067
11068        tcg_temp_free_i64(tcg_rd_hi);
11069        tcg_temp_free_i64(tcg_rd);
11070        tcg_temp_free_i64(tcg_rn);
11071    }
11072}
11073
11074static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11075                                  bool is_q, int size, int rn, int rd)
11076{
11077    /* Implement the pairwise operations from 2-misc:
11078     * SADDLP, UADDLP, SADALP, UADALP.
11079     * These all add pairs of elements in the input to produce a
11080     * double-width result element in the output (possibly accumulating).
11081     */
11082    bool accum = (opcode == 0x6);
11083    int maxpass = is_q ? 2 : 1;
11084    int pass;
11085    TCGv_i64 tcg_res[2];
11086
11087    if (size == 2) {
11088        /* 32 + 32 -> 64 op */
11089        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
11090
11091        for (pass = 0; pass < maxpass; pass++) {
11092            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11093            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11094
11095            tcg_res[pass] = tcg_temp_new_i64();
11096
11097            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11098            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11099            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11100            if (accum) {
11101                read_vec_element(s, tcg_op1, rd, pass, MO_64);
11102                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11103            }
11104
11105            tcg_temp_free_i64(tcg_op1);
11106            tcg_temp_free_i64(tcg_op2);
11107        }
11108    } else {
11109        for (pass = 0; pass < maxpass; pass++) {
11110            TCGv_i64 tcg_op = tcg_temp_new_i64();
11111            NeonGenOneOpFn *genfn;
11112            static NeonGenOneOpFn * const fns[2][2] = {
11113                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11114                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11115            };
11116
11117            genfn = fns[size][u];
11118
11119            tcg_res[pass] = tcg_temp_new_i64();
11120
11121            read_vec_element(s, tcg_op, rn, pass, MO_64);
11122            genfn(tcg_res[pass], tcg_op);
11123
11124            if (accum) {
11125                read_vec_element(s, tcg_op, rd, pass, MO_64);
11126                if (size == 0) {
11127                    gen_helper_neon_addl_u16(tcg_res[pass],
11128                                             tcg_res[pass], tcg_op);
11129                } else {
11130                    gen_helper_neon_addl_u32(tcg_res[pass],
11131                                             tcg_res[pass], tcg_op);
11132                }
11133            }
11134            tcg_temp_free_i64(tcg_op);
11135        }
11136    }
11137    if (!is_q) {
11138        tcg_res[1] = tcg_const_i64(0);
11139    }
11140    for (pass = 0; pass < 2; pass++) {
11141        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11142        tcg_temp_free_i64(tcg_res[pass]);
11143    }
11144}
11145
11146static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11147{
11148    /* Implement SHLL and SHLL2 */
11149    int pass;
11150    int part = is_q ? 2 : 0;
11151    TCGv_i64 tcg_res[2];
11152
11153    for (pass = 0; pass < 2; pass++) {
11154        static NeonGenWidenFn * const widenfns[3] = {
11155            gen_helper_neon_widen_u8,
11156            gen_helper_neon_widen_u16,
11157            tcg_gen_extu_i32_i64,
11158        };
11159        NeonGenWidenFn *widenfn = widenfns[size];
11160        TCGv_i32 tcg_op = tcg_temp_new_i32();
11161
11162        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11163        tcg_res[pass] = tcg_temp_new_i64();
11164        widenfn(tcg_res[pass], tcg_op);
11165        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11166
11167        tcg_temp_free_i32(tcg_op);
11168    }
11169
11170    for (pass = 0; pass < 2; pass++) {
11171        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11172        tcg_temp_free_i64(tcg_res[pass]);
11173    }
11174}
11175
11176/* AdvSIMD two reg misc
11177 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11178 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11179 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11180 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11181 */
11182static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11183{
11184    int size = extract32(insn, 22, 2);
11185    int opcode = extract32(insn, 12, 5);
11186    bool u = extract32(insn, 29, 1);
11187    bool is_q = extract32(insn, 30, 1);
11188    int rn = extract32(insn, 5, 5);
11189    int rd = extract32(insn, 0, 5);
11190    bool need_fpstatus = false;
11191    bool need_rmode = false;
11192    int rmode = -1;
11193    TCGv_i32 tcg_rmode;
11194    TCGv_ptr tcg_fpstatus;
11195
11196    switch (opcode) {
11197    case 0x0: /* REV64, REV32 */
11198    case 0x1: /* REV16 */
11199        handle_rev(s, opcode, u, is_q, size, rn, rd);
11200        return;
11201    case 0x5: /* CNT, NOT, RBIT */
11202        if (u && size == 0) {
11203            /* NOT */
11204            break;
11205        } else if (u && size == 1) {
11206            /* RBIT */
11207            break;
11208        } else if (!u && size == 0) {
11209            /* CNT */
11210            break;
11211        }
11212        unallocated_encoding(s);
11213        return;
11214    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11215    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11216        if (size == 3) {
11217            unallocated_encoding(s);
11218            return;
11219        }
11220        if (!fp_access_check(s)) {
11221            return;
11222        }
11223
11224        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11225        return;
11226    case 0x4: /* CLS, CLZ */
11227        if (size == 3) {
11228            unallocated_encoding(s);
11229            return;
11230        }
11231        break;
11232    case 0x2: /* SADDLP, UADDLP */
11233    case 0x6: /* SADALP, UADALP */
11234        if (size == 3) {
11235            unallocated_encoding(s);
11236            return;
11237        }
11238        if (!fp_access_check(s)) {
11239            return;
11240        }
11241        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11242        return;
11243    case 0x13: /* SHLL, SHLL2 */
11244        if (u == 0 || size == 3) {
11245            unallocated_encoding(s);
11246            return;
11247        }
11248        if (!fp_access_check(s)) {
11249            return;
11250        }
11251        handle_shll(s, is_q, size, rn, rd);
11252        return;
11253    case 0xa: /* CMLT */
11254        if (u == 1) {
11255            unallocated_encoding(s);
11256            return;
11257        }
11258        /* fall through */
11259    case 0x8: /* CMGT, CMGE */
11260    case 0x9: /* CMEQ, CMLE */
11261    case 0xb: /* ABS, NEG */
11262        if (size == 3 && !is_q) {
11263            unallocated_encoding(s);
11264            return;
11265        }
11266        break;
11267    case 0x3: /* SUQADD, USQADD */
11268        if (size == 3 && !is_q) {
11269            unallocated_encoding(s);
11270            return;
11271        }
11272        if (!fp_access_check(s)) {
11273            return;
11274        }
11275        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11276        return;
11277    case 0x7: /* SQABS, SQNEG */
11278        if (size == 3 && !is_q) {
11279            unallocated_encoding(s);
11280            return;
11281        }
11282        break;
11283    case 0xc ... 0xf:
11284    case 0x16 ... 0x1d:
11285    case 0x1f:
11286    {
11287        /* Floating point: U, size[1] and opcode indicate operation;
11288         * size[0] indicates single or double precision.
11289         */
11290        int is_double = extract32(size, 0, 1);
11291        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11292        size = is_double ? 3 : 2;
11293        switch (opcode) {
11294        case 0x2f: /* FABS */
11295        case 0x6f: /* FNEG */
11296            if (size == 3 && !is_q) {
11297                unallocated_encoding(s);
11298                return;
11299            }
11300            break;
11301        case 0x1d: /* SCVTF */
11302        case 0x5d: /* UCVTF */
11303        {
11304            bool is_signed = (opcode == 0x1d) ? true : false;
11305            int elements = is_double ? 2 : is_q ? 4 : 2;
11306            if (is_double && !is_q) {
11307                unallocated_encoding(s);
11308                return;
11309            }
11310            if (!fp_access_check(s)) {
11311                return;
11312            }
11313            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11314            return;
11315        }
11316        case 0x2c: /* FCMGT (zero) */
11317        case 0x2d: /* FCMEQ (zero) */
11318        case 0x2e: /* FCMLT (zero) */
11319        case 0x6c: /* FCMGE (zero) */
11320        case 0x6d: /* FCMLE (zero) */
11321            if (size == 3 && !is_q) {
11322                unallocated_encoding(s);
11323                return;
11324            }
11325            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11326            return;
11327        case 0x7f: /* FSQRT */
11328            if (size == 3 && !is_q) {
11329                unallocated_encoding(s);
11330                return;
11331            }
11332            break;
11333        case 0x1a: /* FCVTNS */
11334        case 0x1b: /* FCVTMS */
11335        case 0x3a: /* FCVTPS */
11336        case 0x3b: /* FCVTZS */
11337        case 0x5a: /* FCVTNU */
11338        case 0x5b: /* FCVTMU */
11339        case 0x7a: /* FCVTPU */
11340        case 0x7b: /* FCVTZU */
11341            need_fpstatus = true;
11342            need_rmode = true;
11343            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11344            if (size == 3 && !is_q) {
11345                unallocated_encoding(s);
11346                return;
11347            }
11348            break;
11349        case 0x5c: /* FCVTAU */
11350        case 0x1c: /* FCVTAS */
11351            need_fpstatus = true;
11352            need_rmode = true;
11353            rmode = FPROUNDING_TIEAWAY;
11354            if (size == 3 && !is_q) {
11355                unallocated_encoding(s);
11356                return;
11357            }
11358            break;
11359        case 0x3c: /* URECPE */
11360            if (size == 3) {
11361                unallocated_encoding(s);
11362                return;
11363            }
11364            /* fall through */
11365        case 0x3d: /* FRECPE */
11366        case 0x7d: /* FRSQRTE */
11367            if (size == 3 && !is_q) {
11368                unallocated_encoding(s);
11369                return;
11370            }
11371            if (!fp_access_check(s)) {
11372                return;
11373            }
11374            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11375            return;
11376        case 0x56: /* FCVTXN, FCVTXN2 */
11377            if (size == 2) {
11378                unallocated_encoding(s);
11379                return;
11380            }
11381            /* fall through */
11382        case 0x16: /* FCVTN, FCVTN2 */
11383            /* handle_2misc_narrow does a 2*size -> size operation, but these
11384             * instructions encode the source size rather than dest size.
11385             */
11386            if (!fp_access_check(s)) {
11387                return;
11388            }
11389            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11390            return;
11391        case 0x17: /* FCVTL, FCVTL2 */
11392            if (!fp_access_check(s)) {
11393                return;
11394            }
11395            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11396            return;
11397        case 0x18: /* FRINTN */
11398        case 0x19: /* FRINTM */
11399        case 0x38: /* FRINTP */
11400        case 0x39: /* FRINTZ */
11401            need_rmode = true;
11402            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11403            /* fall through */
11404        case 0x59: /* FRINTX */
11405        case 0x79: /* FRINTI */
11406            need_fpstatus = true;
11407            if (size == 3 && !is_q) {
11408                unallocated_encoding(s);
11409                return;
11410            }
11411            break;
11412        case 0x58: /* FRINTA */
11413            need_rmode = true;
11414            rmode = FPROUNDING_TIEAWAY;
11415            need_fpstatus = true;
11416            if (size == 3 && !is_q) {
11417                unallocated_encoding(s);
11418                return;
11419            }
11420            break;
11421        case 0x7c: /* URSQRTE */
11422            if (size == 3) {
11423                unallocated_encoding(s);
11424                return;
11425            }
11426            need_fpstatus = true;
11427            break;
11428        default:
11429            unallocated_encoding(s);
11430            return;
11431        }
11432        break;
11433    }
11434    default:
11435        unallocated_encoding(s);
11436        return;
11437    }
11438
11439    if (!fp_access_check(s)) {
11440        return;
11441    }
11442
11443    if (need_fpstatus || need_rmode) {
11444        tcg_fpstatus = get_fpstatus_ptr(false);
11445    } else {
11446        tcg_fpstatus = NULL;
11447    }
11448    if (need_rmode) {
11449        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11450        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11451    } else {
11452        tcg_rmode = NULL;
11453    }
11454
11455    switch (opcode) {
11456    case 0x5:
11457        if (u && size == 0) { /* NOT */
11458            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11459            return;
11460        }
11461        break;
11462    case 0xb:
11463        if (u) { /* NEG */
11464            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11465            return;
11466        }
11467        break;
11468    }
11469
11470    if (size == 3) {
11471        /* All 64-bit element operations can be shared with scalar 2misc */
11472        int pass;
11473
11474        for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
11475            TCGv_i64 tcg_op = tcg_temp_new_i64();
11476            TCGv_i64 tcg_res = tcg_temp_new_i64();
11477
11478            read_vec_element(s, tcg_op, rn, pass, MO_64);
11479
11480            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11481                            tcg_rmode, tcg_fpstatus);
11482
11483            write_vec_element(s, tcg_res, rd, pass, MO_64);
11484
11485            tcg_temp_free_i64(tcg_res);
11486            tcg_temp_free_i64(tcg_op);
11487        }
11488    } else {
11489        int pass;
11490
11491        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11492            TCGv_i32 tcg_op = tcg_temp_new_i32();
11493            TCGv_i32 tcg_res = tcg_temp_new_i32();
11494            TCGCond cond;
11495
11496            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11497
11498            if (size == 2) {
11499                /* Special cases for 32 bit elements */
11500                switch (opcode) {
11501                case 0xa: /* CMLT */
11502                    /* 32 bit integer comparison against zero, result is
11503                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
11504                     * and inverting.
11505                     */
11506                    cond = TCG_COND_LT;
11507                do_cmop:
11508                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
11509                    tcg_gen_neg_i32(tcg_res, tcg_res);
11510                    break;
11511                case 0x8: /* CMGT, CMGE */
11512                    cond = u ? TCG_COND_GE : TCG_COND_GT;
11513                    goto do_cmop;
11514                case 0x9: /* CMEQ, CMLE */
11515                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
11516                    goto do_cmop;
11517                case 0x4: /* CLS */
11518                    if (u) {
11519                        tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11520                    } else {
11521                        tcg_gen_clrsb_i32(tcg_res, tcg_op);
11522                    }
11523                    break;
11524                case 0x7: /* SQABS, SQNEG */
11525                    if (u) {
11526                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
11527                    } else {
11528                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
11529                    }
11530                    break;
11531                case 0xb: /* ABS, NEG */
11532                    if (u) {
11533                        tcg_gen_neg_i32(tcg_res, tcg_op);
11534                    } else {
11535                        TCGv_i32 tcg_zero = tcg_const_i32(0);
11536                        tcg_gen_neg_i32(tcg_res, tcg_op);
11537                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
11538                                            tcg_zero, tcg_op, tcg_res);
11539                        tcg_temp_free_i32(tcg_zero);
11540                    }
11541                    break;
11542                case 0x2f: /* FABS */
11543                    gen_helper_vfp_abss(tcg_res, tcg_op);
11544                    break;
11545                case 0x6f: /* FNEG */
11546                    gen_helper_vfp_negs(tcg_res, tcg_op);
11547                    break;
11548                case 0x7f: /* FSQRT */
11549                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
11550                    break;
11551                case 0x1a: /* FCVTNS */
11552                case 0x1b: /* FCVTMS */
11553                case 0x1c: /* FCVTAS */
11554                case 0x3a: /* FCVTPS */
11555                case 0x3b: /* FCVTZS */
11556                {
11557                    TCGv_i32 tcg_shift = tcg_const_i32(0);
11558                    gen_helper_vfp_tosls(tcg_res, tcg_op,
11559                                         tcg_shift, tcg_fpstatus);
11560                    tcg_temp_free_i32(tcg_shift);
11561                    break;
11562                }
11563                case 0x5a: /* FCVTNU */
11564                case 0x5b: /* FCVTMU */
11565                case 0x5c: /* FCVTAU */
11566                case 0x7a: /* FCVTPU */
11567                case 0x7b: /* FCVTZU */
11568                {
11569                    TCGv_i32 tcg_shift = tcg_const_i32(0);
11570                    gen_helper_vfp_touls(tcg_res, tcg_op,
11571                                         tcg_shift, tcg_fpstatus);
11572                    tcg_temp_free_i32(tcg_shift);
11573                    break;
11574                }
11575                case 0x18: /* FRINTN */
11576                case 0x19: /* FRINTM */
11577                case 0x38: /* FRINTP */
11578                case 0x39: /* FRINTZ */
11579                case 0x58: /* FRINTA */
11580                case 0x79: /* FRINTI */
11581                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11582                    break;
11583                case 0x59: /* FRINTX */
11584                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11585                    break;
11586                case 0x7c: /* URSQRTE */
11587                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
11588                    break;
11589                default:
11590                    g_assert_not_reached();
11591                }
11592            } else {
11593                /* Use helpers for 8 and 16 bit elements */
11594                switch (opcode) {
11595                case 0x5: /* CNT, RBIT */
11596                    /* For these two insns size is part of the opcode specifier
11597                     * (handled earlier); they always operate on byte elements.
11598                     */
11599                    if (u) {
11600                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11601                    } else {
11602                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11603                    }
11604                    break;
11605                case 0x7: /* SQABS, SQNEG */
11606                {
11607                    NeonGenOneOpEnvFn *genfn;
11608                    static NeonGenOneOpEnvFn * const fns[2][2] = {
11609                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11610                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11611                    };
11612                    genfn = fns[size][u];
11613                    genfn(tcg_res, cpu_env, tcg_op);
11614                    break;
11615                }
11616                case 0x8: /* CMGT, CMGE */
11617                case 0x9: /* CMEQ, CMLE */
11618                case 0xa: /* CMLT */
11619                {
11620                    static NeonGenTwoOpFn * const fns[3][2] = {
11621                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
11622                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
11623                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
11624                    };
11625                    NeonGenTwoOpFn *genfn;
11626                    int comp;
11627                    bool reverse;
11628                    TCGv_i32 tcg_zero = tcg_const_i32(0);
11629
11630                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
11631                    comp = (opcode - 0x8) * 2 + u;
11632                    /* ...but LE, LT are implemented as reverse GE, GT */
11633                    reverse = (comp > 2);
11634                    if (reverse) {
11635                        comp = 4 - comp;
11636                    }
11637                    genfn = fns[comp][size];
11638                    if (reverse) {
11639                        genfn(tcg_res, tcg_zero, tcg_op);
11640                    } else {
11641                        genfn(tcg_res, tcg_op, tcg_zero);
11642                    }
11643                    tcg_temp_free_i32(tcg_zero);
11644                    break;
11645                }
11646                case 0xb: /* ABS, NEG */
11647                    if (u) {
11648                        TCGv_i32 tcg_zero = tcg_const_i32(0);
11649                        if (size) {
11650                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
11651                        } else {
11652                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
11653                        }
11654                        tcg_temp_free_i32(tcg_zero);
11655                    } else {
11656                        if (size) {
11657                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
11658                        } else {
11659                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
11660                        }
11661                    }
11662                    break;
11663                case 0x4: /* CLS, CLZ */
11664                    if (u) {
11665                        if (size == 0) {
11666                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
11667                        } else {
11668                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
11669                        }
11670                    } else {
11671                        if (size == 0) {
11672                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
11673                        } else {
11674                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
11675                        }
11676                    }
11677                    break;
11678                default:
11679                    g_assert_not_reached();
11680                }
11681            }
11682
11683            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11684
11685            tcg_temp_free_i32(tcg_res);
11686            tcg_temp_free_i32(tcg_op);
11687        }
11688    }
11689    clear_vec_high(s, is_q, rd);
11690
11691    if (need_rmode) {
11692        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11693        tcg_temp_free_i32(tcg_rmode);
11694    }
11695    if (need_fpstatus) {
11696        tcg_temp_free_ptr(tcg_fpstatus);
11697    }
11698}
11699
11700/* AdvSIMD [scalar] two register miscellaneous (FP16)
11701 *
11702 *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
11703 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11704 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11705 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11706 *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11707 *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11708 *
11709 * This actually covers two groups where scalar access is governed by
11710 * bit 28. A bunch of the instructions (float to integral) only exist
11711 * in the vector form and are un-allocated for the scalar decode. Also
11712 * in the scalar decode Q is always 1.
11713 */
11714static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11715{
11716    int fpop, opcode, a, u;
11717    int rn, rd;
11718    bool is_q;
11719    bool is_scalar;
11720    bool only_in_vector = false;
11721
11722    int pass;
11723    TCGv_i32 tcg_rmode = NULL;
11724    TCGv_ptr tcg_fpstatus = NULL;
11725    bool need_rmode = false;
11726    bool need_fpst = true;
11727    int rmode;
11728
11729    if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
11730        unallocated_encoding(s);
11731        return;
11732    }
11733
11734    rd = extract32(insn, 0, 5);
11735    rn = extract32(insn, 5, 5);
11736
11737    a = extract32(insn, 23, 1);
11738    u = extract32(insn, 29, 1);
11739    is_scalar = extract32(insn, 28, 1);
11740    is_q = extract32(insn, 30, 1);
11741
11742    opcode = extract32(insn, 12, 5);
11743    fpop = deposit32(opcode, 5, 1, a);
11744    fpop = deposit32(fpop, 6, 1, u);
11745
11746    rd = extract32(insn, 0, 5);
11747    rn = extract32(insn, 5, 5);
11748
11749    switch (fpop) {
11750    case 0x1d: /* SCVTF */
11751    case 0x5d: /* UCVTF */
11752    {
11753        int elements;
11754
11755        if (is_scalar) {
11756            elements = 1;
11757        } else {
11758            elements = (is_q ? 8 : 4);
11759        }
11760
11761        if (!fp_access_check(s)) {
11762            return;
11763        }
11764        handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11765        return;
11766    }
11767    break;
11768    case 0x2c: /* FCMGT (zero) */
11769    case 0x2d: /* FCMEQ (zero) */
11770    case 0x2e: /* FCMLT (zero) */
11771    case 0x6c: /* FCMGE (zero) */
11772    case 0x6d: /* FCMLE (zero) */
11773        handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11774        return;
11775    case 0x3d: /* FRECPE */
11776    case 0x3f: /* FRECPX */
11777        break;
11778    case 0x18: /* FRINTN */
11779        need_rmode = true;
11780        only_in_vector = true;
11781        rmode = FPROUNDING_TIEEVEN;
11782        break;
11783    case 0x19: /* FRINTM */
11784        need_rmode = true;
11785        only_in_vector = true;
11786        rmode = FPROUNDING_NEGINF;
11787        break;
11788    case 0x38: /* FRINTP */
11789        need_rmode = true;
11790        only_in_vector = true;
11791        rmode = FPROUNDING_POSINF;
11792        break;
11793    case 0x39: /* FRINTZ */
11794        need_rmode = true;
11795        only_in_vector = true;
11796        rmode = FPROUNDING_ZERO;
11797        break;
11798    case 0x58: /* FRINTA */
11799        need_rmode = true;
11800        only_in_vector = true;
11801        rmode = FPROUNDING_TIEAWAY;
11802        break;
11803    case 0x59: /* FRINTX */
11804    case 0x79: /* FRINTI */
11805        only_in_vector = true;
11806        /* current rounding mode */
11807        break;
11808    case 0x1a: /* FCVTNS */
11809        need_rmode = true;
11810        rmode = FPROUNDING_TIEEVEN;
11811        break;
11812    case 0x1b: /* FCVTMS */
11813        need_rmode = true;
11814        rmode = FPROUNDING_NEGINF;
11815        break;
11816    case 0x1c: /* FCVTAS */
11817        need_rmode = true;
11818        rmode = FPROUNDING_TIEAWAY;
11819        break;
11820    case 0x3a: /* FCVTPS */
11821        need_rmode = true;
11822        rmode = FPROUNDING_POSINF;
11823        break;
11824    case 0x3b: /* FCVTZS */
11825        need_rmode = true;
11826        rmode = FPROUNDING_ZERO;
11827        break;
11828    case 0x5a: /* FCVTNU */
11829        need_rmode = true;
11830        rmode = FPROUNDING_TIEEVEN;
11831        break;
11832    case 0x5b: /* FCVTMU */
11833        need_rmode = true;
11834        rmode = FPROUNDING_NEGINF;
11835        break;
11836    case 0x5c: /* FCVTAU */
11837        need_rmode = true;
11838        rmode = FPROUNDING_TIEAWAY;
11839        break;
11840    case 0x7a: /* FCVTPU */
11841        need_rmode = true;
11842        rmode = FPROUNDING_POSINF;
11843        break;
11844    case 0x7b: /* FCVTZU */
11845        need_rmode = true;
11846        rmode = FPROUNDING_ZERO;
11847        break;
11848    case 0x2f: /* FABS */
11849    case 0x6f: /* FNEG */
11850        need_fpst = false;
11851        break;
11852    case 0x7d: /* FRSQRTE */
11853    case 0x7f: /* FSQRT (vector) */
11854        break;
11855    default:
11856        fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
11857        g_assert_not_reached();
11858    }
11859
11860
11861    /* Check additional constraints for the scalar encoding */
11862    if (is_scalar) {
11863        if (!is_q) {
11864            unallocated_encoding(s);
11865            return;
11866        }
11867        /* FRINTxx is only in the vector form */
11868        if (only_in_vector) {
11869            unallocated_encoding(s);
11870            return;
11871        }
11872    }
11873
11874    if (!fp_access_check(s)) {
11875        return;
11876    }
11877
11878    if (need_rmode || need_fpst) {
11879        tcg_fpstatus = get_fpstatus_ptr(true);
11880    }
11881
11882    if (need_rmode) {
11883        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11884        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11885    }
11886
11887    if (is_scalar) {
11888        TCGv_i32 tcg_op = tcg_temp_new_i32();
11889        TCGv_i32 tcg_res = tcg_temp_new_i32();
11890
11891        read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
11892
11893        switch (fpop) {
11894        case 0x1a: /* FCVTNS */
11895        case 0x1b: /* FCVTMS */
11896        case 0x1c: /* FCVTAS */
11897        case 0x3a: /* FCVTPS */
11898        case 0x3b: /* FCVTZS */
11899            gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11900            break;
11901        case 0x3d: /* FRECPE */
11902            gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11903            break;
11904        case 0x3f: /* FRECPX */
11905            gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
11906            break;
11907        case 0x5a: /* FCVTNU */
11908        case 0x5b: /* FCVTMU */
11909        case 0x5c: /* FCVTAU */
11910        case 0x7a: /* FCVTPU */
11911        case 0x7b: /* FCVTZU */
11912            gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11913            break;
11914        case 0x6f: /* FNEG */
11915            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11916            break;
11917        case 0x7d: /* FRSQRTE */
11918            gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11919            break;
11920        default:
11921            g_assert_not_reached();
11922        }
11923
11924        /* limit any sign extension going on */
11925        tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
11926        write_fp_sreg(s, rd, tcg_res);
11927
11928        tcg_temp_free_i32(tcg_res);
11929        tcg_temp_free_i32(tcg_op);
11930    } else {
11931        for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
11932            TCGv_i32 tcg_op = tcg_temp_new_i32();
11933            TCGv_i32 tcg_res = tcg_temp_new_i32();
11934
11935            read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
11936
11937            switch (fpop) {
11938            case 0x1a: /* FCVTNS */
11939            case 0x1b: /* FCVTMS */
11940            case 0x1c: /* FCVTAS */
11941            case 0x3a: /* FCVTPS */
11942            case 0x3b: /* FCVTZS */
11943                gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11944                break;
11945            case 0x3d: /* FRECPE */
11946                gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11947                break;
11948            case 0x5a: /* FCVTNU */
11949            case 0x5b: /* FCVTMU */
11950            case 0x5c: /* FCVTAU */
11951            case 0x7a: /* FCVTPU */
11952            case 0x7b: /* FCVTZU */
11953                gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11954                break;
11955            case 0x18: /* FRINTN */
11956            case 0x19: /* FRINTM */
11957            case 0x38: /* FRINTP */
11958            case 0x39: /* FRINTZ */
11959            case 0x58: /* FRINTA */
11960            case 0x79: /* FRINTI */
11961                gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
11962                break;
11963            case 0x59: /* FRINTX */
11964                gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
11965                break;
11966            case 0x2f: /* FABS */
11967                tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
11968                break;
11969            case 0x6f: /* FNEG */
11970                tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11971                break;
11972            case 0x7d: /* FRSQRTE */
11973                gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11974                break;
11975            case 0x7f: /* FSQRT */
11976                gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
11977                break;
11978            default:
11979                g_assert_not_reached();
11980            }
11981
11982            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11983
11984            tcg_temp_free_i32(tcg_res);
11985            tcg_temp_free_i32(tcg_op);
11986        }
11987
11988        clear_vec_high(s, is_q, rd);
11989    }
11990
11991    if (tcg_rmode) {
11992        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11993        tcg_temp_free_i32(tcg_rmode);
11994    }
11995
11996    if (tcg_fpstatus) {
11997        tcg_temp_free_ptr(tcg_fpstatus);
11998    }
11999}
12000
12001/* AdvSIMD scalar x indexed element
12002 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12003 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12004 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12005 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12006 * AdvSIMD vector x indexed element
12007 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12008 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12009 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12010 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12011 */
12012static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12013{
12014    /* This encoding has two kinds of instruction:
12015     *  normal, where we perform elt x idxelt => elt for each
12016     *     element in the vector
12017     *  long, where we perform elt x idxelt and generate a result of
12018     *     double the width of the input element
12019     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12020     */
12021    bool is_scalar = extract32(insn, 28, 1);
12022    bool is_q = extract32(insn, 30, 1);
12023    bool u = extract32(insn, 29, 1);
12024    int size = extract32(insn, 22, 2);
12025    int l = extract32(insn, 21, 1);
12026    int m = extract32(insn, 20, 1);
12027    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12028    int rm = extract32(insn, 16, 4);
12029    int opcode = extract32(insn, 12, 4);
12030    int h = extract32(insn, 11, 1);
12031    int rn = extract32(insn, 5, 5);
12032    int rd = extract32(insn, 0, 5);
12033    bool is_long = false;
12034    int is_fp = 0;
12035    bool is_fp16 = false;
12036    int index;
12037    TCGv_ptr fpst;
12038
12039    switch (16 * u + opcode) {
12040    case 0x08: /* MUL */
12041    case 0x10: /* MLA */
12042    case 0x14: /* MLS */
12043        if (is_scalar) {
12044            unallocated_encoding(s);
12045            return;
12046        }
12047        break;
12048    case 0x02: /* SMLAL, SMLAL2 */
12049    case 0x12: /* UMLAL, UMLAL2 */
12050    case 0x06: /* SMLSL, SMLSL2 */
12051    case 0x16: /* UMLSL, UMLSL2 */
12052    case 0x0a: /* SMULL, SMULL2 */
12053    case 0x1a: /* UMULL, UMULL2 */
12054        if (is_scalar) {
12055            unallocated_encoding(s);
12056            return;
12057        }
12058        is_long = true;
12059        break;
12060    case 0x03: /* SQDMLAL, SQDMLAL2 */
12061    case 0x07: /* SQDMLSL, SQDMLSL2 */
12062    case 0x0b: /* SQDMULL, SQDMULL2 */
12063        is_long = true;
12064        break;
12065    case 0x0c: /* SQDMULH */
12066    case 0x0d: /* SQRDMULH */
12067        break;
12068    case 0x01: /* FMLA */
12069    case 0x05: /* FMLS */
12070    case 0x09: /* FMUL */
12071    case 0x19: /* FMULX */
12072        is_fp = 1;
12073        break;
12074    case 0x1d: /* SQRDMLAH */
12075    case 0x1f: /* SQRDMLSH */
12076        if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
12077            unallocated_encoding(s);
12078            return;
12079        }
12080        break;
12081    case 0x11: /* FCMLA #0 */
12082    case 0x13: /* FCMLA #90 */
12083    case 0x15: /* FCMLA #180 */
12084    case 0x17: /* FCMLA #270 */
12085        if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
12086            unallocated_encoding(s);
12087            return;
12088        }
12089        is_fp = 2;
12090        break;
12091    default:
12092        unallocated_encoding(s);
12093        return;
12094    }
12095
12096    switch (is_fp) {
12097    case 1: /* normal fp */
12098        /* convert insn encoded size to TCGMemOp size */
12099        switch (size) {
12100        case 0: /* half-precision */
12101            size = MO_16;
12102            is_fp16 = true;
12103            break;
12104        case MO_32: /* single precision */
12105        case MO_64: /* double precision */
12106            break;
12107        default:
12108            unallocated_encoding(s);
12109            return;
12110        }
12111        break;
12112
12113    case 2: /* complex fp */
12114        /* Each indexable element is a complex pair.  */
12115        size <<= 1;
12116        switch (size) {
12117        case MO_32:
12118            if (h && !is_q) {
12119                unallocated_encoding(s);
12120                return;
12121            }
12122            is_fp16 = true;
12123            break;
12124        case MO_64:
12125            break;
12126        default:
12127            unallocated_encoding(s);
12128            return;
12129        }
12130        break;
12131
12132    default: /* integer */
12133        switch (size) {
12134        case MO_8:
12135        case MO_64:
12136            unallocated_encoding(s);
12137            return;
12138        }
12139        break;
12140    }
12141    if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
12142        unallocated_encoding(s);
12143        return;
12144    }
12145
12146    /* Given TCGMemOp size, adjust register and indexing.  */
12147    switch (size) {
12148    case MO_16:
12149        index = h << 2 | l << 1 | m;
12150        break;
12151    case MO_32:
12152        index = h << 1 | l;
12153        rm |= m << 4;
12154        break;
12155    case MO_64:
12156        if (l || !is_q) {
12157            unallocated_encoding(s);
12158            return;
12159        }
12160        index = h;
12161        rm |= m << 4;
12162        break;
12163    default:
12164        g_assert_not_reached();
12165    }
12166
12167    if (!fp_access_check(s)) {
12168        return;
12169    }
12170
12171    if (is_fp) {
12172        fpst = get_fpstatus_ptr(is_fp16);
12173    } else {
12174        fpst = NULL;
12175    }
12176
12177    switch (16 * u + opcode) {
12178    case 0x11: /* FCMLA #0 */
12179    case 0x13: /* FCMLA #90 */
12180    case 0x15: /* FCMLA #180 */
12181    case 0x17: /* FCMLA #270 */
12182        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12183                           vec_full_reg_offset(s, rn),
12184                           vec_reg_offset(s, rm, index, size), fpst,
12185                           is_q ? 16 : 8, vec_full_reg_size(s),
12186                           extract32(insn, 13, 2), /* rot */
12187                           size == MO_64
12188                           ? gen_helper_gvec_fcmlas_idx
12189                           : gen_helper_gvec_fcmlah_idx);
12190        tcg_temp_free_ptr(fpst);
12191        return;
12192    }
12193
12194    if (size == 3) {
12195        TCGv_i64 tcg_idx = tcg_temp_new_i64();
12196        int pass;
12197
12198        assert(is_fp && is_q && !is_long);
12199
12200        read_vec_element(s, tcg_idx, rm, index, MO_64);
12201
12202        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12203            TCGv_i64 tcg_op = tcg_temp_new_i64();
12204            TCGv_i64 tcg_res = tcg_temp_new_i64();
12205
12206            read_vec_element(s, tcg_op, rn, pass, MO_64);
12207
12208            switch (16 * u + opcode) {
12209            case 0x05: /* FMLS */
12210                /* As usual for ARM, separate negation for fused multiply-add */
12211                gen_helper_vfp_negd(tcg_op, tcg_op);
12212                /* fall through */
12213            case 0x01: /* FMLA */
12214                read_vec_element(s, tcg_res, rd, pass, MO_64);
12215                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12216                break;
12217            case 0x09: /* FMUL */
12218                gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12219                break;
12220            case 0x19: /* FMULX */
12221                gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12222                break;
12223            default:
12224                g_assert_not_reached();
12225            }
12226
12227            write_vec_element(s, tcg_res, rd, pass, MO_64);
12228            tcg_temp_free_i64(tcg_op);
12229            tcg_temp_free_i64(tcg_res);
12230        }
12231
12232        tcg_temp_free_i64(tcg_idx);
12233        clear_vec_high(s, !is_scalar, rd);
12234    } else if (!is_long) {
12235        /* 32 bit floating point, or 16 or 32 bit integer.
12236         * For the 16 bit scalar case we use the usual Neon helpers and
12237         * rely on the fact that 0 op 0 == 0 with no side effects.
12238         */
12239        TCGv_i32 tcg_idx = tcg_temp_new_i32();
12240        int pass, maxpasses;
12241
12242        if (is_scalar) {
12243            maxpasses = 1;
12244        } else {
12245            maxpasses = is_q ? 4 : 2;
12246        }
12247
12248        read_vec_element_i32(s, tcg_idx, rm, index, size);
12249
12250        if (size == 1 && !is_scalar) {
12251            /* The simplest way to handle the 16x16 indexed ops is to duplicate
12252             * the index into both halves of the 32 bit tcg_idx and then use
12253             * the usual Neon helpers.
12254             */
12255            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12256        }
12257
12258        for (pass = 0; pass < maxpasses; pass++) {
12259            TCGv_i32 tcg_op = tcg_temp_new_i32();
12260            TCGv_i32 tcg_res = tcg_temp_new_i32();
12261
12262            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
12263
12264            switch (16 * u + opcode) {
12265            case 0x08: /* MUL */
12266            case 0x10: /* MLA */
12267            case 0x14: /* MLS */
12268            {
12269                static NeonGenTwoOpFn * const fns[2][2] = {
12270                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
12271                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
12272                };
12273                NeonGenTwoOpFn *genfn;
12274                bool is_sub = opcode == 0x4;
12275
12276                if (size == 1) {
12277                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
12278                } else {
12279                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
12280                }
12281                if (opcode == 0x8) {
12282                    break;
12283                }
12284                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
12285                genfn = fns[size - 1][is_sub];
12286                genfn(tcg_res, tcg_op, tcg_res);
12287                break;
12288            }
12289            case 0x05: /* FMLS */
12290            case 0x01: /* FMLA */
12291                read_vec_element_i32(s, tcg_res, rd, pass,
12292                                     is_scalar ? size : MO_32);
12293                switch (size) {
12294                case 1:
12295                    if (opcode == 0x5) {
12296                        /* As usual for ARM, separate negation for fused
12297                         * multiply-add */
12298                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
12299                    }
12300                    if (is_scalar) {
12301                        gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
12302                                                   tcg_res, fpst);
12303                    } else {
12304                        gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
12305                                                    tcg_res, fpst);
12306                    }
12307                    break;
12308                case 2:
12309                    if (opcode == 0x5) {
12310                        /* As usual for ARM, separate negation for
12311                         * fused multiply-add */
12312                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
12313                    }
12314                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
12315                                           tcg_res, fpst);
12316                    break;
12317                default:
12318                    g_assert_not_reached();
12319                }
12320                break;
12321            case 0x09: /* FMUL */
12322                switch (size) {
12323                case 1:
12324                    if (is_scalar) {
12325                        gen_helper_advsimd_mulh(tcg_res, tcg_op,
12326                                                tcg_idx, fpst);
12327                    } else {
12328                        gen_helper_advsimd_mul2h(tcg_res, tcg_op,
12329                                                 tcg_idx, fpst);
12330                    }
12331                    break;
12332                case 2:
12333                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
12334                    break;
12335                default:
12336                    g_assert_not_reached();
12337                }
12338                break;
12339            case 0x19: /* FMULX */
12340                switch (size) {
12341                case 1:
12342                    if (is_scalar) {
12343                        gen_helper_advsimd_mulxh(tcg_res, tcg_op,
12344                                                 tcg_idx, fpst);
12345                    } else {
12346                        gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
12347                                                  tcg_idx, fpst);
12348                    }
12349                    break;
12350                case 2:
12351                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
12352                    break;
12353                default:
12354                    g_assert_not_reached();
12355                }
12356                break;
12357            case 0x0c: /* SQDMULH */
12358                if (size == 1) {
12359                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
12360                                               tcg_op, tcg_idx);
12361                } else {
12362                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
12363                                               tcg_op, tcg_idx);
12364                }
12365                break;
12366            case 0x0d: /* SQRDMULH */
12367                if (size == 1) {
12368                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
12369                                                tcg_op, tcg_idx);
12370                } else {
12371                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
12372                                                tcg_op, tcg_idx);
12373                }
12374                break;
12375            case 0x1d: /* SQRDMLAH */
12376                read_vec_element_i32(s, tcg_res, rd, pass,
12377                                     is_scalar ? size : MO_32);
12378                if (size == 1) {
12379                    gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
12380                                                tcg_op, tcg_idx, tcg_res);
12381                } else {
12382                    gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
12383                                                tcg_op, tcg_idx, tcg_res);
12384                }
12385                break;
12386            case 0x1f: /* SQRDMLSH */
12387                read_vec_element_i32(s, tcg_res, rd, pass,
12388                                     is_scalar ? size : MO_32);
12389                if (size == 1) {
12390                    gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
12391                                                tcg_op, tcg_idx, tcg_res);
12392                } else {
12393                    gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
12394                                                tcg_op, tcg_idx, tcg_res);
12395                }
12396                break;
12397            default:
12398                g_assert_not_reached();
12399            }
12400
12401            if (is_scalar) {
12402                write_fp_sreg(s, rd, tcg_res);
12403            } else {
12404                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12405            }
12406
12407            tcg_temp_free_i32(tcg_op);
12408            tcg_temp_free_i32(tcg_res);
12409        }
12410
12411        tcg_temp_free_i32(tcg_idx);
12412        clear_vec_high(s, is_q, rd);
12413    } else {
12414        /* long ops: 16x16->32 or 32x32->64 */
12415        TCGv_i64 tcg_res[2];
12416        int pass;
12417        bool satop = extract32(opcode, 0, 1);
12418        TCGMemOp memop = MO_32;
12419
12420        if (satop || !u) {
12421            memop |= MO_SIGN;
12422        }
12423
12424        if (size == 2) {
12425            TCGv_i64 tcg_idx = tcg_temp_new_i64();
12426
12427            read_vec_element(s, tcg_idx, rm, index, memop);
12428
12429            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12430                TCGv_i64 tcg_op = tcg_temp_new_i64();
12431                TCGv_i64 tcg_passres;
12432                int passelt;
12433
12434                if (is_scalar) {
12435                    passelt = 0;
12436                } else {
12437                    passelt = pass + (is_q * 2);
12438                }
12439
12440                read_vec_element(s, tcg_op, rn, passelt, memop);
12441
12442                tcg_res[pass] = tcg_temp_new_i64();
12443
12444                if (opcode == 0xa || opcode == 0xb) {
12445                    /* Non-accumulating ops */
12446                    tcg_passres = tcg_res[pass];
12447                } else {
12448                    tcg_passres = tcg_temp_new_i64();
12449                }
12450
12451                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
12452                tcg_temp_free_i64(tcg_op);
12453
12454                if (satop) {
12455                    /* saturating, doubling */
12456                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
12457                                                      tcg_passres, tcg_passres);
12458                }
12459
12460                if (opcode == 0xa || opcode == 0xb) {
12461                    continue;
12462                }
12463
12464                /* Accumulating op: handle accumulate step */
12465                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12466
12467                switch (opcode) {
12468                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12469                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12470                    break;
12471                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12472                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12473                    break;
12474                case 0x7: /* SQDMLSL, SQDMLSL2 */
12475                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
12476                    /* fall through */
12477                case 0x3: /* SQDMLAL, SQDMLAL2 */
12478                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
12479                                                      tcg_res[pass],
12480                                                      tcg_passres);
12481                    break;
12482                default:
12483                    g_assert_not_reached();
12484                }
12485                tcg_temp_free_i64(tcg_passres);
12486            }
12487            tcg_temp_free_i64(tcg_idx);
12488
12489            clear_vec_high(s, !is_scalar, rd);
12490        } else {
12491            TCGv_i32 tcg_idx = tcg_temp_new_i32();
12492
12493            assert(size == 1);
12494            read_vec_element_i32(s, tcg_idx, rm, index, size);
12495
12496            if (!is_scalar) {
12497                /* The simplest way to handle the 16x16 indexed ops is to
12498                 * duplicate the index into both halves of the 32 bit tcg_idx
12499                 * and then use the usual Neon helpers.
12500                 */
12501                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12502            }
12503
12504            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12505                TCGv_i32 tcg_op = tcg_temp_new_i32();
12506                TCGv_i64 tcg_passres;
12507
12508                if (is_scalar) {
12509                    read_vec_element_i32(s, tcg_op, rn, pass, size);
12510                } else {
12511                    read_vec_element_i32(s, tcg_op, rn,
12512                                         pass + (is_q * 2), MO_32);
12513                }
12514
12515                tcg_res[pass] = tcg_temp_new_i64();
12516
12517                if (opcode == 0xa || opcode == 0xb) {
12518                    /* Non-accumulating ops */
12519                    tcg_passres = tcg_res[pass];
12520                } else {
12521                    tcg_passres = tcg_temp_new_i64();
12522                }
12523
12524                if (memop & MO_SIGN) {
12525                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
12526                } else {
12527                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
12528                }
12529                if (satop) {
12530                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
12531                                                      tcg_passres, tcg_passres);
12532                }
12533                tcg_temp_free_i32(tcg_op);
12534
12535                if (opcode == 0xa || opcode == 0xb) {
12536                    continue;
12537                }
12538
12539                /* Accumulating op: handle accumulate step */
12540                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12541
12542                switch (opcode) {
12543                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12544                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
12545                                             tcg_passres);
12546                    break;
12547                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12548                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
12549                                             tcg_passres);
12550                    break;
12551                case 0x7: /* SQDMLSL, SQDMLSL2 */
12552                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
12553                    /* fall through */
12554                case 0x3: /* SQDMLAL, SQDMLAL2 */
12555                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
12556                                                      tcg_res[pass],
12557                                                      tcg_passres);
12558                    break;
12559                default:
12560                    g_assert_not_reached();
12561                }
12562                tcg_temp_free_i64(tcg_passres);
12563            }
12564            tcg_temp_free_i32(tcg_idx);
12565
12566            if (is_scalar) {
12567                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
12568            }
12569        }
12570
12571        if (is_scalar) {
12572            tcg_res[1] = tcg_const_i64(0);
12573        }
12574
12575        for (pass = 0; pass < 2; pass++) {
12576            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12577            tcg_temp_free_i64(tcg_res[pass]);
12578        }
12579    }
12580
12581    if (fpst) {
12582        tcg_temp_free_ptr(fpst);
12583    }
12584}
12585
12586/* Crypto AES
12587 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
12588 * +-----------------+------+-----------+--------+-----+------+------+
12589 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12590 * +-----------------+------+-----------+--------+-----+------+------+
12591 */
12592static void disas_crypto_aes(DisasContext *s, uint32_t insn)
12593{
12594    int size = extract32(insn, 22, 2);
12595    int opcode = extract32(insn, 12, 5);
12596    int rn = extract32(insn, 5, 5);
12597    int rd = extract32(insn, 0, 5);
12598    int decrypt;
12599    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12600    TCGv_i32 tcg_decrypt;
12601    CryptoThreeOpIntFn *genfn;
12602
12603    if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
12604        || size != 0) {
12605        unallocated_encoding(s);
12606        return;
12607    }
12608
12609    switch (opcode) {
12610    case 0x4: /* AESE */
12611        decrypt = 0;
12612        genfn = gen_helper_crypto_aese;
12613        break;
12614    case 0x6: /* AESMC */
12615        decrypt = 0;
12616        genfn = gen_helper_crypto_aesmc;
12617        break;
12618    case 0x5: /* AESD */
12619        decrypt = 1;
12620        genfn = gen_helper_crypto_aese;
12621        break;
12622    case 0x7: /* AESIMC */
12623        decrypt = 1;
12624        genfn = gen_helper_crypto_aesmc;
12625        break;
12626    default:
12627        unallocated_encoding(s);
12628        return;
12629    }
12630
12631    if (!fp_access_check(s)) {
12632        return;
12633    }
12634
12635    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12636    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12637    tcg_decrypt = tcg_const_i32(decrypt);
12638
12639    genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
12640
12641    tcg_temp_free_ptr(tcg_rd_ptr);
12642    tcg_temp_free_ptr(tcg_rn_ptr);
12643    tcg_temp_free_i32(tcg_decrypt);
12644}
12645
12646/* Crypto three-reg SHA
12647 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
12648 * +-----------------+------+---+------+---+--------+-----+------+------+
12649 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
12650 * +-----------------+------+---+------+---+--------+-----+------+------+
12651 */
12652static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
12653{
12654    int size = extract32(insn, 22, 2);
12655    int opcode = extract32(insn, 12, 3);
12656    int rm = extract32(insn, 16, 5);
12657    int rn = extract32(insn, 5, 5);
12658    int rd = extract32(insn, 0, 5);
12659    CryptoThreeOpFn *genfn;
12660    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12661    int feature = ARM_FEATURE_V8_SHA256;
12662
12663    if (size != 0) {
12664        unallocated_encoding(s);
12665        return;
12666    }
12667
12668    switch (opcode) {
12669    case 0: /* SHA1C */
12670    case 1: /* SHA1P */
12671    case 2: /* SHA1M */
12672    case 3: /* SHA1SU0 */
12673        genfn = NULL;
12674        feature = ARM_FEATURE_V8_SHA1;
12675        break;
12676    case 4: /* SHA256H */
12677        genfn = gen_helper_crypto_sha256h;
12678        break;
12679    case 5: /* SHA256H2 */
12680        genfn = gen_helper_crypto_sha256h2;
12681        break;
12682    case 6: /* SHA256SU1 */
12683        genfn = gen_helper_crypto_sha256su1;
12684        break;
12685    default:
12686        unallocated_encoding(s);
12687        return;
12688    }
12689
12690    if (!arm_dc_feature(s, feature)) {
12691        unallocated_encoding(s);
12692        return;
12693    }
12694
12695    if (!fp_access_check(s)) {
12696        return;
12697    }
12698
12699    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12700    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12701    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
12702
12703    if (genfn) {
12704        genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
12705    } else {
12706        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
12707
12708        gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
12709                                    tcg_rm_ptr, tcg_opcode);
12710        tcg_temp_free_i32(tcg_opcode);
12711    }
12712
12713    tcg_temp_free_ptr(tcg_rd_ptr);
12714    tcg_temp_free_ptr(tcg_rn_ptr);
12715    tcg_temp_free_ptr(tcg_rm_ptr);
12716}
12717
12718/* Crypto two-reg SHA
12719 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
12720 * +-----------------+------+-----------+--------+-----+------+------+
12721 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12722 * +-----------------+------+-----------+--------+-----+------+------+
12723 */
12724static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
12725{
12726    int size = extract32(insn, 22, 2);
12727    int opcode = extract32(insn, 12, 5);
12728    int rn = extract32(insn, 5, 5);
12729    int rd = extract32(insn, 0, 5);
12730    CryptoTwoOpFn *genfn;
12731    int feature;
12732    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12733
12734    if (size != 0) {
12735        unallocated_encoding(s);
12736        return;
12737    }
12738
12739    switch (opcode) {
12740    case 0: /* SHA1H */
12741        feature = ARM_FEATURE_V8_SHA1;
12742        genfn = gen_helper_crypto_sha1h;
12743        break;
12744    case 1: /* SHA1SU1 */
12745        feature = ARM_FEATURE_V8_SHA1;
12746        genfn = gen_helper_crypto_sha1su1;
12747        break;
12748    case 2: /* SHA256SU0 */
12749        feature = ARM_FEATURE_V8_SHA256;
12750        genfn = gen_helper_crypto_sha256su0;
12751        break;
12752    default:
12753        unallocated_encoding(s);
12754        return;
12755    }
12756
12757    if (!arm_dc_feature(s, feature)) {
12758        unallocated_encoding(s);
12759        return;
12760    }
12761
12762    if (!fp_access_check(s)) {
12763        return;
12764    }
12765
12766    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12767    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12768
12769    genfn(tcg_rd_ptr, tcg_rn_ptr);
12770
12771    tcg_temp_free_ptr(tcg_rd_ptr);
12772    tcg_temp_free_ptr(tcg_rn_ptr);
12773}
12774
12775/* Crypto three-reg SHA512
12776 *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
12777 * +-----------------------+------+---+---+-----+--------+------+------+
12778 * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
12779 * +-----------------------+------+---+---+-----+--------+------+------+
12780 */
12781static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
12782{
12783    int opcode = extract32(insn, 10, 2);
12784    int o =  extract32(insn, 14, 1);
12785    int rm = extract32(insn, 16, 5);
12786    int rn = extract32(insn, 5, 5);
12787    int rd = extract32(insn, 0, 5);
12788    int feature;
12789    CryptoThreeOpFn *genfn;
12790
12791    if (o == 0) {
12792        switch (opcode) {
12793        case 0: /* SHA512H */
12794            feature = ARM_FEATURE_V8_SHA512;
12795            genfn = gen_helper_crypto_sha512h;
12796            break;
12797        case 1: /* SHA512H2 */
12798            feature = ARM_FEATURE_V8_SHA512;
12799            genfn = gen_helper_crypto_sha512h2;
12800            break;
12801        case 2: /* SHA512SU1 */
12802            feature = ARM_FEATURE_V8_SHA512;
12803            genfn = gen_helper_crypto_sha512su1;
12804            break;
12805        case 3: /* RAX1 */
12806            feature = ARM_FEATURE_V8_SHA3;
12807            genfn = NULL;
12808            break;
12809        }
12810    } else {
12811        switch (opcode) {
12812        case 0: /* SM3PARTW1 */
12813            feature = ARM_FEATURE_V8_SM3;
12814            genfn = gen_helper_crypto_sm3partw1;
12815            break;
12816        case 1: /* SM3PARTW2 */
12817            feature = ARM_FEATURE_V8_SM3;
12818            genfn = gen_helper_crypto_sm3partw2;
12819            break;
12820        case 2: /* SM4EKEY */
12821            feature = ARM_FEATURE_V8_SM4;
12822            genfn = gen_helper_crypto_sm4ekey;
12823            break;
12824        default:
12825            unallocated_encoding(s);
12826            return;
12827        }
12828    }
12829
12830    if (!arm_dc_feature(s, feature)) {
12831        unallocated_encoding(s);
12832        return;
12833    }
12834
12835    if (!fp_access_check(s)) {
12836        return;
12837    }
12838
12839    if (genfn) {
12840        TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12841
12842        tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12843        tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12844        tcg_rm_ptr = vec_full_reg_ptr(s, rm);
12845
12846        genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
12847
12848        tcg_temp_free_ptr(tcg_rd_ptr);
12849        tcg_temp_free_ptr(tcg_rn_ptr);
12850        tcg_temp_free_ptr(tcg_rm_ptr);
12851    } else {
12852        TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
12853        int pass;
12854
12855        tcg_op1 = tcg_temp_new_i64();
12856        tcg_op2 = tcg_temp_new_i64();
12857        tcg_res[0] = tcg_temp_new_i64();
12858        tcg_res[1] = tcg_temp_new_i64();
12859
12860        for (pass = 0; pass < 2; pass++) {
12861            read_vec_element(s, tcg_op1, rn, pass, MO_64);
12862            read_vec_element(s, tcg_op2, rm, pass, MO_64);
12863
12864            tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
12865            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12866        }
12867        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
12868        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
12869
12870        tcg_temp_free_i64(tcg_op1);
12871        tcg_temp_free_i64(tcg_op2);
12872        tcg_temp_free_i64(tcg_res[0]);
12873        tcg_temp_free_i64(tcg_res[1]);
12874    }
12875}
12876
12877/* Crypto two-reg SHA512
12878 *  31                                     12  11  10  9    5 4    0
12879 * +-----------------------------------------+--------+------+------+
12880 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
12881 * +-----------------------------------------+--------+------+------+
12882 */
12883static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
12884{
12885    int opcode = extract32(insn, 10, 2);
12886    int rn = extract32(insn, 5, 5);
12887    int rd = extract32(insn, 0, 5);
12888    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12889    int feature;
12890    CryptoTwoOpFn *genfn;
12891
12892    switch (opcode) {
12893    case 0: /* SHA512SU0 */
12894        feature = ARM_FEATURE_V8_SHA512;
12895        genfn = gen_helper_crypto_sha512su0;
12896        break;
12897    case 1: /* SM4E */
12898        feature = ARM_FEATURE_V8_SM4;
12899        genfn = gen_helper_crypto_sm4e;
12900        break;
12901    default:
12902        unallocated_encoding(s);
12903        return;
12904    }
12905
12906    if (!arm_dc_feature(s, feature)) {
12907        unallocated_encoding(s);
12908        return;
12909    }
12910
12911    if (!fp_access_check(s)) {
12912        return;
12913    }
12914
12915    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12916    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12917
12918    genfn(tcg_rd_ptr, tcg_rn_ptr);
12919
12920    tcg_temp_free_ptr(tcg_rd_ptr);
12921    tcg_temp_free_ptr(tcg_rn_ptr);
12922}
12923
12924/* Crypto four-register
12925 *  31               23 22 21 20  16 15  14  10 9    5 4    0
12926 * +-------------------+-----+------+---+------+------+------+
12927 * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
12928 * +-------------------+-----+------+---+------+------+------+
12929 */
12930static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
12931{
12932    int op0 = extract32(insn, 21, 2);
12933    int rm = extract32(insn, 16, 5);
12934    int ra = extract32(insn, 10, 5);
12935    int rn = extract32(insn, 5, 5);
12936    int rd = extract32(insn, 0, 5);
12937    int feature;
12938
12939    switch (op0) {
12940    case 0: /* EOR3 */
12941    case 1: /* BCAX */
12942        feature = ARM_FEATURE_V8_SHA3;
12943        break;
12944    case 2: /* SM3SS1 */
12945        feature = ARM_FEATURE_V8_SM3;
12946        break;
12947    default:
12948        unallocated_encoding(s);
12949        return;
12950    }
12951
12952    if (!arm_dc_feature(s, feature)) {
12953        unallocated_encoding(s);
12954        return;
12955    }
12956
12957    if (!fp_access_check(s)) {
12958        return;
12959    }
12960
12961    if (op0 < 2) {
12962        TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
12963        int pass;
12964
12965        tcg_op1 = tcg_temp_new_i64();
12966        tcg_op2 = tcg_temp_new_i64();
12967        tcg_op3 = tcg_temp_new_i64();
12968        tcg_res[0] = tcg_temp_new_i64();
12969        tcg_res[1] = tcg_temp_new_i64();
12970
12971        for (pass = 0; pass < 2; pass++) {
12972            read_vec_element(s, tcg_op1, rn, pass, MO_64);
12973            read_vec_element(s, tcg_op2, rm, pass, MO_64);
12974            read_vec_element(s, tcg_op3, ra, pass, MO_64);
12975
12976            if (op0 == 0) {
12977                /* EOR3 */
12978                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
12979            } else {
12980                /* BCAX */
12981                tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
12982            }
12983            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12984        }
12985        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
12986        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
12987
12988        tcg_temp_free_i64(tcg_op1);
12989        tcg_temp_free_i64(tcg_op2);
12990        tcg_temp_free_i64(tcg_op3);
12991        tcg_temp_free_i64(tcg_res[0]);
12992        tcg_temp_free_i64(tcg_res[1]);
12993    } else {
12994        TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
12995
12996        tcg_op1 = tcg_temp_new_i32();
12997        tcg_op2 = tcg_temp_new_i32();
12998        tcg_op3 = tcg_temp_new_i32();
12999        tcg_res = tcg_temp_new_i32();
13000        tcg_zero = tcg_const_i32(0);
13001
13002        read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13003        read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13004        read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13005
13006        tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13007        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13008        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13009        tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13010
13011        write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13012        write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13013        write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13014        write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13015
13016        tcg_temp_free_i32(tcg_op1);
13017        tcg_temp_free_i32(tcg_op2);
13018        tcg_temp_free_i32(tcg_op3);
13019        tcg_temp_free_i32(tcg_res);
13020        tcg_temp_free_i32(tcg_zero);
13021    }
13022}
13023
13024/* Crypto XAR
13025 *  31                   21 20  16 15    10 9    5 4    0
13026 * +-----------------------+------+--------+------+------+
13027 * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13028 * +-----------------------+------+--------+------+------+
13029 */
13030static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13031{
13032    int rm = extract32(insn, 16, 5);
13033    int imm6 = extract32(insn, 10, 6);
13034    int rn = extract32(insn, 5, 5);
13035    int rd = extract32(insn, 0, 5);
13036    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13037    int pass;
13038
13039    if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
13040        unallocated_encoding(s);
13041        return;
13042    }
13043
13044    if (!fp_access_check(s)) {
13045        return;
13046    }
13047
13048    tcg_op1 = tcg_temp_new_i64();
13049    tcg_op2 = tcg_temp_new_i64();
13050    tcg_res[0] = tcg_temp_new_i64();
13051    tcg_res[1] = tcg_temp_new_i64();
13052
13053    for (pass = 0; pass < 2; pass++) {
13054        read_vec_element(s, tcg_op1, rn, pass, MO_64);
13055        read_vec_element(s, tcg_op2, rm, pass, MO_64);
13056
13057        tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13058        tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13059    }
13060    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13061    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13062
13063    tcg_temp_free_i64(tcg_op1);
13064    tcg_temp_free_i64(tcg_op2);
13065    tcg_temp_free_i64(tcg_res[0]);
13066    tcg_temp_free_i64(tcg_res[1]);
13067}
13068
13069/* Crypto three-reg imm2
13070 *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13071 * +-----------------------+------+-----+------+--------+------+------+
13072 * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13073 * +-----------------------+------+-----+------+--------+------+------+
13074 */
13075static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13076{
13077    int opcode = extract32(insn, 10, 2);
13078    int imm2 = extract32(insn, 12, 2);
13079    int rm = extract32(insn, 16, 5);
13080    int rn = extract32(insn, 5, 5);
13081    int rd = extract32(insn, 0, 5);
13082    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13083    TCGv_i32 tcg_imm2, tcg_opcode;
13084
13085    if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
13086        unallocated_encoding(s);
13087        return;
13088    }
13089
13090    if (!fp_access_check(s)) {
13091        return;
13092    }
13093
13094    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13095    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13096    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13097    tcg_imm2   = tcg_const_i32(imm2);
13098    tcg_opcode = tcg_const_i32(opcode);
13099
13100    gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13101                            tcg_opcode);
13102
13103    tcg_temp_free_ptr(tcg_rd_ptr);
13104    tcg_temp_free_ptr(tcg_rn_ptr);
13105    tcg_temp_free_ptr(tcg_rm_ptr);
13106    tcg_temp_free_i32(tcg_imm2);
13107    tcg_temp_free_i32(tcg_opcode);
13108}
13109
13110/* C3.6 Data processing - SIMD, inc Crypto
13111 *
13112 * As the decode gets a little complex we are using a table based
13113 * approach for this part of the decode.
13114 */
13115static const AArch64DecodeTable data_proc_simd[] = {
13116    /* pattern  ,  mask     ,  fn                        */
13117    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13118    { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13119    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13120    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13121    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13122    { 0x0e000400, 0x9fe08400, disas_simd_copy },
13123    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13124    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13125    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13126    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13127    { 0x0e000000, 0xbf208c00, disas_simd_tb },
13128    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13129    { 0x2e000000, 0xbf208400, disas_simd_ext },
13130    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13131    { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13132    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13133    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13134    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13135    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13136    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13137    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13138    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13139    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13140    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13141    { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13142    { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13143    { 0xce000000, 0xff808000, disas_crypto_four_reg },
13144    { 0xce800000, 0xffe00000, disas_crypto_xar },
13145    { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13146    { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13147    { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13148    { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13149    { 0x00000000, 0x00000000, NULL }
13150};
13151
13152static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13153{
13154    /* Note that this is called with all non-FP cases from
13155     * table C3-6 so it must UNDEF for entries not specifically
13156     * allocated to instructions in that table.
13157     */
13158    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13159    if (fn) {
13160        fn(s, insn);
13161    } else {
13162        unallocated_encoding(s);
13163    }
13164}
13165
13166/* C3.6 Data processing - SIMD and floating point */
13167static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13168{
13169    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13170        disas_data_proc_fp(s, insn);
13171    } else {
13172        /* SIMD, including crypto */
13173        disas_data_proc_simd(s, insn);
13174    }
13175}
13176
13177/* C3.1 A64 instruction index by encoding */
13178static void disas_a64_insn(CPUARMState *env, DisasContext *s)
13179{
13180    uint32_t insn;
13181
13182    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
13183    s->insn = insn;
13184    s->pc += 4;
13185
13186    s->fp_access_checked = false;
13187
13188    switch (extract32(insn, 25, 4)) {
13189    case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
13190        unallocated_encoding(s);
13191        break;
13192    case 0x8: case 0x9: /* Data processing - immediate */
13193        disas_data_proc_imm(s, insn);
13194        break;
13195    case 0xa: case 0xb: /* Branch, exception generation and system insns */
13196        disas_b_exc_sys(s, insn);
13197        break;
13198    case 0x4:
13199    case 0x6:
13200    case 0xc:
13201    case 0xe:      /* Loads and stores */
13202        disas_ldst(s, insn);
13203        break;
13204    case 0x5:
13205    case 0xd:      /* Data processing - register */
13206        disas_data_proc_reg(s, insn);
13207        break;
13208    case 0x7:
13209    case 0xf:      /* Data processing - SIMD and floating point */
13210        disas_data_proc_simd_fp(s, insn);
13211        break;
13212    default:
13213        assert(FALSE); /* all 15 cases should be handled above */
13214        break;
13215    }
13216
13217    /* if we allocated any temporaries, free them here */
13218    free_tmp_a64(s);
13219}
13220
13221static int aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13222                                         CPUState *cpu, int max_insns)
13223{
13224    DisasContext *dc = container_of(dcbase, DisasContext, base);
13225    CPUARMState *env = cpu->env_ptr;
13226    ARMCPU *arm_cpu = arm_env_get_cpu(env);
13227    int bound;
13228
13229    dc->pc = dc->base.pc_first;
13230    dc->condjmp = 0;
13231
13232    dc->aarch64 = 1;
13233    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
13234     * there is no secure EL1, so we route exceptions to EL3.
13235     */
13236    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
13237                               !arm_el_is_aa64(env, 3);
13238    dc->thumb = 0;
13239    dc->sctlr_b = 0;
13240    dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
13241    dc->condexec_mask = 0;
13242    dc->condexec_cond = 0;
13243    dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
13244    dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
13245    dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
13246    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13247#if !defined(CONFIG_USER_ONLY)
13248    dc->user = (dc->current_el == 0);
13249#endif
13250    dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
13251    dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
13252    dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
13253    dc->vec_len = 0;
13254    dc->vec_stride = 0;
13255    dc->cp_regs = arm_cpu->cp_regs;
13256    dc->features = env->features;
13257
13258    /* Single step state. The code-generation logic here is:
13259     *  SS_ACTIVE == 0:
13260     *   generate code with no special handling for single-stepping (except
13261     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13262     *   this happens anyway because those changes are all system register or
13263     *   PSTATE writes).
13264     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13265     *   emit code for one insn
13266     *   emit code to clear PSTATE.SS
13267     *   emit code to generate software step exception for completed step
13268     *   end TB (as usual for having generated an exception)
13269     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13270     *   emit code to generate a software step exception
13271     *   end the TB
13272     */
13273    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
13274    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
13275    dc->is_ldex = false;
13276    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
13277
13278    /* Bound the number of insns to execute to those left on the page.  */
13279    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13280
13281    /* If architectural single step active, limit to 1.  */
13282    if (dc->ss_active) {
13283        bound = 1;
13284    }
13285    max_insns = MIN(max_insns, bound);
13286
13287    init_tmp_a64_array(dc);
13288
13289    return max_insns;
13290}
13291
13292static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
13293{
13294    tcg_clear_temp_count();
13295}
13296
13297static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13298{
13299    DisasContext *dc = container_of(dcbase, DisasContext, base);
13300
13301    tcg_gen_insn_start(dc->pc, 0, 0);
13302    dc->insn_start = tcg_last_op();
13303}
13304
13305static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
13306                                        const CPUBreakpoint *bp)
13307{
13308    DisasContext *dc = container_of(dcbase, DisasContext, base);
13309
13310    if (bp->flags & BP_CPU) {
13311        gen_a64_set_pc_im(dc->pc);
13312        gen_helper_check_breakpoints(cpu_env);
13313        /* End the TB early; it likely won't be executed */
13314        dc->base.is_jmp = DISAS_TOO_MANY;
13315    } else {
13316        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
13317        /* The address covered by the breakpoint must be
13318           included in [tb->pc, tb->pc + tb->size) in order
13319           to for it to be properly cleared -- thus we
13320           increment the PC here so that the logic setting
13321           tb->size below does the right thing.  */
13322        dc->pc += 4;
13323        dc->base.is_jmp = DISAS_NORETURN;
13324    }
13325
13326    return true;
13327}
13328
13329static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13330{
13331    DisasContext *dc = container_of(dcbase, DisasContext, base);
13332    CPUARMState *env = cpu->env_ptr;
13333
13334    if (dc->ss_active && !dc->pstate_ss) {
13335        /* Singlestep state is Active-pending.
13336         * If we're in this state at the start of a TB then either
13337         *  a) we just took an exception to an EL which is being debugged
13338         *     and this is the first insn in the exception handler
13339         *  b) debug exceptions were masked and we just unmasked them
13340         *     without changing EL (eg by clearing PSTATE.D)
13341         * In either case we're going to take a swstep exception in the
13342         * "did not step an insn" case, and so the syndrome ISV and EX
13343         * bits should be zero.
13344         */
13345        assert(dc->base.num_insns == 1);
13346        gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
13347                      default_exception_el(dc));
13348        dc->base.is_jmp = DISAS_NORETURN;
13349    } else {
13350        disas_a64_insn(env, dc);
13351    }
13352
13353    dc->base.pc_next = dc->pc;
13354    translator_loop_temp_check(&dc->base);
13355}
13356
13357static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
13358{
13359    DisasContext *dc = container_of(dcbase, DisasContext, base);
13360
13361    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
13362        /* Note that this means single stepping WFI doesn't halt the CPU.
13363         * For conditional branch insns this is harmless unreachable code as
13364         * gen_goto_tb() has already handled emitting the debug exception
13365         * (and thus a tb-jump is not possible when singlestepping).
13366         */
13367        switch (dc->base.is_jmp) {
13368        default:
13369            gen_a64_set_pc_im(dc->pc);
13370            /* fall through */
13371        case DISAS_EXIT:
13372        case DISAS_JUMP:
13373            if (dc->base.singlestep_enabled) {
13374                gen_exception_internal(EXCP_DEBUG);
13375            } else {
13376                gen_step_complete_exception(dc);
13377            }
13378            break;
13379        case DISAS_NORETURN:
13380            break;
13381        }
13382    } else {
13383        switch (dc->base.is_jmp) {
13384        case DISAS_NEXT:
13385        case DISAS_TOO_MANY:
13386            gen_goto_tb(dc, 1, dc->pc);
13387            break;
13388        default:
13389        case DISAS_UPDATE:
13390            gen_a64_set_pc_im(dc->pc);
13391            /* fall through */
13392        case DISAS_EXIT:
13393            tcg_gen_exit_tb(0);
13394            break;
13395        case DISAS_JUMP:
13396            tcg_gen_lookup_and_goto_ptr();
13397            break;
13398        case DISAS_NORETURN:
13399        case DISAS_SWI:
13400            break;
13401        case DISAS_WFE:
13402            gen_a64_set_pc_im(dc->pc);
13403            gen_helper_wfe(cpu_env);
13404            break;
13405        case DISAS_YIELD:
13406            gen_a64_set_pc_im(dc->pc);
13407            gen_helper_yield(cpu_env);
13408            break;
13409        case DISAS_WFI:
13410        {
13411            /* This is a special case because we don't want to just halt the CPU
13412             * if trying to debug across a WFI.
13413             */
13414            TCGv_i32 tmp = tcg_const_i32(4);
13415
13416            gen_a64_set_pc_im(dc->pc);
13417            gen_helper_wfi(cpu_env, tmp);
13418            tcg_temp_free_i32(tmp);
13419            /* The helper doesn't necessarily throw an exception, but we
13420             * must go back to the main loop to check for interrupts anyway.
13421             */
13422            tcg_gen_exit_tb(0);
13423            break;
13424        }
13425        }
13426    }
13427
13428    /* Functions above can change dc->pc, so re-align db->pc_next */
13429    dc->base.pc_next = dc->pc;
13430}
13431
13432static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
13433                                      CPUState *cpu)
13434{
13435    DisasContext *dc = container_of(dcbase, DisasContext, base);
13436
13437    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
13438    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
13439}
13440
13441const TranslatorOps aarch64_translator_ops = {
13442    .init_disas_context = aarch64_tr_init_disas_context,
13443    .tb_start           = aarch64_tr_tb_start,
13444    .insn_start         = aarch64_tr_insn_start,
13445    .breakpoint_check   = aarch64_tr_breakpoint_check,
13446    .translate_insn     = aarch64_tr_translate_insn,
13447    .tb_stop            = aarch64_tr_tb_stop,
13448    .disas_log          = aarch64_tr_disas_log,
13449};
13450