qemu/target/arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg/tcg-op.h"
  24#include "tcg/tcg-op-gvec.h"
  25#include "qemu/log.h"
  26#include "arm_ldst.h"
  27#include "translate.h"
  28#include "internals.h"
  29#include "qemu/host-utils.h"
  30
  31#include "hw/semihosting/semihost.h"
  32#include "exec/gen-icount.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36#include "exec/log.h"
  37
  38#include "trace-tcg.h"
  39#include "translate-a64.h"
  40#include "qemu/atomic128.h"
  41
  42static TCGv_i64 cpu_X[32];
  43static TCGv_i64 cpu_pc;
  44
  45/* Load/store exclusive handling */
  46static TCGv_i64 cpu_exclusive_high;
  47
  48static const char *regnames[] = {
  49    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  50    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  51    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  52    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  53};
  54
  55enum a64_shift_type {
  56    A64_SHIFT_TYPE_LSL = 0,
  57    A64_SHIFT_TYPE_LSR = 1,
  58    A64_SHIFT_TYPE_ASR = 2,
  59    A64_SHIFT_TYPE_ROR = 3
  60};
  61
  62/* Table based decoder typedefs - used when the relevant bits for decode
  63 * are too awkwardly scattered across the instruction (eg SIMD).
  64 */
  65typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  66
  67typedef struct AArch64DecodeTable {
  68    uint32_t pattern;
  69    uint32_t mask;
  70    AArch64DecodeFn *disas_fn;
  71} AArch64DecodeTable;
  72
  73/* initialize TCG globals.  */
  74void a64_translate_init(void)
  75{
  76    int i;
  77
  78    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  79                                    offsetof(CPUARMState, pc),
  80                                    "pc");
  81    for (i = 0; i < 32; i++) {
  82        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  83                                          offsetof(CPUARMState, xregs[i]),
  84                                          regnames[i]);
  85    }
  86
  87    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
  88        offsetof(CPUARMState, exclusive_high), "exclusive_high");
  89}
  90
  91/*
  92 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
  93 */
  94static int get_a64_user_mem_index(DisasContext *s)
  95{
  96    /*
  97     * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
  98     * which is the usual mmu_idx for this cpu state.
  99     */
 100    ARMMMUIdx useridx = s->mmu_idx;
 101
 102    if (s->unpriv) {
 103        /*
 104         * We have pre-computed the condition for AccType_UNPRIV.
 105         * Therefore we should never get here with a mmu_idx for
 106         * which we do not know the corresponding user mmu_idx.
 107         */
 108        switch (useridx) {
 109        case ARMMMUIdx_E10_1:
 110        case ARMMMUIdx_E10_1_PAN:
 111            useridx = ARMMMUIdx_E10_0;
 112            break;
 113        case ARMMMUIdx_E20_2:
 114        case ARMMMUIdx_E20_2_PAN:
 115            useridx = ARMMMUIdx_E20_0;
 116            break;
 117        case ARMMMUIdx_SE10_1:
 118        case ARMMMUIdx_SE10_1_PAN:
 119            useridx = ARMMMUIdx_SE10_0;
 120            break;
 121        default:
 122            g_assert_not_reached();
 123        }
 124    }
 125    return arm_to_core_mmu_idx(useridx);
 126}
 127
 128static void reset_btype(DisasContext *s)
 129{
 130    if (s->btype != 0) {
 131        TCGv_i32 zero = tcg_const_i32(0);
 132        tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
 133        tcg_temp_free_i32(zero);
 134        s->btype = 0;
 135    }
 136}
 137
 138static void set_btype(DisasContext *s, int val)
 139{
 140    TCGv_i32 tcg_val;
 141
 142    /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 143    tcg_debug_assert(val >= 1 && val <= 3);
 144
 145    tcg_val = tcg_const_i32(val);
 146    tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
 147    tcg_temp_free_i32(tcg_val);
 148    s->btype = -1;
 149}
 150
 151void gen_a64_set_pc_im(uint64_t val)
 152{
 153    tcg_gen_movi_i64(cpu_pc, val);
 154}
 155
 156/*
 157 * Handle Top Byte Ignore (TBI) bits.
 158 *
 159 * If address tagging is enabled via the TCR TBI bits:
 160 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 161 *    then the address is zero-extended, clearing bits [63:56]
 162 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 163 *    and TBI1 controls addressses with bit 55 == 1.
 164 *    If the appropriate TBI bit is set for the address then
 165 *    the address is sign-extended from bit 55 into bits [63:56]
 166 *
 167 * Here We have concatenated TBI{1,0} into tbi.
 168 */
 169static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 170                                TCGv_i64 src, int tbi)
 171{
 172    if (tbi == 0) {
 173        /* Load unmodified address */
 174        tcg_gen_mov_i64(dst, src);
 175    } else if (!regime_has_2_ranges(s->mmu_idx)) {
 176        /* Force tag byte to all zero */
 177        tcg_gen_extract_i64(dst, src, 0, 56);
 178    } else {
 179        /* Sign-extend from bit 55.  */
 180        tcg_gen_sextract_i64(dst, src, 0, 56);
 181
 182        if (tbi != 3) {
 183            TCGv_i64 tcg_zero = tcg_const_i64(0);
 184
 185            /*
 186             * The two TBI bits differ.
 187             * If tbi0, then !tbi1: only use the extension if positive.
 188             * if !tbi0, then tbi1: only use the extension if negative.
 189             */
 190            tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
 191                                dst, dst, tcg_zero, dst, src);
 192            tcg_temp_free_i64(tcg_zero);
 193        }
 194    }
 195}
 196
 197static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 198{
 199    /*
 200     * If address tagging is enabled for instructions via the TCR TBI bits,
 201     * then loading an address into the PC will clear out any tag.
 202     */
 203    gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 204}
 205
 206/*
 207 * Handle MTE and/or TBI.
 208 *
 209 * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
 210 * for the tag to be present in the FAR_ELx register.  But for user-only
 211 * mode we do not have a TLB with which to implement this, so we must
 212 * remove the top byte now.
 213 *
 214 * Always return a fresh temporary that we can increment independently
 215 * of the write-back address.
 216 */
 217
 218TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 219{
 220    TCGv_i64 clean = new_tmp_a64(s);
 221#ifdef CONFIG_USER_ONLY
 222    gen_top_byte_ignore(s, clean, addr, s->tbid);
 223#else
 224    tcg_gen_mov_i64(clean, addr);
 225#endif
 226    return clean;
 227}
 228
 229/* Insert a zero tag into src, with the result at dst. */
 230static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
 231{
 232    tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
 233}
 234
 235static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
 236                             MMUAccessType acc, int log2_size)
 237{
 238    TCGv_i32 t_acc = tcg_const_i32(acc);
 239    TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s));
 240    TCGv_i32 t_size = tcg_const_i32(1 << log2_size);
 241
 242    gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size);
 243    tcg_temp_free_i32(t_acc);
 244    tcg_temp_free_i32(t_idx);
 245    tcg_temp_free_i32(t_size);
 246}
 247
 248/*
 249 * For MTE, check a single logical or atomic access.  This probes a single
 250 * address, the exact one specified.  The size and alignment of the access
 251 * is not relevant to MTE, per se, but watchpoints do require the size,
 252 * and we want to recognize those before making any other changes to state.
 253 */
 254static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
 255                                      bool is_write, bool tag_checked,
 256                                      int log2_size, bool is_unpriv,
 257                                      int core_idx)
 258{
 259    if (tag_checked && s->mte_active[is_unpriv]) {
 260        TCGv_i32 tcg_desc;
 261        TCGv_i64 ret;
 262        int desc = 0;
 263
 264        desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
 265        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 266        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 267        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 268        desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size);
 269        tcg_desc = tcg_const_i32(desc);
 270
 271        ret = new_tmp_a64(s);
 272        gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr);
 273        tcg_temp_free_i32(tcg_desc);
 274
 275        return ret;
 276    }
 277    return clean_data_tbi(s, addr);
 278}
 279
 280TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
 281                        bool tag_checked, int log2_size)
 282{
 283    return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
 284                                 false, get_mem_index(s));
 285}
 286
 287/*
 288 * For MTE, check multiple logical sequential accesses.
 289 */
 290TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
 291                        bool tag_checked, int log2_esize, int total_size)
 292{
 293    if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) {
 294        TCGv_i32 tcg_desc;
 295        TCGv_i64 ret;
 296        int desc = 0;
 297
 298        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
 299        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 300        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 301        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 302        desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize);
 303        desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size);
 304        tcg_desc = tcg_const_i32(desc);
 305
 306        ret = new_tmp_a64(s);
 307        gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr);
 308        tcg_temp_free_i32(tcg_desc);
 309
 310        return ret;
 311    }
 312    return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize);
 313}
 314
 315typedef struct DisasCompare64 {
 316    TCGCond cond;
 317    TCGv_i64 value;
 318} DisasCompare64;
 319
 320static void a64_test_cc(DisasCompare64 *c64, int cc)
 321{
 322    DisasCompare c32;
 323
 324    arm_test_cc(&c32, cc);
 325
 326    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 327       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 328    c64->cond = c32.cond;
 329    c64->value = tcg_temp_new_i64();
 330    tcg_gen_ext_i32_i64(c64->value, c32.value);
 331
 332    arm_free_cc(&c32);
 333}
 334
 335static void a64_free_cc(DisasCompare64 *c64)
 336{
 337    tcg_temp_free_i64(c64->value);
 338}
 339
 340static void gen_exception_internal(int excp)
 341{
 342    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 343
 344    assert(excp_is_internal(excp));
 345    gen_helper_exception_internal(cpu_env, tcg_excp);
 346    tcg_temp_free_i32(tcg_excp);
 347}
 348
 349static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
 350{
 351    gen_a64_set_pc_im(pc);
 352    gen_exception_internal(excp);
 353    s->base.is_jmp = DISAS_NORETURN;
 354}
 355
 356static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
 357                               uint32_t syndrome, uint32_t target_el)
 358{
 359    gen_a64_set_pc_im(pc);
 360    gen_exception(excp, syndrome, target_el);
 361    s->base.is_jmp = DISAS_NORETURN;
 362}
 363
 364static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 365{
 366    TCGv_i32 tcg_syn;
 367
 368    gen_a64_set_pc_im(s->pc_curr);
 369    tcg_syn = tcg_const_i32(syndrome);
 370    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 371    tcg_temp_free_i32(tcg_syn);
 372    s->base.is_jmp = DISAS_NORETURN;
 373}
 374
 375static void gen_step_complete_exception(DisasContext *s)
 376{
 377    /* We just completed step of an insn. Move from Active-not-pending
 378     * to Active-pending, and then also take the swstep exception.
 379     * This corresponds to making the (IMPDEF) choice to prioritize
 380     * swstep exceptions over asynchronous exceptions taken to an exception
 381     * level where debug is disabled. This choice has the advantage that
 382     * we do not need to maintain internal state corresponding to the
 383     * ISV/EX syndrome bits between completion of the step and generation
 384     * of the exception, and our syndrome information is always correct.
 385     */
 386    gen_ss_advance(s);
 387    gen_swstep_exception(s, 1, s->is_ldex);
 388    s->base.is_jmp = DISAS_NORETURN;
 389}
 390
 391static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 392{
 393    /* No direct tb linking with singlestep (either QEMU's or the ARM
 394     * debug architecture kind) or deterministic io
 395     */
 396    if (s->base.singlestep_enabled || s->ss_active ||
 397        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 398        return false;
 399    }
 400
 401#ifndef CONFIG_USER_ONLY
 402    /* Only link tbs from inside the same guest page */
 403    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 404        return false;
 405    }
 406#endif
 407
 408    return true;
 409}
 410
 411static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 412{
 413    TranslationBlock *tb;
 414
 415    tb = s->base.tb;
 416    if (use_goto_tb(s, n, dest)) {
 417        tcg_gen_goto_tb(n);
 418        gen_a64_set_pc_im(dest);
 419        tcg_gen_exit_tb(tb, n);
 420        s->base.is_jmp = DISAS_NORETURN;
 421    } else {
 422        gen_a64_set_pc_im(dest);
 423        if (s->ss_active) {
 424            gen_step_complete_exception(s);
 425        } else if (s->base.singlestep_enabled) {
 426            gen_exception_internal(EXCP_DEBUG);
 427        } else {
 428            tcg_gen_lookup_and_goto_ptr();
 429            s->base.is_jmp = DISAS_NORETURN;
 430        }
 431    }
 432}
 433
 434void unallocated_encoding(DisasContext *s)
 435{
 436    /* Unallocated and reserved encodings are uncategorized */
 437    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
 438                       default_exception_el(s));
 439}
 440
 441static void init_tmp_a64_array(DisasContext *s)
 442{
 443#ifdef CONFIG_DEBUG_TCG
 444    memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 445#endif
 446    s->tmp_a64_count = 0;
 447}
 448
 449static void free_tmp_a64(DisasContext *s)
 450{
 451    int i;
 452    for (i = 0; i < s->tmp_a64_count; i++) {
 453        tcg_temp_free_i64(s->tmp_a64[i]);
 454    }
 455    init_tmp_a64_array(s);
 456}
 457
 458TCGv_i64 new_tmp_a64(DisasContext *s)
 459{
 460    assert(s->tmp_a64_count < TMP_A64_MAX);
 461    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 462}
 463
 464TCGv_i64 new_tmp_a64_local(DisasContext *s)
 465{
 466    assert(s->tmp_a64_count < TMP_A64_MAX);
 467    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64();
 468}
 469
 470TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 471{
 472    TCGv_i64 t = new_tmp_a64(s);
 473    tcg_gen_movi_i64(t, 0);
 474    return t;
 475}
 476
 477/*
 478 * Register access functions
 479 *
 480 * These functions are used for directly accessing a register in where
 481 * changes to the final register value are likely to be made. If you
 482 * need to use a register for temporary calculation (e.g. index type
 483 * operations) use the read_* form.
 484 *
 485 * B1.2.1 Register mappings
 486 *
 487 * In instruction register encoding 31 can refer to ZR (zero register) or
 488 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 489 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 490 * This is the point of the _sp forms.
 491 */
 492TCGv_i64 cpu_reg(DisasContext *s, int reg)
 493{
 494    if (reg == 31) {
 495        return new_tmp_a64_zero(s);
 496    } else {
 497        return cpu_X[reg];
 498    }
 499}
 500
 501/* register access for when 31 == SP */
 502TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 503{
 504    return cpu_X[reg];
 505}
 506
 507/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 508 * representing the register contents. This TCGv is an auto-freed
 509 * temporary so it need not be explicitly freed, and may be modified.
 510 */
 511TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 512{
 513    TCGv_i64 v = new_tmp_a64(s);
 514    if (reg != 31) {
 515        if (sf) {
 516            tcg_gen_mov_i64(v, cpu_X[reg]);
 517        } else {
 518            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 519        }
 520    } else {
 521        tcg_gen_movi_i64(v, 0);
 522    }
 523    return v;
 524}
 525
 526TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 527{
 528    TCGv_i64 v = new_tmp_a64(s);
 529    if (sf) {
 530        tcg_gen_mov_i64(v, cpu_X[reg]);
 531    } else {
 532        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 533    }
 534    return v;
 535}
 536
 537/* Return the offset into CPUARMState of a slice (from
 538 * the least significant end) of FP register Qn (ie
 539 * Dn, Sn, Hn or Bn).
 540 * (Note that this is not the same mapping as for A32; see cpu.h)
 541 */
 542static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
 543{
 544    return vec_reg_offset(s, regno, 0, size);
 545}
 546
 547/* Offset of the high half of the 128 bit vector Qn */
 548static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 549{
 550    return vec_reg_offset(s, regno, 1, MO_64);
 551}
 552
 553/* Convenience accessors for reading and writing single and double
 554 * FP registers. Writing clears the upper parts of the associated
 555 * 128 bit vector register, as required by the architecture.
 556 * Note that unlike the GP register accessors, the values returned
 557 * by the read functions must be manually freed.
 558 */
 559static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 560{
 561    TCGv_i64 v = tcg_temp_new_i64();
 562
 563    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 564    return v;
 565}
 566
 567static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 568{
 569    TCGv_i32 v = tcg_temp_new_i32();
 570
 571    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 572    return v;
 573}
 574
 575static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 576{
 577    TCGv_i32 v = tcg_temp_new_i32();
 578
 579    tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 580    return v;
 581}
 582
 583/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 584 * If SVE is not enabled, then there are only 128 bits in the vector.
 585 */
 586static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 587{
 588    unsigned ofs = fp_reg_offset(s, rd, MO_64);
 589    unsigned vsz = vec_full_reg_size(s);
 590
 591    /* Nop move, with side effect of clearing the tail. */
 592    tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 593}
 594
 595void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 596{
 597    unsigned ofs = fp_reg_offset(s, reg, MO_64);
 598
 599    tcg_gen_st_i64(v, cpu_env, ofs);
 600    clear_vec_high(s, false, reg);
 601}
 602
 603static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 604{
 605    TCGv_i64 tmp = tcg_temp_new_i64();
 606
 607    tcg_gen_extu_i32_i64(tmp, v);
 608    write_fp_dreg(s, reg, tmp);
 609    tcg_temp_free_i64(tmp);
 610}
 611
 612/* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 613static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 614                         GVecGen2Fn *gvec_fn, int vece)
 615{
 616    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 617            is_q ? 16 : 8, vec_full_reg_size(s));
 618}
 619
 620/* Expand a 2-operand + immediate AdvSIMD vector operation using
 621 * an expander function.
 622 */
 623static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 624                          int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 625{
 626    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 627            imm, is_q ? 16 : 8, vec_full_reg_size(s));
 628}
 629
 630/* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 631static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 632                         GVecGen3Fn *gvec_fn, int vece)
 633{
 634    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 635            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 636}
 637
 638/* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 639static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 640                         int rx, GVecGen4Fn *gvec_fn, int vece)
 641{
 642    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 643            vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 644            is_q ? 16 : 8, vec_full_reg_size(s));
 645}
 646
 647/* Expand a 2-operand operation using an out-of-line helper.  */
 648static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
 649                             int rn, int data, gen_helper_gvec_2 *fn)
 650{
 651    tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 652                       vec_full_reg_offset(s, rn),
 653                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 654}
 655
 656/* Expand a 3-operand operation using an out-of-line helper.  */
 657static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 658                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
 659{
 660    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 661                       vec_full_reg_offset(s, rn),
 662                       vec_full_reg_offset(s, rm),
 663                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 664}
 665
 666/* Expand a 3-operand + fpstatus pointer + simd data value operation using
 667 * an out-of-line helper.
 668 */
 669static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 670                              int rm, bool is_fp16, int data,
 671                              gen_helper_gvec_3_ptr *fn)
 672{
 673    TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 674    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 675                       vec_full_reg_offset(s, rn),
 676                       vec_full_reg_offset(s, rm), fpst,
 677                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 678    tcg_temp_free_ptr(fpst);
 679}
 680
 681/* Expand a 3-operand + qc + operation using an out-of-line helper.  */
 682static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
 683                            int rm, gen_helper_gvec_3_ptr *fn)
 684{
 685    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
 686
 687    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
 688    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 689                       vec_full_reg_offset(s, rn),
 690                       vec_full_reg_offset(s, rm), qc_ptr,
 691                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 692    tcg_temp_free_ptr(qc_ptr);
 693}
 694
 695/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 696 * than the 32 bit equivalent.
 697 */
 698static inline void gen_set_NZ64(TCGv_i64 result)
 699{
 700    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 701    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 702}
 703
 704/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 705static inline void gen_logic_CC(int sf, TCGv_i64 result)
 706{
 707    if (sf) {
 708        gen_set_NZ64(result);
 709    } else {
 710        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 711        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 712    }
 713    tcg_gen_movi_i32(cpu_CF, 0);
 714    tcg_gen_movi_i32(cpu_VF, 0);
 715}
 716
 717/* dest = T0 + T1; compute C, N, V and Z flags */
 718static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 719{
 720    if (sf) {
 721        TCGv_i64 result, flag, tmp;
 722        result = tcg_temp_new_i64();
 723        flag = tcg_temp_new_i64();
 724        tmp = tcg_temp_new_i64();
 725
 726        tcg_gen_movi_i64(tmp, 0);
 727        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 728
 729        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 730
 731        gen_set_NZ64(result);
 732
 733        tcg_gen_xor_i64(flag, result, t0);
 734        tcg_gen_xor_i64(tmp, t0, t1);
 735        tcg_gen_andc_i64(flag, flag, tmp);
 736        tcg_temp_free_i64(tmp);
 737        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 738
 739        tcg_gen_mov_i64(dest, result);
 740        tcg_temp_free_i64(result);
 741        tcg_temp_free_i64(flag);
 742    } else {
 743        /* 32 bit arithmetic */
 744        TCGv_i32 t0_32 = tcg_temp_new_i32();
 745        TCGv_i32 t1_32 = tcg_temp_new_i32();
 746        TCGv_i32 tmp = tcg_temp_new_i32();
 747
 748        tcg_gen_movi_i32(tmp, 0);
 749        tcg_gen_extrl_i64_i32(t0_32, t0);
 750        tcg_gen_extrl_i64_i32(t1_32, t1);
 751        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 752        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 753        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 754        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 755        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 756        tcg_gen_extu_i32_i64(dest, cpu_NF);
 757
 758        tcg_temp_free_i32(tmp);
 759        tcg_temp_free_i32(t0_32);
 760        tcg_temp_free_i32(t1_32);
 761    }
 762}
 763
 764/* dest = T0 - T1; compute C, N, V and Z flags */
 765static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 766{
 767    if (sf) {
 768        /* 64 bit arithmetic */
 769        TCGv_i64 result, flag, tmp;
 770
 771        result = tcg_temp_new_i64();
 772        flag = tcg_temp_new_i64();
 773        tcg_gen_sub_i64(result, t0, t1);
 774
 775        gen_set_NZ64(result);
 776
 777        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 778        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 779
 780        tcg_gen_xor_i64(flag, result, t0);
 781        tmp = tcg_temp_new_i64();
 782        tcg_gen_xor_i64(tmp, t0, t1);
 783        tcg_gen_and_i64(flag, flag, tmp);
 784        tcg_temp_free_i64(tmp);
 785        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 786        tcg_gen_mov_i64(dest, result);
 787        tcg_temp_free_i64(flag);
 788        tcg_temp_free_i64(result);
 789    } else {
 790        /* 32 bit arithmetic */
 791        TCGv_i32 t0_32 = tcg_temp_new_i32();
 792        TCGv_i32 t1_32 = tcg_temp_new_i32();
 793        TCGv_i32 tmp;
 794
 795        tcg_gen_extrl_i64_i32(t0_32, t0);
 796        tcg_gen_extrl_i64_i32(t1_32, t1);
 797        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 798        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 799        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 800        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 801        tmp = tcg_temp_new_i32();
 802        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 803        tcg_temp_free_i32(t0_32);
 804        tcg_temp_free_i32(t1_32);
 805        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 806        tcg_temp_free_i32(tmp);
 807        tcg_gen_extu_i32_i64(dest, cpu_NF);
 808    }
 809}
 810
 811/* dest = T0 + T1 + CF; do not compute flags. */
 812static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 813{
 814    TCGv_i64 flag = tcg_temp_new_i64();
 815    tcg_gen_extu_i32_i64(flag, cpu_CF);
 816    tcg_gen_add_i64(dest, t0, t1);
 817    tcg_gen_add_i64(dest, dest, flag);
 818    tcg_temp_free_i64(flag);
 819
 820    if (!sf) {
 821        tcg_gen_ext32u_i64(dest, dest);
 822    }
 823}
 824
 825/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 826static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 827{
 828    if (sf) {
 829        TCGv_i64 result, cf_64, vf_64, tmp;
 830        result = tcg_temp_new_i64();
 831        cf_64 = tcg_temp_new_i64();
 832        vf_64 = tcg_temp_new_i64();
 833        tmp = tcg_const_i64(0);
 834
 835        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 836        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 837        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 838        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 839        gen_set_NZ64(result);
 840
 841        tcg_gen_xor_i64(vf_64, result, t0);
 842        tcg_gen_xor_i64(tmp, t0, t1);
 843        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 844        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 845
 846        tcg_gen_mov_i64(dest, result);
 847
 848        tcg_temp_free_i64(tmp);
 849        tcg_temp_free_i64(vf_64);
 850        tcg_temp_free_i64(cf_64);
 851        tcg_temp_free_i64(result);
 852    } else {
 853        TCGv_i32 t0_32, t1_32, tmp;
 854        t0_32 = tcg_temp_new_i32();
 855        t1_32 = tcg_temp_new_i32();
 856        tmp = tcg_const_i32(0);
 857
 858        tcg_gen_extrl_i64_i32(t0_32, t0);
 859        tcg_gen_extrl_i64_i32(t1_32, t1);
 860        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 861        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 862
 863        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 864        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 865        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 866        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 867        tcg_gen_extu_i32_i64(dest, cpu_NF);
 868
 869        tcg_temp_free_i32(tmp);
 870        tcg_temp_free_i32(t1_32);
 871        tcg_temp_free_i32(t0_32);
 872    }
 873}
 874
 875/*
 876 * Load/Store generators
 877 */
 878
 879/*
 880 * Store from GPR register to memory.
 881 */
 882static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 883                             TCGv_i64 tcg_addr, int size, int memidx,
 884                             bool iss_valid,
 885                             unsigned int iss_srt,
 886                             bool iss_sf, bool iss_ar)
 887{
 888    g_assert(size <= 3);
 889    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 890
 891    if (iss_valid) {
 892        uint32_t syn;
 893
 894        syn = syn_data_abort_with_iss(0,
 895                                      size,
 896                                      false,
 897                                      iss_srt,
 898                                      iss_sf,
 899                                      iss_ar,
 900                                      0, 0, 0, 0, 0, false);
 901        disas_set_insn_syndrome(s, syn);
 902    }
 903}
 904
 905static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 906                      TCGv_i64 tcg_addr, int size,
 907                      bool iss_valid,
 908                      unsigned int iss_srt,
 909                      bool iss_sf, bool iss_ar)
 910{
 911    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 912                     iss_valid, iss_srt, iss_sf, iss_ar);
 913}
 914
 915/*
 916 * Load from memory to GPR register
 917 */
 918static void do_gpr_ld_memidx(DisasContext *s,
 919                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 920                             int size, bool is_signed,
 921                             bool extend, int memidx,
 922                             bool iss_valid, unsigned int iss_srt,
 923                             bool iss_sf, bool iss_ar)
 924{
 925    MemOp memop = s->be_data + size;
 926
 927    g_assert(size <= 3);
 928
 929    if (is_signed) {
 930        memop += MO_SIGN;
 931    }
 932
 933    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 934
 935    if (extend && is_signed) {
 936        g_assert(size < 3);
 937        tcg_gen_ext32u_i64(dest, dest);
 938    }
 939
 940    if (iss_valid) {
 941        uint32_t syn;
 942
 943        syn = syn_data_abort_with_iss(0,
 944                                      size,
 945                                      is_signed,
 946                                      iss_srt,
 947                                      iss_sf,
 948                                      iss_ar,
 949                                      0, 0, 0, 0, 0, false);
 950        disas_set_insn_syndrome(s, syn);
 951    }
 952}
 953
 954static void do_gpr_ld(DisasContext *s,
 955                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 956                      int size, bool is_signed, bool extend,
 957                      bool iss_valid, unsigned int iss_srt,
 958                      bool iss_sf, bool iss_ar)
 959{
 960    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 961                     get_mem_index(s),
 962                     iss_valid, iss_srt, iss_sf, iss_ar);
 963}
 964
 965/*
 966 * Store from FP register to memory
 967 */
 968static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 969{
 970    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 971    TCGv_i64 tmp = tcg_temp_new_i64();
 972    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 973    if (size < 4) {
 974        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 975                            s->be_data + size);
 976    } else {
 977        bool be = s->be_data == MO_BE;
 978        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 979
 980        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 981        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 982                            s->be_data | MO_Q);
 983        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 984        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 985                            s->be_data | MO_Q);
 986        tcg_temp_free_i64(tcg_hiaddr);
 987    }
 988
 989    tcg_temp_free_i64(tmp);
 990}
 991
 992/*
 993 * Load from memory to FP register
 994 */
 995static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 996{
 997    /* This always zero-extends and writes to a full 128 bit wide vector */
 998    TCGv_i64 tmplo = tcg_temp_new_i64();
 999    TCGv_i64 tmphi = NULL;
1000
1001    if (size < 4) {
1002        MemOp memop = s->be_data + size;
1003        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
1004    } else {
1005        bool be = s->be_data == MO_BE;
1006        TCGv_i64 tcg_hiaddr;
1007
1008        tmphi = tcg_temp_new_i64();
1009        tcg_hiaddr = tcg_temp_new_i64();
1010
1011        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1012        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1013                            s->be_data | MO_Q);
1014        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1015                            s->be_data | MO_Q);
1016        tcg_temp_free_i64(tcg_hiaddr);
1017    }
1018
1019    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1020    tcg_temp_free_i64(tmplo);
1021
1022    if (tmphi) {
1023        tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1024        tcg_temp_free_i64(tmphi);
1025    }
1026    clear_vec_high(s, tmphi != NULL, destidx);
1027}
1028
1029/*
1030 * Vector load/store helpers.
1031 *
1032 * The principal difference between this and a FP load is that we don't
1033 * zero extend as we are filling a partial chunk of the vector register.
1034 * These functions don't support 128 bit loads/stores, which would be
1035 * normal load/store operations.
1036 *
1037 * The _i32 versions are useful when operating on 32 bit quantities
1038 * (eg for floating point single or using Neon helper functions).
1039 */
1040
1041/* Get value of an element within a vector register */
1042static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1043                             int element, MemOp memop)
1044{
1045    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1046    switch (memop) {
1047    case MO_8:
1048        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1049        break;
1050    case MO_16:
1051        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1052        break;
1053    case MO_32:
1054        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1055        break;
1056    case MO_8|MO_SIGN:
1057        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1058        break;
1059    case MO_16|MO_SIGN:
1060        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1061        break;
1062    case MO_32|MO_SIGN:
1063        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1064        break;
1065    case MO_64:
1066    case MO_64|MO_SIGN:
1067        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1068        break;
1069    default:
1070        g_assert_not_reached();
1071    }
1072}
1073
1074static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1075                                 int element, MemOp memop)
1076{
1077    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1078    switch (memop) {
1079    case MO_8:
1080        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1081        break;
1082    case MO_16:
1083        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1084        break;
1085    case MO_8|MO_SIGN:
1086        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1087        break;
1088    case MO_16|MO_SIGN:
1089        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1090        break;
1091    case MO_32:
1092    case MO_32|MO_SIGN:
1093        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1094        break;
1095    default:
1096        g_assert_not_reached();
1097    }
1098}
1099
1100/* Set value of an element within a vector register */
1101static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1102                              int element, MemOp memop)
1103{
1104    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1105    switch (memop) {
1106    case MO_8:
1107        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1108        break;
1109    case MO_16:
1110        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1111        break;
1112    case MO_32:
1113        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1114        break;
1115    case MO_64:
1116        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1117        break;
1118    default:
1119        g_assert_not_reached();
1120    }
1121}
1122
1123static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1124                                  int destidx, int element, MemOp memop)
1125{
1126    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1127    switch (memop) {
1128    case MO_8:
1129        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1130        break;
1131    case MO_16:
1132        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1133        break;
1134    case MO_32:
1135        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1136        break;
1137    default:
1138        g_assert_not_reached();
1139    }
1140}
1141
1142/* Store from vector register to memory */
1143static void do_vec_st(DisasContext *s, int srcidx, int element,
1144                      TCGv_i64 tcg_addr, int size, MemOp endian)
1145{
1146    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1147
1148    read_vec_element(s, tcg_tmp, srcidx, element, size);
1149    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1150
1151    tcg_temp_free_i64(tcg_tmp);
1152}
1153
1154/* Load from memory to vector register */
1155static void do_vec_ld(DisasContext *s, int destidx, int element,
1156                      TCGv_i64 tcg_addr, int size, MemOp endian)
1157{
1158    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1159
1160    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1161    write_vec_element(s, tcg_tmp, destidx, element, size);
1162
1163    tcg_temp_free_i64(tcg_tmp);
1164}
1165
1166/* Check that FP/Neon access is enabled. If it is, return
1167 * true. If not, emit code to generate an appropriate exception,
1168 * and return false; the caller should not emit any code for
1169 * the instruction. Note that this check must happen after all
1170 * unallocated-encoding checks (otherwise the syndrome information
1171 * for the resulting exception will be incorrect).
1172 */
1173static bool fp_access_check(DisasContext *s)
1174{
1175    if (s->fp_excp_el) {
1176        assert(!s->fp_access_checked);
1177        s->fp_access_checked = true;
1178
1179        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1180                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1181        return false;
1182    }
1183    s->fp_access_checked = true;
1184    return true;
1185}
1186
1187/* Check that SVE access is enabled.  If it is, return true.
1188 * If not, emit code to generate an appropriate exception and return false.
1189 */
1190bool sve_access_check(DisasContext *s)
1191{
1192    if (s->sve_excp_el) {
1193        assert(!s->sve_access_checked);
1194        s->sve_access_checked = true;
1195
1196        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1197                           syn_sve_access_trap(), s->sve_excp_el);
1198        return false;
1199    }
1200    s->sve_access_checked = true;
1201    return fp_access_check(s);
1202}
1203
1204/*
1205 * This utility function is for doing register extension with an
1206 * optional shift. You will likely want to pass a temporary for the
1207 * destination register. See DecodeRegExtend() in the ARM ARM.
1208 */
1209static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1210                              int option, unsigned int shift)
1211{
1212    int extsize = extract32(option, 0, 2);
1213    bool is_signed = extract32(option, 2, 1);
1214
1215    if (is_signed) {
1216        switch (extsize) {
1217        case 0:
1218            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1219            break;
1220        case 1:
1221            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1222            break;
1223        case 2:
1224            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1225            break;
1226        case 3:
1227            tcg_gen_mov_i64(tcg_out, tcg_in);
1228            break;
1229        }
1230    } else {
1231        switch (extsize) {
1232        case 0:
1233            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1234            break;
1235        case 1:
1236            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1237            break;
1238        case 2:
1239            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1240            break;
1241        case 3:
1242            tcg_gen_mov_i64(tcg_out, tcg_in);
1243            break;
1244        }
1245    }
1246
1247    if (shift) {
1248        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1249    }
1250}
1251
1252static inline void gen_check_sp_alignment(DisasContext *s)
1253{
1254    /* The AArch64 architecture mandates that (if enabled via PSTATE
1255     * or SCTLR bits) there is a check that SP is 16-aligned on every
1256     * SP-relative load or store (with an exception generated if it is not).
1257     * In line with general QEMU practice regarding misaligned accesses,
1258     * we omit these checks for the sake of guest program performance.
1259     * This function is provided as a hook so we can more easily add these
1260     * checks in future (possibly as a "favour catching guest program bugs
1261     * over speed" user selectable option).
1262     */
1263}
1264
1265/*
1266 * This provides a simple table based table lookup decoder. It is
1267 * intended to be used when the relevant bits for decode are too
1268 * awkwardly placed and switch/if based logic would be confusing and
1269 * deeply nested. Since it's a linear search through the table, tables
1270 * should be kept small.
1271 *
1272 * It returns the first handler where insn & mask == pattern, or
1273 * NULL if there is no match.
1274 * The table is terminated by an empty mask (i.e. 0)
1275 */
1276static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1277                                               uint32_t insn)
1278{
1279    const AArch64DecodeTable *tptr = table;
1280
1281    while (tptr->mask) {
1282        if ((insn & tptr->mask) == tptr->pattern) {
1283            return tptr->disas_fn;
1284        }
1285        tptr++;
1286    }
1287    return NULL;
1288}
1289
1290/*
1291 * The instruction disassembly implemented here matches
1292 * the instruction encoding classifications in chapter C4
1293 * of the ARM Architecture Reference Manual (DDI0487B_a);
1294 * classification names and decode diagrams here should generally
1295 * match up with those in the manual.
1296 */
1297
1298/* Unconditional branch (immediate)
1299 *   31  30       26 25                                  0
1300 * +----+-----------+-------------------------------------+
1301 * | op | 0 0 1 0 1 |                 imm26               |
1302 * +----+-----------+-------------------------------------+
1303 */
1304static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1305{
1306    uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1307
1308    if (insn & (1U << 31)) {
1309        /* BL Branch with link */
1310        tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1311    }
1312
1313    /* B Branch / BL Branch with link */
1314    reset_btype(s);
1315    gen_goto_tb(s, 0, addr);
1316}
1317
1318/* Compare and branch (immediate)
1319 *   31  30         25  24  23                  5 4      0
1320 * +----+-------------+----+---------------------+--------+
1321 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1322 * +----+-------------+----+---------------------+--------+
1323 */
1324static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1325{
1326    unsigned int sf, op, rt;
1327    uint64_t addr;
1328    TCGLabel *label_match;
1329    TCGv_i64 tcg_cmp;
1330
1331    sf = extract32(insn, 31, 1);
1332    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1333    rt = extract32(insn, 0, 5);
1334    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1335
1336    tcg_cmp = read_cpu_reg(s, rt, sf);
1337    label_match = gen_new_label();
1338
1339    reset_btype(s);
1340    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1341                        tcg_cmp, 0, label_match);
1342
1343    gen_goto_tb(s, 0, s->base.pc_next);
1344    gen_set_label(label_match);
1345    gen_goto_tb(s, 1, addr);
1346}
1347
1348/* Test and branch (immediate)
1349 *   31  30         25  24  23   19 18          5 4    0
1350 * +----+-------------+----+-------+-------------+------+
1351 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1352 * +----+-------------+----+-------+-------------+------+
1353 */
1354static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1355{
1356    unsigned int bit_pos, op, rt;
1357    uint64_t addr;
1358    TCGLabel *label_match;
1359    TCGv_i64 tcg_cmp;
1360
1361    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1362    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1363    addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1364    rt = extract32(insn, 0, 5);
1365
1366    tcg_cmp = tcg_temp_new_i64();
1367    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1368    label_match = gen_new_label();
1369
1370    reset_btype(s);
1371    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1372                        tcg_cmp, 0, label_match);
1373    tcg_temp_free_i64(tcg_cmp);
1374    gen_goto_tb(s, 0, s->base.pc_next);
1375    gen_set_label(label_match);
1376    gen_goto_tb(s, 1, addr);
1377}
1378
1379/* Conditional branch (immediate)
1380 *  31           25  24  23                  5   4  3    0
1381 * +---------------+----+---------------------+----+------+
1382 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1383 * +---------------+----+---------------------+----+------+
1384 */
1385static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1386{
1387    unsigned int cond;
1388    uint64_t addr;
1389
1390    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1391        unallocated_encoding(s);
1392        return;
1393    }
1394    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1395    cond = extract32(insn, 0, 4);
1396
1397    reset_btype(s);
1398    if (cond < 0x0e) {
1399        /* genuinely conditional branches */
1400        TCGLabel *label_match = gen_new_label();
1401        arm_gen_test_cc(cond, label_match);
1402        gen_goto_tb(s, 0, s->base.pc_next);
1403        gen_set_label(label_match);
1404        gen_goto_tb(s, 1, addr);
1405    } else {
1406        /* 0xe and 0xf are both "always" conditions */
1407        gen_goto_tb(s, 0, addr);
1408    }
1409}
1410
1411/* HINT instruction group, including various allocated HINTs */
1412static void handle_hint(DisasContext *s, uint32_t insn,
1413                        unsigned int op1, unsigned int op2, unsigned int crm)
1414{
1415    unsigned int selector = crm << 3 | op2;
1416
1417    if (op1 != 3) {
1418        unallocated_encoding(s);
1419        return;
1420    }
1421
1422    switch (selector) {
1423    case 0b00000: /* NOP */
1424        break;
1425    case 0b00011: /* WFI */
1426        s->base.is_jmp = DISAS_WFI;
1427        break;
1428    case 0b00001: /* YIELD */
1429        /* When running in MTTCG we don't generate jumps to the yield and
1430         * WFE helpers as it won't affect the scheduling of other vCPUs.
1431         * If we wanted to more completely model WFE/SEV so we don't busy
1432         * spin unnecessarily we would need to do something more involved.
1433         */
1434        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1435            s->base.is_jmp = DISAS_YIELD;
1436        }
1437        break;
1438    case 0b00010: /* WFE */
1439        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1440            s->base.is_jmp = DISAS_WFE;
1441        }
1442        break;
1443    case 0b00100: /* SEV */
1444    case 0b00101: /* SEVL */
1445        /* we treat all as NOP at least for now */
1446        break;
1447    case 0b00111: /* XPACLRI */
1448        if (s->pauth_active) {
1449            gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1450        }
1451        break;
1452    case 0b01000: /* PACIA1716 */
1453        if (s->pauth_active) {
1454            gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1455        }
1456        break;
1457    case 0b01010: /* PACIB1716 */
1458        if (s->pauth_active) {
1459            gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1460        }
1461        break;
1462    case 0b01100: /* AUTIA1716 */
1463        if (s->pauth_active) {
1464            gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1465        }
1466        break;
1467    case 0b01110: /* AUTIB1716 */
1468        if (s->pauth_active) {
1469            gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1470        }
1471        break;
1472    case 0b11000: /* PACIAZ */
1473        if (s->pauth_active) {
1474            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1475                                new_tmp_a64_zero(s));
1476        }
1477        break;
1478    case 0b11001: /* PACIASP */
1479        if (s->pauth_active) {
1480            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1481        }
1482        break;
1483    case 0b11010: /* PACIBZ */
1484        if (s->pauth_active) {
1485            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1486                                new_tmp_a64_zero(s));
1487        }
1488        break;
1489    case 0b11011: /* PACIBSP */
1490        if (s->pauth_active) {
1491            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1492        }
1493        break;
1494    case 0b11100: /* AUTIAZ */
1495        if (s->pauth_active) {
1496            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1497                              new_tmp_a64_zero(s));
1498        }
1499        break;
1500    case 0b11101: /* AUTIASP */
1501        if (s->pauth_active) {
1502            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1503        }
1504        break;
1505    case 0b11110: /* AUTIBZ */
1506        if (s->pauth_active) {
1507            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1508                              new_tmp_a64_zero(s));
1509        }
1510        break;
1511    case 0b11111: /* AUTIBSP */
1512        if (s->pauth_active) {
1513            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1514        }
1515        break;
1516    default:
1517        /* default specified as NOP equivalent */
1518        break;
1519    }
1520}
1521
1522static void gen_clrex(DisasContext *s, uint32_t insn)
1523{
1524    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1525}
1526
1527/* CLREX, DSB, DMB, ISB */
1528static void handle_sync(DisasContext *s, uint32_t insn,
1529                        unsigned int op1, unsigned int op2, unsigned int crm)
1530{
1531    TCGBar bar;
1532
1533    if (op1 != 3) {
1534        unallocated_encoding(s);
1535        return;
1536    }
1537
1538    switch (op2) {
1539    case 2: /* CLREX */
1540        gen_clrex(s, insn);
1541        return;
1542    case 4: /* DSB */
1543    case 5: /* DMB */
1544        switch (crm & 3) {
1545        case 1: /* MBReqTypes_Reads */
1546            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1547            break;
1548        case 2: /* MBReqTypes_Writes */
1549            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1550            break;
1551        default: /* MBReqTypes_All */
1552            bar = TCG_BAR_SC | TCG_MO_ALL;
1553            break;
1554        }
1555        tcg_gen_mb(bar);
1556        return;
1557    case 6: /* ISB */
1558        /* We need to break the TB after this insn to execute
1559         * a self-modified code correctly and also to take
1560         * any pending interrupts immediately.
1561         */
1562        reset_btype(s);
1563        gen_goto_tb(s, 0, s->base.pc_next);
1564        return;
1565
1566    case 7: /* SB */
1567        if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1568            goto do_unallocated;
1569        }
1570        /*
1571         * TODO: There is no speculation barrier opcode for TCG;
1572         * MB and end the TB instead.
1573         */
1574        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1575        gen_goto_tb(s, 0, s->base.pc_next);
1576        return;
1577
1578    default:
1579    do_unallocated:
1580        unallocated_encoding(s);
1581        return;
1582    }
1583}
1584
1585static void gen_xaflag(void)
1586{
1587    TCGv_i32 z = tcg_temp_new_i32();
1588
1589    tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1590
1591    /*
1592     * (!C & !Z) << 31
1593     * (!(C | Z)) << 31
1594     * ~((C | Z) << 31)
1595     * ~-(C | Z)
1596     * (C | Z) - 1
1597     */
1598    tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1599    tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1600
1601    /* !(Z & C) */
1602    tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1603    tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1604
1605    /* (!C & Z) << 31 -> -(Z & ~C) */
1606    tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1607    tcg_gen_neg_i32(cpu_VF, cpu_VF);
1608
1609    /* C | Z */
1610    tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1611
1612    tcg_temp_free_i32(z);
1613}
1614
1615static void gen_axflag(void)
1616{
1617    tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1618    tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1619
1620    /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1621    tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1622
1623    tcg_gen_movi_i32(cpu_NF, 0);
1624    tcg_gen_movi_i32(cpu_VF, 0);
1625}
1626
1627/* MSR (immediate) - move immediate to processor state field */
1628static void handle_msr_i(DisasContext *s, uint32_t insn,
1629                         unsigned int op1, unsigned int op2, unsigned int crm)
1630{
1631    TCGv_i32 t1;
1632    int op = op1 << 3 | op2;
1633
1634    /* End the TB by default, chaining is ok.  */
1635    s->base.is_jmp = DISAS_TOO_MANY;
1636
1637    switch (op) {
1638    case 0x00: /* CFINV */
1639        if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1640            goto do_unallocated;
1641        }
1642        tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1643        s->base.is_jmp = DISAS_NEXT;
1644        break;
1645
1646    case 0x01: /* XAFlag */
1647        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1648            goto do_unallocated;
1649        }
1650        gen_xaflag();
1651        s->base.is_jmp = DISAS_NEXT;
1652        break;
1653
1654    case 0x02: /* AXFlag */
1655        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1656            goto do_unallocated;
1657        }
1658        gen_axflag();
1659        s->base.is_jmp = DISAS_NEXT;
1660        break;
1661
1662    case 0x03: /* UAO */
1663        if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1664            goto do_unallocated;
1665        }
1666        if (crm & 1) {
1667            set_pstate_bits(PSTATE_UAO);
1668        } else {
1669            clear_pstate_bits(PSTATE_UAO);
1670        }
1671        t1 = tcg_const_i32(s->current_el);
1672        gen_helper_rebuild_hflags_a64(cpu_env, t1);
1673        tcg_temp_free_i32(t1);
1674        break;
1675
1676    case 0x04: /* PAN */
1677        if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1678            goto do_unallocated;
1679        }
1680        if (crm & 1) {
1681            set_pstate_bits(PSTATE_PAN);
1682        } else {
1683            clear_pstate_bits(PSTATE_PAN);
1684        }
1685        t1 = tcg_const_i32(s->current_el);
1686        gen_helper_rebuild_hflags_a64(cpu_env, t1);
1687        tcg_temp_free_i32(t1);
1688        break;
1689
1690    case 0x05: /* SPSel */
1691        if (s->current_el == 0) {
1692            goto do_unallocated;
1693        }
1694        t1 = tcg_const_i32(crm & PSTATE_SP);
1695        gen_helper_msr_i_spsel(cpu_env, t1);
1696        tcg_temp_free_i32(t1);
1697        break;
1698
1699    case 0x1e: /* DAIFSet */
1700        t1 = tcg_const_i32(crm);
1701        gen_helper_msr_i_daifset(cpu_env, t1);
1702        tcg_temp_free_i32(t1);
1703        break;
1704
1705    case 0x1f: /* DAIFClear */
1706        t1 = tcg_const_i32(crm);
1707        gen_helper_msr_i_daifclear(cpu_env, t1);
1708        tcg_temp_free_i32(t1);
1709        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1710        s->base.is_jmp = DISAS_UPDATE_EXIT;
1711        break;
1712
1713    case 0x1c: /* TCO */
1714        if (dc_isar_feature(aa64_mte, s)) {
1715            /* Full MTE is enabled -- set the TCO bit as directed. */
1716            if (crm & 1) {
1717                set_pstate_bits(PSTATE_TCO);
1718            } else {
1719                clear_pstate_bits(PSTATE_TCO);
1720            }
1721            t1 = tcg_const_i32(s->current_el);
1722            gen_helper_rebuild_hflags_a64(cpu_env, t1);
1723            tcg_temp_free_i32(t1);
1724            /* Many factors, including TCO, go into MTE_ACTIVE. */
1725            s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1726        } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1727            /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1728            s->base.is_jmp = DISAS_NEXT;
1729        } else {
1730            goto do_unallocated;
1731        }
1732        break;
1733
1734    default:
1735    do_unallocated:
1736        unallocated_encoding(s);
1737        return;
1738    }
1739}
1740
1741static void gen_get_nzcv(TCGv_i64 tcg_rt)
1742{
1743    TCGv_i32 tmp = tcg_temp_new_i32();
1744    TCGv_i32 nzcv = tcg_temp_new_i32();
1745
1746    /* build bit 31, N */
1747    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1748    /* build bit 30, Z */
1749    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1750    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1751    /* build bit 29, C */
1752    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1753    /* build bit 28, V */
1754    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1755    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1756    /* generate result */
1757    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1758
1759    tcg_temp_free_i32(nzcv);
1760    tcg_temp_free_i32(tmp);
1761}
1762
1763static void gen_set_nzcv(TCGv_i64 tcg_rt)
1764{
1765    TCGv_i32 nzcv = tcg_temp_new_i32();
1766
1767    /* take NZCV from R[t] */
1768    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1769
1770    /* bit 31, N */
1771    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1772    /* bit 30, Z */
1773    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1774    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1775    /* bit 29, C */
1776    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1777    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1778    /* bit 28, V */
1779    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1780    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1781    tcg_temp_free_i32(nzcv);
1782}
1783
1784/* MRS - move from system register
1785 * MSR (register) - move to system register
1786 * SYS
1787 * SYSL
1788 * These are all essentially the same insn in 'read' and 'write'
1789 * versions, with varying op0 fields.
1790 */
1791static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1792                       unsigned int op0, unsigned int op1, unsigned int op2,
1793                       unsigned int crn, unsigned int crm, unsigned int rt)
1794{
1795    const ARMCPRegInfo *ri;
1796    TCGv_i64 tcg_rt;
1797
1798    ri = get_arm_cp_reginfo(s->cp_regs,
1799                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1800                                               crn, crm, op0, op1, op2));
1801
1802    if (!ri) {
1803        /* Unknown register; this might be a guest error or a QEMU
1804         * unimplemented feature.
1805         */
1806        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1807                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1808                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1809        unallocated_encoding(s);
1810        return;
1811    }
1812
1813    /* Check access permissions */
1814    if (!cp_access_ok(s->current_el, ri, isread)) {
1815        unallocated_encoding(s);
1816        return;
1817    }
1818
1819    if (ri->accessfn) {
1820        /* Emit code to perform further access permissions checks at
1821         * runtime; this may result in an exception.
1822         */
1823        TCGv_ptr tmpptr;
1824        TCGv_i32 tcg_syn, tcg_isread;
1825        uint32_t syndrome;
1826
1827        gen_a64_set_pc_im(s->pc_curr);
1828        tmpptr = tcg_const_ptr(ri);
1829        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1830        tcg_syn = tcg_const_i32(syndrome);
1831        tcg_isread = tcg_const_i32(isread);
1832        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1833        tcg_temp_free_ptr(tmpptr);
1834        tcg_temp_free_i32(tcg_syn);
1835        tcg_temp_free_i32(tcg_isread);
1836    } else if (ri->type & ARM_CP_RAISES_EXC) {
1837        /*
1838         * The readfn or writefn might raise an exception;
1839         * synchronize the CPU state in case it does.
1840         */
1841        gen_a64_set_pc_im(s->pc_curr);
1842    }
1843
1844    /* Handle special cases first */
1845    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1846    case ARM_CP_NOP:
1847        return;
1848    case ARM_CP_NZCV:
1849        tcg_rt = cpu_reg(s, rt);
1850        if (isread) {
1851            gen_get_nzcv(tcg_rt);
1852        } else {
1853            gen_set_nzcv(tcg_rt);
1854        }
1855        return;
1856    case ARM_CP_CURRENTEL:
1857        /* Reads as current EL value from pstate, which is
1858         * guaranteed to be constant by the tb flags.
1859         */
1860        tcg_rt = cpu_reg(s, rt);
1861        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1862        return;
1863    case ARM_CP_DC_ZVA:
1864        /* Writes clear the aligned block of memory which rt points into. */
1865        if (s->mte_active[0]) {
1866            TCGv_i32 t_desc;
1867            int desc = 0;
1868
1869            desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
1870            desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
1871            desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
1872            t_desc = tcg_const_i32(desc);
1873
1874            tcg_rt = new_tmp_a64(s);
1875            gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt));
1876            tcg_temp_free_i32(t_desc);
1877        } else {
1878            tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1879        }
1880        gen_helper_dc_zva(cpu_env, tcg_rt);
1881        return;
1882    case ARM_CP_DC_GVA:
1883        {
1884            TCGv_i64 clean_addr, tag;
1885
1886            /*
1887             * DC_GVA, like DC_ZVA, requires that we supply the original
1888             * pointer for an invalid page.  Probe that address first.
1889             */
1890            tcg_rt = cpu_reg(s, rt);
1891            clean_addr = clean_data_tbi(s, tcg_rt);
1892            gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
1893
1894            if (s->ata) {
1895                /* Extract the tag from the register to match STZGM.  */
1896                tag = tcg_temp_new_i64();
1897                tcg_gen_shri_i64(tag, tcg_rt, 56);
1898                gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1899                tcg_temp_free_i64(tag);
1900            }
1901        }
1902        return;
1903    case ARM_CP_DC_GZVA:
1904        {
1905            TCGv_i64 clean_addr, tag;
1906
1907            /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
1908            tcg_rt = cpu_reg(s, rt);
1909            clean_addr = clean_data_tbi(s, tcg_rt);
1910            gen_helper_dc_zva(cpu_env, clean_addr);
1911
1912            if (s->ata) {
1913                /* Extract the tag from the register to match STZGM.  */
1914                tag = tcg_temp_new_i64();
1915                tcg_gen_shri_i64(tag, tcg_rt, 56);
1916                gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1917                tcg_temp_free_i64(tag);
1918            }
1919        }
1920        return;
1921    default:
1922        break;
1923    }
1924    if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1925        return;
1926    } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1927        return;
1928    }
1929
1930    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1931        gen_io_start();
1932    }
1933
1934    tcg_rt = cpu_reg(s, rt);
1935
1936    if (isread) {
1937        if (ri->type & ARM_CP_CONST) {
1938            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1939        } else if (ri->readfn) {
1940            TCGv_ptr tmpptr;
1941            tmpptr = tcg_const_ptr(ri);
1942            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1943            tcg_temp_free_ptr(tmpptr);
1944        } else {
1945            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1946        }
1947    } else {
1948        if (ri->type & ARM_CP_CONST) {
1949            /* If not forbidden by access permissions, treat as WI */
1950            return;
1951        } else if (ri->writefn) {
1952            TCGv_ptr tmpptr;
1953            tmpptr = tcg_const_ptr(ri);
1954            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1955            tcg_temp_free_ptr(tmpptr);
1956        } else {
1957            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1958        }
1959    }
1960
1961    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1962        /* I/O operations must end the TB here (whether read or write) */
1963        s->base.is_jmp = DISAS_UPDATE_EXIT;
1964    }
1965    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1966        /*
1967         * A write to any coprocessor regiser that ends a TB
1968         * must rebuild the hflags for the next TB.
1969         */
1970        TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1971        gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1972        tcg_temp_free_i32(tcg_el);
1973        /*
1974         * We default to ending the TB on a coprocessor register write,
1975         * but allow this to be suppressed by the register definition
1976         * (usually only necessary to work around guest bugs).
1977         */
1978        s->base.is_jmp = DISAS_UPDATE_EXIT;
1979    }
1980}
1981
1982/* System
1983 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1984 * +---------------------+---+-----+-----+-------+-------+-----+------+
1985 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1986 * +---------------------+---+-----+-----+-------+-------+-----+------+
1987 */
1988static void disas_system(DisasContext *s, uint32_t insn)
1989{
1990    unsigned int l, op0, op1, crn, crm, op2, rt;
1991    l = extract32(insn, 21, 1);
1992    op0 = extract32(insn, 19, 2);
1993    op1 = extract32(insn, 16, 3);
1994    crn = extract32(insn, 12, 4);
1995    crm = extract32(insn, 8, 4);
1996    op2 = extract32(insn, 5, 3);
1997    rt = extract32(insn, 0, 5);
1998
1999    if (op0 == 0) {
2000        if (l || rt != 31) {
2001            unallocated_encoding(s);
2002            return;
2003        }
2004        switch (crn) {
2005        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2006            handle_hint(s, insn, op1, op2, crm);
2007            break;
2008        case 3: /* CLREX, DSB, DMB, ISB */
2009            handle_sync(s, insn, op1, op2, crm);
2010            break;
2011        case 4: /* MSR (immediate) */
2012            handle_msr_i(s, insn, op1, op2, crm);
2013            break;
2014        default:
2015            unallocated_encoding(s);
2016            break;
2017        }
2018        return;
2019    }
2020    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2021}
2022
2023/* Exception generation
2024 *
2025 *  31             24 23 21 20                     5 4   2 1  0
2026 * +-----------------+-----+------------------------+-----+----+
2027 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2028 * +-----------------------+------------------------+----------+
2029 */
2030static void disas_exc(DisasContext *s, uint32_t insn)
2031{
2032    int opc = extract32(insn, 21, 3);
2033    int op2_ll = extract32(insn, 0, 5);
2034    int imm16 = extract32(insn, 5, 16);
2035    TCGv_i32 tmp;
2036
2037    switch (opc) {
2038    case 0:
2039        /* For SVC, HVC and SMC we advance the single-step state
2040         * machine before taking the exception. This is architecturally
2041         * mandated, to ensure that single-stepping a system call
2042         * instruction works properly.
2043         */
2044        switch (op2_ll) {
2045        case 1:                                                     /* SVC */
2046            gen_ss_advance(s);
2047            gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
2048                               syn_aa64_svc(imm16), default_exception_el(s));
2049            break;
2050        case 2:                                                     /* HVC */
2051            if (s->current_el == 0) {
2052                unallocated_encoding(s);
2053                break;
2054            }
2055            /* The pre HVC helper handles cases when HVC gets trapped
2056             * as an undefined insn by runtime configuration.
2057             */
2058            gen_a64_set_pc_im(s->pc_curr);
2059            gen_helper_pre_hvc(cpu_env);
2060            gen_ss_advance(s);
2061            gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
2062                               syn_aa64_hvc(imm16), 2);
2063            break;
2064        case 3:                                                     /* SMC */
2065            if (s->current_el == 0) {
2066                unallocated_encoding(s);
2067                break;
2068            }
2069            gen_a64_set_pc_im(s->pc_curr);
2070            tmp = tcg_const_i32(syn_aa64_smc(imm16));
2071            gen_helper_pre_smc(cpu_env, tmp);
2072            tcg_temp_free_i32(tmp);
2073            gen_ss_advance(s);
2074            gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
2075                               syn_aa64_smc(imm16), 3);
2076            break;
2077        default:
2078            unallocated_encoding(s);
2079            break;
2080        }
2081        break;
2082    case 1:
2083        if (op2_ll != 0) {
2084            unallocated_encoding(s);
2085            break;
2086        }
2087        /* BRK */
2088        gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2089        break;
2090    case 2:
2091        if (op2_ll != 0) {
2092            unallocated_encoding(s);
2093            break;
2094        }
2095        /* HLT. This has two purposes.
2096         * Architecturally, it is an external halting debug instruction.
2097         * Since QEMU doesn't implement external debug, we treat this as
2098         * it is required for halting debug disabled: it will UNDEF.
2099         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2100         */
2101        if (semihosting_enabled() && imm16 == 0xf000) {
2102#ifndef CONFIG_USER_ONLY
2103            /* In system mode, don't allow userspace access to semihosting,
2104             * to provide some semblance of security (and for consistency
2105             * with our 32-bit semihosting).
2106             */
2107            if (s->current_el == 0) {
2108                unsupported_encoding(s, insn);
2109                break;
2110            }
2111#endif
2112            gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
2113        } else {
2114            unsupported_encoding(s, insn);
2115        }
2116        break;
2117    case 5:
2118        if (op2_ll < 1 || op2_ll > 3) {
2119            unallocated_encoding(s);
2120            break;
2121        }
2122        /* DCPS1, DCPS2, DCPS3 */
2123        unsupported_encoding(s, insn);
2124        break;
2125    default:
2126        unallocated_encoding(s);
2127        break;
2128    }
2129}
2130
2131/* Unconditional branch (register)
2132 *  31           25 24   21 20   16 15   10 9    5 4     0
2133 * +---------------+-------+-------+-------+------+-------+
2134 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
2135 * +---------------+-------+-------+-------+------+-------+
2136 */
2137static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
2138{
2139    unsigned int opc, op2, op3, rn, op4;
2140    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
2141    TCGv_i64 dst;
2142    TCGv_i64 modifier;
2143
2144    opc = extract32(insn, 21, 4);
2145    op2 = extract32(insn, 16, 5);
2146    op3 = extract32(insn, 10, 6);
2147    rn = extract32(insn, 5, 5);
2148    op4 = extract32(insn, 0, 5);
2149
2150    if (op2 != 0x1f) {
2151        goto do_unallocated;
2152    }
2153
2154    switch (opc) {
2155    case 0: /* BR */
2156    case 1: /* BLR */
2157    case 2: /* RET */
2158        btype_mod = opc;
2159        switch (op3) {
2160        case 0:
2161            /* BR, BLR, RET */
2162            if (op4 != 0) {
2163                goto do_unallocated;
2164            }
2165            dst = cpu_reg(s, rn);
2166            break;
2167
2168        case 2:
2169        case 3:
2170            if (!dc_isar_feature(aa64_pauth, s)) {
2171                goto do_unallocated;
2172            }
2173            if (opc == 2) {
2174                /* RETAA, RETAB */
2175                if (rn != 0x1f || op4 != 0x1f) {
2176                    goto do_unallocated;
2177                }
2178                rn = 30;
2179                modifier = cpu_X[31];
2180            } else {
2181                /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2182                if (op4 != 0x1f) {
2183                    goto do_unallocated;
2184                }
2185                modifier = new_tmp_a64_zero(s);
2186            }
2187            if (s->pauth_active) {
2188                dst = new_tmp_a64(s);
2189                if (op3 == 2) {
2190                    gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2191                } else {
2192                    gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2193                }
2194            } else {
2195                dst = cpu_reg(s, rn);
2196            }
2197            break;
2198
2199        default:
2200            goto do_unallocated;
2201        }
2202        gen_a64_set_pc(s, dst);
2203        /* BLR also needs to load return address */
2204        if (opc == 1) {
2205            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2206        }
2207        break;
2208
2209    case 8: /* BRAA */
2210    case 9: /* BLRAA */
2211        if (!dc_isar_feature(aa64_pauth, s)) {
2212            goto do_unallocated;
2213        }
2214        if ((op3 & ~1) != 2) {
2215            goto do_unallocated;
2216        }
2217        btype_mod = opc & 1;
2218        if (s->pauth_active) {
2219            dst = new_tmp_a64(s);
2220            modifier = cpu_reg_sp(s, op4);
2221            if (op3 == 2) {
2222                gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2223            } else {
2224                gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2225            }
2226        } else {
2227            dst = cpu_reg(s, rn);
2228        }
2229        gen_a64_set_pc(s, dst);
2230        /* BLRAA also needs to load return address */
2231        if (opc == 9) {
2232            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2233        }
2234        break;
2235
2236    case 4: /* ERET */
2237        if (s->current_el == 0) {
2238            goto do_unallocated;
2239        }
2240        switch (op3) {
2241        case 0: /* ERET */
2242            if (op4 != 0) {
2243                goto do_unallocated;
2244            }
2245            dst = tcg_temp_new_i64();
2246            tcg_gen_ld_i64(dst, cpu_env,
2247                           offsetof(CPUARMState, elr_el[s->current_el]));
2248            break;
2249
2250        case 2: /* ERETAA */
2251        case 3: /* ERETAB */
2252            if (!dc_isar_feature(aa64_pauth, s)) {
2253                goto do_unallocated;
2254            }
2255            if (rn != 0x1f || op4 != 0x1f) {
2256                goto do_unallocated;
2257            }
2258            dst = tcg_temp_new_i64();
2259            tcg_gen_ld_i64(dst, cpu_env,
2260                           offsetof(CPUARMState, elr_el[s->current_el]));
2261            if (s->pauth_active) {
2262                modifier = cpu_X[31];
2263                if (op3 == 2) {
2264                    gen_helper_autia(dst, cpu_env, dst, modifier);
2265                } else {
2266                    gen_helper_autib(dst, cpu_env, dst, modifier);
2267                }
2268            }
2269            break;
2270
2271        default:
2272            goto do_unallocated;
2273        }
2274        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2275            gen_io_start();
2276        }
2277
2278        gen_helper_exception_return(cpu_env, dst);
2279        tcg_temp_free_i64(dst);
2280        /* Must exit loop to check un-masked IRQs */
2281        s->base.is_jmp = DISAS_EXIT;
2282        return;
2283
2284    case 5: /* DRPS */
2285        if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2286            goto do_unallocated;
2287        } else {
2288            unsupported_encoding(s, insn);
2289        }
2290        return;
2291
2292    default:
2293    do_unallocated:
2294        unallocated_encoding(s);
2295        return;
2296    }
2297
2298    switch (btype_mod) {
2299    case 0: /* BR */
2300        if (dc_isar_feature(aa64_bti, s)) {
2301            /* BR to {x16,x17} or !guard -> 1, else 3.  */
2302            set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2303        }
2304        break;
2305
2306    case 1: /* BLR */
2307        if (dc_isar_feature(aa64_bti, s)) {
2308            /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2309            set_btype(s, 2);
2310        }
2311        break;
2312
2313    default: /* RET or none of the above.  */
2314        /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2315        break;
2316    }
2317
2318    s->base.is_jmp = DISAS_JUMP;
2319}
2320
2321/* Branches, exception generating and system instructions */
2322static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2323{
2324    switch (extract32(insn, 25, 7)) {
2325    case 0x0a: case 0x0b:
2326    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2327        disas_uncond_b_imm(s, insn);
2328        break;
2329    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2330        disas_comp_b_imm(s, insn);
2331        break;
2332    case 0x1b: case 0x5b: /* Test & branch (immediate) */
2333        disas_test_b_imm(s, insn);
2334        break;
2335    case 0x2a: /* Conditional branch (immediate) */
2336        disas_cond_b_imm(s, insn);
2337        break;
2338    case 0x6a: /* Exception generation / System */
2339        if (insn & (1 << 24)) {
2340            if (extract32(insn, 22, 2) == 0) {
2341                disas_system(s, insn);
2342            } else {
2343                unallocated_encoding(s);
2344            }
2345        } else {
2346            disas_exc(s, insn);
2347        }
2348        break;
2349    case 0x6b: /* Unconditional branch (register) */
2350        disas_uncond_b_reg(s, insn);
2351        break;
2352    default:
2353        unallocated_encoding(s);
2354        break;
2355    }
2356}
2357
2358/*
2359 * Load/Store exclusive instructions are implemented by remembering
2360 * the value/address loaded, and seeing if these are the same
2361 * when the store is performed. This is not actually the architecturally
2362 * mandated semantics, but it works for typical guest code sequences
2363 * and avoids having to monitor regular stores.
2364 *
2365 * The store exclusive uses the atomic cmpxchg primitives to avoid
2366 * races in multi-threaded linux-user and when MTTCG softmmu is
2367 * enabled.
2368 */
2369static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2370                               TCGv_i64 addr, int size, bool is_pair)
2371{
2372    int idx = get_mem_index(s);
2373    MemOp memop = s->be_data;
2374
2375    g_assert(size <= 3);
2376    if (is_pair) {
2377        g_assert(size >= 2);
2378        if (size == 2) {
2379            /* The pair must be single-copy atomic for the doubleword.  */
2380            memop |= MO_64 | MO_ALIGN;
2381            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2382            if (s->be_data == MO_LE) {
2383                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2384                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2385            } else {
2386                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2387                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2388            }
2389        } else {
2390            /* The pair must be single-copy atomic for *each* doubleword, not
2391               the entire quadword, however it must be quadword aligned.  */
2392            memop |= MO_64;
2393            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2394                                memop | MO_ALIGN_16);
2395
2396            TCGv_i64 addr2 = tcg_temp_new_i64();
2397            tcg_gen_addi_i64(addr2, addr, 8);
2398            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2399            tcg_temp_free_i64(addr2);
2400
2401            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2402            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2403        }
2404    } else {
2405        memop |= size | MO_ALIGN;
2406        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2407        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2408    }
2409    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2410}
2411
2412static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2413                                TCGv_i64 addr, int size, int is_pair)
2414{
2415    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2416     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2417     *     [addr] = {Rt};
2418     *     if (is_pair) {
2419     *         [addr + datasize] = {Rt2};
2420     *     }
2421     *     {Rd} = 0;
2422     * } else {
2423     *     {Rd} = 1;
2424     * }
2425     * env->exclusive_addr = -1;
2426     */
2427    TCGLabel *fail_label = gen_new_label();
2428    TCGLabel *done_label = gen_new_label();
2429    TCGv_i64 tmp;
2430
2431    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2432
2433    tmp = tcg_temp_new_i64();
2434    if (is_pair) {
2435        if (size == 2) {
2436            if (s->be_data == MO_LE) {
2437                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2438            } else {
2439                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2440            }
2441            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2442                                       cpu_exclusive_val, tmp,
2443                                       get_mem_index(s),
2444                                       MO_64 | MO_ALIGN | s->be_data);
2445            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2446        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2447            if (!HAVE_CMPXCHG128) {
2448                gen_helper_exit_atomic(cpu_env);
2449                s->base.is_jmp = DISAS_NORETURN;
2450            } else if (s->be_data == MO_LE) {
2451                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2452                                                        cpu_exclusive_addr,
2453                                                        cpu_reg(s, rt),
2454                                                        cpu_reg(s, rt2));
2455            } else {
2456                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2457                                                        cpu_exclusive_addr,
2458                                                        cpu_reg(s, rt),
2459                                                        cpu_reg(s, rt2));
2460            }
2461        } else if (s->be_data == MO_LE) {
2462            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2463                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2464        } else {
2465            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2466                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2467        }
2468    } else {
2469        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2470                                   cpu_reg(s, rt), get_mem_index(s),
2471                                   size | MO_ALIGN | s->be_data);
2472        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2473    }
2474    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2475    tcg_temp_free_i64(tmp);
2476    tcg_gen_br(done_label);
2477
2478    gen_set_label(fail_label);
2479    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2480    gen_set_label(done_label);
2481    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2482}
2483
2484static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2485                                 int rn, int size)
2486{
2487    TCGv_i64 tcg_rs = cpu_reg(s, rs);
2488    TCGv_i64 tcg_rt = cpu_reg(s, rt);
2489    int memidx = get_mem_index(s);
2490    TCGv_i64 clean_addr;
2491
2492    if (rn == 31) {
2493        gen_check_sp_alignment(s);
2494    }
2495    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2496    tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2497                               size | MO_ALIGN | s->be_data);
2498}
2499
2500static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2501                                      int rn, int size)
2502{
2503    TCGv_i64 s1 = cpu_reg(s, rs);
2504    TCGv_i64 s2 = cpu_reg(s, rs + 1);
2505    TCGv_i64 t1 = cpu_reg(s, rt);
2506    TCGv_i64 t2 = cpu_reg(s, rt + 1);
2507    TCGv_i64 clean_addr;
2508    int memidx = get_mem_index(s);
2509
2510    if (rn == 31) {
2511        gen_check_sp_alignment(s);
2512    }
2513
2514    /* This is a single atomic access, despite the "pair". */
2515    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2516
2517    if (size == 2) {
2518        TCGv_i64 cmp = tcg_temp_new_i64();
2519        TCGv_i64 val = tcg_temp_new_i64();
2520
2521        if (s->be_data == MO_LE) {
2522            tcg_gen_concat32_i64(val, t1, t2);
2523            tcg_gen_concat32_i64(cmp, s1, s2);
2524        } else {
2525            tcg_gen_concat32_i64(val, t2, t1);
2526            tcg_gen_concat32_i64(cmp, s2, s1);
2527        }
2528
2529        tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2530                                   MO_64 | MO_ALIGN | s->be_data);
2531        tcg_temp_free_i64(val);
2532
2533        if (s->be_data == MO_LE) {
2534            tcg_gen_extr32_i64(s1, s2, cmp);
2535        } else {
2536            tcg_gen_extr32_i64(s2, s1, cmp);
2537        }
2538        tcg_temp_free_i64(cmp);
2539    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2540        if (HAVE_CMPXCHG128) {
2541            TCGv_i32 tcg_rs = tcg_const_i32(rs);
2542            if (s->be_data == MO_LE) {
2543                gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2544                                            clean_addr, t1, t2);
2545            } else {
2546                gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2547                                            clean_addr, t1, t2);
2548            }
2549            tcg_temp_free_i32(tcg_rs);
2550        } else {
2551            gen_helper_exit_atomic(cpu_env);
2552            s->base.is_jmp = DISAS_NORETURN;
2553        }
2554    } else {
2555        TCGv_i64 d1 = tcg_temp_new_i64();
2556        TCGv_i64 d2 = tcg_temp_new_i64();
2557        TCGv_i64 a2 = tcg_temp_new_i64();
2558        TCGv_i64 c1 = tcg_temp_new_i64();
2559        TCGv_i64 c2 = tcg_temp_new_i64();
2560        TCGv_i64 zero = tcg_const_i64(0);
2561
2562        /* Load the two words, in memory order.  */
2563        tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2564                            MO_64 | MO_ALIGN_16 | s->be_data);
2565        tcg_gen_addi_i64(a2, clean_addr, 8);
2566        tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2567
2568        /* Compare the two words, also in memory order.  */
2569        tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2570        tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2571        tcg_gen_and_i64(c2, c2, c1);
2572
2573        /* If compare equal, write back new data, else write back old data.  */
2574        tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2575        tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2576        tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2577        tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2578        tcg_temp_free_i64(a2);
2579        tcg_temp_free_i64(c1);
2580        tcg_temp_free_i64(c2);
2581        tcg_temp_free_i64(zero);
2582
2583        /* Write back the data from memory to Rs.  */
2584        tcg_gen_mov_i64(s1, d1);
2585        tcg_gen_mov_i64(s2, d2);
2586        tcg_temp_free_i64(d1);
2587        tcg_temp_free_i64(d2);
2588    }
2589}
2590
2591/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2592 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2593 */
2594static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2595{
2596    int opc0 = extract32(opc, 0, 1);
2597    int regsize;
2598
2599    if (is_signed) {
2600        regsize = opc0 ? 32 : 64;
2601    } else {
2602        regsize = size == 3 ? 64 : 32;
2603    }
2604    return regsize == 64;
2605}
2606
2607/* Load/store exclusive
2608 *
2609 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2610 * +-----+-------------+----+---+----+------+----+-------+------+------+
2611 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2612 * +-----+-------------+----+---+----+------+----+-------+------+------+
2613 *
2614 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2615 *   L: 0 -> store, 1 -> load
2616 *  o2: 0 -> exclusive, 1 -> not
2617 *  o1: 0 -> single register, 1 -> register pair
2618 *  o0: 1 -> load-acquire/store-release, 0 -> not
2619 */
2620static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2621{
2622    int rt = extract32(insn, 0, 5);
2623    int rn = extract32(insn, 5, 5);
2624    int rt2 = extract32(insn, 10, 5);
2625    int rs = extract32(insn, 16, 5);
2626    int is_lasr = extract32(insn, 15, 1);
2627    int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2628    int size = extract32(insn, 30, 2);
2629    TCGv_i64 clean_addr;
2630
2631    switch (o2_L_o1_o0) {
2632    case 0x0: /* STXR */
2633    case 0x1: /* STLXR */
2634        if (rn == 31) {
2635            gen_check_sp_alignment(s);
2636        }
2637        if (is_lasr) {
2638            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2639        }
2640        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2641                                    true, rn != 31, size);
2642        gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2643        return;
2644
2645    case 0x4: /* LDXR */
2646    case 0x5: /* LDAXR */
2647        if (rn == 31) {
2648            gen_check_sp_alignment(s);
2649        }
2650        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2651                                    false, rn != 31, size);
2652        s->is_ldex = true;
2653        gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2654        if (is_lasr) {
2655            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2656        }
2657        return;
2658
2659    case 0x8: /* STLLR */
2660        if (!dc_isar_feature(aa64_lor, s)) {
2661            break;
2662        }
2663        /* StoreLORelease is the same as Store-Release for QEMU.  */
2664        /* fall through */
2665    case 0x9: /* STLR */
2666        /* Generate ISS for non-exclusive accesses including LASR.  */
2667        if (rn == 31) {
2668            gen_check_sp_alignment(s);
2669        }
2670        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2671        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2672                                    true, rn != 31, size);
2673        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2674                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2675        return;
2676
2677    case 0xc: /* LDLAR */
2678        if (!dc_isar_feature(aa64_lor, s)) {
2679            break;
2680        }
2681        /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2682        /* fall through */
2683    case 0xd: /* LDAR */
2684        /* Generate ISS for non-exclusive accesses including LASR.  */
2685        if (rn == 31) {
2686            gen_check_sp_alignment(s);
2687        }
2688        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2689                                    false, rn != 31, size);
2690        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2691                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2692        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2693        return;
2694
2695    case 0x2: case 0x3: /* CASP / STXP */
2696        if (size & 2) { /* STXP / STLXP */
2697            if (rn == 31) {
2698                gen_check_sp_alignment(s);
2699            }
2700            if (is_lasr) {
2701                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2702            }
2703            clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2704                                        true, rn != 31, size);
2705            gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2706            return;
2707        }
2708        if (rt2 == 31
2709            && ((rt | rs) & 1) == 0
2710            && dc_isar_feature(aa64_atomics, s)) {
2711            /* CASP / CASPL */
2712            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2713            return;
2714        }
2715        break;
2716
2717    case 0x6: case 0x7: /* CASPA / LDXP */
2718        if (size & 2) { /* LDXP / LDAXP */
2719            if (rn == 31) {
2720                gen_check_sp_alignment(s);
2721            }
2722            clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2723                                        false, rn != 31, size);
2724            s->is_ldex = true;
2725            gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2726            if (is_lasr) {
2727                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2728            }
2729            return;
2730        }
2731        if (rt2 == 31
2732            && ((rt | rs) & 1) == 0
2733            && dc_isar_feature(aa64_atomics, s)) {
2734            /* CASPA / CASPAL */
2735            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2736            return;
2737        }
2738        break;
2739
2740    case 0xa: /* CAS */
2741    case 0xb: /* CASL */
2742    case 0xe: /* CASA */
2743    case 0xf: /* CASAL */
2744        if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2745            gen_compare_and_swap(s, rs, rt, rn, size);
2746            return;
2747        }
2748        break;
2749    }
2750    unallocated_encoding(s);
2751}
2752
2753/*
2754 * Load register (literal)
2755 *
2756 *  31 30 29   27  26 25 24 23                5 4     0
2757 * +-----+-------+---+-----+-------------------+-------+
2758 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2759 * +-----+-------+---+-----+-------------------+-------+
2760 *
2761 * V: 1 -> vector (simd/fp)
2762 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2763 *                   10-> 32 bit signed, 11 -> prefetch
2764 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2765 */
2766static void disas_ld_lit(DisasContext *s, uint32_t insn)
2767{
2768    int rt = extract32(insn, 0, 5);
2769    int64_t imm = sextract32(insn, 5, 19) << 2;
2770    bool is_vector = extract32(insn, 26, 1);
2771    int opc = extract32(insn, 30, 2);
2772    bool is_signed = false;
2773    int size = 2;
2774    TCGv_i64 tcg_rt, clean_addr;
2775
2776    if (is_vector) {
2777        if (opc == 3) {
2778            unallocated_encoding(s);
2779            return;
2780        }
2781        size = 2 + opc;
2782        if (!fp_access_check(s)) {
2783            return;
2784        }
2785    } else {
2786        if (opc == 3) {
2787            /* PRFM (literal) : prefetch */
2788            return;
2789        }
2790        size = 2 + extract32(opc, 0, 1);
2791        is_signed = extract32(opc, 1, 1);
2792    }
2793
2794    tcg_rt = cpu_reg(s, rt);
2795
2796    clean_addr = tcg_const_i64(s->pc_curr + imm);
2797    if (is_vector) {
2798        do_fp_ld(s, rt, clean_addr, size);
2799    } else {
2800        /* Only unsigned 32bit loads target 32bit registers.  */
2801        bool iss_sf = opc != 0;
2802
2803        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2804                  true, rt, iss_sf, false);
2805    }
2806    tcg_temp_free_i64(clean_addr);
2807}
2808
2809/*
2810 * LDNP (Load Pair - non-temporal hint)
2811 * LDP (Load Pair - non vector)
2812 * LDPSW (Load Pair Signed Word - non vector)
2813 * STNP (Store Pair - non-temporal hint)
2814 * STP (Store Pair - non vector)
2815 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2816 * LDP (Load Pair of SIMD&FP)
2817 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2818 * STP (Store Pair of SIMD&FP)
2819 *
2820 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2821 * +-----+-------+---+---+-------+---+-----------------------------+
2822 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2823 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2824 *
2825 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2826 *      LDPSW/STGP               01
2827 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2828 *   V: 0 -> GPR, 1 -> Vector
2829 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2830 *      10 -> signed offset, 11 -> pre-index
2831 *   L: 0 -> Store 1 -> Load
2832 *
2833 * Rt, Rt2 = GPR or SIMD registers to be stored
2834 * Rn = general purpose register containing address
2835 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2836 */
2837static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2838{
2839    int rt = extract32(insn, 0, 5);
2840    int rn = extract32(insn, 5, 5);
2841    int rt2 = extract32(insn, 10, 5);
2842    uint64_t offset = sextract64(insn, 15, 7);
2843    int index = extract32(insn, 23, 2);
2844    bool is_vector = extract32(insn, 26, 1);
2845    bool is_load = extract32(insn, 22, 1);
2846    int opc = extract32(insn, 30, 2);
2847
2848    bool is_signed = false;
2849    bool postindex = false;
2850    bool wback = false;
2851    bool set_tag = false;
2852
2853    TCGv_i64 clean_addr, dirty_addr;
2854
2855    int size;
2856
2857    if (opc == 3) {
2858        unallocated_encoding(s);
2859        return;
2860    }
2861
2862    if (is_vector) {
2863        size = 2 + opc;
2864    } else if (opc == 1 && !is_load) {
2865        /* STGP */
2866        if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2867            unallocated_encoding(s);
2868            return;
2869        }
2870        size = 3;
2871        set_tag = true;
2872    } else {
2873        size = 2 + extract32(opc, 1, 1);
2874        is_signed = extract32(opc, 0, 1);
2875        if (!is_load && is_signed) {
2876            unallocated_encoding(s);
2877            return;
2878        }
2879    }
2880
2881    switch (index) {
2882    case 1: /* post-index */
2883        postindex = true;
2884        wback = true;
2885        break;
2886    case 0:
2887        /* signed offset with "non-temporal" hint. Since we don't emulate
2888         * caches we don't care about hints to the cache system about
2889         * data access patterns, and handle this identically to plain
2890         * signed offset.
2891         */
2892        if (is_signed) {
2893            /* There is no non-temporal-hint version of LDPSW */
2894            unallocated_encoding(s);
2895            return;
2896        }
2897        postindex = false;
2898        break;
2899    case 2: /* signed offset, rn not updated */
2900        postindex = false;
2901        break;
2902    case 3: /* pre-index */
2903        postindex = false;
2904        wback = true;
2905        break;
2906    }
2907
2908    if (is_vector && !fp_access_check(s)) {
2909        return;
2910    }
2911
2912    offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2913
2914    if (rn == 31) {
2915        gen_check_sp_alignment(s);
2916    }
2917
2918    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2919    if (!postindex) {
2920        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2921    }
2922
2923    if (set_tag) {
2924        if (!s->ata) {
2925            /*
2926             * TODO: We could rely on the stores below, at least for
2927             * system mode, if we arrange to add MO_ALIGN_16.
2928             */
2929            gen_helper_stg_stub(cpu_env, dirty_addr);
2930        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2931            gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2932        } else {
2933            gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2934        }
2935    }
2936
2937    clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2938                                (wback || rn != 31) && !set_tag,
2939                                size, 2 << size);
2940
2941    if (is_vector) {
2942        if (is_load) {
2943            do_fp_ld(s, rt, clean_addr, size);
2944        } else {
2945            do_fp_st(s, rt, clean_addr, size);
2946        }
2947        tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2948        if (is_load) {
2949            do_fp_ld(s, rt2, clean_addr, size);
2950        } else {
2951            do_fp_st(s, rt2, clean_addr, size);
2952        }
2953    } else {
2954        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2955        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2956
2957        if (is_load) {
2958            TCGv_i64 tmp = tcg_temp_new_i64();
2959
2960            /* Do not modify tcg_rt before recognizing any exception
2961             * from the second load.
2962             */
2963            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2964                      false, 0, false, false);
2965            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2966            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2967                      false, 0, false, false);
2968
2969            tcg_gen_mov_i64(tcg_rt, tmp);
2970            tcg_temp_free_i64(tmp);
2971        } else {
2972            do_gpr_st(s, tcg_rt, clean_addr, size,
2973                      false, 0, false, false);
2974            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2975            do_gpr_st(s, tcg_rt2, clean_addr, size,
2976                      false, 0, false, false);
2977        }
2978    }
2979
2980    if (wback) {
2981        if (postindex) {
2982            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2983        }
2984        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2985    }
2986}
2987
2988/*
2989 * Load/store (immediate post-indexed)
2990 * Load/store (immediate pre-indexed)
2991 * Load/store (unscaled immediate)
2992 *
2993 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2994 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2995 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2996 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2997 *
2998 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2999         10 -> unprivileged
3000 * V = 0 -> non-vector
3001 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3002 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3003 */
3004static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3005                                int opc,
3006                                int size,
3007                                int rt,
3008                                bool is_vector)
3009{
3010    int rn = extract32(insn, 5, 5);
3011    int imm9 = sextract32(insn, 12, 9);
3012    int idx = extract32(insn, 10, 2);
3013    bool is_signed = false;
3014    bool is_store = false;
3015    bool is_extended = false;
3016    bool is_unpriv = (idx == 2);
3017    bool iss_valid = !is_vector;
3018    bool post_index;
3019    bool writeback;
3020    int memidx;
3021
3022    TCGv_i64 clean_addr, dirty_addr;
3023
3024    if (is_vector) {
3025        size |= (opc & 2) << 1;
3026        if (size > 4 || is_unpriv) {
3027            unallocated_encoding(s);
3028            return;
3029        }
3030        is_store = ((opc & 1) == 0);
3031        if (!fp_access_check(s)) {
3032            return;
3033        }
3034    } else {
3035        if (size == 3 && opc == 2) {
3036            /* PRFM - prefetch */
3037            if (idx != 0) {
3038                unallocated_encoding(s);
3039                return;
3040            }
3041            return;
3042        }
3043        if (opc == 3 && size > 1) {
3044            unallocated_encoding(s);
3045            return;
3046        }
3047        is_store = (opc == 0);
3048        is_signed = extract32(opc, 1, 1);
3049        is_extended = (size < 3) && extract32(opc, 0, 1);
3050    }
3051
3052    switch (idx) {
3053    case 0:
3054    case 2:
3055        post_index = false;
3056        writeback = false;
3057        break;
3058    case 1:
3059        post_index = true;
3060        writeback = true;
3061        break;
3062    case 3:
3063        post_index = false;
3064        writeback = true;
3065        break;
3066    default:
3067        g_assert_not_reached();
3068    }
3069
3070    if (rn == 31) {
3071        gen_check_sp_alignment(s);
3072    }
3073
3074    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3075    if (!post_index) {
3076        tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3077    }
3078
3079    memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3080    clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3081                                       writeback || rn != 31,
3082                                       size, is_unpriv, memidx);
3083
3084    if (is_vector) {
3085        if (is_store) {
3086            do_fp_st(s, rt, clean_addr, size);
3087        } else {
3088            do_fp_ld(s, rt, clean_addr, size);
3089        }
3090    } else {
3091        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3092        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3093
3094        if (is_store) {
3095            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3096                             iss_valid, rt, iss_sf, false);
3097        } else {
3098            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
3099                             is_signed, is_extended, memidx,
3100                             iss_valid, rt, iss_sf, false);
3101        }
3102    }
3103
3104    if (writeback) {
3105        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3106        if (post_index) {
3107            tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3108        }
3109        tcg_gen_mov_i64(tcg_rn, dirty_addr);
3110    }
3111}
3112
3113/*
3114 * Load/store (register offset)
3115 *
3116 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3117 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3118 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3119 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3120 *
3121 * For non-vector:
3122 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3123 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3124 * For vector:
3125 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3126 *   opc<0>: 0 -> store, 1 -> load
3127 * V: 1 -> vector/simd
3128 * opt: extend encoding (see DecodeRegExtend)
3129 * S: if S=1 then scale (essentially index by sizeof(size))
3130 * Rt: register to transfer into/out of
3131 * Rn: address register or SP for base
3132 * Rm: offset register or ZR for offset
3133 */
3134static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3135                                   int opc,
3136                                   int size,
3137                                   int rt,
3138                                   bool is_vector)
3139{
3140    int rn = extract32(insn, 5, 5);
3141    int shift = extract32(insn, 12, 1);
3142    int rm = extract32(insn, 16, 5);
3143    int opt = extract32(insn, 13, 3);
3144    bool is_signed = false;
3145    bool is_store = false;
3146    bool is_extended = false;
3147
3148    TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3149
3150    if (extract32(opt, 1, 1) == 0) {
3151        unallocated_encoding(s);
3152        return;
3153    }
3154
3155    if (is_vector) {
3156        size |= (opc & 2) << 1;
3157        if (size > 4) {
3158            unallocated_encoding(s);
3159            return;
3160        }
3161        is_store = !extract32(opc, 0, 1);
3162        if (!fp_access_check(s)) {
3163            return;
3164        }
3165    } else {
3166        if (size == 3 && opc == 2) {
3167            /* PRFM - prefetch */
3168            return;
3169        }
3170        if (opc == 3 && size > 1) {
3171            unallocated_encoding(s);
3172            return;
3173        }
3174        is_store = (opc == 0);
3175        is_signed = extract32(opc, 1, 1);
3176        is_extended = (size < 3) && extract32(opc, 0, 1);
3177    }
3178
3179    if (rn == 31) {
3180        gen_check_sp_alignment(s);
3181    }
3182    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3183
3184    tcg_rm = read_cpu_reg(s, rm, 1);
3185    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3186
3187    tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3188    clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3189
3190    if (is_vector) {
3191        if (is_store) {
3192            do_fp_st(s, rt, clean_addr, size);
3193        } else {
3194            do_fp_ld(s, rt, clean_addr, size);
3195        }
3196    } else {
3197        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3198        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3199        if (is_store) {
3200            do_gpr_st(s, tcg_rt, clean_addr, size,
3201                      true, rt, iss_sf, false);
3202        } else {
3203            do_gpr_ld(s, tcg_rt, clean_addr, size,
3204                      is_signed, is_extended,
3205                      true, rt, iss_sf, false);
3206        }
3207    }
3208}
3209
3210/*
3211 * Load/store (unsigned immediate)
3212 *
3213 * 31 30 29   27  26 25 24 23 22 21        10 9     5
3214 * +----+-------+---+-----+-----+------------+-------+------+
3215 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3216 * +----+-------+---+-----+-----+------------+-------+------+
3217 *
3218 * For non-vector:
3219 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3220 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3221 * For vector:
3222 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3223 *   opc<0>: 0 -> store, 1 -> load
3224 * Rn: base address register (inc SP)
3225 * Rt: target register
3226 */
3227static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3228                                        int opc,
3229                                        int size,
3230                                        int rt,
3231                                        bool is_vector)
3232{
3233    int rn = extract32(insn, 5, 5);
3234    unsigned int imm12 = extract32(insn, 10, 12);
3235    unsigned int offset;
3236
3237    TCGv_i64 clean_addr, dirty_addr;
3238
3239    bool is_store;
3240    bool is_signed = false;
3241    bool is_extended = false;
3242
3243    if (is_vector) {
3244        size |= (opc & 2) << 1;
3245        if (size > 4) {
3246            unallocated_encoding(s);
3247            return;
3248        }
3249        is_store = !extract32(opc, 0, 1);
3250        if (!fp_access_check(s)) {
3251            return;
3252        }
3253    } else {
3254        if (size == 3 && opc == 2) {
3255            /* PRFM - prefetch */
3256            return;
3257        }
3258        if (opc == 3 && size > 1) {
3259            unallocated_encoding(s);
3260            return;
3261        }
3262        is_store = (opc == 0);
3263        is_signed = extract32(opc, 1, 1);
3264        is_extended = (size < 3) && extract32(opc, 0, 1);
3265    }
3266
3267    if (rn == 31) {
3268        gen_check_sp_alignment(s);
3269    }
3270    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3271    offset = imm12 << size;
3272    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3273    clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3274
3275    if (is_vector) {
3276        if (is_store) {
3277            do_fp_st(s, rt, clean_addr, size);
3278        } else {
3279            do_fp_ld(s, rt, clean_addr, size);
3280        }
3281    } else {
3282        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3283        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3284        if (is_store) {
3285            do_gpr_st(s, tcg_rt, clean_addr, size,
3286                      true, rt, iss_sf, false);
3287        } else {
3288            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3289                      true, rt, iss_sf, false);
3290        }
3291    }
3292}
3293
3294/* Atomic memory operations
3295 *
3296 *  31  30      27  26    24    22  21   16   15    12    10    5     0
3297 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3298 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3299 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3300 *
3301 * Rt: the result register
3302 * Rn: base address or SP
3303 * Rs: the source register for the operation
3304 * V: vector flag (always 0 as of v8.3)
3305 * A: acquire flag
3306 * R: release flag
3307 */
3308static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3309                              int size, int rt, bool is_vector)
3310{
3311    int rs = extract32(insn, 16, 5);
3312    int rn = extract32(insn, 5, 5);
3313    int o3_opc = extract32(insn, 12, 4);
3314    bool r = extract32(insn, 22, 1);
3315    bool a = extract32(insn, 23, 1);
3316    TCGv_i64 tcg_rs, clean_addr;
3317    AtomicThreeOpFn *fn = NULL;
3318
3319    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3320        unallocated_encoding(s);
3321        return;
3322    }
3323    switch (o3_opc) {
3324    case 000: /* LDADD */
3325        fn = tcg_gen_atomic_fetch_add_i64;
3326        break;
3327    case 001: /* LDCLR */
3328        fn = tcg_gen_atomic_fetch_and_i64;
3329        break;
3330    case 002: /* LDEOR */
3331        fn = tcg_gen_atomic_fetch_xor_i64;
3332        break;
3333    case 003: /* LDSET */
3334        fn = tcg_gen_atomic_fetch_or_i64;
3335        break;
3336    case 004: /* LDSMAX */
3337        fn = tcg_gen_atomic_fetch_smax_i64;
3338        break;
3339    case 005: /* LDSMIN */
3340        fn = tcg_gen_atomic_fetch_smin_i64;
3341        break;
3342    case 006: /* LDUMAX */
3343        fn = tcg_gen_atomic_fetch_umax_i64;
3344        break;
3345    case 007: /* LDUMIN */
3346        fn = tcg_gen_atomic_fetch_umin_i64;
3347        break;
3348    case 010: /* SWP */
3349        fn = tcg_gen_atomic_xchg_i64;
3350        break;
3351    case 014: /* LDAPR, LDAPRH, LDAPRB */
3352        if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3353            rs != 31 || a != 1 || r != 0) {
3354            unallocated_encoding(s);
3355            return;
3356        }
3357        break;
3358    default:
3359        unallocated_encoding(s);
3360        return;
3361    }
3362
3363    if (rn == 31) {
3364        gen_check_sp_alignment(s);
3365    }
3366    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3367
3368    if (o3_opc == 014) {
3369        /*
3370         * LDAPR* are a special case because they are a simple load, not a
3371         * fetch-and-do-something op.
3372         * The architectural consistency requirements here are weaker than
3373         * full load-acquire (we only need "load-acquire processor consistent"),
3374         * but we choose to implement them as full LDAQ.
3375         */
3376        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false,
3377                  true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3378        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3379        return;
3380    }
3381
3382    tcg_rs = read_cpu_reg(s, rs, true);
3383
3384    if (o3_opc == 1) { /* LDCLR */
3385        tcg_gen_not_i64(tcg_rs, tcg_rs);
3386    }
3387
3388    /* The tcg atomic primitives are all full barriers.  Therefore we
3389     * can ignore the Acquire and Release bits of this instruction.
3390     */
3391    fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3392       s->be_data | size | MO_ALIGN);
3393}
3394
3395/*
3396 * PAC memory operations
3397 *
3398 *  31  30      27  26    24    22  21       12  11  10    5     0
3399 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3400 * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3401 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3402 *
3403 * Rt: the result register
3404 * Rn: base address or SP
3405 * V: vector flag (always 0 as of v8.3)
3406 * M: clear for key DA, set for key DB
3407 * W: pre-indexing flag
3408 * S: sign for imm9.
3409 */
3410static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3411                           int size, int rt, bool is_vector)
3412{
3413    int rn = extract32(insn, 5, 5);
3414    bool is_wback = extract32(insn, 11, 1);
3415    bool use_key_a = !extract32(insn, 23, 1);
3416    int offset;
3417    TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3418
3419    if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3420        unallocated_encoding(s);
3421        return;
3422    }
3423
3424    if (rn == 31) {
3425        gen_check_sp_alignment(s);
3426    }
3427    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3428
3429    if (s->pauth_active) {
3430        if (use_key_a) {
3431            gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3432                             new_tmp_a64_zero(s));
3433        } else {
3434            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3435                             new_tmp_a64_zero(s));
3436        }
3437    }
3438
3439    /* Form the 10-bit signed, scaled offset.  */
3440    offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3441    offset = sextract32(offset << size, 0, 10 + size);
3442    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3443
3444    /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3445    clean_addr = gen_mte_check1(s, dirty_addr, false,
3446                                is_wback || rn != 31, size);
3447
3448    tcg_rt = cpu_reg(s, rt);
3449    do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3450              /* extend */ false, /* iss_valid */ !is_wback,
3451              /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3452
3453    if (is_wback) {
3454        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3455    }
3456}
3457
3458/*
3459 * LDAPR/STLR (unscaled immediate)
3460 *
3461 *  31  30            24    22  21       12    10    5     0
3462 * +------+-------------+-----+---+--------+-----+----+-----+
3463 * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3464 * +------+-------------+-----+---+--------+-----+----+-----+
3465 *
3466 * Rt: source or destination register
3467 * Rn: base register
3468 * imm9: unscaled immediate offset
3469 * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3470 * size: size of load/store
3471 */
3472static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3473{
3474    int rt = extract32(insn, 0, 5);
3475    int rn = extract32(insn, 5, 5);
3476    int offset = sextract32(insn, 12, 9);
3477    int opc = extract32(insn, 22, 2);
3478    int size = extract32(insn, 30, 2);
3479    TCGv_i64 clean_addr, dirty_addr;
3480    bool is_store = false;
3481    bool is_signed = false;
3482    bool extend = false;
3483    bool iss_sf;
3484
3485    if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3486        unallocated_encoding(s);
3487        return;
3488    }
3489
3490    switch (opc) {
3491    case 0: /* STLURB */
3492        is_store = true;
3493        break;
3494    case 1: /* LDAPUR* */
3495        break;
3496    case 2: /* LDAPURS* 64-bit variant */
3497        if (size == 3) {
3498            unallocated_encoding(s);
3499            return;
3500        }
3501        is_signed = true;
3502        break;
3503    case 3: /* LDAPURS* 32-bit variant */
3504        if (size > 1) {
3505            unallocated_encoding(s);
3506            return;
3507        }
3508        is_signed = true;
3509        extend = true; /* zero-extend 32->64 after signed load */
3510        break;
3511    default:
3512        g_assert_not_reached();
3513    }
3514
3515    iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3516
3517    if (rn == 31) {
3518        gen_check_sp_alignment(s);
3519    }
3520
3521    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3522    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3523    clean_addr = clean_data_tbi(s, dirty_addr);
3524
3525    if (is_store) {
3526        /* Store-Release semantics */
3527        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3528        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, iss_sf, true);
3529    } else {
3530        /*
3531         * Load-AcquirePC semantics; we implement as the slightly more
3532         * restrictive Load-Acquire.
3533         */
3534        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend,
3535                  true, rt, iss_sf, true);
3536        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3537    }
3538}
3539
3540/* Load/store register (all forms) */
3541static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3542{
3543    int rt = extract32(insn, 0, 5);
3544    int opc = extract32(insn, 22, 2);
3545    bool is_vector = extract32(insn, 26, 1);
3546    int size = extract32(insn, 30, 2);
3547
3548    switch (extract32(insn, 24, 2)) {
3549    case 0:
3550        if (extract32(insn, 21, 1) == 0) {
3551            /* Load/store register (unscaled immediate)
3552             * Load/store immediate pre/post-indexed
3553             * Load/store register unprivileged
3554             */
3555            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3556            return;
3557        }
3558        switch (extract32(insn, 10, 2)) {
3559        case 0:
3560            disas_ldst_atomic(s, insn, size, rt, is_vector);
3561            return;
3562        case 2:
3563            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3564            return;
3565        default:
3566            disas_ldst_pac(s, insn, size, rt, is_vector);
3567            return;
3568        }
3569        break;
3570    case 1:
3571        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3572        return;
3573    }
3574    unallocated_encoding(s);
3575}
3576
3577/* AdvSIMD load/store multiple structures
3578 *
3579 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3580 * +---+---+---------------+---+-------------+--------+------+------+------+
3581 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3582 * +---+---+---------------+---+-------------+--------+------+------+------+
3583 *
3584 * AdvSIMD load/store multiple structures (post-indexed)
3585 *
3586 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3587 * +---+---+---------------+---+---+---------+--------+------+------+------+
3588 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3589 * +---+---+---------------+---+---+---------+--------+------+------+------+
3590 *
3591 * Rt: first (or only) SIMD&FP register to be transferred
3592 * Rn: base address or SP
3593 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3594 */
3595static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3596{
3597    int rt = extract32(insn, 0, 5);
3598    int rn = extract32(insn, 5, 5);
3599    int rm = extract32(insn, 16, 5);
3600    int size = extract32(insn, 10, 2);
3601    int opcode = extract32(insn, 12, 4);
3602    bool is_store = !extract32(insn, 22, 1);
3603    bool is_postidx = extract32(insn, 23, 1);
3604    bool is_q = extract32(insn, 30, 1);
3605    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3606    MemOp endian = s->be_data;
3607
3608    int total;    /* total bytes */
3609    int elements; /* elements per vector */
3610    int rpt;    /* num iterations */
3611    int selem;  /* structure elements */
3612    int r;
3613
3614    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3615        unallocated_encoding(s);
3616        return;
3617    }
3618
3619    if (!is_postidx && rm != 0) {
3620        unallocated_encoding(s);
3621        return;
3622    }
3623
3624    /* From the shared decode logic */
3625    switch (opcode) {
3626    case 0x0:
3627        rpt = 1;
3628        selem = 4;
3629        break;
3630    case 0x2:
3631        rpt = 4;
3632        selem = 1;
3633        break;
3634    case 0x4:
3635        rpt = 1;
3636        selem = 3;
3637        break;
3638    case 0x6:
3639        rpt = 3;
3640        selem = 1;
3641        break;
3642    case 0x7:
3643        rpt = 1;
3644        selem = 1;
3645        break;
3646    case 0x8:
3647        rpt = 1;
3648        selem = 2;
3649        break;
3650    case 0xa:
3651        rpt = 2;
3652        selem = 1;
3653        break;
3654    default:
3655        unallocated_encoding(s);
3656        return;
3657    }
3658
3659    if (size == 3 && !is_q && selem != 1) {
3660        /* reserved */
3661        unallocated_encoding(s);
3662        return;
3663    }
3664
3665    if (!fp_access_check(s)) {
3666        return;
3667    }
3668
3669    if (rn == 31) {
3670        gen_check_sp_alignment(s);
3671    }
3672
3673    /* For our purposes, bytes are always little-endian.  */
3674    if (size == 0) {
3675        endian = MO_LE;
3676    }
3677
3678    total = rpt * selem * (is_q ? 16 : 8);
3679    tcg_rn = cpu_reg_sp(s, rn);
3680
3681    /*
3682     * Issue the MTE check vs the logical repeat count, before we
3683     * promote consecutive little-endian elements below.
3684     */
3685    clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3686                                size, total);
3687
3688    /*
3689     * Consecutive little-endian elements from a single register
3690     * can be promoted to a larger little-endian operation.
3691     */
3692    if (selem == 1 && endian == MO_LE) {
3693        size = 3;
3694    }
3695    elements = (is_q ? 16 : 8) >> size;
3696
3697    tcg_ebytes = tcg_const_i64(1 << size);
3698    for (r = 0; r < rpt; r++) {
3699        int e;
3700        for (e = 0; e < elements; e++) {
3701            int xs;
3702            for (xs = 0; xs < selem; xs++) {
3703                int tt = (rt + r + xs) % 32;
3704                if (is_store) {
3705                    do_vec_st(s, tt, e, clean_addr, size, endian);
3706                } else {
3707                    do_vec_ld(s, tt, e, clean_addr, size, endian);
3708                }
3709                tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3710            }
3711        }
3712    }
3713    tcg_temp_free_i64(tcg_ebytes);
3714
3715    if (!is_store) {
3716        /* For non-quad operations, setting a slice of the low
3717         * 64 bits of the register clears the high 64 bits (in
3718         * the ARM ARM pseudocode this is implicit in the fact
3719         * that 'rval' is a 64 bit wide variable).
3720         * For quad operations, we might still need to zero the
3721         * high bits of SVE.
3722         */
3723        for (r = 0; r < rpt * selem; r++) {
3724            int tt = (rt + r) % 32;
3725            clear_vec_high(s, is_q, tt);
3726        }
3727    }
3728
3729    if (is_postidx) {
3730        if (rm == 31) {
3731            tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3732        } else {
3733            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3734        }
3735    }
3736}
3737
3738/* AdvSIMD load/store single structure
3739 *
3740 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3741 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3742 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3743 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3744 *
3745 * AdvSIMD load/store single structure (post-indexed)
3746 *
3747 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3748 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3749 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3750 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3751 *
3752 * Rt: first (or only) SIMD&FP register to be transferred
3753 * Rn: base address or SP
3754 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3755 * index = encoded in Q:S:size dependent on size
3756 *
3757 * lane_size = encoded in R, opc
3758 * transfer width = encoded in opc, S, size
3759 */
3760static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3761{
3762    int rt = extract32(insn, 0, 5);
3763    int rn = extract32(insn, 5, 5);
3764    int rm = extract32(insn, 16, 5);
3765    int size = extract32(insn, 10, 2);
3766    int S = extract32(insn, 12, 1);
3767    int opc = extract32(insn, 13, 3);
3768    int R = extract32(insn, 21, 1);
3769    int is_load = extract32(insn, 22, 1);
3770    int is_postidx = extract32(insn, 23, 1);
3771    int is_q = extract32(insn, 30, 1);
3772
3773    int scale = extract32(opc, 1, 2);
3774    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3775    bool replicate = false;
3776    int index = is_q << 3 | S << 2 | size;
3777    int xs, total;
3778    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3779
3780    if (extract32(insn, 31, 1)) {
3781        unallocated_encoding(s);
3782        return;
3783    }
3784    if (!is_postidx && rm != 0) {
3785        unallocated_encoding(s);
3786        return;
3787    }
3788
3789    switch (scale) {
3790    case 3:
3791        if (!is_load || S) {
3792            unallocated_encoding(s);
3793            return;
3794        }
3795        scale = size;
3796        replicate = true;
3797        break;
3798    case 0:
3799        break;
3800    case 1:
3801        if (extract32(size, 0, 1)) {
3802            unallocated_encoding(s);
3803            return;
3804        }
3805        index >>= 1;
3806        break;
3807    case 2:
3808        if (extract32(size, 1, 1)) {
3809            unallocated_encoding(s);
3810            return;
3811        }
3812        if (!extract32(size, 0, 1)) {
3813            index >>= 2;
3814        } else {
3815            if (S) {
3816                unallocated_encoding(s);
3817                return;
3818            }
3819            index >>= 3;
3820            scale = 3;
3821        }
3822        break;
3823    default:
3824        g_assert_not_reached();
3825    }
3826
3827    if (!fp_access_check(s)) {
3828        return;
3829    }
3830
3831    if (rn == 31) {
3832        gen_check_sp_alignment(s);
3833    }
3834
3835    total = selem << scale;
3836    tcg_rn = cpu_reg_sp(s, rn);
3837
3838    clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3839                                scale, total);
3840
3841    tcg_ebytes = tcg_const_i64(1 << scale);
3842    for (xs = 0; xs < selem; xs++) {
3843        if (replicate) {
3844            /* Load and replicate to all elements */
3845            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3846
3847            tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3848                                get_mem_index(s), s->be_data + scale);
3849            tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3850                                 (is_q + 1) * 8, vec_full_reg_size(s),
3851                                 tcg_tmp);
3852            tcg_temp_free_i64(tcg_tmp);
3853        } else {
3854            /* Load/store one element per register */
3855            if (is_load) {
3856                do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3857            } else {
3858                do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3859            }
3860        }
3861        tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3862        rt = (rt + 1) % 32;
3863    }
3864    tcg_temp_free_i64(tcg_ebytes);
3865
3866    if (is_postidx) {
3867        if (rm == 31) {
3868            tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3869        } else {
3870            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3871        }
3872    }
3873}
3874
3875/*
3876 * Load/Store memory tags
3877 *
3878 *  31 30 29         24     22  21     12    10      5      0
3879 * +-----+-------------+-----+---+------+-----+------+------+
3880 * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3881 * +-----+-------------+-----+---+------+-----+------+------+
3882 */
3883static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3884{
3885    int rt = extract32(insn, 0, 5);
3886    int rn = extract32(insn, 5, 5);
3887    uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3888    int op2 = extract32(insn, 10, 2);
3889    int op1 = extract32(insn, 22, 2);
3890    bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3891    int index = 0;
3892    TCGv_i64 addr, clean_addr, tcg_rt;
3893
3894    /* We checked insn bits [29:24,21] in the caller.  */
3895    if (extract32(insn, 30, 2) != 3) {
3896        goto do_unallocated;
3897    }
3898
3899    /*
3900     * @index is a tri-state variable which has 3 states:
3901     * < 0 : post-index, writeback
3902     * = 0 : signed offset
3903     * > 0 : pre-index, writeback
3904     */
3905    switch (op1) {
3906    case 0:
3907        if (op2 != 0) {
3908            /* STG */
3909            index = op2 - 2;
3910        } else {
3911            /* STZGM */
3912            if (s->current_el == 0 || offset != 0) {
3913                goto do_unallocated;
3914            }
3915            is_mult = is_zero = true;
3916        }
3917        break;
3918    case 1:
3919        if (op2 != 0) {
3920            /* STZG */
3921            is_zero = true;
3922            index = op2 - 2;
3923        } else {
3924            /* LDG */
3925            is_load = true;
3926        }
3927        break;
3928    case 2:
3929        if (op2 != 0) {
3930            /* ST2G */
3931            is_pair = true;
3932            index = op2 - 2;
3933        } else {
3934            /* STGM */
3935            if (s->current_el == 0 || offset != 0) {
3936                goto do_unallocated;
3937            }
3938            is_mult = true;
3939        }
3940        break;
3941    case 3:
3942        if (op2 != 0) {
3943            /* STZ2G */
3944            is_pair = is_zero = true;
3945            index = op2 - 2;
3946        } else {
3947            /* LDGM */
3948            if (s->current_el == 0 || offset != 0) {
3949                goto do_unallocated;
3950            }
3951            is_mult = is_load = true;
3952        }
3953        break;
3954
3955    default:
3956    do_unallocated:
3957        unallocated_encoding(s);
3958        return;
3959    }
3960
3961    if (is_mult
3962        ? !dc_isar_feature(aa64_mte, s)
3963        : !dc_isar_feature(aa64_mte_insn_reg, s)) {
3964        goto do_unallocated;
3965    }
3966
3967    if (rn == 31) {
3968        gen_check_sp_alignment(s);
3969    }
3970
3971    addr = read_cpu_reg_sp(s, rn, true);
3972    if (index >= 0) {
3973        /* pre-index or signed offset */
3974        tcg_gen_addi_i64(addr, addr, offset);
3975    }
3976
3977    if (is_mult) {
3978        tcg_rt = cpu_reg(s, rt);
3979
3980        if (is_zero) {
3981            int size = 4 << s->dcz_blocksize;
3982
3983            if (s->ata) {
3984                gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
3985            }
3986            /*
3987             * The non-tags portion of STZGM is mostly like DC_ZVA,
3988             * except the alignment happens before the access.
3989             */
3990            clean_addr = clean_data_tbi(s, addr);
3991            tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3992            gen_helper_dc_zva(cpu_env, clean_addr);
3993        } else if (s->ata) {
3994            if (is_load) {
3995                gen_helper_ldgm(tcg_rt, cpu_env, addr);
3996            } else {
3997                gen_helper_stgm(cpu_env, addr, tcg_rt);
3998            }
3999        } else {
4000            MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4001            int size = 4 << GMID_EL1_BS;
4002
4003            clean_addr = clean_data_tbi(s, addr);
4004            tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4005            gen_probe_access(s, clean_addr, acc, size);
4006
4007            if (is_load) {
4008                /* The result tags are zeros.  */
4009                tcg_gen_movi_i64(tcg_rt, 0);
4010            }
4011        }
4012        return;
4013    }
4014
4015    if (is_load) {
4016        tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4017        tcg_rt = cpu_reg(s, rt);
4018        if (s->ata) {
4019            gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4020        } else {
4021            clean_addr = clean_data_tbi(s, addr);
4022            gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4023            gen_address_with_allocation_tag0(tcg_rt, addr);
4024        }
4025    } else {
4026        tcg_rt = cpu_reg_sp(s, rt);
4027        if (!s->ata) {
4028            /*
4029             * For STG and ST2G, we need to check alignment and probe memory.
4030             * TODO: For STZG and STZ2G, we could rely on the stores below,
4031             * at least for system mode; user-only won't enforce alignment.
4032             */
4033            if (is_pair) {
4034                gen_helper_st2g_stub(cpu_env, addr);
4035            } else {
4036                gen_helper_stg_stub(cpu_env, addr);
4037            }
4038        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4039            if (is_pair) {
4040                gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4041            } else {
4042                gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4043            }
4044        } else {
4045            if (is_pair) {
4046                gen_helper_st2g(cpu_env, addr, tcg_rt);
4047            } else {
4048                gen_helper_stg(cpu_env, addr, tcg_rt);
4049            }
4050        }
4051    }
4052
4053    if (is_zero) {
4054        TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4055        TCGv_i64 tcg_zero = tcg_const_i64(0);
4056        int mem_index = get_mem_index(s);
4057        int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
4058
4059        tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
4060                            MO_Q | MO_ALIGN_16);
4061        for (i = 8; i < n; i += 8) {
4062            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4063            tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q);
4064        }
4065        tcg_temp_free_i64(tcg_zero);
4066    }
4067
4068    if (index != 0) {
4069        /* pre-index or post-index */
4070        if (index < 0) {
4071            /* post-index */
4072            tcg_gen_addi_i64(addr, addr, offset);
4073        }
4074        tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4075    }
4076}
4077
4078/* Loads and stores */
4079static void disas_ldst(DisasContext *s, uint32_t insn)
4080{
4081    switch (extract32(insn, 24, 6)) {
4082    case 0x08: /* Load/store exclusive */
4083        disas_ldst_excl(s, insn);
4084        break;
4085    case 0x18: case 0x1c: /* Load register (literal) */
4086        disas_ld_lit(s, insn);
4087        break;
4088    case 0x28: case 0x29:
4089    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4090        disas_ldst_pair(s, insn);
4091        break;
4092    case 0x38: case 0x39:
4093    case 0x3c: case 0x3d: /* Load/store register (all forms) */
4094        disas_ldst_reg(s, insn);
4095        break;
4096    case 0x0c: /* AdvSIMD load/store multiple structures */
4097        disas_ldst_multiple_struct(s, insn);
4098        break;
4099    case 0x0d: /* AdvSIMD load/store single structure */
4100        disas_ldst_single_struct(s, insn);
4101        break;
4102    case 0x19:
4103        if (extract32(insn, 21, 1) != 0) {
4104            disas_ldst_tag(s, insn);
4105        } else if (extract32(insn, 10, 2) == 0) {
4106            disas_ldst_ldapr_stlr(s, insn);
4107        } else {
4108            unallocated_encoding(s);
4109        }
4110        break;
4111    default:
4112        unallocated_encoding(s);
4113        break;
4114    }
4115}
4116
4117/* PC-rel. addressing
4118 *   31  30   29 28       24 23                5 4    0
4119 * +----+-------+-----------+-------------------+------+
4120 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
4121 * +----+-------+-----------+-------------------+------+
4122 */
4123static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
4124{
4125    unsigned int page, rd;
4126    uint64_t base;
4127    uint64_t offset;
4128
4129    page = extract32(insn, 31, 1);
4130    /* SignExtend(immhi:immlo) -> offset */
4131    offset = sextract64(insn, 5, 19);
4132    offset = offset << 2 | extract32(insn, 29, 2);
4133    rd = extract32(insn, 0, 5);
4134    base = s->pc_curr;
4135
4136    if (page) {
4137        /* ADRP (page based) */
4138        base &= ~0xfff;
4139        offset <<= 12;
4140    }
4141
4142    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
4143}
4144
4145/*
4146 * Add/subtract (immediate)
4147 *
4148 *  31 30 29 28         23 22 21         10 9   5 4   0
4149 * +--+--+--+-------------+--+-------------+-----+-----+
4150 * |sf|op| S| 1 0 0 0 1 0 |sh|    imm12    |  Rn | Rd  |
4151 * +--+--+--+-------------+--+-------------+-----+-----+
4152 *
4153 *    sf: 0 -> 32bit, 1 -> 64bit
4154 *    op: 0 -> add  , 1 -> sub
4155 *     S: 1 -> set flags
4156 *    sh: 1 -> LSL imm by 12
4157 */
4158static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
4159{
4160    int rd = extract32(insn, 0, 5);
4161    int rn = extract32(insn, 5, 5);
4162    uint64_t imm = extract32(insn, 10, 12);
4163    bool shift = extract32(insn, 22, 1);
4164    bool setflags = extract32(insn, 29, 1);
4165    bool sub_op = extract32(insn, 30, 1);
4166    bool is_64bit = extract32(insn, 31, 1);
4167
4168    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
4169    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
4170    TCGv_i64 tcg_result;
4171
4172    if (shift) {
4173        imm <<= 12;
4174    }
4175
4176    tcg_result = tcg_temp_new_i64();
4177    if (!setflags) {
4178        if (sub_op) {
4179            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
4180        } else {
4181            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
4182        }
4183    } else {
4184        TCGv_i64 tcg_imm = tcg_const_i64(imm);
4185        if (sub_op) {
4186            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4187        } else {
4188            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4189        }
4190        tcg_temp_free_i64(tcg_imm);
4191    }
4192
4193    if (is_64bit) {
4194        tcg_gen_mov_i64(tcg_rd, tcg_result);
4195    } else {
4196        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4197    }
4198
4199    tcg_temp_free_i64(tcg_result);
4200}
4201
4202/*
4203 * Add/subtract (immediate, with tags)
4204 *
4205 *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
4206 * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4207 * |sf|op| S| 1 0 0 0 1 1 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
4208 * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4209 *
4210 *    op: 0 -> add, 1 -> sub
4211 */
4212static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
4213{
4214    int rd = extract32(insn, 0, 5);
4215    int rn = extract32(insn, 5, 5);
4216    int uimm4 = extract32(insn, 10, 4);
4217    int uimm6 = extract32(insn, 16, 6);
4218    bool sub_op = extract32(insn, 30, 1);
4219    TCGv_i64 tcg_rn, tcg_rd;
4220    int imm;
4221
4222    /* Test all of sf=1, S=0, o2=0, o3=0.  */
4223    if ((insn & 0xa040c000u) != 0x80000000u ||
4224        !dc_isar_feature(aa64_mte_insn_reg, s)) {
4225        unallocated_encoding(s);
4226        return;
4227    }
4228
4229    imm = uimm6 << LOG2_TAG_GRANULE;
4230    if (sub_op) {
4231        imm = -imm;
4232    }
4233
4234    tcg_rn = cpu_reg_sp(s, rn);
4235    tcg_rd = cpu_reg_sp(s, rd);
4236
4237    if (s->ata) {
4238        TCGv_i32 offset = tcg_const_i32(imm);
4239        TCGv_i32 tag_offset = tcg_const_i32(uimm4);
4240
4241        gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset);
4242        tcg_temp_free_i32(tag_offset);
4243        tcg_temp_free_i32(offset);
4244    } else {
4245        tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4246        gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4247    }
4248}
4249
4250/* The input should be a value in the bottom e bits (with higher
4251 * bits zero); returns that value replicated into every element
4252 * of size e in a 64 bit integer.
4253 */
4254static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4255{
4256    assert(e != 0);
4257    while (e < 64) {
4258        mask |= mask << e;
4259        e *= 2;
4260    }
4261    return mask;
4262}
4263
4264/* Return a value with the bottom len bits set (where 0 < len <= 64) */
4265static inline uint64_t bitmask64(unsigned int length)
4266{
4267    assert(length > 0 && length <= 64);
4268    return ~0ULL >> (64 - length);
4269}
4270
4271/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
4272 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4273 * value (ie should cause a guest UNDEF exception), and true if they are
4274 * valid, in which case the decoded bit pattern is written to result.
4275 */
4276bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4277                            unsigned int imms, unsigned int immr)
4278{
4279    uint64_t mask;
4280    unsigned e, levels, s, r;
4281    int len;
4282
4283    assert(immn < 2 && imms < 64 && immr < 64);
4284
4285    /* The bit patterns we create here are 64 bit patterns which
4286     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4287     * 64 bits each. Each element contains the same value: a run
4288     * of between 1 and e-1 non-zero bits, rotated within the
4289     * element by between 0 and e-1 bits.
4290     *
4291     * The element size and run length are encoded into immn (1 bit)
4292     * and imms (6 bits) as follows:
4293     * 64 bit elements: immn = 1, imms = <length of run - 1>
4294     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4295     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4296     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4297     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4298     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4299     * Notice that immn = 0, imms = 11111x is the only combination
4300     * not covered by one of the above options; this is reserved.
4301     * Further, <length of run - 1> all-ones is a reserved pattern.
4302     *
4303     * In all cases the rotation is by immr % e (and immr is 6 bits).
4304     */
4305
4306    /* First determine the element size */
4307    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4308    if (len < 1) {
4309        /* This is the immn == 0, imms == 0x11111x case */
4310        return false;
4311    }
4312    e = 1 << len;
4313
4314    levels = e - 1;
4315    s = imms & levels;
4316    r = immr & levels;
4317
4318    if (s == levels) {
4319        /* <length of run - 1> mustn't be all-ones. */
4320        return false;
4321    }
4322
4323    /* Create the value of one element: s+1 set bits rotated
4324     * by r within the element (which is e bits wide)...
4325     */
4326    mask = bitmask64(s + 1);
4327    if (r) {
4328        mask = (mask >> r) | (mask << (e - r));
4329        mask &= bitmask64(e);
4330    }
4331    /* ...then replicate the element over the whole 64 bit value */
4332    mask = bitfield_replicate(mask, e);
4333    *result = mask;
4334    return true;
4335}
4336
4337/* Logical (immediate)
4338 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4339 * +----+-----+-------------+---+------+------+------+------+
4340 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
4341 * +----+-----+-------------+---+------+------+------+------+
4342 */
4343static void disas_logic_imm(DisasContext *s, uint32_t insn)
4344{
4345    unsigned int sf, opc, is_n, immr, imms, rn, rd;
4346    TCGv_i64 tcg_rd, tcg_rn;
4347    uint64_t wmask;
4348    bool is_and = false;
4349
4350    sf = extract32(insn, 31, 1);
4351    opc = extract32(insn, 29, 2);
4352    is_n = extract32(insn, 22, 1);
4353    immr = extract32(insn, 16, 6);
4354    imms = extract32(insn, 10, 6);
4355    rn = extract32(insn, 5, 5);
4356    rd = extract32(insn, 0, 5);
4357
4358    if (!sf && is_n) {
4359        unallocated_encoding(s);
4360        return;
4361    }
4362
4363    if (opc == 0x3) { /* ANDS */
4364        tcg_rd = cpu_reg(s, rd);
4365    } else {
4366        tcg_rd = cpu_reg_sp(s, rd);
4367    }
4368    tcg_rn = cpu_reg(s, rn);
4369
4370    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
4371        /* some immediate field values are reserved */
4372        unallocated_encoding(s);
4373        return;
4374    }
4375
4376    if (!sf) {
4377        wmask &= 0xffffffff;
4378    }
4379
4380    switch (opc) {
4381    case 0x3: /* ANDS */
4382    case 0x0: /* AND */
4383        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
4384        is_and = true;
4385        break;
4386    case 0x1: /* ORR */
4387        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
4388        break;
4389    case 0x2: /* EOR */
4390        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
4391        break;
4392    default:
4393        assert(FALSE); /* must handle all above */
4394        break;
4395    }
4396
4397    if (!sf && !is_and) {
4398        /* zero extend final result; we know we can skip this for AND
4399         * since the immediate had the high 32 bits clear.
4400         */
4401        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4402    }
4403
4404    if (opc == 3) { /* ANDS */
4405        gen_logic_CC(sf, tcg_rd);
4406    }
4407}
4408
4409/*
4410 * Move wide (immediate)
4411 *
4412 *  31 30 29 28         23 22 21 20             5 4    0
4413 * +--+-----+-------------+-----+----------------+------+
4414 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
4415 * +--+-----+-------------+-----+----------------+------+
4416 *
4417 * sf: 0 -> 32 bit, 1 -> 64 bit
4418 * opc: 00 -> N, 10 -> Z, 11 -> K
4419 * hw: shift/16 (0,16, and sf only 32, 48)
4420 */
4421static void disas_movw_imm(DisasContext *s, uint32_t insn)
4422{
4423    int rd = extract32(insn, 0, 5);
4424    uint64_t imm = extract32(insn, 5, 16);
4425    int sf = extract32(insn, 31, 1);
4426    int opc = extract32(insn, 29, 2);
4427    int pos = extract32(insn, 21, 2) << 4;
4428    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4429    TCGv_i64 tcg_imm;
4430
4431    if (!sf && (pos >= 32)) {
4432        unallocated_encoding(s);
4433        return;
4434    }
4435
4436    switch (opc) {
4437    case 0: /* MOVN */
4438    case 2: /* MOVZ */
4439        imm <<= pos;
4440        if (opc == 0) {
4441            imm = ~imm;
4442        }
4443        if (!sf) {
4444            imm &= 0xffffffffu;
4445        }
4446        tcg_gen_movi_i64(tcg_rd, imm);
4447        break;
4448    case 3: /* MOVK */
4449        tcg_imm = tcg_const_i64(imm);
4450        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
4451        tcg_temp_free_i64(tcg_imm);
4452        if (!sf) {
4453            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4454        }
4455        break;
4456    default:
4457        unallocated_encoding(s);
4458        break;
4459    }
4460}
4461
4462/* Bitfield
4463 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4464 * +----+-----+-------------+---+------+------+------+------+
4465 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
4466 * +----+-----+-------------+---+------+------+------+------+
4467 */
4468static void disas_bitfield(DisasContext *s, uint32_t insn)
4469{
4470    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
4471    TCGv_i64 tcg_rd, tcg_tmp;
4472
4473    sf = extract32(insn, 31, 1);
4474    opc = extract32(insn, 29, 2);
4475    n = extract32(insn, 22, 1);
4476    ri = extract32(insn, 16, 6);
4477    si = extract32(insn, 10, 6);
4478    rn = extract32(insn, 5, 5);
4479    rd = extract32(insn, 0, 5);
4480    bitsize = sf ? 64 : 32;
4481
4482    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
4483        unallocated_encoding(s);
4484        return;
4485    }
4486
4487    tcg_rd = cpu_reg(s, rd);
4488
4489    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
4490       to be smaller than bitsize, we'll never reference data outside the
4491       low 32-bits anyway.  */
4492    tcg_tmp = read_cpu_reg(s, rn, 1);
4493
4494    /* Recognize simple(r) extractions.  */
4495    if (si >= ri) {
4496        /* Wd<s-r:0> = Wn<s:r> */
4497        len = (si - ri) + 1;
4498        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
4499            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4500            goto done;
4501        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
4502            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4503            return;
4504        }
4505        /* opc == 1, BFXIL fall through to deposit */
4506        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4507        pos = 0;
4508    } else {
4509        /* Handle the ri > si case with a deposit
4510         * Wd<32+s-r,32-r> = Wn<s:0>
4511         */
4512        len = si + 1;
4513        pos = (bitsize - ri) & (bitsize - 1);
4514    }
4515
4516    if (opc == 0 && len < ri) {
4517        /* SBFM: sign extend the destination field from len to fill
4518           the balance of the word.  Let the deposit below insert all
4519           of those sign bits.  */
4520        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4521        len = ri;
4522    }
4523
4524    if (opc == 1) { /* BFM, BFXIL */
4525        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4526    } else {
4527        /* SBFM or UBFM: We start with zero, and we haven't modified
4528           any bits outside bitsize, therefore the zero-extension
4529           below is unneeded.  */
4530        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4531        return;
4532    }
4533
4534 done:
4535    if (!sf) { /* zero extend final result */
4536        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4537    }
4538}
4539
4540/* Extract
4541 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
4542 * +----+------+-------------+---+----+------+--------+------+------+
4543 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
4544 * +----+------+-------------+---+----+------+--------+------+------+
4545 */
4546static void disas_extract(DisasContext *s, uint32_t insn)
4547{
4548    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
4549
4550    sf = extract32(insn, 31, 1);
4551    n = extract32(insn, 22, 1);
4552    rm = extract32(insn, 16, 5);
4553    imm = extract32(insn, 10, 6);
4554    rn = extract32(insn, 5, 5);
4555    rd = extract32(insn, 0, 5);
4556    op21 = extract32(insn, 29, 2);
4557    op0 = extract32(insn, 21, 1);
4558    bitsize = sf ? 64 : 32;
4559
4560    if (sf != n || op21 || op0 || imm >= bitsize) {
4561        unallocated_encoding(s);
4562    } else {
4563        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4564
4565        tcg_rd = cpu_reg(s, rd);
4566
4567        if (unlikely(imm == 0)) {
4568            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4569             * so an extract from bit 0 is a special case.
4570             */
4571            if (sf) {
4572                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
4573            } else {
4574                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
4575            }
4576        } else {
4577            tcg_rm = cpu_reg(s, rm);
4578            tcg_rn = cpu_reg(s, rn);
4579
4580            if (sf) {
4581                /* Specialization to ROR happens in EXTRACT2.  */
4582                tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4583            } else {
4584                TCGv_i32 t0 = tcg_temp_new_i32();
4585
4586                tcg_gen_extrl_i64_i32(t0, tcg_rm);
4587                if (rm == rn) {
4588                    tcg_gen_rotri_i32(t0, t0, imm);
4589                } else {
4590                    TCGv_i32 t1 = tcg_temp_new_i32();
4591                    tcg_gen_extrl_i64_i32(t1, tcg_rn);
4592                    tcg_gen_extract2_i32(t0, t0, t1, imm);
4593                    tcg_temp_free_i32(t1);
4594                }
4595                tcg_gen_extu_i32_i64(tcg_rd, t0);
4596                tcg_temp_free_i32(t0);
4597            }
4598        }
4599    }
4600}
4601
4602/* Data processing - immediate */
4603static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4604{
4605    switch (extract32(insn, 23, 6)) {
4606    case 0x20: case 0x21: /* PC-rel. addressing */
4607        disas_pc_rel_adr(s, insn);
4608        break;
4609    case 0x22: /* Add/subtract (immediate) */
4610        disas_add_sub_imm(s, insn);
4611        break;
4612    case 0x23: /* Add/subtract (immediate, with tags) */
4613        disas_add_sub_imm_with_tags(s, insn);
4614        break;
4615    case 0x24: /* Logical (immediate) */
4616        disas_logic_imm(s, insn);
4617        break;
4618    case 0x25: /* Move wide (immediate) */
4619        disas_movw_imm(s, insn);
4620        break;
4621    case 0x26: /* Bitfield */
4622        disas_bitfield(s, insn);
4623        break;
4624    case 0x27: /* Extract */
4625        disas_extract(s, insn);
4626        break;
4627    default:
4628        unallocated_encoding(s);
4629        break;
4630    }
4631}
4632
4633/* Shift a TCGv src by TCGv shift_amount, put result in dst.
4634 * Note that it is the caller's responsibility to ensure that the
4635 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4636 * mandated semantics for out of range shifts.
4637 */
4638static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4639                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4640{
4641    switch (shift_type) {
4642    case A64_SHIFT_TYPE_LSL:
4643        tcg_gen_shl_i64(dst, src, shift_amount);
4644        break;
4645    case A64_SHIFT_TYPE_LSR:
4646        tcg_gen_shr_i64(dst, src, shift_amount);
4647        break;
4648    case A64_SHIFT_TYPE_ASR:
4649        if (!sf) {
4650            tcg_gen_ext32s_i64(dst, src);
4651        }
4652        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4653        break;
4654    case A64_SHIFT_TYPE_ROR:
4655        if (sf) {
4656            tcg_gen_rotr_i64(dst, src, shift_amount);
4657        } else {
4658            TCGv_i32 t0, t1;
4659            t0 = tcg_temp_new_i32();
4660            t1 = tcg_temp_new_i32();
4661            tcg_gen_extrl_i64_i32(t0, src);
4662            tcg_gen_extrl_i64_i32(t1, shift_amount);
4663            tcg_gen_rotr_i32(t0, t0, t1);
4664            tcg_gen_extu_i32_i64(dst, t0);
4665            tcg_temp_free_i32(t0);
4666            tcg_temp_free_i32(t1);
4667        }
4668        break;
4669    default:
4670        assert(FALSE); /* all shift types should be handled */
4671        break;
4672    }
4673
4674    if (!sf) { /* zero extend final result */
4675        tcg_gen_ext32u_i64(dst, dst);
4676    }
4677}
4678
4679/* Shift a TCGv src by immediate, put result in dst.
4680 * The shift amount must be in range (this should always be true as the
4681 * relevant instructions will UNDEF on bad shift immediates).
4682 */
4683static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4684                          enum a64_shift_type shift_type, unsigned int shift_i)
4685{
4686    assert(shift_i < (sf ? 64 : 32));
4687
4688    if (shift_i == 0) {
4689        tcg_gen_mov_i64(dst, src);
4690    } else {
4691        TCGv_i64 shift_const;
4692
4693        shift_const = tcg_const_i64(shift_i);
4694        shift_reg(dst, src, sf, shift_type, shift_const);
4695        tcg_temp_free_i64(shift_const);
4696    }
4697}
4698
4699/* Logical (shifted register)
4700 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4701 * +----+-----+-----------+-------+---+------+--------+------+------+
4702 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4703 * +----+-----+-----------+-------+---+------+--------+------+------+
4704 */
4705static void disas_logic_reg(DisasContext *s, uint32_t insn)
4706{
4707    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4708    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4709
4710    sf = extract32(insn, 31, 1);
4711    opc = extract32(insn, 29, 2);
4712    shift_type = extract32(insn, 22, 2);
4713    invert = extract32(insn, 21, 1);
4714    rm = extract32(insn, 16, 5);
4715    shift_amount = extract32(insn, 10, 6);
4716    rn = extract32(insn, 5, 5);
4717    rd = extract32(insn, 0, 5);
4718
4719    if (!sf && (shift_amount & (1 << 5))) {
4720        unallocated_encoding(s);
4721        return;
4722    }
4723
4724    tcg_rd = cpu_reg(s, rd);
4725
4726    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4727        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4728         * register-register MOV and MVN, so it is worth special casing.
4729         */
4730        tcg_rm = cpu_reg(s, rm);
4731        if (invert) {
4732            tcg_gen_not_i64(tcg_rd, tcg_rm);
4733            if (!sf) {
4734                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4735            }
4736        } else {
4737            if (sf) {
4738                tcg_gen_mov_i64(tcg_rd, tcg_rm);
4739            } else {
4740                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4741            }
4742        }
4743        return;
4744    }
4745
4746    tcg_rm = read_cpu_reg(s, rm, sf);
4747
4748    if (shift_amount) {
4749        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4750    }
4751
4752    tcg_rn = cpu_reg(s, rn);
4753
4754    switch (opc | (invert << 2)) {
4755    case 0: /* AND */
4756    case 3: /* ANDS */
4757        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4758        break;
4759    case 1: /* ORR */
4760        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4761        break;
4762    case 2: /* EOR */
4763        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4764        break;
4765    case 4: /* BIC */
4766    case 7: /* BICS */
4767        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4768        break;
4769    case 5: /* ORN */
4770        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4771        break;
4772    case 6: /* EON */
4773        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4774        break;
4775    default:
4776        assert(FALSE);
4777        break;
4778    }
4779
4780    if (!sf) {
4781        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4782    }
4783
4784    if (opc == 3) {
4785        gen_logic_CC(sf, tcg_rd);
4786    }
4787}
4788
4789/*
4790 * Add/subtract (extended register)
4791 *
4792 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4793 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4794 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4795 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4796 *
4797 *  sf: 0 -> 32bit, 1 -> 64bit
4798 *  op: 0 -> add  , 1 -> sub
4799 *   S: 1 -> set flags
4800 * opt: 00
4801 * option: extension type (see DecodeRegExtend)
4802 * imm3: optional shift to Rm
4803 *
4804 * Rd = Rn + LSL(extend(Rm), amount)
4805 */
4806static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4807{
4808    int rd = extract32(insn, 0, 5);
4809    int rn = extract32(insn, 5, 5);
4810    int imm3 = extract32(insn, 10, 3);
4811    int option = extract32(insn, 13, 3);
4812    int rm = extract32(insn, 16, 5);
4813    int opt = extract32(insn, 22, 2);
4814    bool setflags = extract32(insn, 29, 1);
4815    bool sub_op = extract32(insn, 30, 1);
4816    bool sf = extract32(insn, 31, 1);
4817
4818    TCGv_i64 tcg_rm, tcg_rn; /* temps */
4819    TCGv_i64 tcg_rd;
4820    TCGv_i64 tcg_result;
4821
4822    if (imm3 > 4 || opt != 0) {
4823        unallocated_encoding(s);
4824        return;
4825    }
4826
4827    /* non-flag setting ops may use SP */
4828    if (!setflags) {
4829        tcg_rd = cpu_reg_sp(s, rd);
4830    } else {
4831        tcg_rd = cpu_reg(s, rd);
4832    }
4833    tcg_rn = read_cpu_reg_sp(s, rn, sf);
4834
4835    tcg_rm = read_cpu_reg(s, rm, sf);
4836    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4837
4838    tcg_result = tcg_temp_new_i64();
4839
4840    if (!setflags) {
4841        if (sub_op) {
4842            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4843        } else {
4844            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4845        }
4846    } else {
4847        if (sub_op) {
4848            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4849        } else {
4850            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4851        }
4852    }
4853
4854    if (sf) {
4855        tcg_gen_mov_i64(tcg_rd, tcg_result);
4856    } else {
4857        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4858    }
4859
4860    tcg_temp_free_i64(tcg_result);
4861}
4862
4863/*
4864 * Add/subtract (shifted register)
4865 *
4866 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4867 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4868 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4869 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4870 *
4871 *    sf: 0 -> 32bit, 1 -> 64bit
4872 *    op: 0 -> add  , 1 -> sub
4873 *     S: 1 -> set flags
4874 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4875 *  imm6: Shift amount to apply to Rm before the add/sub
4876 */
4877static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4878{
4879    int rd = extract32(insn, 0, 5);
4880    int rn = extract32(insn, 5, 5);
4881    int imm6 = extract32(insn, 10, 6);
4882    int rm = extract32(insn, 16, 5);
4883    int shift_type = extract32(insn, 22, 2);
4884    bool setflags = extract32(insn, 29, 1);
4885    bool sub_op = extract32(insn, 30, 1);
4886    bool sf = extract32(insn, 31, 1);
4887
4888    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4889    TCGv_i64 tcg_rn, tcg_rm;
4890    TCGv_i64 tcg_result;
4891
4892    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4893        unallocated_encoding(s);
4894        return;
4895    }
4896
4897    tcg_rn = read_cpu_reg(s, rn, sf);
4898    tcg_rm = read_cpu_reg(s, rm, sf);
4899
4900    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4901
4902    tcg_result = tcg_temp_new_i64();
4903
4904    if (!setflags) {
4905        if (sub_op) {
4906            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4907        } else {
4908            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4909        }
4910    } else {
4911        if (sub_op) {
4912            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4913        } else {
4914            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4915        }
4916    }
4917
4918    if (sf) {
4919        tcg_gen_mov_i64(tcg_rd, tcg_result);
4920    } else {
4921        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4922    }
4923
4924    tcg_temp_free_i64(tcg_result);
4925}
4926
4927/* Data-processing (3 source)
4928 *
4929 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4930 *  +--+------+-----------+------+------+----+------+------+------+
4931 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4932 *  +--+------+-----------+------+------+----+------+------+------+
4933 */
4934static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4935{
4936    int rd = extract32(insn, 0, 5);
4937    int rn = extract32(insn, 5, 5);
4938    int ra = extract32(insn, 10, 5);
4939    int rm = extract32(insn, 16, 5);
4940    int op_id = (extract32(insn, 29, 3) << 4) |
4941        (extract32(insn, 21, 3) << 1) |
4942        extract32(insn, 15, 1);
4943    bool sf = extract32(insn, 31, 1);
4944    bool is_sub = extract32(op_id, 0, 1);
4945    bool is_high = extract32(op_id, 2, 1);
4946    bool is_signed = false;
4947    TCGv_i64 tcg_op1;
4948    TCGv_i64 tcg_op2;
4949    TCGv_i64 tcg_tmp;
4950
4951    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4952    switch (op_id) {
4953    case 0x42: /* SMADDL */
4954    case 0x43: /* SMSUBL */
4955    case 0x44: /* SMULH */
4956        is_signed = true;
4957        break;
4958    case 0x0: /* MADD (32bit) */
4959    case 0x1: /* MSUB (32bit) */
4960    case 0x40: /* MADD (64bit) */
4961    case 0x41: /* MSUB (64bit) */
4962    case 0x4a: /* UMADDL */
4963    case 0x4b: /* UMSUBL */
4964    case 0x4c: /* UMULH */
4965        break;
4966    default:
4967        unallocated_encoding(s);
4968        return;
4969    }
4970
4971    if (is_high) {
4972        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4973        TCGv_i64 tcg_rd = cpu_reg(s, rd);
4974        TCGv_i64 tcg_rn = cpu_reg(s, rn);
4975        TCGv_i64 tcg_rm = cpu_reg(s, rm);
4976
4977        if (is_signed) {
4978            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4979        } else {
4980            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4981        }
4982
4983        tcg_temp_free_i64(low_bits);
4984        return;
4985    }
4986
4987    tcg_op1 = tcg_temp_new_i64();
4988    tcg_op2 = tcg_temp_new_i64();
4989    tcg_tmp = tcg_temp_new_i64();
4990
4991    if (op_id < 0x42) {
4992        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4993        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4994    } else {
4995        if (is_signed) {
4996            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4997            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4998        } else {
4999            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
5000            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
5001        }
5002    }
5003
5004    if (ra == 31 && !is_sub) {
5005        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
5006        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
5007    } else {
5008        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
5009        if (is_sub) {
5010            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5011        } else {
5012            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5013        }
5014    }
5015
5016    if (!sf) {
5017        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
5018    }
5019
5020    tcg_temp_free_i64(tcg_op1);
5021    tcg_temp_free_i64(tcg_op2);
5022    tcg_temp_free_i64(tcg_tmp);
5023}
5024
5025/* Add/subtract (with carry)
5026 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
5027 * +--+--+--+------------------------+------+-------------+------+-----+
5028 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
5029 * +--+--+--+------------------------+------+-------------+------+-----+
5030 */
5031
5032static void disas_adc_sbc(DisasContext *s, uint32_t insn)
5033{
5034    unsigned int sf, op, setflags, rm, rn, rd;
5035    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
5036
5037    sf = extract32(insn, 31, 1);
5038    op = extract32(insn, 30, 1);
5039    setflags = extract32(insn, 29, 1);
5040    rm = extract32(insn, 16, 5);
5041    rn = extract32(insn, 5, 5);
5042    rd = extract32(insn, 0, 5);
5043
5044    tcg_rd = cpu_reg(s, rd);
5045    tcg_rn = cpu_reg(s, rn);
5046
5047    if (op) {
5048        tcg_y = new_tmp_a64(s);
5049        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
5050    } else {
5051        tcg_y = cpu_reg(s, rm);
5052    }
5053
5054    if (setflags) {
5055        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
5056    } else {
5057        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
5058    }
5059}
5060
5061/*
5062 * Rotate right into flags
5063 *  31 30 29                21       15          10      5  4      0
5064 * +--+--+--+-----------------+--------+-----------+------+--+------+
5065 * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
5066 * +--+--+--+-----------------+--------+-----------+------+--+------+
5067 */
5068static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
5069{
5070    int mask = extract32(insn, 0, 4);
5071    int o2 = extract32(insn, 4, 1);
5072    int rn = extract32(insn, 5, 5);
5073    int imm6 = extract32(insn, 15, 6);
5074    int sf_op_s = extract32(insn, 29, 3);
5075    TCGv_i64 tcg_rn;
5076    TCGv_i32 nzcv;
5077
5078    if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
5079        unallocated_encoding(s);
5080        return;
5081    }
5082
5083    tcg_rn = read_cpu_reg(s, rn, 1);
5084    tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
5085
5086    nzcv = tcg_temp_new_i32();
5087    tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
5088
5089    if (mask & 8) { /* N */
5090        tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
5091    }
5092    if (mask & 4) { /* Z */
5093        tcg_gen_not_i32(cpu_ZF, nzcv);
5094        tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
5095    }
5096    if (mask & 2) { /* C */
5097        tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
5098    }
5099    if (mask & 1) { /* V */
5100        tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
5101    }
5102
5103    tcg_temp_free_i32(nzcv);
5104}
5105
5106/*
5107 * Evaluate into flags
5108 *  31 30 29                21        15   14        10      5  4      0
5109 * +--+--+--+-----------------+---------+----+---------+------+--+------+
5110 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5111 * +--+--+--+-----------------+---------+----+---------+------+--+------+
5112 */
5113static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5114{
5115    int o3_mask = extract32(insn, 0, 5);
5116    int rn = extract32(insn, 5, 5);
5117    int o2 = extract32(insn, 15, 6);
5118    int sz = extract32(insn, 14, 1);
5119    int sf_op_s = extract32(insn, 29, 3);
5120    TCGv_i32 tmp;
5121    int shift;
5122
5123    if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5124        !dc_isar_feature(aa64_condm_4, s)) {
5125        unallocated_encoding(s);
5126        return;
5127    }
5128    shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5129
5130    tmp = tcg_temp_new_i32();
5131    tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5132    tcg_gen_shli_i32(cpu_NF, tmp, shift);
5133    tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5134    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5135    tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5136    tcg_temp_free_i32(tmp);
5137}
5138
5139/* Conditional compare (immediate / register)
5140 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5141 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5142 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5143 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5144 *        [1]                             y                [0]       [0]
5145 */
5146static void disas_cc(DisasContext *s, uint32_t insn)
5147{
5148    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5149    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5150    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5151    DisasCompare c;
5152
5153    if (!extract32(insn, 29, 1)) {
5154        unallocated_encoding(s);
5155        return;
5156    }
5157    if (insn & (1 << 10 | 1 << 4)) {
5158        unallocated_encoding(s);
5159        return;
5160    }
5161    sf = extract32(insn, 31, 1);
5162    op = extract32(insn, 30, 1);
5163    is_imm = extract32(insn, 11, 1);
5164    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5165    cond = extract32(insn, 12, 4);
5166    rn = extract32(insn, 5, 5);
5167    nzcv = extract32(insn, 0, 4);
5168
5169    /* Set T0 = !COND.  */
5170    tcg_t0 = tcg_temp_new_i32();
5171    arm_test_cc(&c, cond);
5172    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5173    arm_free_cc(&c);
5174
5175    /* Load the arguments for the new comparison.  */
5176    if (is_imm) {
5177        tcg_y = new_tmp_a64(s);
5178        tcg_gen_movi_i64(tcg_y, y);
5179    } else {
5180        tcg_y = cpu_reg(s, y);
5181    }
5182    tcg_rn = cpu_reg(s, rn);
5183
5184    /* Set the flags for the new comparison.  */
5185    tcg_tmp = tcg_temp_new_i64();
5186    if (op) {
5187        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5188    } else {
5189        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5190    }
5191    tcg_temp_free_i64(tcg_tmp);
5192
5193    /* If COND was false, force the flags to #nzcv.  Compute two masks
5194     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5195     * For tcg hosts that support ANDC, we can make do with just T1.
5196     * In either case, allow the tcg optimizer to delete any unused mask.
5197     */
5198    tcg_t1 = tcg_temp_new_i32();
5199    tcg_t2 = tcg_temp_new_i32();
5200    tcg_gen_neg_i32(tcg_t1, tcg_t0);
5201    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5202
5203    if (nzcv & 8) { /* N */
5204        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5205    } else {
5206        if (TCG_TARGET_HAS_andc_i32) {
5207            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5208        } else {
5209            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5210        }
5211    }
5212    if (nzcv & 4) { /* Z */
5213        if (TCG_TARGET_HAS_andc_i32) {
5214            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5215        } else {
5216            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5217        }
5218    } else {
5219        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5220    }
5221    if (nzcv & 2) { /* C */
5222        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5223    } else {
5224        if (TCG_TARGET_HAS_andc_i32) {
5225            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5226        } else {
5227            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5228        }
5229    }
5230    if (nzcv & 1) { /* V */
5231        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5232    } else {
5233        if (TCG_TARGET_HAS_andc_i32) {
5234            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5235        } else {
5236            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5237        }
5238    }
5239    tcg_temp_free_i32(tcg_t0);
5240    tcg_temp_free_i32(tcg_t1);
5241    tcg_temp_free_i32(tcg_t2);
5242}
5243
5244/* Conditional select
5245 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5246 * +----+----+---+-----------------+------+------+-----+------+------+
5247 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5248 * +----+----+---+-----------------+------+------+-----+------+------+
5249 */
5250static void disas_cond_select(DisasContext *s, uint32_t insn)
5251{
5252    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5253    TCGv_i64 tcg_rd, zero;
5254    DisasCompare64 c;
5255
5256    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5257        /* S == 1 or op2<1> == 1 */
5258        unallocated_encoding(s);
5259        return;
5260    }
5261    sf = extract32(insn, 31, 1);
5262    else_inv = extract32(insn, 30, 1);
5263    rm = extract32(insn, 16, 5);
5264    cond = extract32(insn, 12, 4);
5265    else_inc = extract32(insn, 10, 1);
5266    rn = extract32(insn, 5, 5);
5267    rd = extract32(insn, 0, 5);
5268
5269    tcg_rd = cpu_reg(s, rd);
5270
5271    a64_test_cc(&c, cond);
5272    zero = tcg_const_i64(0);
5273
5274    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5275        /* CSET & CSETM.  */
5276        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5277        if (else_inv) {
5278            tcg_gen_neg_i64(tcg_rd, tcg_rd);
5279        }
5280    } else {
5281        TCGv_i64 t_true = cpu_reg(s, rn);
5282        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5283        if (else_inv && else_inc) {
5284            tcg_gen_neg_i64(t_false, t_false);
5285        } else if (else_inv) {
5286            tcg_gen_not_i64(t_false, t_false);
5287        } else if (else_inc) {
5288            tcg_gen_addi_i64(t_false, t_false, 1);
5289        }
5290        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5291    }
5292
5293    tcg_temp_free_i64(zero);
5294    a64_free_cc(&c);
5295
5296    if (!sf) {
5297        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5298    }
5299}
5300
5301static void handle_clz(DisasContext *s, unsigned int sf,
5302                       unsigned int rn, unsigned int rd)
5303{
5304    TCGv_i64 tcg_rd, tcg_rn;
5305    tcg_rd = cpu_reg(s, rd);
5306    tcg_rn = cpu_reg(s, rn);
5307
5308    if (sf) {
5309        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5310    } else {
5311        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5312        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5313        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5314        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5315        tcg_temp_free_i32(tcg_tmp32);
5316    }
5317}
5318
5319static void handle_cls(DisasContext *s, unsigned int sf,
5320                       unsigned int rn, unsigned int rd)
5321{
5322    TCGv_i64 tcg_rd, tcg_rn;
5323    tcg_rd = cpu_reg(s, rd);
5324    tcg_rn = cpu_reg(s, rn);
5325
5326    if (sf) {
5327        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5328    } else {
5329        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5330        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5331        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5332        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5333        tcg_temp_free_i32(tcg_tmp32);
5334    }
5335}
5336
5337static void handle_rbit(DisasContext *s, unsigned int sf,
5338                        unsigned int rn, unsigned int rd)
5339{
5340    TCGv_i64 tcg_rd, tcg_rn;
5341    tcg_rd = cpu_reg(s, rd);
5342    tcg_rn = cpu_reg(s, rn);
5343
5344    if (sf) {
5345        gen_helper_rbit64(tcg_rd, tcg_rn);
5346    } else {
5347        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5348        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5349        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5350        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5351        tcg_temp_free_i32(tcg_tmp32);
5352    }
5353}
5354
5355/* REV with sf==1, opcode==3 ("REV64") */
5356static void handle_rev64(DisasContext *s, unsigned int sf,
5357                         unsigned int rn, unsigned int rd)
5358{
5359    if (!sf) {
5360        unallocated_encoding(s);
5361        return;
5362    }
5363    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5364}
5365
5366/* REV with sf==0, opcode==2
5367 * REV32 (sf==1, opcode==2)
5368 */
5369static void handle_rev32(DisasContext *s, unsigned int sf,
5370                         unsigned int rn, unsigned int rd)
5371{
5372    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5373
5374    if (sf) {
5375        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5376        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5377
5378        /* bswap32_i64 requires zero high word */
5379        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
5380        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
5381        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
5382        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
5383        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
5384
5385        tcg_temp_free_i64(tcg_tmp);
5386    } else {
5387        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
5388        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
5389    }
5390}
5391
5392/* REV16 (opcode==1) */
5393static void handle_rev16(DisasContext *s, unsigned int sf,
5394                         unsigned int rn, unsigned int rd)
5395{
5396    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5397    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5398    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5399    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5400
5401    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5402    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5403    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5404    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5405    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5406
5407    tcg_temp_free_i64(mask);
5408    tcg_temp_free_i64(tcg_tmp);
5409}
5410
5411/* Data-processing (1 source)
5412 *   31  30  29  28             21 20     16 15    10 9    5 4    0
5413 * +----+---+---+-----------------+---------+--------+------+------+
5414 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5415 * +----+---+---+-----------------+---------+--------+------+------+
5416 */
5417static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5418{
5419    unsigned int sf, opcode, opcode2, rn, rd;
5420    TCGv_i64 tcg_rd;
5421
5422    if (extract32(insn, 29, 1)) {
5423        unallocated_encoding(s);
5424        return;
5425    }
5426
5427    sf = extract32(insn, 31, 1);
5428    opcode = extract32(insn, 10, 6);
5429    opcode2 = extract32(insn, 16, 5);
5430    rn = extract32(insn, 5, 5);
5431    rd = extract32(insn, 0, 5);
5432
5433#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5434
5435    switch (MAP(sf, opcode2, opcode)) {
5436    case MAP(0, 0x00, 0x00): /* RBIT */
5437    case MAP(1, 0x00, 0x00):
5438        handle_rbit(s, sf, rn, rd);
5439        break;
5440    case MAP(0, 0x00, 0x01): /* REV16 */
5441    case MAP(1, 0x00, 0x01):
5442        handle_rev16(s, sf, rn, rd);
5443        break;
5444    case MAP(0, 0x00, 0x02): /* REV/REV32 */
5445    case MAP(1, 0x00, 0x02):
5446        handle_rev32(s, sf, rn, rd);
5447        break;
5448    case MAP(1, 0x00, 0x03): /* REV64 */
5449        handle_rev64(s, sf, rn, rd);
5450        break;
5451    case MAP(0, 0x00, 0x04): /* CLZ */
5452    case MAP(1, 0x00, 0x04):
5453        handle_clz(s, sf, rn, rd);
5454        break;
5455    case MAP(0, 0x00, 0x05): /* CLS */
5456    case MAP(1, 0x00, 0x05):
5457        handle_cls(s, sf, rn, rd);
5458        break;
5459    case MAP(1, 0x01, 0x00): /* PACIA */
5460        if (s->pauth_active) {
5461            tcg_rd = cpu_reg(s, rd);
5462            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5463        } else if (!dc_isar_feature(aa64_pauth, s)) {
5464            goto do_unallocated;
5465        }
5466        break;
5467    case MAP(1, 0x01, 0x01): /* PACIB */
5468        if (s->pauth_active) {
5469            tcg_rd = cpu_reg(s, rd);
5470            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5471        } else if (!dc_isar_feature(aa64_pauth, s)) {
5472            goto do_unallocated;
5473        }
5474        break;
5475    case MAP(1, 0x01, 0x02): /* PACDA */
5476        if (s->pauth_active) {
5477            tcg_rd = cpu_reg(s, rd);
5478            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5479        } else if (!dc_isar_feature(aa64_pauth, s)) {
5480            goto do_unallocated;
5481        }
5482        break;
5483    case MAP(1, 0x01, 0x03): /* PACDB */
5484        if (s->pauth_active) {
5485            tcg_rd = cpu_reg(s, rd);
5486            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5487        } else if (!dc_isar_feature(aa64_pauth, s)) {
5488            goto do_unallocated;
5489        }
5490        break;
5491    case MAP(1, 0x01, 0x04): /* AUTIA */
5492        if (s->pauth_active) {
5493            tcg_rd = cpu_reg(s, rd);
5494            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5495        } else if (!dc_isar_feature(aa64_pauth, s)) {
5496            goto do_unallocated;
5497        }
5498        break;
5499    case MAP(1, 0x01, 0x05): /* AUTIB */
5500        if (s->pauth_active) {
5501            tcg_rd = cpu_reg(s, rd);
5502            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5503        } else if (!dc_isar_feature(aa64_pauth, s)) {
5504            goto do_unallocated;
5505        }
5506        break;
5507    case MAP(1, 0x01, 0x06): /* AUTDA */
5508        if (s->pauth_active) {
5509            tcg_rd = cpu_reg(s, rd);
5510            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5511        } else if (!dc_isar_feature(aa64_pauth, s)) {
5512            goto do_unallocated;
5513        }
5514        break;
5515    case MAP(1, 0x01, 0x07): /* AUTDB */
5516        if (s->pauth_active) {
5517            tcg_rd = cpu_reg(s, rd);
5518            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5519        } else if (!dc_isar_feature(aa64_pauth, s)) {
5520            goto do_unallocated;
5521        }
5522        break;
5523    case MAP(1, 0x01, 0x08): /* PACIZA */
5524        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5525            goto do_unallocated;
5526        } else if (s->pauth_active) {
5527            tcg_rd = cpu_reg(s, rd);
5528            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5529        }
5530        break;
5531    case MAP(1, 0x01, 0x09): /* PACIZB */
5532        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5533            goto do_unallocated;
5534        } else if (s->pauth_active) {
5535            tcg_rd = cpu_reg(s, rd);
5536            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5537        }
5538        break;
5539    case MAP(1, 0x01, 0x0a): /* PACDZA */
5540        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5541            goto do_unallocated;
5542        } else if (s->pauth_active) {
5543            tcg_rd = cpu_reg(s, rd);
5544            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5545        }
5546        break;
5547    case MAP(1, 0x01, 0x0b): /* PACDZB */
5548        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5549            goto do_unallocated;
5550        } else if (s->pauth_active) {
5551            tcg_rd = cpu_reg(s, rd);
5552            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5553        }
5554        break;
5555    case MAP(1, 0x01, 0x0c): /* AUTIZA */
5556        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5557            goto do_unallocated;
5558        } else if (s->pauth_active) {
5559            tcg_rd = cpu_reg(s, rd);
5560            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5561        }
5562        break;
5563    case MAP(1, 0x01, 0x0d): /* AUTIZB */
5564        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5565            goto do_unallocated;
5566        } else if (s->pauth_active) {
5567            tcg_rd = cpu_reg(s, rd);
5568            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5569        }
5570        break;
5571    case MAP(1, 0x01, 0x0e): /* AUTDZA */
5572        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5573            goto do_unallocated;
5574        } else if (s->pauth_active) {
5575            tcg_rd = cpu_reg(s, rd);
5576            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5577        }
5578        break;
5579    case MAP(1, 0x01, 0x0f): /* AUTDZB */
5580        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5581            goto do_unallocated;
5582        } else if (s->pauth_active) {
5583            tcg_rd = cpu_reg(s, rd);
5584            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5585        }
5586        break;
5587    case MAP(1, 0x01, 0x10): /* XPACI */
5588        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5589            goto do_unallocated;
5590        } else if (s->pauth_active) {
5591            tcg_rd = cpu_reg(s, rd);
5592            gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5593        }
5594        break;
5595    case MAP(1, 0x01, 0x11): /* XPACD */
5596        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5597            goto do_unallocated;
5598        } else if (s->pauth_active) {
5599            tcg_rd = cpu_reg(s, rd);
5600            gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5601        }
5602        break;
5603    default:
5604    do_unallocated:
5605        unallocated_encoding(s);
5606        break;
5607    }
5608
5609#undef MAP
5610}
5611
5612static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5613                       unsigned int rm, unsigned int rn, unsigned int rd)
5614{
5615    TCGv_i64 tcg_n, tcg_m, tcg_rd;
5616    tcg_rd = cpu_reg(s, rd);
5617
5618    if (!sf && is_signed) {
5619        tcg_n = new_tmp_a64(s);
5620        tcg_m = new_tmp_a64(s);
5621        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5622        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5623    } else {
5624        tcg_n = read_cpu_reg(s, rn, sf);
5625        tcg_m = read_cpu_reg(s, rm, sf);
5626    }
5627
5628    if (is_signed) {
5629        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5630    } else {
5631        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5632    }
5633
5634    if (!sf) { /* zero extend final result */
5635        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5636    }
5637}
5638
5639/* LSLV, LSRV, ASRV, RORV */
5640static void handle_shift_reg(DisasContext *s,
5641                             enum a64_shift_type shift_type, unsigned int sf,
5642                             unsigned int rm, unsigned int rn, unsigned int rd)
5643{
5644    TCGv_i64 tcg_shift = tcg_temp_new_i64();
5645    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5646    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5647
5648    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5649    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5650    tcg_temp_free_i64(tcg_shift);
5651}
5652
5653/* CRC32[BHWX], CRC32C[BHWX] */
5654static void handle_crc32(DisasContext *s,
5655                         unsigned int sf, unsigned int sz, bool crc32c,
5656                         unsigned int rm, unsigned int rn, unsigned int rd)
5657{
5658    TCGv_i64 tcg_acc, tcg_val;
5659    TCGv_i32 tcg_bytes;
5660
5661    if (!dc_isar_feature(aa64_crc32, s)
5662        || (sf == 1 && sz != 3)
5663        || (sf == 0 && sz == 3)) {
5664        unallocated_encoding(s);
5665        return;
5666    }
5667
5668    if (sz == 3) {
5669        tcg_val = cpu_reg(s, rm);
5670    } else {
5671        uint64_t mask;
5672        switch (sz) {
5673        case 0:
5674            mask = 0xFF;
5675            break;
5676        case 1:
5677            mask = 0xFFFF;
5678            break;
5679        case 2:
5680            mask = 0xFFFFFFFF;
5681            break;
5682        default:
5683            g_assert_not_reached();
5684        }
5685        tcg_val = new_tmp_a64(s);
5686        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5687    }
5688
5689    tcg_acc = cpu_reg(s, rn);
5690    tcg_bytes = tcg_const_i32(1 << sz);
5691
5692    if (crc32c) {
5693        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5694    } else {
5695        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5696    }
5697
5698    tcg_temp_free_i32(tcg_bytes);
5699}
5700
5701/* Data-processing (2 source)
5702 *   31   30  29 28             21 20  16 15    10 9    5 4    0
5703 * +----+---+---+-----------------+------+--------+------+------+
5704 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5705 * +----+---+---+-----------------+------+--------+------+------+
5706 */
5707static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5708{
5709    unsigned int sf, rm, opcode, rn, rd, setflag;
5710    sf = extract32(insn, 31, 1);
5711    setflag = extract32(insn, 29, 1);
5712    rm = extract32(insn, 16, 5);
5713    opcode = extract32(insn, 10, 6);
5714    rn = extract32(insn, 5, 5);
5715    rd = extract32(insn, 0, 5);
5716
5717    if (setflag && opcode != 0) {
5718        unallocated_encoding(s);
5719        return;
5720    }
5721
5722    switch (opcode) {
5723    case 0: /* SUBP(S) */
5724        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5725            goto do_unallocated;
5726        } else {
5727            TCGv_i64 tcg_n, tcg_m, tcg_d;
5728
5729            tcg_n = read_cpu_reg_sp(s, rn, true);
5730            tcg_m = read_cpu_reg_sp(s, rm, true);
5731            tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5732            tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5733            tcg_d = cpu_reg(s, rd);
5734
5735            if (setflag) {
5736                gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5737            } else {
5738                tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5739            }
5740        }
5741        break;
5742    case 2: /* UDIV */
5743        handle_div(s, false, sf, rm, rn, rd);
5744        break;
5745    case 3: /* SDIV */
5746        handle_div(s, true, sf, rm, rn, rd);
5747        break;
5748    case 4: /* IRG */
5749        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5750            goto do_unallocated;
5751        }
5752        if (s->ata) {
5753            gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5754                           cpu_reg_sp(s, rn), cpu_reg(s, rm));
5755        } else {
5756            gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5757                                             cpu_reg_sp(s, rn));
5758        }
5759        break;
5760    case 5: /* GMI */
5761        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5762            goto do_unallocated;
5763        } else {
5764            TCGv_i64 t1 = tcg_const_i64(1);
5765            TCGv_i64 t2 = tcg_temp_new_i64();
5766
5767            tcg_gen_extract_i64(t2, cpu_reg_sp(s, rn), 56, 4);
5768            tcg_gen_shl_i64(t1, t1, t2);
5769            tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t1);
5770
5771            tcg_temp_free_i64(t1);
5772            tcg_temp_free_i64(t2);
5773        }
5774        break;
5775    case 8: /* LSLV */
5776        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5777        break;
5778    case 9: /* LSRV */
5779        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5780        break;
5781    case 10: /* ASRV */
5782        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5783        break;
5784    case 11: /* RORV */
5785        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5786        break;
5787    case 12: /* PACGA */
5788        if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5789            goto do_unallocated;
5790        }
5791        gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5792                         cpu_reg(s, rn), cpu_reg_sp(s, rm));
5793        break;
5794    case 16:
5795    case 17:
5796    case 18:
5797    case 19:
5798    case 20:
5799    case 21:
5800    case 22:
5801    case 23: /* CRC32 */
5802    {
5803        int sz = extract32(opcode, 0, 2);
5804        bool crc32c = extract32(opcode, 2, 1);
5805        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5806        break;
5807    }
5808    default:
5809    do_unallocated:
5810        unallocated_encoding(s);
5811        break;
5812    }
5813}
5814
5815/*
5816 * Data processing - register
5817 *  31  30 29  28      25    21  20  16      10         0
5818 * +--+---+--+---+-------+-----+-------+-------+---------+
5819 * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5820 * +--+---+--+---+-------+-----+-------+-------+---------+
5821 */
5822static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5823{
5824    int op0 = extract32(insn, 30, 1);
5825    int op1 = extract32(insn, 28, 1);
5826    int op2 = extract32(insn, 21, 4);
5827    int op3 = extract32(insn, 10, 6);
5828
5829    if (!op1) {
5830        if (op2 & 8) {
5831            if (op2 & 1) {
5832                /* Add/sub (extended register) */
5833                disas_add_sub_ext_reg(s, insn);
5834            } else {
5835                /* Add/sub (shifted register) */
5836                disas_add_sub_reg(s, insn);
5837            }
5838        } else {
5839            /* Logical (shifted register) */
5840            disas_logic_reg(s, insn);
5841        }
5842        return;
5843    }
5844
5845    switch (op2) {
5846    case 0x0:
5847        switch (op3) {
5848        case 0x00: /* Add/subtract (with carry) */
5849            disas_adc_sbc(s, insn);
5850            break;
5851
5852        case 0x01: /* Rotate right into flags */
5853        case 0x21:
5854            disas_rotate_right_into_flags(s, insn);
5855            break;
5856
5857        case 0x02: /* Evaluate into flags */
5858        case 0x12:
5859        case 0x22:
5860        case 0x32:
5861            disas_evaluate_into_flags(s, insn);
5862            break;
5863
5864        default:
5865            goto do_unallocated;
5866        }
5867        break;
5868
5869    case 0x2: /* Conditional compare */
5870        disas_cc(s, insn); /* both imm and reg forms */
5871        break;
5872
5873    case 0x4: /* Conditional select */
5874        disas_cond_select(s, insn);
5875        break;
5876
5877    case 0x6: /* Data-processing */
5878        if (op0) {    /* (1 source) */
5879            disas_data_proc_1src(s, insn);
5880        } else {      /* (2 source) */
5881            disas_data_proc_2src(s, insn);
5882        }
5883        break;
5884    case 0x8 ... 0xf: /* (3 source) */
5885        disas_data_proc_3src(s, insn);
5886        break;
5887
5888    default:
5889    do_unallocated:
5890        unallocated_encoding(s);
5891        break;
5892    }
5893}
5894
5895static void handle_fp_compare(DisasContext *s, int size,
5896                              unsigned int rn, unsigned int rm,
5897                              bool cmp_with_zero, bool signal_all_nans)
5898{
5899    TCGv_i64 tcg_flags = tcg_temp_new_i64();
5900    TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5901
5902    if (size == MO_64) {
5903        TCGv_i64 tcg_vn, tcg_vm;
5904
5905        tcg_vn = read_fp_dreg(s, rn);
5906        if (cmp_with_zero) {
5907            tcg_vm = tcg_const_i64(0);
5908        } else {
5909            tcg_vm = read_fp_dreg(s, rm);
5910        }
5911        if (signal_all_nans) {
5912            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5913        } else {
5914            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5915        }
5916        tcg_temp_free_i64(tcg_vn);
5917        tcg_temp_free_i64(tcg_vm);
5918    } else {
5919        TCGv_i32 tcg_vn = tcg_temp_new_i32();
5920        TCGv_i32 tcg_vm = tcg_temp_new_i32();
5921
5922        read_vec_element_i32(s, tcg_vn, rn, 0, size);
5923        if (cmp_with_zero) {
5924            tcg_gen_movi_i32(tcg_vm, 0);
5925        } else {
5926            read_vec_element_i32(s, tcg_vm, rm, 0, size);
5927        }
5928
5929        switch (size) {
5930        case MO_32:
5931            if (signal_all_nans) {
5932                gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5933            } else {
5934                gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5935            }
5936            break;
5937        case MO_16:
5938            if (signal_all_nans) {
5939                gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5940            } else {
5941                gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5942            }
5943            break;
5944        default:
5945            g_assert_not_reached();
5946        }
5947
5948        tcg_temp_free_i32(tcg_vn);
5949        tcg_temp_free_i32(tcg_vm);
5950    }
5951
5952    tcg_temp_free_ptr(fpst);
5953
5954    gen_set_nzcv(tcg_flags);
5955
5956    tcg_temp_free_i64(tcg_flags);
5957}
5958
5959/* Floating point compare
5960 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5961 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5962 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5963 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5964 */
5965static void disas_fp_compare(DisasContext *s, uint32_t insn)
5966{
5967    unsigned int mos, type, rm, op, rn, opc, op2r;
5968    int size;
5969
5970    mos = extract32(insn, 29, 3);
5971    type = extract32(insn, 22, 2);
5972    rm = extract32(insn, 16, 5);
5973    op = extract32(insn, 14, 2);
5974    rn = extract32(insn, 5, 5);
5975    opc = extract32(insn, 3, 2);
5976    op2r = extract32(insn, 0, 3);
5977
5978    if (mos || op || op2r) {
5979        unallocated_encoding(s);
5980        return;
5981    }
5982
5983    switch (type) {
5984    case 0:
5985        size = MO_32;
5986        break;
5987    case 1:
5988        size = MO_64;
5989        break;
5990    case 3:
5991        size = MO_16;
5992        if (dc_isar_feature(aa64_fp16, s)) {
5993            break;
5994        }
5995        /* fallthru */
5996    default:
5997        unallocated_encoding(s);
5998        return;
5999    }
6000
6001    if (!fp_access_check(s)) {
6002        return;
6003    }
6004
6005    handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
6006}
6007
6008/* Floating point conditional compare
6009 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
6010 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
6011 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
6012 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
6013 */
6014static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
6015{
6016    unsigned int mos, type, rm, cond, rn, op, nzcv;
6017    TCGv_i64 tcg_flags;
6018    TCGLabel *label_continue = NULL;
6019    int size;
6020
6021    mos = extract32(insn, 29, 3);
6022    type = extract32(insn, 22, 2);
6023    rm = extract32(insn, 16, 5);
6024    cond = extract32(insn, 12, 4);
6025    rn = extract32(insn, 5, 5);
6026    op = extract32(insn, 4, 1);
6027    nzcv = extract32(insn, 0, 4);
6028
6029    if (mos) {
6030        unallocated_encoding(s);
6031        return;
6032    }
6033
6034    switch (type) {
6035    case 0:
6036        size = MO_32;
6037        break;
6038    case 1:
6039        size = MO_64;
6040        break;
6041    case 3:
6042        size = MO_16;
6043        if (dc_isar_feature(aa64_fp16, s)) {
6044            break;
6045        }
6046        /* fallthru */
6047    default:
6048        unallocated_encoding(s);
6049        return;
6050    }
6051
6052    if (!fp_access_check(s)) {
6053        return;
6054    }
6055
6056    if (cond < 0x0e) { /* not always */
6057        TCGLabel *label_match = gen_new_label();
6058        label_continue = gen_new_label();
6059        arm_gen_test_cc(cond, label_match);
6060        /* nomatch: */
6061        tcg_flags = tcg_const_i64(nzcv << 28);
6062        gen_set_nzcv(tcg_flags);
6063        tcg_temp_free_i64(tcg_flags);
6064        tcg_gen_br(label_continue);
6065        gen_set_label(label_match);
6066    }
6067
6068    handle_fp_compare(s, size, rn, rm, false, op);
6069
6070    if (cond < 0x0e) {
6071        gen_set_label(label_continue);
6072    }
6073}
6074
6075/* Floating point conditional select
6076 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
6077 * +---+---+---+-----------+------+---+------+------+-----+------+------+
6078 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
6079 * +---+---+---+-----------+------+---+------+------+-----+------+------+
6080 */
6081static void disas_fp_csel(DisasContext *s, uint32_t insn)
6082{
6083    unsigned int mos, type, rm, cond, rn, rd;
6084    TCGv_i64 t_true, t_false, t_zero;
6085    DisasCompare64 c;
6086    MemOp sz;
6087
6088    mos = extract32(insn, 29, 3);
6089    type = extract32(insn, 22, 2);
6090    rm = extract32(insn, 16, 5);
6091    cond = extract32(insn, 12, 4);
6092    rn = extract32(insn, 5, 5);
6093    rd = extract32(insn, 0, 5);
6094
6095    if (mos) {
6096        unallocated_encoding(s);
6097        return;
6098    }
6099
6100    switch (type) {
6101    case 0:
6102        sz = MO_32;
6103        break;
6104    case 1:
6105        sz = MO_64;
6106        break;
6107    case 3:
6108        sz = MO_16;
6109        if (dc_isar_feature(aa64_fp16, s)) {
6110            break;
6111        }
6112        /* fallthru */
6113    default:
6114        unallocated_encoding(s);
6115        return;
6116    }
6117
6118    if (!fp_access_check(s)) {
6119        return;
6120    }
6121
6122    /* Zero extend sreg & hreg inputs to 64 bits now.  */
6123    t_true = tcg_temp_new_i64();
6124    t_false = tcg_temp_new_i64();
6125    read_vec_element(s, t_true, rn, 0, sz);
6126    read_vec_element(s, t_false, rm, 0, sz);
6127
6128    a64_test_cc(&c, cond);
6129    t_zero = tcg_const_i64(0);
6130    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
6131    tcg_temp_free_i64(t_zero);
6132    tcg_temp_free_i64(t_false);
6133    a64_free_cc(&c);
6134
6135    /* Note that sregs & hregs write back zeros to the high bits,
6136       and we've already done the zero-extension.  */
6137    write_fp_dreg(s, rd, t_true);
6138    tcg_temp_free_i64(t_true);
6139}
6140
6141/* Floating-point data-processing (1 source) - half precision */
6142static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
6143{
6144    TCGv_ptr fpst = NULL;
6145    TCGv_i32 tcg_op = read_fp_hreg(s, rn);
6146    TCGv_i32 tcg_res = tcg_temp_new_i32();
6147
6148    switch (opcode) {
6149    case 0x0: /* FMOV */
6150        tcg_gen_mov_i32(tcg_res, tcg_op);
6151        break;
6152    case 0x1: /* FABS */
6153        tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6154        break;
6155    case 0x2: /* FNEG */
6156        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6157        break;
6158    case 0x3: /* FSQRT */
6159        fpst = fpstatus_ptr(FPST_FPCR_F16);
6160        gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6161        break;
6162    case 0x8: /* FRINTN */
6163    case 0x9: /* FRINTP */
6164    case 0xa: /* FRINTM */
6165    case 0xb: /* FRINTZ */
6166    case 0xc: /* FRINTA */
6167    {
6168        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
6169        fpst = fpstatus_ptr(FPST_FPCR_F16);
6170
6171        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6172        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6173
6174        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6175        tcg_temp_free_i32(tcg_rmode);
6176        break;
6177    }
6178    case 0xe: /* FRINTX */
6179        fpst = fpstatus_ptr(FPST_FPCR_F16);
6180        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6181        break;
6182    case 0xf: /* FRINTI */
6183        fpst = fpstatus_ptr(FPST_FPCR_F16);
6184        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6185        break;
6186    default:
6187        abort();
6188    }
6189
6190    write_fp_sreg(s, rd, tcg_res);
6191
6192    if (fpst) {
6193        tcg_temp_free_ptr(fpst);
6194    }
6195    tcg_temp_free_i32(tcg_op);
6196    tcg_temp_free_i32(tcg_res);
6197}
6198
6199/* Floating-point data-processing (1 source) - single precision */
6200static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6201{
6202    void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6203    TCGv_i32 tcg_op, tcg_res;
6204    TCGv_ptr fpst;
6205    int rmode = -1;
6206
6207    tcg_op = read_fp_sreg(s, rn);
6208    tcg_res = tcg_temp_new_i32();
6209
6210    switch (opcode) {
6211    case 0x0: /* FMOV */
6212        tcg_gen_mov_i32(tcg_res, tcg_op);
6213        goto done;
6214    case 0x1: /* FABS */
6215        gen_helper_vfp_abss(tcg_res, tcg_op);
6216        goto done;
6217    case 0x2: /* FNEG */
6218        gen_helper_vfp_negs(tcg_res, tcg_op);
6219        goto done;
6220    case 0x3: /* FSQRT */
6221        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6222        goto done;
6223    case 0x8: /* FRINTN */
6224    case 0x9: /* FRINTP */
6225    case 0xa: /* FRINTM */
6226    case 0xb: /* FRINTZ */
6227    case 0xc: /* FRINTA */
6228        rmode = arm_rmode_to_sf(opcode & 7);
6229        gen_fpst = gen_helper_rints;
6230        break;
6231    case 0xe: /* FRINTX */
6232        gen_fpst = gen_helper_rints_exact;
6233        break;
6234    case 0xf: /* FRINTI */
6235        gen_fpst = gen_helper_rints;
6236        break;
6237    case 0x10: /* FRINT32Z */
6238        rmode = float_round_to_zero;
6239        gen_fpst = gen_helper_frint32_s;
6240        break;
6241    case 0x11: /* FRINT32X */
6242        gen_fpst = gen_helper_frint32_s;
6243        break;
6244    case 0x12: /* FRINT64Z */
6245        rmode = float_round_to_zero;
6246        gen_fpst = gen_helper_frint64_s;
6247        break;
6248    case 0x13: /* FRINT64X */
6249        gen_fpst = gen_helper_frint64_s;
6250        break;
6251    default:
6252        g_assert_not_reached();
6253    }
6254
6255    fpst = fpstatus_ptr(FPST_FPCR);
6256    if (rmode >= 0) {
6257        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
6258        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6259        gen_fpst(tcg_res, tcg_op, fpst);
6260        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6261        tcg_temp_free_i32(tcg_rmode);
6262    } else {
6263        gen_fpst(tcg_res, tcg_op, fpst);
6264    }
6265    tcg_temp_free_ptr(fpst);
6266
6267 done:
6268    write_fp_sreg(s, rd, tcg_res);
6269    tcg_temp_free_i32(tcg_op);
6270    tcg_temp_free_i32(tcg_res);
6271}
6272
6273/* Floating-point data-processing (1 source) - double precision */
6274static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6275{
6276    void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6277    TCGv_i64 tcg_op, tcg_res;
6278    TCGv_ptr fpst;
6279    int rmode = -1;
6280
6281    switch (opcode) {
6282    case 0x0: /* FMOV */
6283        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6284        return;
6285    }
6286
6287    tcg_op = read_fp_dreg(s, rn);
6288    tcg_res = tcg_temp_new_i64();
6289
6290    switch (opcode) {
6291    case 0x1: /* FABS */
6292        gen_helper_vfp_absd(tcg_res, tcg_op);
6293        goto done;
6294    case 0x2: /* FNEG */
6295        gen_helper_vfp_negd(tcg_res, tcg_op);
6296        goto done;
6297    case 0x3: /* FSQRT */
6298        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6299        goto done;
6300    case 0x8: /* FRINTN */
6301    case 0x9: /* FRINTP */
6302    case 0xa: /* FRINTM */
6303    case 0xb: /* FRINTZ */
6304    case 0xc: /* FRINTA */
6305        rmode = arm_rmode_to_sf(opcode & 7);
6306        gen_fpst = gen_helper_rintd;
6307        break;
6308    case 0xe: /* FRINTX */
6309        gen_fpst = gen_helper_rintd_exact;
6310        break;
6311    case 0xf: /* FRINTI */
6312        gen_fpst = gen_helper_rintd;
6313        break;
6314    case 0x10: /* FRINT32Z */
6315        rmode = float_round_to_zero;
6316        gen_fpst = gen_helper_frint32_d;
6317        break;
6318    case 0x11: /* FRINT32X */
6319        gen_fpst = gen_helper_frint32_d;
6320        break;
6321    case 0x12: /* FRINT64Z */
6322        rmode = float_round_to_zero;
6323        gen_fpst = gen_helper_frint64_d;
6324        break;
6325    case 0x13: /* FRINT64X */
6326        gen_fpst = gen_helper_frint64_d;
6327        break;
6328    default:
6329        g_assert_not_reached();
6330    }
6331
6332    fpst = fpstatus_ptr(FPST_FPCR);
6333    if (rmode >= 0) {
6334        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
6335        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6336        gen_fpst(tcg_res, tcg_op, fpst);
6337        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6338        tcg_temp_free_i32(tcg_rmode);
6339    } else {
6340        gen_fpst(tcg_res, tcg_op, fpst);
6341    }
6342    tcg_temp_free_ptr(fpst);
6343
6344 done:
6345    write_fp_dreg(s, rd, tcg_res);
6346    tcg_temp_free_i64(tcg_op);
6347    tcg_temp_free_i64(tcg_res);
6348}
6349
6350static void handle_fp_fcvt(DisasContext *s, int opcode,
6351                           int rd, int rn, int dtype, int ntype)
6352{
6353    switch (ntype) {
6354    case 0x0:
6355    {
6356        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6357        if (dtype == 1) {
6358            /* Single to double */
6359            TCGv_i64 tcg_rd = tcg_temp_new_i64();
6360            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6361            write_fp_dreg(s, rd, tcg_rd);
6362            tcg_temp_free_i64(tcg_rd);
6363        } else {
6364            /* Single to half */
6365            TCGv_i32 tcg_rd = tcg_temp_new_i32();
6366            TCGv_i32 ahp = get_ahp_flag();
6367            TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6368
6369            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6370            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6371            write_fp_sreg(s, rd, tcg_rd);
6372            tcg_temp_free_i32(tcg_rd);
6373            tcg_temp_free_i32(ahp);
6374            tcg_temp_free_ptr(fpst);
6375        }
6376        tcg_temp_free_i32(tcg_rn);
6377        break;
6378    }
6379    case 0x1:
6380    {
6381        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6382        TCGv_i32 tcg_rd = tcg_temp_new_i32();
6383        if (dtype == 0) {
6384            /* Double to single */
6385            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6386        } else {
6387            TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6388            TCGv_i32 ahp = get_ahp_flag();
6389            /* Double to half */
6390            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6391            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6392            tcg_temp_free_ptr(fpst);
6393            tcg_temp_free_i32(ahp);
6394        }
6395        write_fp_sreg(s, rd, tcg_rd);
6396        tcg_temp_free_i32(tcg_rd);
6397        tcg_temp_free_i64(tcg_rn);
6398        break;
6399    }
6400    case 0x3:
6401    {
6402        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6403        TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6404        TCGv_i32 tcg_ahp = get_ahp_flag();
6405        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6406        if (dtype == 0) {
6407            /* Half to single */
6408            TCGv_i32 tcg_rd = tcg_temp_new_i32();
6409            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6410            write_fp_sreg(s, rd, tcg_rd);
6411            tcg_temp_free_i32(tcg_rd);
6412        } else {
6413            /* Half to double */
6414            TCGv_i64 tcg_rd = tcg_temp_new_i64();
6415            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6416            write_fp_dreg(s, rd, tcg_rd);
6417            tcg_temp_free_i64(tcg_rd);
6418        }
6419        tcg_temp_free_i32(tcg_rn);
6420        tcg_temp_free_ptr(tcg_fpst);
6421        tcg_temp_free_i32(tcg_ahp);
6422        break;
6423    }
6424    default:
6425        abort();
6426    }
6427}
6428
6429/* Floating point data-processing (1 source)
6430 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6431 * +---+---+---+-----------+------+---+--------+-----------+------+------+
6432 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6433 * +---+---+---+-----------+------+---+--------+-----------+------+------+
6434 */
6435static void disas_fp_1src(DisasContext *s, uint32_t insn)
6436{
6437    int mos = extract32(insn, 29, 3);
6438    int type = extract32(insn, 22, 2);
6439    int opcode = extract32(insn, 15, 6);
6440    int rn = extract32(insn, 5, 5);
6441    int rd = extract32(insn, 0, 5);
6442
6443    if (mos) {
6444        unallocated_encoding(s);
6445        return;
6446    }
6447
6448    switch (opcode) {
6449    case 0x4: case 0x5: case 0x7:
6450    {
6451        /* FCVT between half, single and double precision */
6452        int dtype = extract32(opcode, 0, 2);
6453        if (type == 2 || dtype == type) {
6454            unallocated_encoding(s);
6455            return;
6456        }
6457        if (!fp_access_check(s)) {
6458            return;
6459        }
6460
6461        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6462        break;
6463    }
6464
6465    case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6466        if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6467            unallocated_encoding(s);
6468            return;
6469        }
6470        /* fall through */
6471    case 0x0 ... 0x3:
6472    case 0x8 ... 0xc:
6473    case 0xe ... 0xf:
6474        /* 32-to-32 and 64-to-64 ops */
6475        switch (type) {
6476        case 0:
6477            if (!fp_access_check(s)) {
6478                return;
6479            }
6480            handle_fp_1src_single(s, opcode, rd, rn);
6481            break;
6482        case 1:
6483            if (!fp_access_check(s)) {
6484                return;
6485            }
6486            handle_fp_1src_double(s, opcode, rd, rn);
6487            break;
6488        case 3:
6489            if (!dc_isar_feature(aa64_fp16, s)) {
6490                unallocated_encoding(s);
6491                return;
6492            }
6493
6494            if (!fp_access_check(s)) {
6495                return;
6496            }
6497            handle_fp_1src_half(s, opcode, rd, rn);
6498            break;
6499        default:
6500            unallocated_encoding(s);
6501        }
6502        break;
6503
6504    default:
6505        unallocated_encoding(s);
6506        break;
6507    }
6508}
6509
6510/* Floating-point data-processing (2 source) - single precision */
6511static void handle_fp_2src_single(DisasContext *s, int opcode,
6512                                  int rd, int rn, int rm)
6513{
6514    TCGv_i32 tcg_op1;
6515    TCGv_i32 tcg_op2;
6516    TCGv_i32 tcg_res;
6517    TCGv_ptr fpst;
6518
6519    tcg_res = tcg_temp_new_i32();
6520    fpst = fpstatus_ptr(FPST_FPCR);
6521    tcg_op1 = read_fp_sreg(s, rn);
6522    tcg_op2 = read_fp_sreg(s, rm);
6523
6524    switch (opcode) {
6525    case 0x0: /* FMUL */
6526        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6527        break;
6528    case 0x1: /* FDIV */
6529        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6530        break;
6531    case 0x2: /* FADD */
6532        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6533        break;
6534    case 0x3: /* FSUB */
6535        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6536        break;
6537    case 0x4: /* FMAX */
6538        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6539        break;
6540    case 0x5: /* FMIN */
6541        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6542        break;
6543    case 0x6: /* FMAXNM */
6544        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6545        break;
6546    case 0x7: /* FMINNM */
6547        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6548        break;
6549    case 0x8: /* FNMUL */
6550        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6551        gen_helper_vfp_negs(tcg_res, tcg_res);
6552        break;
6553    }
6554
6555    write_fp_sreg(s, rd, tcg_res);
6556
6557    tcg_temp_free_ptr(fpst);
6558    tcg_temp_free_i32(tcg_op1);
6559    tcg_temp_free_i32(tcg_op2);
6560    tcg_temp_free_i32(tcg_res);
6561}
6562
6563/* Floating-point data-processing (2 source) - double precision */
6564static void handle_fp_2src_double(DisasContext *s, int opcode,
6565                                  int rd, int rn, int rm)
6566{
6567    TCGv_i64 tcg_op1;
6568    TCGv_i64 tcg_op2;
6569    TCGv_i64 tcg_res;
6570    TCGv_ptr fpst;
6571
6572    tcg_res = tcg_temp_new_i64();
6573    fpst = fpstatus_ptr(FPST_FPCR);
6574    tcg_op1 = read_fp_dreg(s, rn);
6575    tcg_op2 = read_fp_dreg(s, rm);
6576
6577    switch (opcode) {
6578    case 0x0: /* FMUL */
6579        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6580        break;
6581    case 0x1: /* FDIV */
6582        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6583        break;
6584    case 0x2: /* FADD */
6585        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6586        break;
6587    case 0x3: /* FSUB */
6588        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6589        break;
6590    case 0x4: /* FMAX */
6591        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6592        break;
6593    case 0x5: /* FMIN */
6594        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6595        break;
6596    case 0x6: /* FMAXNM */
6597        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6598        break;
6599    case 0x7: /* FMINNM */
6600        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6601        break;
6602    case 0x8: /* FNMUL */
6603        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6604        gen_helper_vfp_negd(tcg_res, tcg_res);
6605        break;
6606    }
6607
6608    write_fp_dreg(s, rd, tcg_res);
6609
6610    tcg_temp_free_ptr(fpst);
6611    tcg_temp_free_i64(tcg_op1);
6612    tcg_temp_free_i64(tcg_op2);
6613    tcg_temp_free_i64(tcg_res);
6614}
6615
6616/* Floating-point data-processing (2 source) - half precision */
6617static void handle_fp_2src_half(DisasContext *s, int opcode,
6618                                int rd, int rn, int rm)
6619{
6620    TCGv_i32 tcg_op1;
6621    TCGv_i32 tcg_op2;
6622    TCGv_i32 tcg_res;
6623    TCGv_ptr fpst;
6624
6625    tcg_res = tcg_temp_new_i32();
6626    fpst = fpstatus_ptr(FPST_FPCR_F16);
6627    tcg_op1 = read_fp_hreg(s, rn);
6628    tcg_op2 = read_fp_hreg(s, rm);
6629
6630    switch (opcode) {
6631    case 0x0: /* FMUL */
6632        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6633        break;
6634    case 0x1: /* FDIV */
6635        gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6636        break;
6637    case 0x2: /* FADD */
6638        gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6639        break;
6640    case 0x3: /* FSUB */
6641        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6642        break;
6643    case 0x4: /* FMAX */
6644        gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6645        break;
6646    case 0x5: /* FMIN */
6647        gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6648        break;
6649    case 0x6: /* FMAXNM */
6650        gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6651        break;
6652    case 0x7: /* FMINNM */
6653        gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6654        break;
6655    case 0x8: /* FNMUL */
6656        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6657        tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6658        break;
6659    default:
6660        g_assert_not_reached();
6661    }
6662
6663    write_fp_sreg(s, rd, tcg_res);
6664
6665    tcg_temp_free_ptr(fpst);
6666    tcg_temp_free_i32(tcg_op1);
6667    tcg_temp_free_i32(tcg_op2);
6668    tcg_temp_free_i32(tcg_res);
6669}
6670
6671/* Floating point data-processing (2 source)
6672 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6673 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6674 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6675 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6676 */
6677static void disas_fp_2src(DisasContext *s, uint32_t insn)
6678{
6679    int mos = extract32(insn, 29, 3);
6680    int type = extract32(insn, 22, 2);
6681    int rd = extract32(insn, 0, 5);
6682    int rn = extract32(insn, 5, 5);
6683    int rm = extract32(insn, 16, 5);
6684    int opcode = extract32(insn, 12, 4);
6685
6686    if (opcode > 8 || mos) {
6687        unallocated_encoding(s);
6688        return;
6689    }
6690
6691    switch (type) {
6692    case 0:
6693        if (!fp_access_check(s)) {
6694            return;
6695        }
6696        handle_fp_2src_single(s, opcode, rd, rn, rm);
6697        break;
6698    case 1:
6699        if (!fp_access_check(s)) {
6700            return;
6701        }
6702        handle_fp_2src_double(s, opcode, rd, rn, rm);
6703        break;
6704    case 3:
6705        if (!dc_isar_feature(aa64_fp16, s)) {
6706            unallocated_encoding(s);
6707            return;
6708        }
6709        if (!fp_access_check(s)) {
6710            return;
6711        }
6712        handle_fp_2src_half(s, opcode, rd, rn, rm);
6713        break;
6714    default:
6715        unallocated_encoding(s);
6716    }
6717}
6718
6719/* Floating-point data-processing (3 source) - single precision */
6720static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6721                                  int rd, int rn, int rm, int ra)
6722{
6723    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6724    TCGv_i32 tcg_res = tcg_temp_new_i32();
6725    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6726
6727    tcg_op1 = read_fp_sreg(s, rn);
6728    tcg_op2 = read_fp_sreg(s, rm);
6729    tcg_op3 = read_fp_sreg(s, ra);
6730
6731    /* These are fused multiply-add, and must be done as one
6732     * floating point operation with no rounding between the
6733     * multiplication and addition steps.
6734     * NB that doing the negations here as separate steps is
6735     * correct : an input NaN should come out with its sign bit
6736     * flipped if it is a negated-input.
6737     */
6738    if (o1 == true) {
6739        gen_helper_vfp_negs(tcg_op3, tcg_op3);
6740    }
6741
6742    if (o0 != o1) {
6743        gen_helper_vfp_negs(tcg_op1, tcg_op1);
6744    }
6745
6746    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6747
6748    write_fp_sreg(s, rd, tcg_res);
6749
6750    tcg_temp_free_ptr(fpst);
6751    tcg_temp_free_i32(tcg_op1);
6752    tcg_temp_free_i32(tcg_op2);
6753    tcg_temp_free_i32(tcg_op3);
6754    tcg_temp_free_i32(tcg_res);
6755}
6756
6757/* Floating-point data-processing (3 source) - double precision */
6758static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6759                                  int rd, int rn, int rm, int ra)
6760{
6761    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6762    TCGv_i64 tcg_res = tcg_temp_new_i64();
6763    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6764
6765    tcg_op1 = read_fp_dreg(s, rn);
6766    tcg_op2 = read_fp_dreg(s, rm);
6767    tcg_op3 = read_fp_dreg(s, ra);
6768
6769    /* These are fused multiply-add, and must be done as one
6770     * floating point operation with no rounding between the
6771     * multiplication and addition steps.
6772     * NB that doing the negations here as separate steps is
6773     * correct : an input NaN should come out with its sign bit
6774     * flipped if it is a negated-input.
6775     */
6776    if (o1 == true) {
6777        gen_helper_vfp_negd(tcg_op3, tcg_op3);
6778    }
6779
6780    if (o0 != o1) {
6781        gen_helper_vfp_negd(tcg_op1, tcg_op1);
6782    }
6783
6784    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6785
6786    write_fp_dreg(s, rd, tcg_res);
6787
6788    tcg_temp_free_ptr(fpst);
6789    tcg_temp_free_i64(tcg_op1);
6790    tcg_temp_free_i64(tcg_op2);
6791    tcg_temp_free_i64(tcg_op3);
6792    tcg_temp_free_i64(tcg_res);
6793}
6794
6795/* Floating-point data-processing (3 source) - half precision */
6796static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6797                                int rd, int rn, int rm, int ra)
6798{
6799    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6800    TCGv_i32 tcg_res = tcg_temp_new_i32();
6801    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6802
6803    tcg_op1 = read_fp_hreg(s, rn);
6804    tcg_op2 = read_fp_hreg(s, rm);
6805    tcg_op3 = read_fp_hreg(s, ra);
6806
6807    /* These are fused multiply-add, and must be done as one
6808     * floating point operation with no rounding between the
6809     * multiplication and addition steps.
6810     * NB that doing the negations here as separate steps is
6811     * correct : an input NaN should come out with its sign bit
6812     * flipped if it is a negated-input.
6813     */
6814    if (o1 == true) {
6815        tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6816    }
6817
6818    if (o0 != o1) {
6819        tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6820    }
6821
6822    gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6823
6824    write_fp_sreg(s, rd, tcg_res);
6825
6826    tcg_temp_free_ptr(fpst);
6827    tcg_temp_free_i32(tcg_op1);
6828    tcg_temp_free_i32(tcg_op2);
6829    tcg_temp_free_i32(tcg_op3);
6830    tcg_temp_free_i32(tcg_res);
6831}
6832
6833/* Floating point data-processing (3 source)
6834 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6835 * +---+---+---+-----------+------+----+------+----+------+------+------+
6836 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6837 * +---+---+---+-----------+------+----+------+----+------+------+------+
6838 */
6839static void disas_fp_3src(DisasContext *s, uint32_t insn)
6840{
6841    int mos = extract32(insn, 29, 3);
6842    int type = extract32(insn, 22, 2);
6843    int rd = extract32(insn, 0, 5);
6844    int rn = extract32(insn, 5, 5);
6845    int ra = extract32(insn, 10, 5);
6846    int rm = extract32(insn, 16, 5);
6847    bool o0 = extract32(insn, 15, 1);
6848    bool o1 = extract32(insn, 21, 1);
6849
6850    if (mos) {
6851        unallocated_encoding(s);
6852        return;
6853    }
6854
6855    switch (type) {
6856    case 0:
6857        if (!fp_access_check(s)) {
6858            return;
6859        }
6860        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6861        break;
6862    case 1:
6863        if (!fp_access_check(s)) {
6864            return;
6865        }
6866        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6867        break;
6868    case 3:
6869        if (!dc_isar_feature(aa64_fp16, s)) {
6870            unallocated_encoding(s);
6871            return;
6872        }
6873        if (!fp_access_check(s)) {
6874            return;
6875        }
6876        handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6877        break;
6878    default:
6879        unallocated_encoding(s);
6880    }
6881}
6882
6883/* Floating point immediate
6884 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6885 * +---+---+---+-----------+------+---+------------+-------+------+------+
6886 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6887 * +---+---+---+-----------+------+---+------------+-------+------+------+
6888 */
6889static void disas_fp_imm(DisasContext *s, uint32_t insn)
6890{
6891    int rd = extract32(insn, 0, 5);
6892    int imm5 = extract32(insn, 5, 5);
6893    int imm8 = extract32(insn, 13, 8);
6894    int type = extract32(insn, 22, 2);
6895    int mos = extract32(insn, 29, 3);
6896    uint64_t imm;
6897    TCGv_i64 tcg_res;
6898    MemOp sz;
6899
6900    if (mos || imm5) {
6901        unallocated_encoding(s);
6902        return;
6903    }
6904
6905    switch (type) {
6906    case 0:
6907        sz = MO_32;
6908        break;
6909    case 1:
6910        sz = MO_64;
6911        break;
6912    case 3:
6913        sz = MO_16;
6914        if (dc_isar_feature(aa64_fp16, s)) {
6915            break;
6916        }
6917        /* fallthru */
6918    default:
6919        unallocated_encoding(s);
6920        return;
6921    }
6922
6923    if (!fp_access_check(s)) {
6924        return;
6925    }
6926
6927    imm = vfp_expand_imm(sz, imm8);
6928
6929    tcg_res = tcg_const_i64(imm);
6930    write_fp_dreg(s, rd, tcg_res);
6931    tcg_temp_free_i64(tcg_res);
6932}
6933
6934/* Handle floating point <=> fixed point conversions. Note that we can
6935 * also deal with fp <=> integer conversions as a special case (scale == 64)
6936 * OPTME: consider handling that special case specially or at least skipping
6937 * the call to scalbn in the helpers for zero shifts.
6938 */
6939static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6940                           bool itof, int rmode, int scale, int sf, int type)
6941{
6942    bool is_signed = !(opcode & 1);
6943    TCGv_ptr tcg_fpstatus;
6944    TCGv_i32 tcg_shift, tcg_single;
6945    TCGv_i64 tcg_double;
6946
6947    tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6948
6949    tcg_shift = tcg_const_i32(64 - scale);
6950
6951    if (itof) {
6952        TCGv_i64 tcg_int = cpu_reg(s, rn);
6953        if (!sf) {
6954            TCGv_i64 tcg_extend = new_tmp_a64(s);
6955
6956            if (is_signed) {
6957                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6958            } else {
6959                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6960            }
6961
6962            tcg_int = tcg_extend;
6963        }
6964
6965        switch (type) {
6966        case 1: /* float64 */
6967            tcg_double = tcg_temp_new_i64();
6968            if (is_signed) {
6969                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6970                                     tcg_shift, tcg_fpstatus);
6971            } else {
6972                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6973                                     tcg_shift, tcg_fpstatus);
6974            }
6975            write_fp_dreg(s, rd, tcg_double);
6976            tcg_temp_free_i64(tcg_double);
6977            break;
6978
6979        case 0: /* float32 */
6980            tcg_single = tcg_temp_new_i32();
6981            if (is_signed) {
6982                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6983                                     tcg_shift, tcg_fpstatus);
6984            } else {
6985                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6986                                     tcg_shift, tcg_fpstatus);
6987            }
6988            write_fp_sreg(s, rd, tcg_single);
6989            tcg_temp_free_i32(tcg_single);
6990            break;
6991
6992        case 3: /* float16 */
6993            tcg_single = tcg_temp_new_i32();
6994            if (is_signed) {
6995                gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6996                                     tcg_shift, tcg_fpstatus);
6997            } else {
6998                gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6999                                     tcg_shift, tcg_fpstatus);
7000            }
7001            write_fp_sreg(s, rd, tcg_single);
7002            tcg_temp_free_i32(tcg_single);
7003            break;
7004
7005        default:
7006            g_assert_not_reached();
7007        }
7008    } else {
7009        TCGv_i64 tcg_int = cpu_reg(s, rd);
7010        TCGv_i32 tcg_rmode;
7011
7012        if (extract32(opcode, 2, 1)) {
7013            /* There are too many rounding modes to all fit into rmode,
7014             * so FCVTA[US] is a special case.
7015             */
7016            rmode = FPROUNDING_TIEAWAY;
7017        }
7018
7019        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
7020
7021        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7022
7023        switch (type) {
7024        case 1: /* float64 */
7025            tcg_double = read_fp_dreg(s, rn);
7026            if (is_signed) {
7027                if (!sf) {
7028                    gen_helper_vfp_tosld(tcg_int, tcg_double,
7029                                         tcg_shift, tcg_fpstatus);
7030                } else {
7031                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
7032                                         tcg_shift, tcg_fpstatus);
7033                }
7034            } else {
7035                if (!sf) {
7036                    gen_helper_vfp_tould(tcg_int, tcg_double,
7037                                         tcg_shift, tcg_fpstatus);
7038                } else {
7039                    gen_helper_vfp_touqd(tcg_int, tcg_double,
7040                                         tcg_shift, tcg_fpstatus);
7041                }
7042            }
7043            if (!sf) {
7044                tcg_gen_ext32u_i64(tcg_int, tcg_int);
7045            }
7046            tcg_temp_free_i64(tcg_double);
7047            break;
7048
7049        case 0: /* float32 */
7050            tcg_single = read_fp_sreg(s, rn);
7051            if (sf) {
7052                if (is_signed) {
7053                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
7054                                         tcg_shift, tcg_fpstatus);
7055                } else {
7056                    gen_helper_vfp_touqs(tcg_int, tcg_single,
7057                                         tcg_shift, tcg_fpstatus);
7058                }
7059            } else {
7060                TCGv_i32 tcg_dest = tcg_temp_new_i32();
7061                if (is_signed) {
7062                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
7063                                         tcg_shift, tcg_fpstatus);
7064                } else {
7065                    gen_helper_vfp_touls(tcg_dest, tcg_single,
7066                                         tcg_shift, tcg_fpstatus);
7067                }
7068                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
7069                tcg_temp_free_i32(tcg_dest);
7070            }
7071            tcg_temp_free_i32(tcg_single);
7072            break;
7073
7074        case 3: /* float16 */
7075            tcg_single = read_fp_sreg(s, rn);
7076            if (sf) {
7077                if (is_signed) {
7078                    gen_helper_vfp_tosqh(tcg_int, tcg_single,
7079                                         tcg_shift, tcg_fpstatus);
7080                } else {
7081                    gen_helper_vfp_touqh(tcg_int, tcg_single,
7082                                         tcg_shift, tcg_fpstatus);
7083                }
7084            } else {
7085                TCGv_i32 tcg_dest = tcg_temp_new_i32();
7086                if (is_signed) {
7087                    gen_helper_vfp_toslh(tcg_dest, tcg_single,
7088                                         tcg_shift, tcg_fpstatus);
7089                } else {
7090                    gen_helper_vfp_toulh(tcg_dest, tcg_single,
7091                                         tcg_shift, tcg_fpstatus);
7092                }
7093                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
7094                tcg_temp_free_i32(tcg_dest);
7095            }
7096            tcg_temp_free_i32(tcg_single);
7097            break;
7098
7099        default:
7100            g_assert_not_reached();
7101        }
7102
7103        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7104        tcg_temp_free_i32(tcg_rmode);
7105    }
7106
7107    tcg_temp_free_ptr(tcg_fpstatus);
7108    tcg_temp_free_i32(tcg_shift);
7109}
7110
7111/* Floating point <-> fixed point conversions
7112 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
7113 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7114 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
7115 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7116 */
7117static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
7118{
7119    int rd = extract32(insn, 0, 5);
7120    int rn = extract32(insn, 5, 5);
7121    int scale = extract32(insn, 10, 6);
7122    int opcode = extract32(insn, 16, 3);
7123    int rmode = extract32(insn, 19, 2);
7124    int type = extract32(insn, 22, 2);
7125    bool sbit = extract32(insn, 29, 1);
7126    bool sf = extract32(insn, 31, 1);
7127    bool itof;
7128
7129    if (sbit || (!sf && scale < 32)) {
7130        unallocated_encoding(s);
7131        return;
7132    }
7133
7134    switch (type) {
7135    case 0: /* float32 */
7136    case 1: /* float64 */
7137        break;
7138    case 3: /* float16 */
7139        if (dc_isar_feature(aa64_fp16, s)) {
7140            break;
7141        }
7142        /* fallthru */
7143    default:
7144        unallocated_encoding(s);
7145        return;
7146    }
7147
7148    switch ((rmode << 3) | opcode) {
7149    case 0x2: /* SCVTF */
7150    case 0x3: /* UCVTF */
7151        itof = true;
7152        break;
7153    case 0x18: /* FCVTZS */
7154    case 0x19: /* FCVTZU */
7155        itof = false;
7156        break;
7157    default:
7158        unallocated_encoding(s);
7159        return;
7160    }
7161
7162    if (!fp_access_check(s)) {
7163        return;
7164    }
7165
7166    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
7167}
7168
7169static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
7170{
7171    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
7172     * without conversion.
7173     */
7174
7175    if (itof) {
7176        TCGv_i64 tcg_rn = cpu_reg(s, rn);
7177        TCGv_i64 tmp;
7178
7179        switch (type) {
7180        case 0:
7181            /* 32 bit */
7182            tmp = tcg_temp_new_i64();
7183            tcg_gen_ext32u_i64(tmp, tcg_rn);
7184            write_fp_dreg(s, rd, tmp);
7185            tcg_temp_free_i64(tmp);
7186            break;
7187        case 1:
7188            /* 64 bit */
7189            write_fp_dreg(s, rd, tcg_rn);
7190            break;
7191        case 2:
7192            /* 64 bit to top half. */
7193            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
7194            clear_vec_high(s, true, rd);
7195            break;
7196        case 3:
7197            /* 16 bit */
7198            tmp = tcg_temp_new_i64();
7199            tcg_gen_ext16u_i64(tmp, tcg_rn);
7200            write_fp_dreg(s, rd, tmp);
7201            tcg_temp_free_i64(tmp);
7202            break;
7203        default:
7204            g_assert_not_reached();
7205        }
7206    } else {
7207        TCGv_i64 tcg_rd = cpu_reg(s, rd);
7208
7209        switch (type) {
7210        case 0:
7211            /* 32 bit */
7212            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
7213            break;
7214        case 1:
7215            /* 64 bit */
7216            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
7217            break;
7218        case 2:
7219            /* 64 bits from top half */
7220            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
7221            break;
7222        case 3:
7223            /* 16 bit */
7224            tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
7225            break;
7226        default:
7227            g_assert_not_reached();
7228        }
7229    }
7230}
7231
7232static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7233{
7234    TCGv_i64 t = read_fp_dreg(s, rn);
7235    TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7236
7237    gen_helper_fjcvtzs(t, t, fpstatus);
7238
7239    tcg_temp_free_ptr(fpstatus);
7240
7241    tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7242    tcg_gen_extrh_i64_i32(cpu_ZF, t);
7243    tcg_gen_movi_i32(cpu_CF, 0);
7244    tcg_gen_movi_i32(cpu_NF, 0);
7245    tcg_gen_movi_i32(cpu_VF, 0);
7246
7247    tcg_temp_free_i64(t);
7248}
7249
7250/* Floating point <-> integer conversions
7251 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7252 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7253 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7254 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7255 */
7256static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7257{
7258    int rd = extract32(insn, 0, 5);
7259    int rn = extract32(insn, 5, 5);
7260    int opcode = extract32(insn, 16, 3);
7261    int rmode = extract32(insn, 19, 2);
7262    int type = extract32(insn, 22, 2);
7263    bool sbit = extract32(insn, 29, 1);
7264    bool sf = extract32(insn, 31, 1);
7265    bool itof = false;
7266
7267    if (sbit) {
7268        goto do_unallocated;
7269    }
7270
7271    switch (opcode) {
7272    case 2: /* SCVTF */
7273    case 3: /* UCVTF */
7274        itof = true;
7275        /* fallthru */
7276    case 4: /* FCVTAS */
7277    case 5: /* FCVTAU */
7278        if (rmode != 0) {
7279            goto do_unallocated;
7280        }
7281        /* fallthru */
7282    case 0: /* FCVT[NPMZ]S */
7283    case 1: /* FCVT[NPMZ]U */
7284        switch (type) {
7285        case 0: /* float32 */
7286        case 1: /* float64 */
7287            break;
7288        case 3: /* float16 */
7289            if (!dc_isar_feature(aa64_fp16, s)) {
7290                goto do_unallocated;
7291            }
7292            break;
7293        default:
7294            goto do_unallocated;
7295        }
7296        if (!fp_access_check(s)) {
7297            return;
7298        }
7299        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7300        break;
7301
7302    default:
7303        switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7304        case 0b01100110: /* FMOV half <-> 32-bit int */
7305        case 0b01100111:
7306        case 0b11100110: /* FMOV half <-> 64-bit int */
7307        case 0b11100111:
7308            if (!dc_isar_feature(aa64_fp16, s)) {
7309                goto do_unallocated;
7310            }
7311            /* fallthru */
7312        case 0b00000110: /* FMOV 32-bit */
7313        case 0b00000111:
7314        case 0b10100110: /* FMOV 64-bit */
7315        case 0b10100111:
7316        case 0b11001110: /* FMOV top half of 128-bit */
7317        case 0b11001111:
7318            if (!fp_access_check(s)) {
7319                return;
7320            }
7321            itof = opcode & 1;
7322            handle_fmov(s, rd, rn, type, itof);
7323            break;
7324
7325        case 0b00111110: /* FJCVTZS */
7326            if (!dc_isar_feature(aa64_jscvt, s)) {
7327                goto do_unallocated;
7328            } else if (fp_access_check(s)) {
7329                handle_fjcvtzs(s, rd, rn);
7330            }
7331            break;
7332
7333        default:
7334        do_unallocated:
7335            unallocated_encoding(s);
7336            return;
7337        }
7338        break;
7339    }
7340}
7341
7342/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7343 *   31  30  29 28     25 24                          0
7344 * +---+---+---+---------+-----------------------------+
7345 * |   | 0 |   | 1 1 1 1 |                             |
7346 * +---+---+---+---------+-----------------------------+
7347 */
7348static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7349{
7350    if (extract32(insn, 24, 1)) {
7351        /* Floating point data-processing (3 source) */
7352        disas_fp_3src(s, insn);
7353    } else if (extract32(insn, 21, 1) == 0) {
7354        /* Floating point to fixed point conversions */
7355        disas_fp_fixed_conv(s, insn);
7356    } else {
7357        switch (extract32(insn, 10, 2)) {
7358        case 1:
7359            /* Floating point conditional compare */
7360            disas_fp_ccomp(s, insn);
7361            break;
7362        case 2:
7363            /* Floating point data-processing (2 source) */
7364            disas_fp_2src(s, insn);
7365            break;
7366        case 3:
7367            /* Floating point conditional select */
7368            disas_fp_csel(s, insn);
7369            break;
7370        case 0:
7371            switch (ctz32(extract32(insn, 12, 4))) {
7372            case 0: /* [15:12] == xxx1 */
7373                /* Floating point immediate */
7374                disas_fp_imm(s, insn);
7375                break;
7376            case 1: /* [15:12] == xx10 */
7377                /* Floating point compare */
7378                disas_fp_compare(s, insn);
7379                break;
7380            case 2: /* [15:12] == x100 */
7381                /* Floating point data-processing (1 source) */
7382                disas_fp_1src(s, insn);
7383                break;
7384            case 3: /* [15:12] == 1000 */
7385                unallocated_encoding(s);
7386                break;
7387            default: /* [15:12] == 0000 */
7388                /* Floating point <-> integer conversions */
7389                disas_fp_int_conv(s, insn);
7390                break;
7391            }
7392            break;
7393        }
7394    }
7395}
7396
7397static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7398                     int pos)
7399{
7400    /* Extract 64 bits from the middle of two concatenated 64 bit
7401     * vector register slices left:right. The extracted bits start
7402     * at 'pos' bits into the right (least significant) side.
7403     * We return the result in tcg_right, and guarantee not to
7404     * trash tcg_left.
7405     */
7406    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7407    assert(pos > 0 && pos < 64);
7408
7409    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7410    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7411    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7412
7413    tcg_temp_free_i64(tcg_tmp);
7414}
7415
7416/* EXT
7417 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7418 * +---+---+-------------+-----+---+------+---+------+---+------+------+
7419 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7420 * +---+---+-------------+-----+---+------+---+------+---+------+------+
7421 */
7422static void disas_simd_ext(DisasContext *s, uint32_t insn)
7423{
7424    int is_q = extract32(insn, 30, 1);
7425    int op2 = extract32(insn, 22, 2);
7426    int imm4 = extract32(insn, 11, 4);
7427    int rm = extract32(insn, 16, 5);
7428    int rn = extract32(insn, 5, 5);
7429    int rd = extract32(insn, 0, 5);
7430    int pos = imm4 << 3;
7431    TCGv_i64 tcg_resl, tcg_resh;
7432
7433    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7434        unallocated_encoding(s);
7435        return;
7436    }
7437
7438    if (!fp_access_check(s)) {
7439        return;
7440    }
7441
7442    tcg_resh = tcg_temp_new_i64();
7443    tcg_resl = tcg_temp_new_i64();
7444
7445    /* Vd gets bits starting at pos bits into Vm:Vn. This is
7446     * either extracting 128 bits from a 128:128 concatenation, or
7447     * extracting 64 bits from a 64:64 concatenation.
7448     */
7449    if (!is_q) {
7450        read_vec_element(s, tcg_resl, rn, 0, MO_64);
7451        if (pos != 0) {
7452            read_vec_element(s, tcg_resh, rm, 0, MO_64);
7453            do_ext64(s, tcg_resh, tcg_resl, pos);
7454        }
7455    } else {
7456        TCGv_i64 tcg_hh;
7457        typedef struct {
7458            int reg;
7459            int elt;
7460        } EltPosns;
7461        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7462        EltPosns *elt = eltposns;
7463
7464        if (pos >= 64) {
7465            elt++;
7466            pos -= 64;
7467        }
7468
7469        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7470        elt++;
7471        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7472        elt++;
7473        if (pos != 0) {
7474            do_ext64(s, tcg_resh, tcg_resl, pos);
7475            tcg_hh = tcg_temp_new_i64();
7476            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7477            do_ext64(s, tcg_hh, tcg_resh, pos);
7478            tcg_temp_free_i64(tcg_hh);
7479        }
7480    }
7481
7482    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7483    tcg_temp_free_i64(tcg_resl);
7484    if (is_q) {
7485        write_vec_element(s, tcg_resh, rd, 1, MO_64);
7486    }
7487    tcg_temp_free_i64(tcg_resh);
7488    clear_vec_high(s, is_q, rd);
7489}
7490
7491/* TBL/TBX
7492 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7493 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7494 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7495 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7496 */
7497static void disas_simd_tb(DisasContext *s, uint32_t insn)
7498{
7499    int op2 = extract32(insn, 22, 2);
7500    int is_q = extract32(insn, 30, 1);
7501    int rm = extract32(insn, 16, 5);
7502    int rn = extract32(insn, 5, 5);
7503    int rd = extract32(insn, 0, 5);
7504    int is_tblx = extract32(insn, 12, 1);
7505    int len = extract32(insn, 13, 2);
7506    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
7507    TCGv_i32 tcg_regno, tcg_numregs;
7508
7509    if (op2 != 0) {
7510        unallocated_encoding(s);
7511        return;
7512    }
7513
7514    if (!fp_access_check(s)) {
7515        return;
7516    }
7517
7518    /* This does a table lookup: for every byte element in the input
7519     * we index into a table formed from up to four vector registers,
7520     * and then the output is the result of the lookups. Our helper
7521     * function does the lookup operation for a single 64 bit part of
7522     * the input.
7523     */
7524    tcg_resl = tcg_temp_new_i64();
7525    tcg_resh = NULL;
7526
7527    if (is_tblx) {
7528        read_vec_element(s, tcg_resl, rd, 0, MO_64);
7529    } else {
7530        tcg_gen_movi_i64(tcg_resl, 0);
7531    }
7532
7533    if (is_q) {
7534        tcg_resh = tcg_temp_new_i64();
7535        if (is_tblx) {
7536            read_vec_element(s, tcg_resh, rd, 1, MO_64);
7537        } else {
7538            tcg_gen_movi_i64(tcg_resh, 0);
7539        }
7540    }
7541
7542    tcg_idx = tcg_temp_new_i64();
7543    tcg_regno = tcg_const_i32(rn);
7544    tcg_numregs = tcg_const_i32(len + 1);
7545    read_vec_element(s, tcg_idx, rm, 0, MO_64);
7546    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
7547                        tcg_regno, tcg_numregs);
7548    if (is_q) {
7549        read_vec_element(s, tcg_idx, rm, 1, MO_64);
7550        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
7551                            tcg_regno, tcg_numregs);
7552    }
7553    tcg_temp_free_i64(tcg_idx);
7554    tcg_temp_free_i32(tcg_regno);
7555    tcg_temp_free_i32(tcg_numregs);
7556
7557    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7558    tcg_temp_free_i64(tcg_resl);
7559
7560    if (is_q) {
7561        write_vec_element(s, tcg_resh, rd, 1, MO_64);
7562        tcg_temp_free_i64(tcg_resh);
7563    }
7564    clear_vec_high(s, is_q, rd);
7565}
7566
7567/* ZIP/UZP/TRN
7568 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7569 * +---+---+-------------+------+---+------+---+------------------+------+
7570 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7571 * +---+---+-------------+------+---+------+---+------------------+------+
7572 */
7573static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7574{
7575    int rd = extract32(insn, 0, 5);
7576    int rn = extract32(insn, 5, 5);
7577    int rm = extract32(insn, 16, 5);
7578    int size = extract32(insn, 22, 2);
7579    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7580     * bit 2 indicates 1 vs 2 variant of the insn.
7581     */
7582    int opcode = extract32(insn, 12, 2);
7583    bool part = extract32(insn, 14, 1);
7584    bool is_q = extract32(insn, 30, 1);
7585    int esize = 8 << size;
7586    int i, ofs;
7587    int datasize = is_q ? 128 : 64;
7588    int elements = datasize / esize;
7589    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
7590
7591    if (opcode == 0 || (size == 3 && !is_q)) {
7592        unallocated_encoding(s);
7593        return;
7594    }
7595
7596    if (!fp_access_check(s)) {
7597        return;
7598    }
7599
7600    tcg_resl = tcg_const_i64(0);
7601    tcg_resh = is_q ? tcg_const_i64(0) : NULL;
7602    tcg_res = tcg_temp_new_i64();
7603
7604    for (i = 0; i < elements; i++) {
7605        switch (opcode) {
7606        case 1: /* UZP1/2 */
7607        {
7608            int midpoint = elements / 2;
7609            if (i < midpoint) {
7610                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
7611            } else {
7612                read_vec_element(s, tcg_res, rm,
7613                                 2 * (i - midpoint) + part, size);
7614            }
7615            break;
7616        }
7617        case 2: /* TRN1/2 */
7618            if (i & 1) {
7619                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
7620            } else {
7621                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
7622            }
7623            break;
7624        case 3: /* ZIP1/2 */
7625        {
7626            int base = part * elements / 2;
7627            if (i & 1) {
7628                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
7629            } else {
7630                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
7631            }
7632            break;
7633        }
7634        default:
7635            g_assert_not_reached();
7636        }
7637
7638        ofs = i * esize;
7639        if (ofs < 64) {
7640            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
7641            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
7642        } else {
7643            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
7644            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
7645        }
7646    }
7647
7648    tcg_temp_free_i64(tcg_res);
7649
7650    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7651    tcg_temp_free_i64(tcg_resl);
7652
7653    if (is_q) {
7654        write_vec_element(s, tcg_resh, rd, 1, MO_64);
7655        tcg_temp_free_i64(tcg_resh);
7656    }
7657    clear_vec_high(s, is_q, rd);
7658}
7659
7660/*
7661 * do_reduction_op helper
7662 *
7663 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7664 * important for correct NaN propagation that we do these
7665 * operations in exactly the order specified by the pseudocode.
7666 *
7667 * This is a recursive function, TCG temps should be freed by the
7668 * calling function once it is done with the values.
7669 */
7670static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7671                                int esize, int size, int vmap, TCGv_ptr fpst)
7672{
7673    if (esize == size) {
7674        int element;
7675        MemOp msize = esize == 16 ? MO_16 : MO_32;
7676        TCGv_i32 tcg_elem;
7677
7678        /* We should have one register left here */
7679        assert(ctpop8(vmap) == 1);
7680        element = ctz32(vmap);
7681        assert(element < 8);
7682
7683        tcg_elem = tcg_temp_new_i32();
7684        read_vec_element_i32(s, tcg_elem, rn, element, msize);
7685        return tcg_elem;
7686    } else {
7687        int bits = size / 2;
7688        int shift = ctpop8(vmap) / 2;
7689        int vmap_lo = (vmap >> shift) & vmap;
7690        int vmap_hi = (vmap & ~vmap_lo);
7691        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7692
7693        tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7694        tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7695        tcg_res = tcg_temp_new_i32();
7696
7697        switch (fpopcode) {
7698        case 0x0c: /* fmaxnmv half-precision */
7699            gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7700            break;
7701        case 0x0f: /* fmaxv half-precision */
7702            gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7703            break;
7704        case 0x1c: /* fminnmv half-precision */
7705            gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7706            break;
7707        case 0x1f: /* fminv half-precision */
7708            gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7709            break;
7710        case 0x2c: /* fmaxnmv */
7711            gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7712            break;
7713        case 0x2f: /* fmaxv */
7714            gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7715            break;
7716        case 0x3c: /* fminnmv */
7717            gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7718            break;
7719        case 0x3f: /* fminv */
7720            gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7721            break;
7722        default:
7723            g_assert_not_reached();
7724        }
7725
7726        tcg_temp_free_i32(tcg_hi);
7727        tcg_temp_free_i32(tcg_lo);
7728        return tcg_res;
7729    }
7730}
7731
7732/* AdvSIMD across lanes
7733 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7734 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7735 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7736 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7737 */
7738static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7739{
7740    int rd = extract32(insn, 0, 5);
7741    int rn = extract32(insn, 5, 5);
7742    int size = extract32(insn, 22, 2);
7743    int opcode = extract32(insn, 12, 5);
7744    bool is_q = extract32(insn, 30, 1);
7745    bool is_u = extract32(insn, 29, 1);
7746    bool is_fp = false;
7747    bool is_min = false;
7748    int esize;
7749    int elements;
7750    int i;
7751    TCGv_i64 tcg_res, tcg_elt;
7752
7753    switch (opcode) {
7754    case 0x1b: /* ADDV */
7755        if (is_u) {
7756            unallocated_encoding(s);
7757            return;
7758        }
7759        /* fall through */
7760    case 0x3: /* SADDLV, UADDLV */
7761    case 0xa: /* SMAXV, UMAXV */
7762    case 0x1a: /* SMINV, UMINV */
7763        if (size == 3 || (size == 2 && !is_q)) {
7764            unallocated_encoding(s);
7765            return;
7766        }
7767        break;
7768    case 0xc: /* FMAXNMV, FMINNMV */
7769    case 0xf: /* FMAXV, FMINV */
7770        /* Bit 1 of size field encodes min vs max and the actual size
7771         * depends on the encoding of the U bit. If not set (and FP16
7772         * enabled) then we do half-precision float instead of single
7773         * precision.
7774         */
7775        is_min = extract32(size, 1, 1);
7776        is_fp = true;
7777        if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7778            size = 1;
7779        } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7780            unallocated_encoding(s);
7781            return;
7782        } else {
7783            size = 2;
7784        }
7785        break;
7786    default:
7787        unallocated_encoding(s);
7788        return;
7789    }
7790
7791    if (!fp_access_check(s)) {
7792        return;
7793    }
7794
7795    esize = 8 << size;
7796    elements = (is_q ? 128 : 64) / esize;
7797
7798    tcg_res = tcg_temp_new_i64();
7799    tcg_elt = tcg_temp_new_i64();
7800
7801    /* These instructions operate across all lanes of a vector
7802     * to produce a single result. We can guarantee that a 64
7803     * bit intermediate is sufficient:
7804     *  + for [US]ADDLV the maximum element size is 32 bits, and
7805     *    the result type is 64 bits
7806     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7807     *    same as the element size, which is 32 bits at most
7808     * For the integer operations we can choose to work at 64
7809     * or 32 bits and truncate at the end; for simplicity
7810     * we use 64 bits always. The floating point
7811     * ops do require 32 bit intermediates, though.
7812     */
7813    if (!is_fp) {
7814        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7815
7816        for (i = 1; i < elements; i++) {
7817            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7818
7819            switch (opcode) {
7820            case 0x03: /* SADDLV / UADDLV */
7821            case 0x1b: /* ADDV */
7822                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7823                break;
7824            case 0x0a: /* SMAXV / UMAXV */
7825                if (is_u) {
7826                    tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7827                } else {
7828                    tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7829                }
7830                break;
7831            case 0x1a: /* SMINV / UMINV */
7832                if (is_u) {
7833                    tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7834                } else {
7835                    tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7836                }
7837                break;
7838            default:
7839                g_assert_not_reached();
7840            }
7841
7842        }
7843    } else {
7844        /* Floating point vector reduction ops which work across 32
7845         * bit (single) or 16 bit (half-precision) intermediates.
7846         * Note that correct NaN propagation requires that we do these
7847         * operations in exactly the order specified by the pseudocode.
7848         */
7849        TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7850        int fpopcode = opcode | is_min << 4 | is_u << 5;
7851        int vmap = (1 << elements) - 1;
7852        TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7853                                             (is_q ? 128 : 64), vmap, fpst);
7854        tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7855        tcg_temp_free_i32(tcg_res32);
7856        tcg_temp_free_ptr(fpst);
7857    }
7858
7859    tcg_temp_free_i64(tcg_elt);
7860
7861    /* Now truncate the result to the width required for the final output */
7862    if (opcode == 0x03) {
7863        /* SADDLV, UADDLV: result is 2*esize */
7864        size++;
7865    }
7866
7867    switch (size) {
7868    case 0:
7869        tcg_gen_ext8u_i64(tcg_res, tcg_res);
7870        break;
7871    case 1:
7872        tcg_gen_ext16u_i64(tcg_res, tcg_res);
7873        break;
7874    case 2:
7875        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7876        break;
7877    case 3:
7878        break;
7879    default:
7880        g_assert_not_reached();
7881    }
7882
7883    write_fp_dreg(s, rd, tcg_res);
7884    tcg_temp_free_i64(tcg_res);
7885}
7886
7887/* DUP (Element, Vector)
7888 *
7889 *  31  30   29              21 20    16 15        10  9    5 4    0
7890 * +---+---+-------------------+--------+-------------+------+------+
7891 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7892 * +---+---+-------------------+--------+-------------+------+------+
7893 *
7894 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7895 */
7896static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7897                             int imm5)
7898{
7899    int size = ctz32(imm5);
7900    int index;
7901
7902    if (size > 3 || (size == 3 && !is_q)) {
7903        unallocated_encoding(s);
7904        return;
7905    }
7906
7907    if (!fp_access_check(s)) {
7908        return;
7909    }
7910
7911    index = imm5 >> (size + 1);
7912    tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7913                         vec_reg_offset(s, rn, index, size),
7914                         is_q ? 16 : 8, vec_full_reg_size(s));
7915}
7916
7917/* DUP (element, scalar)
7918 *  31                   21 20    16 15        10  9    5 4    0
7919 * +-----------------------+--------+-------------+------+------+
7920 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7921 * +-----------------------+--------+-------------+------+------+
7922 */
7923static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7924                              int imm5)
7925{
7926    int size = ctz32(imm5);
7927    int index;
7928    TCGv_i64 tmp;
7929
7930    if (size > 3) {
7931        unallocated_encoding(s);
7932        return;
7933    }
7934
7935    if (!fp_access_check(s)) {
7936        return;
7937    }
7938
7939    index = imm5 >> (size + 1);
7940
7941    /* This instruction just extracts the specified element and
7942     * zero-extends it into the bottom of the destination register.
7943     */
7944    tmp = tcg_temp_new_i64();
7945    read_vec_element(s, tmp, rn, index, size);
7946    write_fp_dreg(s, rd, tmp);
7947    tcg_temp_free_i64(tmp);
7948}
7949
7950/* DUP (General)
7951 *
7952 *  31  30   29              21 20    16 15        10  9    5 4    0
7953 * +---+---+-------------------+--------+-------------+------+------+
7954 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7955 * +---+---+-------------------+--------+-------------+------+------+
7956 *
7957 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7958 */
7959static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7960                             int imm5)
7961{
7962    int size = ctz32(imm5);
7963    uint32_t dofs, oprsz, maxsz;
7964
7965    if (size > 3 || ((size == 3) && !is_q)) {
7966        unallocated_encoding(s);
7967        return;
7968    }
7969
7970    if (!fp_access_check(s)) {
7971        return;
7972    }
7973
7974    dofs = vec_full_reg_offset(s, rd);
7975    oprsz = is_q ? 16 : 8;
7976    maxsz = vec_full_reg_size(s);
7977
7978    tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7979}
7980
7981/* INS (Element)
7982 *
7983 *  31                   21 20    16 15  14    11  10 9    5 4    0
7984 * +-----------------------+--------+------------+---+------+------+
7985 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7986 * +-----------------------+--------+------------+---+------+------+
7987 *
7988 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7989 * index: encoded in imm5<4:size+1>
7990 */
7991static void handle_simd_inse(DisasContext *s, int rd, int rn,
7992                             int imm4, int imm5)
7993{
7994    int size = ctz32(imm5);
7995    int src_index, dst_index;
7996    TCGv_i64 tmp;
7997
7998    if (size > 3) {
7999        unallocated_encoding(s);
8000        return;
8001    }
8002
8003    if (!fp_access_check(s)) {
8004        return;
8005    }
8006
8007    dst_index = extract32(imm5, 1+size, 5);
8008    src_index = extract32(imm4, size, 4);
8009
8010    tmp = tcg_temp_new_i64();
8011
8012    read_vec_element(s, tmp, rn, src_index, size);
8013    write_vec_element(s, tmp, rd, dst_index, size);
8014
8015    tcg_temp_free_i64(tmp);
8016
8017    /* INS is considered a 128-bit write for SVE. */
8018    clear_vec_high(s, true, rd);
8019}
8020
8021
8022/* INS (General)
8023 *
8024 *  31                   21 20    16 15        10  9    5 4    0
8025 * +-----------------------+--------+-------------+------+------+
8026 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
8027 * +-----------------------+--------+-------------+------+------+
8028 *
8029 * size: encoded in imm5 (see ARM ARM LowestSetBit())
8030 * index: encoded in imm5<4:size+1>
8031 */
8032static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
8033{
8034    int size = ctz32(imm5);
8035    int idx;
8036
8037    if (size > 3) {
8038        unallocated_encoding(s);
8039        return;
8040    }
8041
8042    if (!fp_access_check(s)) {
8043        return;
8044    }
8045
8046    idx = extract32(imm5, 1 + size, 4 - size);
8047    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
8048
8049    /* INS is considered a 128-bit write for SVE. */
8050    clear_vec_high(s, true, rd);
8051}
8052
8053/*
8054 * UMOV (General)
8055 * SMOV (General)
8056 *
8057 *  31  30   29              21 20    16 15    12   10 9    5 4    0
8058 * +---+---+-------------------+--------+-------------+------+------+
8059 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
8060 * +---+---+-------------------+--------+-------------+------+------+
8061 *
8062 * U: unsigned when set
8063 * size: encoded in imm5 (see ARM ARM LowestSetBit())
8064 */
8065static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
8066                                  int rn, int rd, int imm5)
8067{
8068    int size = ctz32(imm5);
8069    int element;
8070    TCGv_i64 tcg_rd;
8071
8072    /* Check for UnallocatedEncodings */
8073    if (is_signed) {
8074        if (size > 2 || (size == 2 && !is_q)) {
8075            unallocated_encoding(s);
8076            return;
8077        }
8078    } else {
8079        if (size > 3
8080            || (size < 3 && is_q)
8081            || (size == 3 && !is_q)) {
8082            unallocated_encoding(s);
8083            return;
8084        }
8085    }
8086
8087    if (!fp_access_check(s)) {
8088        return;
8089    }
8090
8091    element = extract32(imm5, 1+size, 4);
8092
8093    tcg_rd = cpu_reg(s, rd);
8094    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
8095    if (is_signed && !is_q) {
8096        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8097    }
8098}
8099
8100/* AdvSIMD copy
8101 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
8102 * +---+---+----+-----------------+------+---+------+---+------+------+
8103 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
8104 * +---+---+----+-----------------+------+---+------+---+------+------+
8105 */
8106static void disas_simd_copy(DisasContext *s, uint32_t insn)
8107{
8108    int rd = extract32(insn, 0, 5);
8109    int rn = extract32(insn, 5, 5);
8110    int imm4 = extract32(insn, 11, 4);
8111    int op = extract32(insn, 29, 1);
8112    int is_q = extract32(insn, 30, 1);
8113    int imm5 = extract32(insn, 16, 5);
8114
8115    if (op) {
8116        if (is_q) {
8117            /* INS (element) */
8118            handle_simd_inse(s, rd, rn, imm4, imm5);
8119        } else {
8120            unallocated_encoding(s);
8121        }
8122    } else {
8123        switch (imm4) {
8124        case 0:
8125            /* DUP (element - vector) */
8126            handle_simd_dupe(s, is_q, rd, rn, imm5);
8127            break;
8128        case 1:
8129            /* DUP (general) */
8130            handle_simd_dupg(s, is_q, rd, rn, imm5);
8131            break;
8132        case 3:
8133            if (is_q) {
8134                /* INS (general) */
8135                handle_simd_insg(s, rd, rn, imm5);
8136            } else {
8137                unallocated_encoding(s);
8138            }
8139            break;
8140        case 5:
8141        case 7:
8142            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
8143            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
8144            break;
8145        default:
8146            unallocated_encoding(s);
8147            break;
8148        }
8149    }
8150}
8151
8152/* AdvSIMD modified immediate
8153 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
8154 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
8155 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
8156 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
8157 *
8158 * There are a number of operations that can be carried out here:
8159 *   MOVI - move (shifted) imm into register
8160 *   MVNI - move inverted (shifted) imm into register
8161 *   ORR  - bitwise OR of (shifted) imm with register
8162 *   BIC  - bitwise clear of (shifted) imm with register
8163 * With ARMv8.2 we also have:
8164 *   FMOV half-precision
8165 */
8166static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
8167{
8168    int rd = extract32(insn, 0, 5);
8169    int cmode = extract32(insn, 12, 4);
8170    int cmode_3_1 = extract32(cmode, 1, 3);
8171    int cmode_0 = extract32(cmode, 0, 1);
8172    int o2 = extract32(insn, 11, 1);
8173    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
8174    bool is_neg = extract32(insn, 29, 1);
8175    bool is_q = extract32(insn, 30, 1);
8176    uint64_t imm = 0;
8177
8178    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
8179        /* Check for FMOV (vector, immediate) - half-precision */
8180        if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
8181            unallocated_encoding(s);
8182            return;
8183        }
8184    }
8185
8186    if (!fp_access_check(s)) {
8187        return;
8188    }
8189
8190    /* See AdvSIMDExpandImm() in ARM ARM */
8191    switch (cmode_3_1) {
8192    case 0: /* Replicate(Zeros(24):imm8, 2) */
8193    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
8194    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
8195    case 3: /* Replicate(imm8:Zeros(24), 2) */
8196    {
8197        int shift = cmode_3_1 * 8;
8198        imm = bitfield_replicate(abcdefgh << shift, 32);
8199        break;
8200    }
8201    case 4: /* Replicate(Zeros(8):imm8, 4) */
8202    case 5: /* Replicate(imm8:Zeros(8), 4) */
8203    {
8204        int shift = (cmode_3_1 & 0x1) * 8;
8205        imm = bitfield_replicate(abcdefgh << shift, 16);
8206        break;
8207    }
8208    case 6:
8209        if (cmode_0) {
8210            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
8211            imm = (abcdefgh << 16) | 0xffff;
8212        } else {
8213            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
8214            imm = (abcdefgh << 8) | 0xff;
8215        }
8216        imm = bitfield_replicate(imm, 32);
8217        break;
8218    case 7:
8219        if (!cmode_0 && !is_neg) {
8220            imm = bitfield_replicate(abcdefgh, 8);
8221        } else if (!cmode_0 && is_neg) {
8222            int i;
8223            imm = 0;
8224            for (i = 0; i < 8; i++) {
8225                if ((abcdefgh) & (1 << i)) {
8226                    imm |= 0xffULL << (i * 8);
8227                }
8228            }
8229        } else if (cmode_0) {
8230            if (is_neg) {
8231                imm = (abcdefgh & 0x3f) << 48;
8232                if (abcdefgh & 0x80) {
8233                    imm |= 0x8000000000000000ULL;
8234                }
8235                if (abcdefgh & 0x40) {
8236                    imm |= 0x3fc0000000000000ULL;
8237                } else {
8238                    imm |= 0x4000000000000000ULL;
8239                }
8240            } else {
8241                if (o2) {
8242                    /* FMOV (vector, immediate) - half-precision */
8243                    imm = vfp_expand_imm(MO_16, abcdefgh);
8244                    /* now duplicate across the lanes */
8245                    imm = bitfield_replicate(imm, 16);
8246                } else {
8247                    imm = (abcdefgh & 0x3f) << 19;
8248                    if (abcdefgh & 0x80) {
8249                        imm |= 0x80000000;
8250                    }
8251                    if (abcdefgh & 0x40) {
8252                        imm |= 0x3e000000;
8253                    } else {
8254                        imm |= 0x40000000;
8255                    }
8256                    imm |= (imm << 32);
8257                }
8258            }
8259        }
8260        break;
8261    default:
8262        fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
8263        g_assert_not_reached();
8264    }
8265
8266    if (cmode_3_1 != 7 && is_neg) {
8267        imm = ~imm;
8268    }
8269
8270    if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
8271        /* MOVI or MVNI, with MVNI negation handled above.  */
8272        tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
8273                             vec_full_reg_size(s), imm);
8274    } else {
8275        /* ORR or BIC, with BIC negation to AND handled above.  */
8276        if (is_neg) {
8277            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
8278        } else {
8279            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
8280        }
8281    }
8282}
8283
8284/* AdvSIMD scalar copy
8285 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
8286 * +-----+----+-----------------+------+---+------+---+------+------+
8287 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
8288 * +-----+----+-----------------+------+---+------+---+------+------+
8289 */
8290static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
8291{
8292    int rd = extract32(insn, 0, 5);
8293    int rn = extract32(insn, 5, 5);
8294    int imm4 = extract32(insn, 11, 4);
8295    int imm5 = extract32(insn, 16, 5);
8296    int op = extract32(insn, 29, 1);
8297
8298    if (op != 0 || imm4 != 0) {
8299        unallocated_encoding(s);
8300        return;
8301    }
8302
8303    /* DUP (element, scalar) */
8304    handle_simd_dupes(s, rd, rn, imm5);
8305}
8306
8307/* AdvSIMD scalar pairwise
8308 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8309 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8310 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8311 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8312 */
8313static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
8314{
8315    int u = extract32(insn, 29, 1);
8316    int size = extract32(insn, 22, 2);
8317    int opcode = extract32(insn, 12, 5);
8318    int rn = extract32(insn, 5, 5);
8319    int rd = extract32(insn, 0, 5);
8320    TCGv_ptr fpst;
8321
8322    /* For some ops (the FP ones), size[1] is part of the encoding.
8323     * For ADDP strictly it is not but size[1] is always 1 for valid
8324     * encodings.
8325     */
8326    opcode |= (extract32(size, 1, 1) << 5);
8327
8328    switch (opcode) {
8329    case 0x3b: /* ADDP */
8330        if (u || size != 3) {
8331            unallocated_encoding(s);
8332            return;
8333        }
8334        if (!fp_access_check(s)) {
8335            return;
8336        }
8337
8338        fpst = NULL;
8339        break;
8340    case 0xc: /* FMAXNMP */
8341    case 0xd: /* FADDP */
8342    case 0xf: /* FMAXP */
8343    case 0x2c: /* FMINNMP */
8344    case 0x2f: /* FMINP */
8345        /* FP op, size[0] is 32 or 64 bit*/
8346        if (!u) {
8347            if (!dc_isar_feature(aa64_fp16, s)) {
8348                unallocated_encoding(s);
8349                return;
8350            } else {
8351                size = MO_16;
8352            }
8353        } else {
8354            size = extract32(size, 0, 1) ? MO_64 : MO_32;
8355        }
8356
8357        if (!fp_access_check(s)) {
8358            return;
8359        }
8360
8361        fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8362        break;
8363    default:
8364        unallocated_encoding(s);
8365        return;
8366    }
8367
8368    if (size == MO_64) {
8369        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8370        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8371        TCGv_i64 tcg_res = tcg_temp_new_i64();
8372
8373        read_vec_element(s, tcg_op1, rn, 0, MO_64);
8374        read_vec_element(s, tcg_op2, rn, 1, MO_64);
8375
8376        switch (opcode) {
8377        case 0x3b: /* ADDP */
8378            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8379            break;
8380        case 0xc: /* FMAXNMP */
8381            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8382            break;
8383        case 0xd: /* FADDP */
8384            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8385            break;
8386        case 0xf: /* FMAXP */
8387            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8388            break;
8389        case 0x2c: /* FMINNMP */
8390            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8391            break;
8392        case 0x2f: /* FMINP */
8393            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8394            break;
8395        default:
8396            g_assert_not_reached();
8397        }
8398
8399        write_fp_dreg(s, rd, tcg_res);
8400
8401        tcg_temp_free_i64(tcg_op1);
8402        tcg_temp_free_i64(tcg_op2);
8403        tcg_temp_free_i64(tcg_res);
8404    } else {
8405        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8406        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8407        TCGv_i32 tcg_res = tcg_temp_new_i32();
8408
8409        read_vec_element_i32(s, tcg_op1, rn, 0, size);
8410        read_vec_element_i32(s, tcg_op2, rn, 1, size);
8411
8412        if (size == MO_16) {
8413            switch (opcode) {
8414            case 0xc: /* FMAXNMP */
8415                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8416                break;
8417            case 0xd: /* FADDP */
8418                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8419                break;
8420            case 0xf: /* FMAXP */
8421                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8422                break;
8423            case 0x2c: /* FMINNMP */
8424                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8425                break;
8426            case 0x2f: /* FMINP */
8427                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8428                break;
8429            default:
8430                g_assert_not_reached();
8431            }
8432        } else {
8433            switch (opcode) {
8434            case 0xc: /* FMAXNMP */
8435                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8436                break;
8437            case 0xd: /* FADDP */
8438                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8439                break;
8440            case 0xf: /* FMAXP */
8441                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8442                break;
8443            case 0x2c: /* FMINNMP */
8444                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8445                break;
8446            case 0x2f: /* FMINP */
8447                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8448                break;
8449            default:
8450                g_assert_not_reached();
8451            }
8452        }
8453
8454        write_fp_sreg(s, rd, tcg_res);
8455
8456        tcg_temp_free_i32(tcg_op1);
8457        tcg_temp_free_i32(tcg_op2);
8458        tcg_temp_free_i32(tcg_res);
8459    }
8460
8461    if (fpst) {
8462        tcg_temp_free_ptr(fpst);
8463    }
8464}
8465
8466/*
8467 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8468 *
8469 * This code is handles the common shifting code and is used by both
8470 * the vector and scalar code.
8471 */
8472static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8473                                    TCGv_i64 tcg_rnd, bool accumulate,
8474                                    bool is_u, int size, int shift)
8475{
8476    bool extended_result = false;
8477    bool round = tcg_rnd != NULL;
8478    int ext_lshift = 0;
8479    TCGv_i64 tcg_src_hi;
8480
8481    if (round && size == 3) {
8482        extended_result = true;
8483        ext_lshift = 64 - shift;
8484        tcg_src_hi = tcg_temp_new_i64();
8485    } else if (shift == 64) {
8486        if (!accumulate && is_u) {
8487            /* result is zero */
8488            tcg_gen_movi_i64(tcg_res, 0);
8489            return;
8490        }
8491    }
8492
8493    /* Deal with the rounding step */
8494    if (round) {
8495        if (extended_result) {
8496            TCGv_i64 tcg_zero = tcg_const_i64(0);
8497            if (!is_u) {
8498                /* take care of sign extending tcg_res */
8499                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8500                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8501                                 tcg_src, tcg_src_hi,
8502                                 tcg_rnd, tcg_zero);
8503            } else {
8504                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8505                                 tcg_src, tcg_zero,
8506                                 tcg_rnd, tcg_zero);
8507            }
8508            tcg_temp_free_i64(tcg_zero);
8509        } else {
8510            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8511        }
8512    }
8513
8514    /* Now do the shift right */
8515    if (round && extended_result) {
8516        /* extended case, >64 bit precision required */
8517        if (ext_lshift == 0) {
8518            /* special case, only high bits matter */
8519            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8520        } else {
8521            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8522            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8523            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8524        }
8525    } else {
8526        if (is_u) {
8527            if (shift == 64) {
8528                /* essentially shifting in 64 zeros */
8529                tcg_gen_movi_i64(tcg_src, 0);
8530            } else {
8531                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8532            }
8533        } else {
8534            if (shift == 64) {
8535                /* effectively extending the sign-bit */
8536                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8537            } else {
8538                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8539            }
8540        }
8541    }
8542
8543    if (accumulate) {
8544        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8545    } else {
8546        tcg_gen_mov_i64(tcg_res, tcg_src);
8547    }
8548
8549    if (extended_result) {
8550        tcg_temp_free_i64(tcg_src_hi);
8551    }
8552}
8553
8554/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8555static void handle_scalar_simd_shri(DisasContext *s,
8556                                    bool is_u, int immh, int immb,
8557                                    int opcode, int rn, int rd)
8558{
8559    const int size = 3;
8560    int immhb = immh << 3 | immb;
8561    int shift = 2 * (8 << size) - immhb;
8562    bool accumulate = false;
8563    bool round = false;
8564    bool insert = false;
8565    TCGv_i64 tcg_rn;
8566    TCGv_i64 tcg_rd;
8567    TCGv_i64 tcg_round;
8568
8569    if (!extract32(immh, 3, 1)) {
8570        unallocated_encoding(s);
8571        return;
8572    }
8573
8574    if (!fp_access_check(s)) {
8575        return;
8576    }
8577
8578    switch (opcode) {
8579    case 0x02: /* SSRA / USRA (accumulate) */
8580        accumulate = true;
8581        break;
8582    case 0x04: /* SRSHR / URSHR (rounding) */
8583        round = true;
8584        break;
8585    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8586        accumulate = round = true;
8587        break;
8588    case 0x08: /* SRI */
8589        insert = true;
8590        break;
8591    }
8592
8593    if (round) {
8594        uint64_t round_const = 1ULL << (shift - 1);
8595        tcg_round = tcg_const_i64(round_const);
8596    } else {
8597        tcg_round = NULL;
8598    }
8599
8600    tcg_rn = read_fp_dreg(s, rn);
8601    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8602
8603    if (insert) {
8604        /* shift count same as element size is valid but does nothing;
8605         * special case to avoid potential shift by 64.
8606         */
8607        int esize = 8 << size;
8608        if (shift != esize) {
8609            tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8610            tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8611        }
8612    } else {
8613        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8614                                accumulate, is_u, size, shift);
8615    }
8616
8617    write_fp_dreg(s, rd, tcg_rd);
8618
8619    tcg_temp_free_i64(tcg_rn);
8620    tcg_temp_free_i64(tcg_rd);
8621    if (round) {
8622        tcg_temp_free_i64(tcg_round);
8623    }
8624}
8625
8626/* SHL/SLI - Scalar shift left */
8627static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8628                                    int immh, int immb, int opcode,
8629                                    int rn, int rd)
8630{
8631    int size = 32 - clz32(immh) - 1;
8632    int immhb = immh << 3 | immb;
8633    int shift = immhb - (8 << size);
8634    TCGv_i64 tcg_rn;
8635    TCGv_i64 tcg_rd;
8636
8637    if (!extract32(immh, 3, 1)) {
8638        unallocated_encoding(s);
8639        return;
8640    }
8641
8642    if (!fp_access_check(s)) {
8643        return;
8644    }
8645
8646    tcg_rn = read_fp_dreg(s, rn);
8647    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8648
8649    if (insert) {
8650        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8651    } else {
8652        tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8653    }
8654
8655    write_fp_dreg(s, rd, tcg_rd);
8656
8657    tcg_temp_free_i64(tcg_rn);
8658    tcg_temp_free_i64(tcg_rd);
8659}
8660
8661/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8662 * (signed/unsigned) narrowing */
8663static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8664                                   bool is_u_shift, bool is_u_narrow,
8665                                   int immh, int immb, int opcode,
8666                                   int rn, int rd)
8667{
8668    int immhb = immh << 3 | immb;
8669    int size = 32 - clz32(immh) - 1;
8670    int esize = 8 << size;
8671    int shift = (2 * esize) - immhb;
8672    int elements = is_scalar ? 1 : (64 / esize);
8673    bool round = extract32(opcode, 0, 1);
8674    MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8675    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8676    TCGv_i32 tcg_rd_narrowed;
8677    TCGv_i64 tcg_final;
8678
8679    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8680        { gen_helper_neon_narrow_sat_s8,
8681          gen_helper_neon_unarrow_sat8 },
8682        { gen_helper_neon_narrow_sat_s16,
8683          gen_helper_neon_unarrow_sat16 },
8684        { gen_helper_neon_narrow_sat_s32,
8685          gen_helper_neon_unarrow_sat32 },
8686        { NULL, NULL },
8687    };
8688    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8689        gen_helper_neon_narrow_sat_u8,
8690        gen_helper_neon_narrow_sat_u16,
8691        gen_helper_neon_narrow_sat_u32,
8692        NULL
8693    };
8694    NeonGenNarrowEnvFn *narrowfn;
8695
8696    int i;
8697
8698    assert(size < 4);
8699
8700    if (extract32(immh, 3, 1)) {
8701        unallocated_encoding(s);
8702        return;
8703    }
8704
8705    if (!fp_access_check(s)) {
8706        return;
8707    }
8708
8709    if (is_u_shift) {
8710        narrowfn = unsigned_narrow_fns[size];
8711    } else {
8712        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8713    }
8714
8715    tcg_rn = tcg_temp_new_i64();
8716    tcg_rd = tcg_temp_new_i64();
8717    tcg_rd_narrowed = tcg_temp_new_i32();
8718    tcg_final = tcg_const_i64(0);
8719
8720    if (round) {
8721        uint64_t round_const = 1ULL << (shift - 1);
8722        tcg_round = tcg_const_i64(round_const);
8723    } else {
8724        tcg_round = NULL;
8725    }
8726
8727    for (i = 0; i < elements; i++) {
8728        read_vec_element(s, tcg_rn, rn, i, ldop);
8729        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8730                                false, is_u_shift, size+1, shift);
8731        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8732        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8733        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8734    }
8735
8736    if (!is_q) {
8737        write_vec_element(s, tcg_final, rd, 0, MO_64);
8738    } else {
8739        write_vec_element(s, tcg_final, rd, 1, MO_64);
8740    }
8741
8742    if (round) {
8743        tcg_temp_free_i64(tcg_round);
8744    }
8745    tcg_temp_free_i64(tcg_rn);
8746    tcg_temp_free_i64(tcg_rd);
8747    tcg_temp_free_i32(tcg_rd_narrowed);
8748    tcg_temp_free_i64(tcg_final);
8749
8750    clear_vec_high(s, is_q, rd);
8751}
8752
8753/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8754static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8755                             bool src_unsigned, bool dst_unsigned,
8756                             int immh, int immb, int rn, int rd)
8757{
8758    int immhb = immh << 3 | immb;
8759    int size = 32 - clz32(immh) - 1;
8760    int shift = immhb - (8 << size);
8761    int pass;
8762
8763    assert(immh != 0);
8764    assert(!(scalar && is_q));
8765
8766    if (!scalar) {
8767        if (!is_q && extract32(immh, 3, 1)) {
8768            unallocated_encoding(s);
8769            return;
8770        }
8771
8772        /* Since we use the variable-shift helpers we must
8773         * replicate the shift count into each element of
8774         * the tcg_shift value.
8775         */
8776        switch (size) {
8777        case 0:
8778            shift |= shift << 8;
8779            /* fall through */
8780        case 1:
8781            shift |= shift << 16;
8782            break;
8783        case 2:
8784        case 3:
8785            break;
8786        default:
8787            g_assert_not_reached();
8788        }
8789    }
8790
8791    if (!fp_access_check(s)) {
8792        return;
8793    }
8794
8795    if (size == 3) {
8796        TCGv_i64 tcg_shift = tcg_const_i64(shift);
8797        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8798            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8799            { NULL, gen_helper_neon_qshl_u64 },
8800        };
8801        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8802        int maxpass = is_q ? 2 : 1;
8803
8804        for (pass = 0; pass < maxpass; pass++) {
8805            TCGv_i64 tcg_op = tcg_temp_new_i64();
8806
8807            read_vec_element(s, tcg_op, rn, pass, MO_64);
8808            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8809            write_vec_element(s, tcg_op, rd, pass, MO_64);
8810
8811            tcg_temp_free_i64(tcg_op);
8812        }
8813        tcg_temp_free_i64(tcg_shift);
8814        clear_vec_high(s, is_q, rd);
8815    } else {
8816        TCGv_i32 tcg_shift = tcg_const_i32(shift);
8817        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8818            {
8819                { gen_helper_neon_qshl_s8,
8820                  gen_helper_neon_qshl_s16,
8821                  gen_helper_neon_qshl_s32 },
8822                { gen_helper_neon_qshlu_s8,
8823                  gen_helper_neon_qshlu_s16,
8824                  gen_helper_neon_qshlu_s32 }
8825            }, {
8826                { NULL, NULL, NULL },
8827                { gen_helper_neon_qshl_u8,
8828                  gen_helper_neon_qshl_u16,
8829                  gen_helper_neon_qshl_u32 }
8830            }
8831        };
8832        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8833        MemOp memop = scalar ? size : MO_32;
8834        int maxpass = scalar ? 1 : is_q ? 4 : 2;
8835
8836        for (pass = 0; pass < maxpass; pass++) {
8837            TCGv_i32 tcg_op = tcg_temp_new_i32();
8838
8839            read_vec_element_i32(s, tcg_op, rn, pass, memop);
8840            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8841            if (scalar) {
8842                switch (size) {
8843                case 0:
8844                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
8845                    break;
8846                case 1:
8847                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
8848                    break;
8849                case 2:
8850                    break;
8851                default:
8852                    g_assert_not_reached();
8853                }
8854                write_fp_sreg(s, rd, tcg_op);
8855            } else {
8856                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8857            }
8858
8859            tcg_temp_free_i32(tcg_op);
8860        }
8861        tcg_temp_free_i32(tcg_shift);
8862
8863        if (!scalar) {
8864            clear_vec_high(s, is_q, rd);
8865        }
8866    }
8867}
8868
8869/* Common vector code for handling integer to FP conversion */
8870static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8871                                   int elements, int is_signed,
8872                                   int fracbits, int size)
8873{
8874    TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8875    TCGv_i32 tcg_shift = NULL;
8876
8877    MemOp mop = size | (is_signed ? MO_SIGN : 0);
8878    int pass;
8879
8880    if (fracbits || size == MO_64) {
8881        tcg_shift = tcg_const_i32(fracbits);
8882    }
8883
8884    if (size == MO_64) {
8885        TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8886        TCGv_i64 tcg_double = tcg_temp_new_i64();
8887
8888        for (pass = 0; pass < elements; pass++) {
8889            read_vec_element(s, tcg_int64, rn, pass, mop);
8890
8891            if (is_signed) {
8892                gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8893                                     tcg_shift, tcg_fpst);
8894            } else {
8895                gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8896                                     tcg_shift, tcg_fpst);
8897            }
8898            if (elements == 1) {
8899                write_fp_dreg(s, rd, tcg_double);
8900            } else {
8901                write_vec_element(s, tcg_double, rd, pass, MO_64);
8902            }
8903        }
8904
8905        tcg_temp_free_i64(tcg_int64);
8906        tcg_temp_free_i64(tcg_double);
8907
8908    } else {
8909        TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8910        TCGv_i32 tcg_float = tcg_temp_new_i32();
8911
8912        for (pass = 0; pass < elements; pass++) {
8913            read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8914
8915            switch (size) {
8916            case MO_32:
8917                if (fracbits) {
8918                    if (is_signed) {
8919                        gen_helper_vfp_sltos(tcg_float, tcg_int32,
8920                                             tcg_shift, tcg_fpst);
8921                    } else {
8922                        gen_helper_vfp_ultos(tcg_float, tcg_int32,
8923                                             tcg_shift, tcg_fpst);
8924                    }
8925                } else {
8926                    if (is_signed) {
8927                        gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8928                    } else {
8929                        gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8930                    }
8931                }
8932                break;
8933            case MO_16:
8934                if (fracbits) {
8935                    if (is_signed) {
8936                        gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8937                                             tcg_shift, tcg_fpst);
8938                    } else {
8939                        gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8940                                             tcg_shift, tcg_fpst);
8941                    }
8942                } else {
8943                    if (is_signed) {
8944                        gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8945                    } else {
8946                        gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8947                    }
8948                }
8949                break;
8950            default:
8951                g_assert_not_reached();
8952            }
8953
8954            if (elements == 1) {
8955                write_fp_sreg(s, rd, tcg_float);
8956            } else {
8957                write_vec_element_i32(s, tcg_float, rd, pass, size);
8958            }
8959        }
8960
8961        tcg_temp_free_i32(tcg_int32);
8962        tcg_temp_free_i32(tcg_float);
8963    }
8964
8965    tcg_temp_free_ptr(tcg_fpst);
8966    if (tcg_shift) {
8967        tcg_temp_free_i32(tcg_shift);
8968    }
8969
8970    clear_vec_high(s, elements << size == 16, rd);
8971}
8972
8973/* UCVTF/SCVTF - Integer to FP conversion */
8974static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8975                                         bool is_q, bool is_u,
8976                                         int immh, int immb, int opcode,
8977                                         int rn, int rd)
8978{
8979    int size, elements, fracbits;
8980    int immhb = immh << 3 | immb;
8981
8982    if (immh & 8) {
8983        size = MO_64;
8984        if (!is_scalar && !is_q) {
8985            unallocated_encoding(s);
8986            return;
8987        }
8988    } else if (immh & 4) {
8989        size = MO_32;
8990    } else if (immh & 2) {
8991        size = MO_16;
8992        if (!dc_isar_feature(aa64_fp16, s)) {
8993            unallocated_encoding(s);
8994            return;
8995        }
8996    } else {
8997        /* immh == 0 would be a failure of the decode logic */
8998        g_assert(immh == 1);
8999        unallocated_encoding(s);
9000        return;
9001    }
9002
9003    if (is_scalar) {
9004        elements = 1;
9005    } else {
9006        elements = (8 << is_q) >> size;
9007    }
9008    fracbits = (16 << size) - immhb;
9009
9010    if (!fp_access_check(s)) {
9011        return;
9012    }
9013
9014    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
9015}
9016
9017/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
9018static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
9019                                         bool is_q, bool is_u,
9020                                         int immh, int immb, int rn, int rd)
9021{
9022    int immhb = immh << 3 | immb;
9023    int pass, size, fracbits;
9024    TCGv_ptr tcg_fpstatus;
9025    TCGv_i32 tcg_rmode, tcg_shift;
9026
9027    if (immh & 0x8) {
9028        size = MO_64;
9029        if (!is_scalar && !is_q) {
9030            unallocated_encoding(s);
9031            return;
9032        }
9033    } else if (immh & 0x4) {
9034        size = MO_32;
9035    } else if (immh & 0x2) {
9036        size = MO_16;
9037        if (!dc_isar_feature(aa64_fp16, s)) {
9038            unallocated_encoding(s);
9039            return;
9040        }
9041    } else {
9042        /* Should have split out AdvSIMD modified immediate earlier.  */
9043        assert(immh == 1);
9044        unallocated_encoding(s);
9045        return;
9046    }
9047
9048    if (!fp_access_check(s)) {
9049        return;
9050    }
9051
9052    assert(!(is_scalar && is_q));
9053
9054    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
9055    tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9056    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9057    fracbits = (16 << size) - immhb;
9058    tcg_shift = tcg_const_i32(fracbits);
9059
9060    if (size == MO_64) {
9061        int maxpass = is_scalar ? 1 : 2;
9062
9063        for (pass = 0; pass < maxpass; pass++) {
9064            TCGv_i64 tcg_op = tcg_temp_new_i64();
9065
9066            read_vec_element(s, tcg_op, rn, pass, MO_64);
9067            if (is_u) {
9068                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9069            } else {
9070                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9071            }
9072            write_vec_element(s, tcg_op, rd, pass, MO_64);
9073            tcg_temp_free_i64(tcg_op);
9074        }
9075        clear_vec_high(s, is_q, rd);
9076    } else {
9077        void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
9078        int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
9079
9080        switch (size) {
9081        case MO_16:
9082            if (is_u) {
9083                fn = gen_helper_vfp_touhh;
9084            } else {
9085                fn = gen_helper_vfp_toshh;
9086            }
9087            break;
9088        case MO_32:
9089            if (is_u) {
9090                fn = gen_helper_vfp_touls;
9091            } else {
9092                fn = gen_helper_vfp_tosls;
9093            }
9094            break;
9095        default:
9096            g_assert_not_reached();
9097        }
9098
9099        for (pass = 0; pass < maxpass; pass++) {
9100            TCGv_i32 tcg_op = tcg_temp_new_i32();
9101
9102            read_vec_element_i32(s, tcg_op, rn, pass, size);
9103            fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9104            if (is_scalar) {
9105                write_fp_sreg(s, rd, tcg_op);
9106            } else {
9107                write_vec_element_i32(s, tcg_op, rd, pass, size);
9108            }
9109            tcg_temp_free_i32(tcg_op);
9110        }
9111        if (!is_scalar) {
9112            clear_vec_high(s, is_q, rd);
9113        }
9114    }
9115
9116    tcg_temp_free_ptr(tcg_fpstatus);
9117    tcg_temp_free_i32(tcg_shift);
9118    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9119    tcg_temp_free_i32(tcg_rmode);
9120}
9121
9122/* AdvSIMD scalar shift by immediate
9123 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
9124 * +-----+---+-------------+------+------+--------+---+------+------+
9125 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9126 * +-----+---+-------------+------+------+--------+---+------+------+
9127 *
9128 * This is the scalar version so it works on a fixed sized registers
9129 */
9130static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
9131{
9132    int rd = extract32(insn, 0, 5);
9133    int rn = extract32(insn, 5, 5);
9134    int opcode = extract32(insn, 11, 5);
9135    int immb = extract32(insn, 16, 3);
9136    int immh = extract32(insn, 19, 4);
9137    bool is_u = extract32(insn, 29, 1);
9138
9139    if (immh == 0) {
9140        unallocated_encoding(s);
9141        return;
9142    }
9143
9144    switch (opcode) {
9145    case 0x08: /* SRI */
9146        if (!is_u) {
9147            unallocated_encoding(s);
9148            return;
9149        }
9150        /* fall through */
9151    case 0x00: /* SSHR / USHR */
9152    case 0x02: /* SSRA / USRA */
9153    case 0x04: /* SRSHR / URSHR */
9154    case 0x06: /* SRSRA / URSRA */
9155        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
9156        break;
9157    case 0x0a: /* SHL / SLI */
9158        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
9159        break;
9160    case 0x1c: /* SCVTF, UCVTF */
9161        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
9162                                     opcode, rn, rd);
9163        break;
9164    case 0x10: /* SQSHRUN, SQSHRUN2 */
9165    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
9166        if (!is_u) {
9167            unallocated_encoding(s);
9168            return;
9169        }
9170        handle_vec_simd_sqshrn(s, true, false, false, true,
9171                               immh, immb, opcode, rn, rd);
9172        break;
9173    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
9174    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
9175        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
9176                               immh, immb, opcode, rn, rd);
9177        break;
9178    case 0xc: /* SQSHLU */
9179        if (!is_u) {
9180            unallocated_encoding(s);
9181            return;
9182        }
9183        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
9184        break;
9185    case 0xe: /* SQSHL, UQSHL */
9186        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
9187        break;
9188    case 0x1f: /* FCVTZS, FCVTZU */
9189        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
9190        break;
9191    default:
9192        unallocated_encoding(s);
9193        break;
9194    }
9195}
9196
9197/* AdvSIMD scalar three different
9198 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
9199 * +-----+---+-----------+------+---+------+--------+-----+------+------+
9200 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
9201 * +-----+---+-----------+------+---+------+--------+-----+------+------+
9202 */
9203static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
9204{
9205    bool is_u = extract32(insn, 29, 1);
9206    int size = extract32(insn, 22, 2);
9207    int opcode = extract32(insn, 12, 4);
9208    int rm = extract32(insn, 16, 5);
9209    int rn = extract32(insn, 5, 5);
9210    int rd = extract32(insn, 0, 5);
9211
9212    if (is_u) {
9213        unallocated_encoding(s);
9214        return;
9215    }
9216
9217    switch (opcode) {
9218    case 0x9: /* SQDMLAL, SQDMLAL2 */
9219    case 0xb: /* SQDMLSL, SQDMLSL2 */
9220    case 0xd: /* SQDMULL, SQDMULL2 */
9221        if (size == 0 || size == 3) {
9222            unallocated_encoding(s);
9223            return;
9224        }
9225        break;
9226    default:
9227        unallocated_encoding(s);
9228        return;
9229    }
9230
9231    if (!fp_access_check(s)) {
9232        return;
9233    }
9234
9235    if (size == 2) {
9236        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9237        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9238        TCGv_i64 tcg_res = tcg_temp_new_i64();
9239
9240        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
9241        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
9242
9243        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
9244        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
9245
9246        switch (opcode) {
9247        case 0xd: /* SQDMULL, SQDMULL2 */
9248            break;
9249        case 0xb: /* SQDMLSL, SQDMLSL2 */
9250            tcg_gen_neg_i64(tcg_res, tcg_res);
9251            /* fall through */
9252        case 0x9: /* SQDMLAL, SQDMLAL2 */
9253            read_vec_element(s, tcg_op1, rd, 0, MO_64);
9254            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
9255                                              tcg_res, tcg_op1);
9256            break;
9257        default:
9258            g_assert_not_reached();
9259        }
9260
9261        write_fp_dreg(s, rd, tcg_res);
9262
9263        tcg_temp_free_i64(tcg_op1);
9264        tcg_temp_free_i64(tcg_op2);
9265        tcg_temp_free_i64(tcg_res);
9266    } else {
9267        TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
9268        TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
9269        TCGv_i64 tcg_res = tcg_temp_new_i64();
9270
9271        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
9272        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
9273
9274        switch (opcode) {
9275        case 0xd: /* SQDMULL, SQDMULL2 */
9276            break;
9277        case 0xb: /* SQDMLSL, SQDMLSL2 */
9278            gen_helper_neon_negl_u32(tcg_res, tcg_res);
9279            /* fall through */
9280        case 0x9: /* SQDMLAL, SQDMLAL2 */
9281        {
9282            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
9283            read_vec_element(s, tcg_op3, rd, 0, MO_32);
9284            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
9285                                              tcg_res, tcg_op3);
9286            tcg_temp_free_i64(tcg_op3);
9287            break;
9288        }
9289        default:
9290            g_assert_not_reached();
9291        }
9292
9293        tcg_gen_ext32u_i64(tcg_res, tcg_res);
9294        write_fp_dreg(s, rd, tcg_res);
9295
9296        tcg_temp_free_i32(tcg_op1);
9297        tcg_temp_free_i32(tcg_op2);
9298        tcg_temp_free_i64(tcg_res);
9299    }
9300}
9301
9302static void handle_3same_64(DisasContext *s, int opcode, bool u,
9303                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
9304{
9305    /* Handle 64x64->64 opcodes which are shared between the scalar
9306     * and vector 3-same groups. We cover every opcode where size == 3
9307     * is valid in either the three-reg-same (integer, not pairwise)
9308     * or scalar-three-reg-same groups.
9309     */
9310    TCGCond cond;
9311
9312    switch (opcode) {
9313    case 0x1: /* SQADD */
9314        if (u) {
9315            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9316        } else {
9317            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9318        }
9319        break;
9320    case 0x5: /* SQSUB */
9321        if (u) {
9322            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9323        } else {
9324            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9325        }
9326        break;
9327    case 0x6: /* CMGT, CMHI */
9328        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
9329         * We implement this using setcond (test) and then negating.
9330         */
9331        cond = u ? TCG_COND_GTU : TCG_COND_GT;
9332    do_cmop:
9333        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
9334        tcg_gen_neg_i64(tcg_rd, tcg_rd);
9335        break;
9336    case 0x7: /* CMGE, CMHS */
9337        cond = u ? TCG_COND_GEU : TCG_COND_GE;
9338        goto do_cmop;
9339    case 0x11: /* CMTST, CMEQ */
9340        if (u) {
9341            cond = TCG_COND_EQ;
9342            goto do_cmop;
9343        }
9344        gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
9345        break;
9346    case 0x8: /* SSHL, USHL */
9347        if (u) {
9348            gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
9349        } else {
9350            gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
9351        }
9352        break;
9353    case 0x9: /* SQSHL, UQSHL */
9354        if (u) {
9355            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9356        } else {
9357            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9358        }
9359        break;
9360    case 0xa: /* SRSHL, URSHL */
9361        if (u) {
9362            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
9363        } else {
9364            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
9365        }
9366        break;
9367    case 0xb: /* SQRSHL, UQRSHL */
9368        if (u) {
9369            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9370        } else {
9371            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9372        }
9373        break;
9374    case 0x10: /* ADD, SUB */
9375        if (u) {
9376            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
9377        } else {
9378            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
9379        }
9380        break;
9381    default:
9382        g_assert_not_reached();
9383    }
9384}
9385
9386/* Handle the 3-same-operands float operations; shared by the scalar
9387 * and vector encodings. The caller must filter out any encodings
9388 * not allocated for the encoding it is dealing with.
9389 */
9390static void handle_3same_float(DisasContext *s, int size, int elements,
9391                               int fpopcode, int rd, int rn, int rm)
9392{
9393    int pass;
9394    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9395
9396    for (pass = 0; pass < elements; pass++) {
9397        if (size) {
9398            /* Double */
9399            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9400            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9401            TCGv_i64 tcg_res = tcg_temp_new_i64();
9402
9403            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9404            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9405
9406            switch (fpopcode) {
9407            case 0x39: /* FMLS */
9408                /* As usual for ARM, separate negation for fused multiply-add */
9409                gen_helper_vfp_negd(tcg_op1, tcg_op1);
9410                /* fall through */
9411            case 0x19: /* FMLA */
9412                read_vec_element(s, tcg_res, rd, pass, MO_64);
9413                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
9414                                       tcg_res, fpst);
9415                break;
9416            case 0x18: /* FMAXNM */
9417                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9418                break;
9419            case 0x1a: /* FADD */
9420                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
9421                break;
9422            case 0x1b: /* FMULX */
9423                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
9424                break;
9425            case 0x1c: /* FCMEQ */
9426                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9427                break;
9428            case 0x1e: /* FMAX */
9429                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9430                break;
9431            case 0x1f: /* FRECPS */
9432                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9433                break;
9434            case 0x38: /* FMINNM */
9435                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9436                break;
9437            case 0x3a: /* FSUB */
9438                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9439                break;
9440            case 0x3e: /* FMIN */
9441                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9442                break;
9443            case 0x3f: /* FRSQRTS */
9444                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9445                break;
9446            case 0x5b: /* FMUL */
9447                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9448                break;
9449            case 0x5c: /* FCMGE */
9450                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9451                break;
9452            case 0x5d: /* FACGE */
9453                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9454                break;
9455            case 0x5f: /* FDIV */
9456                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9457                break;
9458            case 0x7a: /* FABD */
9459                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9460                gen_helper_vfp_absd(tcg_res, tcg_res);
9461                break;
9462            case 0x7c: /* FCMGT */
9463                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9464                break;
9465            case 0x7d: /* FACGT */
9466                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9467                break;
9468            default:
9469                g_assert_not_reached();
9470            }
9471
9472            write_vec_element(s, tcg_res, rd, pass, MO_64);
9473
9474            tcg_temp_free_i64(tcg_res);
9475            tcg_temp_free_i64(tcg_op1);
9476            tcg_temp_free_i64(tcg_op2);
9477        } else {
9478            /* Single */
9479            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9480            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9481            TCGv_i32 tcg_res = tcg_temp_new_i32();
9482
9483            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9484            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9485
9486            switch (fpopcode) {
9487            case 0x39: /* FMLS */
9488                /* As usual for ARM, separate negation for fused multiply-add */
9489                gen_helper_vfp_negs(tcg_op1, tcg_op1);
9490                /* fall through */
9491            case 0x19: /* FMLA */
9492                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9493                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9494                                       tcg_res, fpst);
9495                break;
9496            case 0x1a: /* FADD */
9497                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9498                break;
9499            case 0x1b: /* FMULX */
9500                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9501                break;
9502            case 0x1c: /* FCMEQ */
9503                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9504                break;
9505            case 0x1e: /* FMAX */
9506                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9507                break;
9508            case 0x1f: /* FRECPS */
9509                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9510                break;
9511            case 0x18: /* FMAXNM */
9512                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9513                break;
9514            case 0x38: /* FMINNM */
9515                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9516                break;
9517            case 0x3a: /* FSUB */
9518                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9519                break;
9520            case 0x3e: /* FMIN */
9521                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9522                break;
9523            case 0x3f: /* FRSQRTS */
9524                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9525                break;
9526            case 0x5b: /* FMUL */
9527                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9528                break;
9529            case 0x5c: /* FCMGE */
9530                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9531                break;
9532            case 0x5d: /* FACGE */
9533                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9534                break;
9535            case 0x5f: /* FDIV */
9536                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9537                break;
9538            case 0x7a: /* FABD */
9539                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9540                gen_helper_vfp_abss(tcg_res, tcg_res);
9541                break;
9542            case 0x7c: /* FCMGT */
9543                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9544                break;
9545            case 0x7d: /* FACGT */
9546                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9547                break;
9548            default:
9549                g_assert_not_reached();
9550            }
9551
9552            if (elements == 1) {
9553                /* scalar single so clear high part */
9554                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9555
9556                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9557                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9558                tcg_temp_free_i64(tcg_tmp);
9559            } else {
9560                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9561            }
9562
9563            tcg_temp_free_i32(tcg_res);
9564            tcg_temp_free_i32(tcg_op1);
9565            tcg_temp_free_i32(tcg_op2);
9566        }
9567    }
9568
9569    tcg_temp_free_ptr(fpst);
9570
9571    clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9572}
9573
9574/* AdvSIMD scalar three same
9575 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9576 * +-----+---+-----------+------+---+------+--------+---+------+------+
9577 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9578 * +-----+---+-----------+------+---+------+--------+---+------+------+
9579 */
9580static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9581{
9582    int rd = extract32(insn, 0, 5);
9583    int rn = extract32(insn, 5, 5);
9584    int opcode = extract32(insn, 11, 5);
9585    int rm = extract32(insn, 16, 5);
9586    int size = extract32(insn, 22, 2);
9587    bool u = extract32(insn, 29, 1);
9588    TCGv_i64 tcg_rd;
9589
9590    if (opcode >= 0x18) {
9591        /* Floating point: U, size[1] and opcode indicate operation */
9592        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9593        switch (fpopcode) {
9594        case 0x1b: /* FMULX */
9595        case 0x1f: /* FRECPS */
9596        case 0x3f: /* FRSQRTS */
9597        case 0x5d: /* FACGE */
9598        case 0x7d: /* FACGT */
9599        case 0x1c: /* FCMEQ */
9600        case 0x5c: /* FCMGE */
9601        case 0x7c: /* FCMGT */
9602        case 0x7a: /* FABD */
9603            break;
9604        default:
9605            unallocated_encoding(s);
9606            return;
9607        }
9608
9609        if (!fp_access_check(s)) {
9610            return;
9611        }
9612
9613        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9614        return;
9615    }
9616
9617    switch (opcode) {
9618    case 0x1: /* SQADD, UQADD */
9619    case 0x5: /* SQSUB, UQSUB */
9620    case 0x9: /* SQSHL, UQSHL */
9621    case 0xb: /* SQRSHL, UQRSHL */
9622        break;
9623    case 0x8: /* SSHL, USHL */
9624    case 0xa: /* SRSHL, URSHL */
9625    case 0x6: /* CMGT, CMHI */
9626    case 0x7: /* CMGE, CMHS */
9627    case 0x11: /* CMTST, CMEQ */
9628    case 0x10: /* ADD, SUB (vector) */
9629        if (size != 3) {
9630            unallocated_encoding(s);
9631            return;
9632        }
9633        break;
9634    case 0x16: /* SQDMULH, SQRDMULH (vector) */
9635        if (size != 1 && size != 2) {
9636            unallocated_encoding(s);
9637            return;
9638        }
9639        break;
9640    default:
9641        unallocated_encoding(s);
9642        return;
9643    }
9644
9645    if (!fp_access_check(s)) {
9646        return;
9647    }
9648
9649    tcg_rd = tcg_temp_new_i64();
9650
9651    if (size == 3) {
9652        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9653        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9654
9655        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9656        tcg_temp_free_i64(tcg_rn);
9657        tcg_temp_free_i64(tcg_rm);
9658    } else {
9659        /* Do a single operation on the lowest element in the vector.
9660         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9661         * no side effects for all these operations.
9662         * OPTME: special-purpose helpers would avoid doing some
9663         * unnecessary work in the helper for the 8 and 16 bit cases.
9664         */
9665        NeonGenTwoOpEnvFn *genenvfn;
9666        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9667        TCGv_i32 tcg_rm = tcg_temp_new_i32();
9668        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9669
9670        read_vec_element_i32(s, tcg_rn, rn, 0, size);
9671        read_vec_element_i32(s, tcg_rm, rm, 0, size);
9672
9673        switch (opcode) {
9674        case 0x1: /* SQADD, UQADD */
9675        {
9676            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9677                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9678                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9679                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9680            };
9681            genenvfn = fns[size][u];
9682            break;
9683        }
9684        case 0x5: /* SQSUB, UQSUB */
9685        {
9686            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9687                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9688                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9689                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9690            };
9691            genenvfn = fns[size][u];
9692            break;
9693        }
9694        case 0x9: /* SQSHL, UQSHL */
9695        {
9696            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9697                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9698                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9699                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9700            };
9701            genenvfn = fns[size][u];
9702            break;
9703        }
9704        case 0xb: /* SQRSHL, UQRSHL */
9705        {
9706            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9707                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9708                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9709                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9710            };
9711            genenvfn = fns[size][u];
9712            break;
9713        }
9714        case 0x16: /* SQDMULH, SQRDMULH */
9715        {
9716            static NeonGenTwoOpEnvFn * const fns[2][2] = {
9717                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9718                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9719            };
9720            assert(size == 1 || size == 2);
9721            genenvfn = fns[size - 1][u];
9722            break;
9723        }
9724        default:
9725            g_assert_not_reached();
9726        }
9727
9728        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9729        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9730        tcg_temp_free_i32(tcg_rd32);
9731        tcg_temp_free_i32(tcg_rn);
9732        tcg_temp_free_i32(tcg_rm);
9733    }
9734
9735    write_fp_dreg(s, rd, tcg_rd);
9736
9737    tcg_temp_free_i64(tcg_rd);
9738}
9739
9740/* AdvSIMD scalar three same FP16
9741 *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9742 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9743 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9744 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9745 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9746 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9747 */
9748static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9749                                                  uint32_t insn)
9750{
9751    int rd = extract32(insn, 0, 5);
9752    int rn = extract32(insn, 5, 5);
9753    int opcode = extract32(insn, 11, 3);
9754    int rm = extract32(insn, 16, 5);
9755    bool u = extract32(insn, 29, 1);
9756    bool a = extract32(insn, 23, 1);
9757    int fpopcode = opcode | (a << 3) |  (u << 4);
9758    TCGv_ptr fpst;
9759    TCGv_i32 tcg_op1;
9760    TCGv_i32 tcg_op2;
9761    TCGv_i32 tcg_res;
9762
9763    switch (fpopcode) {
9764    case 0x03: /* FMULX */
9765    case 0x04: /* FCMEQ (reg) */
9766    case 0x07: /* FRECPS */
9767    case 0x0f: /* FRSQRTS */
9768    case 0x14: /* FCMGE (reg) */
9769    case 0x15: /* FACGE */
9770    case 0x1a: /* FABD */
9771    case 0x1c: /* FCMGT (reg) */
9772    case 0x1d: /* FACGT */
9773        break;
9774    default:
9775        unallocated_encoding(s);
9776        return;
9777    }
9778
9779    if (!dc_isar_feature(aa64_fp16, s)) {
9780        unallocated_encoding(s);
9781    }
9782
9783    if (!fp_access_check(s)) {
9784        return;
9785    }
9786
9787    fpst = fpstatus_ptr(FPST_FPCR_F16);
9788
9789    tcg_op1 = read_fp_hreg(s, rn);
9790    tcg_op2 = read_fp_hreg(s, rm);
9791    tcg_res = tcg_temp_new_i32();
9792
9793    switch (fpopcode) {
9794    case 0x03: /* FMULX */
9795        gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9796        break;
9797    case 0x04: /* FCMEQ (reg) */
9798        gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9799        break;
9800    case 0x07: /* FRECPS */
9801        gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9802        break;
9803    case 0x0f: /* FRSQRTS */
9804        gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9805        break;
9806    case 0x14: /* FCMGE (reg) */
9807        gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9808        break;
9809    case 0x15: /* FACGE */
9810        gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9811        break;
9812    case 0x1a: /* FABD */
9813        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9814        tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9815        break;
9816    case 0x1c: /* FCMGT (reg) */
9817        gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9818        break;
9819    case 0x1d: /* FACGT */
9820        gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9821        break;
9822    default:
9823        g_assert_not_reached();
9824    }
9825
9826    write_fp_sreg(s, rd, tcg_res);
9827
9828
9829    tcg_temp_free_i32(tcg_res);
9830    tcg_temp_free_i32(tcg_op1);
9831    tcg_temp_free_i32(tcg_op2);
9832    tcg_temp_free_ptr(fpst);
9833}
9834
9835/* AdvSIMD scalar three same extra
9836 *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9837 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9838 * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9839 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9840 */
9841static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9842                                                   uint32_t insn)
9843{
9844    int rd = extract32(insn, 0, 5);
9845    int rn = extract32(insn, 5, 5);
9846    int opcode = extract32(insn, 11, 4);
9847    int rm = extract32(insn, 16, 5);
9848    int size = extract32(insn, 22, 2);
9849    bool u = extract32(insn, 29, 1);
9850    TCGv_i32 ele1, ele2, ele3;
9851    TCGv_i64 res;
9852    bool feature;
9853
9854    switch (u * 16 + opcode) {
9855    case 0x10: /* SQRDMLAH (vector) */
9856    case 0x11: /* SQRDMLSH (vector) */
9857        if (size != 1 && size != 2) {
9858            unallocated_encoding(s);
9859            return;
9860        }
9861        feature = dc_isar_feature(aa64_rdm, s);
9862        break;
9863    default:
9864        unallocated_encoding(s);
9865        return;
9866    }
9867    if (!feature) {
9868        unallocated_encoding(s);
9869        return;
9870    }
9871    if (!fp_access_check(s)) {
9872        return;
9873    }
9874
9875    /* Do a single operation on the lowest element in the vector.
9876     * We use the standard Neon helpers and rely on 0 OP 0 == 0
9877     * with no side effects for all these operations.
9878     * OPTME: special-purpose helpers would avoid doing some
9879     * unnecessary work in the helper for the 16 bit cases.
9880     */
9881    ele1 = tcg_temp_new_i32();
9882    ele2 = tcg_temp_new_i32();
9883    ele3 = tcg_temp_new_i32();
9884
9885    read_vec_element_i32(s, ele1, rn, 0, size);
9886    read_vec_element_i32(s, ele2, rm, 0, size);
9887    read_vec_element_i32(s, ele3, rd, 0, size);
9888
9889    switch (opcode) {
9890    case 0x0: /* SQRDMLAH */
9891        if (size == 1) {
9892            gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9893        } else {
9894            gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9895        }
9896        break;
9897    case 0x1: /* SQRDMLSH */
9898        if (size == 1) {
9899            gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9900        } else {
9901            gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9902        }
9903        break;
9904    default:
9905        g_assert_not_reached();
9906    }
9907    tcg_temp_free_i32(ele1);
9908    tcg_temp_free_i32(ele2);
9909
9910    res = tcg_temp_new_i64();
9911    tcg_gen_extu_i32_i64(res, ele3);
9912    tcg_temp_free_i32(ele3);
9913
9914    write_fp_dreg(s, rd, res);
9915    tcg_temp_free_i64(res);
9916}
9917
9918static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9919                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9920                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9921{
9922    /* Handle 64->64 opcodes which are shared between the scalar and
9923     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9924     * is valid in either group and also the double-precision fp ops.
9925     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9926     * requires them.
9927     */
9928    TCGCond cond;
9929
9930    switch (opcode) {
9931    case 0x4: /* CLS, CLZ */
9932        if (u) {
9933            tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9934        } else {
9935            tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9936        }
9937        break;
9938    case 0x5: /* NOT */
9939        /* This opcode is shared with CNT and RBIT but we have earlier
9940         * enforced that size == 3 if and only if this is the NOT insn.
9941         */
9942        tcg_gen_not_i64(tcg_rd, tcg_rn);
9943        break;
9944    case 0x7: /* SQABS, SQNEG */
9945        if (u) {
9946            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9947        } else {
9948            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9949        }
9950        break;
9951    case 0xa: /* CMLT */
9952        /* 64 bit integer comparison against zero, result is
9953         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9954         * subtracting 1.
9955         */
9956        cond = TCG_COND_LT;
9957    do_cmop:
9958        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9959        tcg_gen_neg_i64(tcg_rd, tcg_rd);
9960        break;
9961    case 0x8: /* CMGT, CMGE */
9962        cond = u ? TCG_COND_GE : TCG_COND_GT;
9963        goto do_cmop;
9964    case 0x9: /* CMEQ, CMLE */
9965        cond = u ? TCG_COND_LE : TCG_COND_EQ;
9966        goto do_cmop;
9967    case 0xb: /* ABS, NEG */
9968        if (u) {
9969            tcg_gen_neg_i64(tcg_rd, tcg_rn);
9970        } else {
9971            tcg_gen_abs_i64(tcg_rd, tcg_rn);
9972        }
9973        break;
9974    case 0x2f: /* FABS */
9975        gen_helper_vfp_absd(tcg_rd, tcg_rn);
9976        break;
9977    case 0x6f: /* FNEG */
9978        gen_helper_vfp_negd(tcg_rd, tcg_rn);
9979        break;
9980    case 0x7f: /* FSQRT */
9981        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9982        break;
9983    case 0x1a: /* FCVTNS */
9984    case 0x1b: /* FCVTMS */
9985    case 0x1c: /* FCVTAS */
9986    case 0x3a: /* FCVTPS */
9987    case 0x3b: /* FCVTZS */
9988    {
9989        TCGv_i32 tcg_shift = tcg_const_i32(0);
9990        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9991        tcg_temp_free_i32(tcg_shift);
9992        break;
9993    }
9994    case 0x5a: /* FCVTNU */
9995    case 0x5b: /* FCVTMU */
9996    case 0x5c: /* FCVTAU */
9997    case 0x7a: /* FCVTPU */
9998    case 0x7b: /* FCVTZU */
9999    {
10000        TCGv_i32 tcg_shift = tcg_const_i32(0);
10001        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10002        tcg_temp_free_i32(tcg_shift);
10003        break;
10004    }
10005    case 0x18: /* FRINTN */
10006    case 0x19: /* FRINTM */
10007    case 0x38: /* FRINTP */
10008    case 0x39: /* FRINTZ */
10009    case 0x58: /* FRINTA */
10010    case 0x79: /* FRINTI */
10011        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
10012        break;
10013    case 0x59: /* FRINTX */
10014        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
10015        break;
10016    case 0x1e: /* FRINT32Z */
10017    case 0x5e: /* FRINT32X */
10018        gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
10019        break;
10020    case 0x1f: /* FRINT64Z */
10021    case 0x5f: /* FRINT64X */
10022        gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
10023        break;
10024    default:
10025        g_assert_not_reached();
10026    }
10027}
10028
10029static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
10030                                   bool is_scalar, bool is_u, bool is_q,
10031                                   int size, int rn, int rd)
10032{
10033    bool is_double = (size == MO_64);
10034    TCGv_ptr fpst;
10035
10036    if (!fp_access_check(s)) {
10037        return;
10038    }
10039
10040    fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
10041
10042    if (is_double) {
10043        TCGv_i64 tcg_op = tcg_temp_new_i64();
10044        TCGv_i64 tcg_zero = tcg_const_i64(0);
10045        TCGv_i64 tcg_res = tcg_temp_new_i64();
10046        NeonGenTwoDoubleOpFn *genfn;
10047        bool swap = false;
10048        int pass;
10049
10050        switch (opcode) {
10051        case 0x2e: /* FCMLT (zero) */
10052            swap = true;
10053            /* fallthrough */
10054        case 0x2c: /* FCMGT (zero) */
10055            genfn = gen_helper_neon_cgt_f64;
10056            break;
10057        case 0x2d: /* FCMEQ (zero) */
10058            genfn = gen_helper_neon_ceq_f64;
10059            break;
10060        case 0x6d: /* FCMLE (zero) */
10061            swap = true;
10062            /* fall through */
10063        case 0x6c: /* FCMGE (zero) */
10064            genfn = gen_helper_neon_cge_f64;
10065            break;
10066        default:
10067            g_assert_not_reached();
10068        }
10069
10070        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10071            read_vec_element(s, tcg_op, rn, pass, MO_64);
10072            if (swap) {
10073                genfn(tcg_res, tcg_zero, tcg_op, fpst);
10074            } else {
10075                genfn(tcg_res, tcg_op, tcg_zero, fpst);
10076            }
10077            write_vec_element(s, tcg_res, rd, pass, MO_64);
10078        }
10079        tcg_temp_free_i64(tcg_res);
10080        tcg_temp_free_i64(tcg_zero);
10081        tcg_temp_free_i64(tcg_op);
10082
10083        clear_vec_high(s, !is_scalar, rd);
10084    } else {
10085        TCGv_i32 tcg_op = tcg_temp_new_i32();
10086        TCGv_i32 tcg_zero = tcg_const_i32(0);
10087        TCGv_i32 tcg_res = tcg_temp_new_i32();
10088        NeonGenTwoSingleOpFn *genfn;
10089        bool swap = false;
10090        int pass, maxpasses;
10091
10092        if (size == MO_16) {
10093            switch (opcode) {
10094            case 0x2e: /* FCMLT (zero) */
10095                swap = true;
10096                /* fall through */
10097            case 0x2c: /* FCMGT (zero) */
10098                genfn = gen_helper_advsimd_cgt_f16;
10099                break;
10100            case 0x2d: /* FCMEQ (zero) */
10101                genfn = gen_helper_advsimd_ceq_f16;
10102                break;
10103            case 0x6d: /* FCMLE (zero) */
10104                swap = true;
10105                /* fall through */
10106            case 0x6c: /* FCMGE (zero) */
10107                genfn = gen_helper_advsimd_cge_f16;
10108                break;
10109            default:
10110                g_assert_not_reached();
10111            }
10112        } else {
10113            switch (opcode) {
10114            case 0x2e: /* FCMLT (zero) */
10115                swap = true;
10116                /* fall through */
10117            case 0x2c: /* FCMGT (zero) */
10118                genfn = gen_helper_neon_cgt_f32;
10119                break;
10120            case 0x2d: /* FCMEQ (zero) */
10121                genfn = gen_helper_neon_ceq_f32;
10122                break;
10123            case 0x6d: /* FCMLE (zero) */
10124                swap = true;
10125                /* fall through */
10126            case 0x6c: /* FCMGE (zero) */
10127                genfn = gen_helper_neon_cge_f32;
10128                break;
10129            default:
10130                g_assert_not_reached();
10131            }
10132        }
10133
10134        if (is_scalar) {
10135            maxpasses = 1;
10136        } else {
10137            int vector_size = 8 << is_q;
10138            maxpasses = vector_size >> size;
10139        }
10140
10141        for (pass = 0; pass < maxpasses; pass++) {
10142            read_vec_element_i32(s, tcg_op, rn, pass, size);
10143            if (swap) {
10144                genfn(tcg_res, tcg_zero, tcg_op, fpst);
10145            } else {
10146                genfn(tcg_res, tcg_op, tcg_zero, fpst);
10147            }
10148            if (is_scalar) {
10149                write_fp_sreg(s, rd, tcg_res);
10150            } else {
10151                write_vec_element_i32(s, tcg_res, rd, pass, size);
10152            }
10153        }
10154        tcg_temp_free_i32(tcg_res);
10155        tcg_temp_free_i32(tcg_zero);
10156        tcg_temp_free_i32(tcg_op);
10157        if (!is_scalar) {
10158            clear_vec_high(s, is_q, rd);
10159        }
10160    }
10161
10162    tcg_temp_free_ptr(fpst);
10163}
10164
10165static void handle_2misc_reciprocal(DisasContext *s, int opcode,
10166                                    bool is_scalar, bool is_u, bool is_q,
10167                                    int size, int rn, int rd)
10168{
10169    bool is_double = (size == 3);
10170    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10171
10172    if (is_double) {
10173        TCGv_i64 tcg_op = tcg_temp_new_i64();
10174        TCGv_i64 tcg_res = tcg_temp_new_i64();
10175        int pass;
10176
10177        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10178            read_vec_element(s, tcg_op, rn, pass, MO_64);
10179            switch (opcode) {
10180            case 0x3d: /* FRECPE */
10181                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
10182                break;
10183            case 0x3f: /* FRECPX */
10184                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
10185                break;
10186            case 0x7d: /* FRSQRTE */
10187                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
10188                break;
10189            default:
10190                g_assert_not_reached();
10191            }
10192            write_vec_element(s, tcg_res, rd, pass, MO_64);
10193        }
10194        tcg_temp_free_i64(tcg_res);
10195        tcg_temp_free_i64(tcg_op);
10196        clear_vec_high(s, !is_scalar, rd);
10197    } else {
10198        TCGv_i32 tcg_op = tcg_temp_new_i32();
10199        TCGv_i32 tcg_res = tcg_temp_new_i32();
10200        int pass, maxpasses;
10201
10202        if (is_scalar) {
10203            maxpasses = 1;
10204        } else {
10205            maxpasses = is_q ? 4 : 2;
10206        }
10207
10208        for (pass = 0; pass < maxpasses; pass++) {
10209            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10210
10211            switch (opcode) {
10212            case 0x3c: /* URECPE */
10213                gen_helper_recpe_u32(tcg_res, tcg_op);
10214                break;
10215            case 0x3d: /* FRECPE */
10216                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
10217                break;
10218            case 0x3f: /* FRECPX */
10219                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
10220                break;
10221            case 0x7d: /* FRSQRTE */
10222                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
10223                break;
10224            default:
10225                g_assert_not_reached();
10226            }
10227
10228            if (is_scalar) {
10229                write_fp_sreg(s, rd, tcg_res);
10230            } else {
10231                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10232            }
10233        }
10234        tcg_temp_free_i32(tcg_res);
10235        tcg_temp_free_i32(tcg_op);
10236        if (!is_scalar) {
10237            clear_vec_high(s, is_q, rd);
10238        }
10239    }
10240    tcg_temp_free_ptr(fpst);
10241}
10242
10243static void handle_2misc_narrow(DisasContext *s, bool scalar,
10244                                int opcode, bool u, bool is_q,
10245                                int size, int rn, int rd)
10246{
10247    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
10248     * in the source becomes a size element in the destination).
10249     */
10250    int pass;
10251    TCGv_i32 tcg_res[2];
10252    int destelt = is_q ? 2 : 0;
10253    int passes = scalar ? 1 : 2;
10254
10255    if (scalar) {
10256        tcg_res[1] = tcg_const_i32(0);
10257    }
10258
10259    for (pass = 0; pass < passes; pass++) {
10260        TCGv_i64 tcg_op = tcg_temp_new_i64();
10261        NeonGenNarrowFn *genfn = NULL;
10262        NeonGenNarrowEnvFn *genenvfn = NULL;
10263
10264        if (scalar) {
10265            read_vec_element(s, tcg_op, rn, pass, size + 1);
10266        } else {
10267            read_vec_element(s, tcg_op, rn, pass, MO_64);
10268        }
10269        tcg_res[pass] = tcg_temp_new_i32();
10270
10271        switch (opcode) {
10272        case 0x12: /* XTN, SQXTUN */
10273        {
10274            static NeonGenNarrowFn * const xtnfns[3] = {
10275                gen_helper_neon_narrow_u8,
10276                gen_helper_neon_narrow_u16,
10277                tcg_gen_extrl_i64_i32,
10278            };
10279            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
10280                gen_helper_neon_unarrow_sat8,
10281                gen_helper_neon_unarrow_sat16,
10282                gen_helper_neon_unarrow_sat32,
10283            };
10284            if (u) {
10285                genenvfn = sqxtunfns[size];
10286            } else {
10287                genfn = xtnfns[size];
10288            }
10289            break;
10290        }
10291        case 0x14: /* SQXTN, UQXTN */
10292        {
10293            static NeonGenNarrowEnvFn * const fns[3][2] = {
10294                { gen_helper_neon_narrow_sat_s8,
10295                  gen_helper_neon_narrow_sat_u8 },
10296                { gen_helper_neon_narrow_sat_s16,
10297                  gen_helper_neon_narrow_sat_u16 },
10298                { gen_helper_neon_narrow_sat_s32,
10299                  gen_helper_neon_narrow_sat_u32 },
10300            };
10301            genenvfn = fns[size][u];
10302            break;
10303        }
10304        case 0x16: /* FCVTN, FCVTN2 */
10305            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
10306            if (size == 2) {
10307                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
10308            } else {
10309                TCGv_i32 tcg_lo = tcg_temp_new_i32();
10310                TCGv_i32 tcg_hi = tcg_temp_new_i32();
10311                TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10312                TCGv_i32 ahp = get_ahp_flag();
10313
10314                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
10315                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10316                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10317                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
10318                tcg_temp_free_i32(tcg_lo);
10319                tcg_temp_free_i32(tcg_hi);
10320                tcg_temp_free_ptr(fpst);
10321                tcg_temp_free_i32(ahp);
10322            }
10323            break;
10324        case 0x56:  /* FCVTXN, FCVTXN2 */
10325            /* 64 bit to 32 bit float conversion
10326             * with von Neumann rounding (round to odd)
10327             */
10328            assert(size == 2);
10329            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
10330            break;
10331        default:
10332            g_assert_not_reached();
10333        }
10334
10335        if (genfn) {
10336            genfn(tcg_res[pass], tcg_op);
10337        } else if (genenvfn) {
10338            genenvfn(tcg_res[pass], cpu_env, tcg_op);
10339        }
10340
10341        tcg_temp_free_i64(tcg_op);
10342    }
10343
10344    for (pass = 0; pass < 2; pass++) {
10345        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
10346        tcg_temp_free_i32(tcg_res[pass]);
10347    }
10348    clear_vec_high(s, is_q, rd);
10349}
10350
10351/* Remaining saturating accumulating ops */
10352static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
10353                                bool is_q, int size, int rn, int rd)
10354{
10355    bool is_double = (size == 3);
10356
10357    if (is_double) {
10358        TCGv_i64 tcg_rn = tcg_temp_new_i64();
10359        TCGv_i64 tcg_rd = tcg_temp_new_i64();
10360        int pass;
10361
10362        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10363            read_vec_element(s, tcg_rn, rn, pass, MO_64);
10364            read_vec_element(s, tcg_rd, rd, pass, MO_64);
10365
10366            if (is_u) { /* USQADD */
10367                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10368            } else { /* SUQADD */
10369                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10370            }
10371            write_vec_element(s, tcg_rd, rd, pass, MO_64);
10372        }
10373        tcg_temp_free_i64(tcg_rd);
10374        tcg_temp_free_i64(tcg_rn);
10375        clear_vec_high(s, !is_scalar, rd);
10376    } else {
10377        TCGv_i32 tcg_rn = tcg_temp_new_i32();
10378        TCGv_i32 tcg_rd = tcg_temp_new_i32();
10379        int pass, maxpasses;
10380
10381        if (is_scalar) {
10382            maxpasses = 1;
10383        } else {
10384            maxpasses = is_q ? 4 : 2;
10385        }
10386
10387        for (pass = 0; pass < maxpasses; pass++) {
10388            if (is_scalar) {
10389                read_vec_element_i32(s, tcg_rn, rn, pass, size);
10390                read_vec_element_i32(s, tcg_rd, rd, pass, size);
10391            } else {
10392                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
10393                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10394            }
10395
10396            if (is_u) { /* USQADD */
10397                switch (size) {
10398                case 0:
10399                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10400                    break;
10401                case 1:
10402                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10403                    break;
10404                case 2:
10405                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10406                    break;
10407                default:
10408                    g_assert_not_reached();
10409                }
10410            } else { /* SUQADD */
10411                switch (size) {
10412                case 0:
10413                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10414                    break;
10415                case 1:
10416                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10417                    break;
10418                case 2:
10419                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10420                    break;
10421                default:
10422                    g_assert_not_reached();
10423                }
10424            }
10425
10426            if (is_scalar) {
10427                TCGv_i64 tcg_zero = tcg_const_i64(0);
10428                write_vec_element(s, tcg_zero, rd, 0, MO_64);
10429                tcg_temp_free_i64(tcg_zero);
10430            }
10431            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10432        }
10433        tcg_temp_free_i32(tcg_rd);
10434        tcg_temp_free_i32(tcg_rn);
10435        clear_vec_high(s, is_q, rd);
10436    }
10437}
10438
10439/* AdvSIMD scalar two reg misc
10440 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10441 * +-----+---+-----------+------+-----------+--------+-----+------+------+
10442 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10443 * +-----+---+-----------+------+-----------+--------+-----+------+------+
10444 */
10445static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10446{
10447    int rd = extract32(insn, 0, 5);
10448    int rn = extract32(insn, 5, 5);
10449    int opcode = extract32(insn, 12, 5);
10450    int size = extract32(insn, 22, 2);
10451    bool u = extract32(insn, 29, 1);
10452    bool is_fcvt = false;
10453    int rmode;
10454    TCGv_i32 tcg_rmode;
10455    TCGv_ptr tcg_fpstatus;
10456
10457    switch (opcode) {
10458    case 0x3: /* USQADD / SUQADD*/
10459        if (!fp_access_check(s)) {
10460            return;
10461        }
10462        handle_2misc_satacc(s, true, u, false, size, rn, rd);
10463        return;
10464    case 0x7: /* SQABS / SQNEG */
10465        break;
10466    case 0xa: /* CMLT */
10467        if (u) {
10468            unallocated_encoding(s);
10469            return;
10470        }
10471        /* fall through */
10472    case 0x8: /* CMGT, CMGE */
10473    case 0x9: /* CMEQ, CMLE */
10474    case 0xb: /* ABS, NEG */
10475        if (size != 3) {
10476            unallocated_encoding(s);
10477            return;
10478        }
10479        break;
10480    case 0x12: /* SQXTUN */
10481        if (!u) {
10482            unallocated_encoding(s);
10483            return;
10484        }
10485        /* fall through */
10486    case 0x14: /* SQXTN, UQXTN */
10487        if (size == 3) {
10488            unallocated_encoding(s);
10489            return;
10490        }
10491        if (!fp_access_check(s)) {
10492            return;
10493        }
10494        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10495        return;
10496    case 0xc ... 0xf:
10497    case 0x16 ... 0x1d:
10498    case 0x1f:
10499        /* Floating point: U, size[1] and opcode indicate operation;
10500         * size[0] indicates single or double precision.
10501         */
10502        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10503        size = extract32(size, 0, 1) ? 3 : 2;
10504        switch (opcode) {
10505        case 0x2c: /* FCMGT (zero) */
10506        case 0x2d: /* FCMEQ (zero) */
10507        case 0x2e: /* FCMLT (zero) */
10508        case 0x6c: /* FCMGE (zero) */
10509        case 0x6d: /* FCMLE (zero) */
10510            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10511            return;
10512        case 0x1d: /* SCVTF */
10513        case 0x5d: /* UCVTF */
10514        {
10515            bool is_signed = (opcode == 0x1d);
10516            if (!fp_access_check(s)) {
10517                return;
10518            }
10519            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10520            return;
10521        }
10522        case 0x3d: /* FRECPE */
10523        case 0x3f: /* FRECPX */
10524        case 0x7d: /* FRSQRTE */
10525            if (!fp_access_check(s)) {
10526                return;
10527            }
10528            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10529            return;
10530        case 0x1a: /* FCVTNS */
10531        case 0x1b: /* FCVTMS */
10532        case 0x3a: /* FCVTPS */
10533        case 0x3b: /* FCVTZS */
10534        case 0x5a: /* FCVTNU */
10535        case 0x5b: /* FCVTMU */
10536        case 0x7a: /* FCVTPU */
10537        case 0x7b: /* FCVTZU */
10538            is_fcvt = true;
10539            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10540            break;
10541        case 0x1c: /* FCVTAS */
10542        case 0x5c: /* FCVTAU */
10543            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10544            is_fcvt = true;
10545            rmode = FPROUNDING_TIEAWAY;
10546            break;
10547        case 0x56: /* FCVTXN, FCVTXN2 */
10548            if (size == 2) {
10549                unallocated_encoding(s);
10550                return;
10551            }
10552            if (!fp_access_check(s)) {
10553                return;
10554            }
10555            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10556            return;
10557        default:
10558            unallocated_encoding(s);
10559            return;
10560        }
10561        break;
10562    default:
10563        unallocated_encoding(s);
10564        return;
10565    }
10566
10567    if (!fp_access_check(s)) {
10568        return;
10569    }
10570
10571    if (is_fcvt) {
10572        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10573        tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10574        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
10575    } else {
10576        tcg_rmode = NULL;
10577        tcg_fpstatus = NULL;
10578    }
10579
10580    if (size == 3) {
10581        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10582        TCGv_i64 tcg_rd = tcg_temp_new_i64();
10583
10584        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10585        write_fp_dreg(s, rd, tcg_rd);
10586        tcg_temp_free_i64(tcg_rd);
10587        tcg_temp_free_i64(tcg_rn);
10588    } else {
10589        TCGv_i32 tcg_rn = tcg_temp_new_i32();
10590        TCGv_i32 tcg_rd = tcg_temp_new_i32();
10591
10592        read_vec_element_i32(s, tcg_rn, rn, 0, size);
10593
10594        switch (opcode) {
10595        case 0x7: /* SQABS, SQNEG */
10596        {
10597            NeonGenOneOpEnvFn *genfn;
10598            static NeonGenOneOpEnvFn * const fns[3][2] = {
10599                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10600                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10601                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10602            };
10603            genfn = fns[size][u];
10604            genfn(tcg_rd, cpu_env, tcg_rn);
10605            break;
10606        }
10607        case 0x1a: /* FCVTNS */
10608        case 0x1b: /* FCVTMS */
10609        case 0x1c: /* FCVTAS */
10610        case 0x3a: /* FCVTPS */
10611        case 0x3b: /* FCVTZS */
10612        {
10613            TCGv_i32 tcg_shift = tcg_const_i32(0);
10614            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10615            tcg_temp_free_i32(tcg_shift);
10616            break;
10617        }
10618        case 0x5a: /* FCVTNU */
10619        case 0x5b: /* FCVTMU */
10620        case 0x5c: /* FCVTAU */
10621        case 0x7a: /* FCVTPU */
10622        case 0x7b: /* FCVTZU */
10623        {
10624            TCGv_i32 tcg_shift = tcg_const_i32(0);
10625            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10626            tcg_temp_free_i32(tcg_shift);
10627            break;
10628        }
10629        default:
10630            g_assert_not_reached();
10631        }
10632
10633        write_fp_sreg(s, rd, tcg_rd);
10634        tcg_temp_free_i32(tcg_rd);
10635        tcg_temp_free_i32(tcg_rn);
10636    }
10637
10638    if (is_fcvt) {
10639        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
10640        tcg_temp_free_i32(tcg_rmode);
10641        tcg_temp_free_ptr(tcg_fpstatus);
10642    }
10643}
10644
10645/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10646static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10647                                 int immh, int immb, int opcode, int rn, int rd)
10648{
10649    int size = 32 - clz32(immh) - 1;
10650    int immhb = immh << 3 | immb;
10651    int shift = 2 * (8 << size) - immhb;
10652    GVecGen2iFn *gvec_fn;
10653
10654    if (extract32(immh, 3, 1) && !is_q) {
10655        unallocated_encoding(s);
10656        return;
10657    }
10658    tcg_debug_assert(size <= 3);
10659
10660    if (!fp_access_check(s)) {
10661        return;
10662    }
10663
10664    switch (opcode) {
10665    case 0x02: /* SSRA / USRA (accumulate) */
10666        gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10667        break;
10668
10669    case 0x08: /* SRI */
10670        gvec_fn = gen_gvec_sri;
10671        break;
10672
10673    case 0x00: /* SSHR / USHR */
10674        if (is_u) {
10675            if (shift == 8 << size) {
10676                /* Shift count the same size as element size produces zero.  */
10677                tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10678                                     is_q ? 16 : 8, vec_full_reg_size(s), 0);
10679                return;
10680            }
10681            gvec_fn = tcg_gen_gvec_shri;
10682        } else {
10683            /* Shift count the same size as element size produces all sign.  */
10684            if (shift == 8 << size) {
10685                shift -= 1;
10686            }
10687            gvec_fn = tcg_gen_gvec_sari;
10688        }
10689        break;
10690
10691    case 0x04: /* SRSHR / URSHR (rounding) */
10692        gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10693        break;
10694
10695    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10696        gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10697        break;
10698
10699    default:
10700        g_assert_not_reached();
10701    }
10702
10703    gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10704}
10705
10706/* SHL/SLI - Vector shift left */
10707static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10708                                 int immh, int immb, int opcode, int rn, int rd)
10709{
10710    int size = 32 - clz32(immh) - 1;
10711    int immhb = immh << 3 | immb;
10712    int shift = immhb - (8 << size);
10713
10714    /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10715    assert(size >= 0 && size <= 3);
10716
10717    if (extract32(immh, 3, 1) && !is_q) {
10718        unallocated_encoding(s);
10719        return;
10720    }
10721
10722    if (!fp_access_check(s)) {
10723        return;
10724    }
10725
10726    if (insert) {
10727        gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10728    } else {
10729        gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10730    }
10731}
10732
10733/* USHLL/SHLL - Vector shift left with widening */
10734static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10735                                 int immh, int immb, int opcode, int rn, int rd)
10736{
10737    int size = 32 - clz32(immh) - 1;
10738    int immhb = immh << 3 | immb;
10739    int shift = immhb - (8 << size);
10740    int dsize = 64;
10741    int esize = 8 << size;
10742    int elements = dsize/esize;
10743    TCGv_i64 tcg_rn = new_tmp_a64(s);
10744    TCGv_i64 tcg_rd = new_tmp_a64(s);
10745    int i;
10746
10747    if (size >= 3) {
10748        unallocated_encoding(s);
10749        return;
10750    }
10751
10752    if (!fp_access_check(s)) {
10753        return;
10754    }
10755
10756    /* For the LL variants the store is larger than the load,
10757     * so if rd == rn we would overwrite parts of our input.
10758     * So load everything right now and use shifts in the main loop.
10759     */
10760    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10761
10762    for (i = 0; i < elements; i++) {
10763        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10764        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10765        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10766        write_vec_element(s, tcg_rd, rd, i, size + 1);
10767    }
10768}
10769
10770/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10771static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10772                                 int immh, int immb, int opcode, int rn, int rd)
10773{
10774    int immhb = immh << 3 | immb;
10775    int size = 32 - clz32(immh) - 1;
10776    int dsize = 64;
10777    int esize = 8 << size;
10778    int elements = dsize/esize;
10779    int shift = (2 * esize) - immhb;
10780    bool round = extract32(opcode, 0, 1);
10781    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10782    TCGv_i64 tcg_round;
10783    int i;
10784
10785    if (extract32(immh, 3, 1)) {
10786        unallocated_encoding(s);
10787        return;
10788    }
10789
10790    if (!fp_access_check(s)) {
10791        return;
10792    }
10793
10794    tcg_rn = tcg_temp_new_i64();
10795    tcg_rd = tcg_temp_new_i64();
10796    tcg_final = tcg_temp_new_i64();
10797    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10798
10799    if (round) {
10800        uint64_t round_const = 1ULL << (shift - 1);
10801        tcg_round = tcg_const_i64(round_const);
10802    } else {
10803        tcg_round = NULL;
10804    }
10805
10806    for (i = 0; i < elements; i++) {
10807        read_vec_element(s, tcg_rn, rn, i, size+1);
10808        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10809                                false, true, size+1, shift);
10810
10811        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10812    }
10813
10814    if (!is_q) {
10815        write_vec_element(s, tcg_final, rd, 0, MO_64);
10816    } else {
10817        write_vec_element(s, tcg_final, rd, 1, MO_64);
10818    }
10819    if (round) {
10820        tcg_temp_free_i64(tcg_round);
10821    }
10822    tcg_temp_free_i64(tcg_rn);
10823    tcg_temp_free_i64(tcg_rd);
10824    tcg_temp_free_i64(tcg_final);
10825
10826    clear_vec_high(s, is_q, rd);
10827}
10828
10829
10830/* AdvSIMD shift by immediate
10831 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10832 * +---+---+---+-------------+------+------+--------+---+------+------+
10833 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10834 * +---+---+---+-------------+------+------+--------+---+------+------+
10835 */
10836static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10837{
10838    int rd = extract32(insn, 0, 5);
10839    int rn = extract32(insn, 5, 5);
10840    int opcode = extract32(insn, 11, 5);
10841    int immb = extract32(insn, 16, 3);
10842    int immh = extract32(insn, 19, 4);
10843    bool is_u = extract32(insn, 29, 1);
10844    bool is_q = extract32(insn, 30, 1);
10845
10846    /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10847    assert(immh != 0);
10848
10849    switch (opcode) {
10850    case 0x08: /* SRI */
10851        if (!is_u) {
10852            unallocated_encoding(s);
10853            return;
10854        }
10855        /* fall through */
10856    case 0x00: /* SSHR / USHR */
10857    case 0x02: /* SSRA / USRA (accumulate) */
10858    case 0x04: /* SRSHR / URSHR (rounding) */
10859    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10860        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10861        break;
10862    case 0x0a: /* SHL / SLI */
10863        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10864        break;
10865    case 0x10: /* SHRN */
10866    case 0x11: /* RSHRN / SQRSHRUN */
10867        if (is_u) {
10868            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10869                                   opcode, rn, rd);
10870        } else {
10871            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10872        }
10873        break;
10874    case 0x12: /* SQSHRN / UQSHRN */
10875    case 0x13: /* SQRSHRN / UQRSHRN */
10876        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10877                               opcode, rn, rd);
10878        break;
10879    case 0x14: /* SSHLL / USHLL */
10880        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10881        break;
10882    case 0x1c: /* SCVTF / UCVTF */
10883        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10884                                     opcode, rn, rd);
10885        break;
10886    case 0xc: /* SQSHLU */
10887        if (!is_u) {
10888            unallocated_encoding(s);
10889            return;
10890        }
10891        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10892        break;
10893    case 0xe: /* SQSHL, UQSHL */
10894        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10895        break;
10896    case 0x1f: /* FCVTZS/ FCVTZU */
10897        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10898        return;
10899    default:
10900        unallocated_encoding(s);
10901        return;
10902    }
10903}
10904
10905/* Generate code to do a "long" addition or subtraction, ie one done in
10906 * TCGv_i64 on vector lanes twice the width specified by size.
10907 */
10908static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10909                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10910{
10911    static NeonGenTwo64OpFn * const fns[3][2] = {
10912        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10913        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10914        { tcg_gen_add_i64, tcg_gen_sub_i64 },
10915    };
10916    NeonGenTwo64OpFn *genfn;
10917    assert(size < 3);
10918
10919    genfn = fns[size][is_sub];
10920    genfn(tcg_res, tcg_op1, tcg_op2);
10921}
10922
10923static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10924                                int opcode, int rd, int rn, int rm)
10925{
10926    /* 3-reg-different widening insns: 64 x 64 -> 128 */
10927    TCGv_i64 tcg_res[2];
10928    int pass, accop;
10929
10930    tcg_res[0] = tcg_temp_new_i64();
10931    tcg_res[1] = tcg_temp_new_i64();
10932
10933    /* Does this op do an adding accumulate, a subtracting accumulate,
10934     * or no accumulate at all?
10935     */
10936    switch (opcode) {
10937    case 5:
10938    case 8:
10939    case 9:
10940        accop = 1;
10941        break;
10942    case 10:
10943    case 11:
10944        accop = -1;
10945        break;
10946    default:
10947        accop = 0;
10948        break;
10949    }
10950
10951    if (accop != 0) {
10952        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10953        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10954    }
10955
10956    /* size == 2 means two 32x32->64 operations; this is worth special
10957     * casing because we can generally handle it inline.
10958     */
10959    if (size == 2) {
10960        for (pass = 0; pass < 2; pass++) {
10961            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10962            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10963            TCGv_i64 tcg_passres;
10964            MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10965
10966            int elt = pass + is_q * 2;
10967
10968            read_vec_element(s, tcg_op1, rn, elt, memop);
10969            read_vec_element(s, tcg_op2, rm, elt, memop);
10970
10971            if (accop == 0) {
10972                tcg_passres = tcg_res[pass];
10973            } else {
10974                tcg_passres = tcg_temp_new_i64();
10975            }
10976
10977            switch (opcode) {
10978            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10979                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10980                break;
10981            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10982                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10983                break;
10984            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10985            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10986            {
10987                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10988                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10989
10990                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10991                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10992                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10993                                    tcg_passres,
10994                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10995                tcg_temp_free_i64(tcg_tmp1);
10996                tcg_temp_free_i64(tcg_tmp2);
10997                break;
10998            }
10999            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
11000            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
11001            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
11002                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
11003                break;
11004            case 9: /* SQDMLAL, SQDMLAL2 */
11005            case 11: /* SQDMLSL, SQDMLSL2 */
11006            case 13: /* SQDMULL, SQDMULL2 */
11007                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
11008                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
11009                                                  tcg_passres, tcg_passres);
11010                break;
11011            default:
11012                g_assert_not_reached();
11013            }
11014
11015            if (opcode == 9 || opcode == 11) {
11016                /* saturating accumulate ops */
11017                if (accop < 0) {
11018                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
11019                }
11020                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
11021                                                  tcg_res[pass], tcg_passres);
11022            } else if (accop > 0) {
11023                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
11024            } else if (accop < 0) {
11025                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
11026            }
11027
11028            if (accop != 0) {
11029                tcg_temp_free_i64(tcg_passres);
11030            }
11031
11032            tcg_temp_free_i64(tcg_op1);
11033            tcg_temp_free_i64(tcg_op2);
11034        }
11035    } else {
11036        /* size 0 or 1, generally helper functions */
11037        for (pass = 0; pass < 2; pass++) {
11038            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11039            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11040            TCGv_i64 tcg_passres;
11041            int elt = pass + is_q * 2;
11042
11043            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
11044            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
11045
11046            if (accop == 0) {
11047                tcg_passres = tcg_res[pass];
11048            } else {
11049                tcg_passres = tcg_temp_new_i64();
11050            }
11051
11052            switch (opcode) {
11053            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
11054            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
11055            {
11056                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
11057                static NeonGenWidenFn * const widenfns[2][2] = {
11058                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
11059                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
11060                };
11061                NeonGenWidenFn *widenfn = widenfns[size][is_u];
11062
11063                widenfn(tcg_op2_64, tcg_op2);
11064                widenfn(tcg_passres, tcg_op1);
11065                gen_neon_addl(size, (opcode == 2), tcg_passres,
11066                              tcg_passres, tcg_op2_64);
11067                tcg_temp_free_i64(tcg_op2_64);
11068                break;
11069            }
11070            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
11071            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
11072                if (size == 0) {
11073                    if (is_u) {
11074                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
11075                    } else {
11076                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
11077                    }
11078                } else {
11079                    if (is_u) {
11080                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
11081                    } else {
11082                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
11083                    }
11084                }
11085                break;
11086            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
11087            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
11088            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
11089                if (size == 0) {
11090                    if (is_u) {
11091                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
11092                    } else {
11093                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
11094                    }
11095                } else {
11096                    if (is_u) {
11097                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
11098                    } else {
11099                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
11100                    }
11101                }
11102                break;
11103            case 9: /* SQDMLAL, SQDMLAL2 */
11104            case 11: /* SQDMLSL, SQDMLSL2 */
11105            case 13: /* SQDMULL, SQDMULL2 */
11106                assert(size == 1);
11107                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
11108                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
11109                                                  tcg_passres, tcg_passres);
11110                break;
11111            default:
11112                g_assert_not_reached();
11113            }
11114            tcg_temp_free_i32(tcg_op1);
11115            tcg_temp_free_i32(tcg_op2);
11116
11117            if (accop != 0) {
11118                if (opcode == 9 || opcode == 11) {
11119                    /* saturating accumulate ops */
11120                    if (accop < 0) {
11121                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
11122                    }
11123                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
11124                                                      tcg_res[pass],
11125                                                      tcg_passres);
11126                } else {
11127                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
11128                                  tcg_res[pass], tcg_passres);
11129                }
11130                tcg_temp_free_i64(tcg_passres);
11131            }
11132        }
11133    }
11134
11135    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
11136    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
11137    tcg_temp_free_i64(tcg_res[0]);
11138    tcg_temp_free_i64(tcg_res[1]);
11139}
11140
11141static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
11142                            int opcode, int rd, int rn, int rm)
11143{
11144    TCGv_i64 tcg_res[2];
11145    int part = is_q ? 2 : 0;
11146    int pass;
11147
11148    for (pass = 0; pass < 2; pass++) {
11149        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11150        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11151        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
11152        static NeonGenWidenFn * const widenfns[3][2] = {
11153            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
11154            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
11155            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
11156        };
11157        NeonGenWidenFn *widenfn = widenfns[size][is_u];
11158
11159        read_vec_element(s, tcg_op1, rn, pass, MO_64);
11160        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
11161        widenfn(tcg_op2_wide, tcg_op2);
11162        tcg_temp_free_i32(tcg_op2);
11163        tcg_res[pass] = tcg_temp_new_i64();
11164        gen_neon_addl(size, (opcode == 3),
11165                      tcg_res[pass], tcg_op1, tcg_op2_wide);
11166        tcg_temp_free_i64(tcg_op1);
11167        tcg_temp_free_i64(tcg_op2_wide);
11168    }
11169
11170    for (pass = 0; pass < 2; pass++) {
11171        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11172        tcg_temp_free_i64(tcg_res[pass]);
11173    }
11174}
11175
11176static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
11177{
11178    tcg_gen_addi_i64(in, in, 1U << 31);
11179    tcg_gen_extrh_i64_i32(res, in);
11180}
11181
11182static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
11183                                 int opcode, int rd, int rn, int rm)
11184{
11185    TCGv_i32 tcg_res[2];
11186    int part = is_q ? 2 : 0;
11187    int pass;
11188
11189    for (pass = 0; pass < 2; pass++) {
11190        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11191        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11192        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
11193        static NeonGenNarrowFn * const narrowfns[3][2] = {
11194            { gen_helper_neon_narrow_high_u8,
11195              gen_helper_neon_narrow_round_high_u8 },
11196            { gen_helper_neon_narrow_high_u16,
11197              gen_helper_neon_narrow_round_high_u16 },
11198            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
11199        };
11200        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
11201
11202        read_vec_element(s, tcg_op1, rn, pass, MO_64);
11203        read_vec_element(s, tcg_op2, rm, pass, MO_64);
11204
11205        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
11206
11207        tcg_temp_free_i64(tcg_op1);
11208        tcg_temp_free_i64(tcg_op2);
11209
11210        tcg_res[pass] = tcg_temp_new_i32();
11211        gennarrow(tcg_res[pass], tcg_wideres);
11212        tcg_temp_free_i64(tcg_wideres);
11213    }
11214
11215    for (pass = 0; pass < 2; pass++) {
11216        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
11217        tcg_temp_free_i32(tcg_res[pass]);
11218    }
11219    clear_vec_high(s, is_q, rd);
11220}
11221
11222/* AdvSIMD three different
11223 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
11224 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
11225 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
11226 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
11227 */
11228static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
11229{
11230    /* Instructions in this group fall into three basic classes
11231     * (in each case with the operation working on each element in
11232     * the input vectors):
11233     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
11234     *     128 bit input)
11235     * (2) wide 64 x 128 -> 128
11236     * (3) narrowing 128 x 128 -> 64
11237     * Here we do initial decode, catch unallocated cases and
11238     * dispatch to separate functions for each class.
11239     */
11240    int is_q = extract32(insn, 30, 1);
11241    int is_u = extract32(insn, 29, 1);
11242    int size = extract32(insn, 22, 2);
11243    int opcode = extract32(insn, 12, 4);
11244    int rm = extract32(insn, 16, 5);
11245    int rn = extract32(insn, 5, 5);
11246    int rd = extract32(insn, 0, 5);
11247
11248    switch (opcode) {
11249    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
11250    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
11251        /* 64 x 128 -> 128 */
11252        if (size == 3) {
11253            unallocated_encoding(s);
11254            return;
11255        }
11256        if (!fp_access_check(s)) {
11257            return;
11258        }
11259        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
11260        break;
11261    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
11262    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
11263        /* 128 x 128 -> 64 */
11264        if (size == 3) {
11265            unallocated_encoding(s);
11266            return;
11267        }
11268        if (!fp_access_check(s)) {
11269            return;
11270        }
11271        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
11272        break;
11273    case 14: /* PMULL, PMULL2 */
11274        if (is_u) {
11275            unallocated_encoding(s);
11276            return;
11277        }
11278        switch (size) {
11279        case 0: /* PMULL.P8 */
11280            if (!fp_access_check(s)) {
11281                return;
11282            }
11283            /* The Q field specifies lo/hi half input for this insn.  */
11284            gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
11285                             gen_helper_neon_pmull_h);
11286            break;
11287
11288        case 3: /* PMULL.P64 */
11289            if (!dc_isar_feature(aa64_pmull, s)) {
11290                unallocated_encoding(s);
11291                return;
11292            }
11293            if (!fp_access_check(s)) {
11294                return;
11295            }
11296            /* The Q field specifies lo/hi half input for this insn.  */
11297            gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
11298                             gen_helper_gvec_pmull_q);
11299            break;
11300
11301        default:
11302            unallocated_encoding(s);
11303            break;
11304        }
11305        return;
11306    case 9: /* SQDMLAL, SQDMLAL2 */
11307    case 11: /* SQDMLSL, SQDMLSL2 */
11308    case 13: /* SQDMULL, SQDMULL2 */
11309        if (is_u || size == 0) {
11310            unallocated_encoding(s);
11311            return;
11312        }
11313        /* fall through */
11314    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
11315    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
11316    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
11317    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
11318    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
11319    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
11320    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
11321        /* 64 x 64 -> 128 */
11322        if (size == 3) {
11323            unallocated_encoding(s);
11324            return;
11325        }
11326        if (!fp_access_check(s)) {
11327            return;
11328        }
11329
11330        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
11331        break;
11332    default:
11333        /* opcode 15 not allocated */
11334        unallocated_encoding(s);
11335        break;
11336    }
11337}
11338
11339/* Logic op (opcode == 3) subgroup of C3.6.16. */
11340static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
11341{
11342    int rd = extract32(insn, 0, 5);
11343    int rn = extract32(insn, 5, 5);
11344    int rm = extract32(insn, 16, 5);
11345    int size = extract32(insn, 22, 2);
11346    bool is_u = extract32(insn, 29, 1);
11347    bool is_q = extract32(insn, 30, 1);
11348
11349    if (!fp_access_check(s)) {
11350        return;
11351    }
11352
11353    switch (size + 4 * is_u) {
11354    case 0: /* AND */
11355        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
11356        return;
11357    case 1: /* BIC */
11358        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
11359        return;
11360    case 2: /* ORR */
11361        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
11362        return;
11363    case 3: /* ORN */
11364        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
11365        return;
11366    case 4: /* EOR */
11367        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
11368        return;
11369
11370    case 5: /* BSL bitwise select */
11371        gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
11372        return;
11373    case 6: /* BIT, bitwise insert if true */
11374        gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
11375        return;
11376    case 7: /* BIF, bitwise insert if false */
11377        gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
11378        return;
11379
11380    default:
11381        g_assert_not_reached();
11382    }
11383}
11384
11385/* Pairwise op subgroup of C3.6.16.
11386 *
11387 * This is called directly or via the handle_3same_float for float pairwise
11388 * operations where the opcode and size are calculated differently.
11389 */
11390static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
11391                                   int size, int rn, int rm, int rd)
11392{
11393    TCGv_ptr fpst;
11394    int pass;
11395
11396    /* Floating point operations need fpst */
11397    if (opcode >= 0x58) {
11398        fpst = fpstatus_ptr(FPST_FPCR);
11399    } else {
11400        fpst = NULL;
11401    }
11402
11403    if (!fp_access_check(s)) {
11404        return;
11405    }
11406
11407    /* These operations work on the concatenated rm:rn, with each pair of
11408     * adjacent elements being operated on to produce an element in the result.
11409     */
11410    if (size == 3) {
11411        TCGv_i64 tcg_res[2];
11412
11413        for (pass = 0; pass < 2; pass++) {
11414            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11415            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11416            int passreg = (pass == 0) ? rn : rm;
11417
11418            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
11419            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
11420            tcg_res[pass] = tcg_temp_new_i64();
11421
11422            switch (opcode) {
11423            case 0x17: /* ADDP */
11424                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11425                break;
11426            case 0x58: /* FMAXNMP */
11427                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11428                break;
11429            case 0x5a: /* FADDP */
11430                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11431                break;
11432            case 0x5e: /* FMAXP */
11433                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11434                break;
11435            case 0x78: /* FMINNMP */
11436                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11437                break;
11438            case 0x7e: /* FMINP */
11439                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11440                break;
11441            default:
11442                g_assert_not_reached();
11443            }
11444
11445            tcg_temp_free_i64(tcg_op1);
11446            tcg_temp_free_i64(tcg_op2);
11447        }
11448
11449        for (pass = 0; pass < 2; pass++) {
11450            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11451            tcg_temp_free_i64(tcg_res[pass]);
11452        }
11453    } else {
11454        int maxpass = is_q ? 4 : 2;
11455        TCGv_i32 tcg_res[4];
11456
11457        for (pass = 0; pass < maxpass; pass++) {
11458            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11459            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11460            NeonGenTwoOpFn *genfn = NULL;
11461            int passreg = pass < (maxpass / 2) ? rn : rm;
11462            int passelt = (is_q && (pass & 1)) ? 2 : 0;
11463
11464            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
11465            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
11466            tcg_res[pass] = tcg_temp_new_i32();
11467
11468            switch (opcode) {
11469            case 0x17: /* ADDP */
11470            {
11471                static NeonGenTwoOpFn * const fns[3] = {
11472                    gen_helper_neon_padd_u8,
11473                    gen_helper_neon_padd_u16,
11474                    tcg_gen_add_i32,
11475                };
11476                genfn = fns[size];
11477                break;
11478            }
11479            case 0x14: /* SMAXP, UMAXP */
11480            {
11481                static NeonGenTwoOpFn * const fns[3][2] = {
11482                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
11483                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
11484                    { tcg_gen_smax_i32, tcg_gen_umax_i32 },
11485                };
11486                genfn = fns[size][u];
11487                break;
11488            }
11489            case 0x15: /* SMINP, UMINP */
11490            {
11491                static NeonGenTwoOpFn * const fns[3][2] = {
11492                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
11493                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
11494                    { tcg_gen_smin_i32, tcg_gen_umin_i32 },
11495                };
11496                genfn = fns[size][u];
11497                break;
11498            }
11499            /* The FP operations are all on single floats (32 bit) */
11500            case 0x58: /* FMAXNMP */
11501                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11502                break;
11503            case 0x5a: /* FADDP */
11504                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11505                break;
11506            case 0x5e: /* FMAXP */
11507                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11508                break;
11509            case 0x78: /* FMINNMP */
11510                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11511                break;
11512            case 0x7e: /* FMINP */
11513                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11514                break;
11515            default:
11516                g_assert_not_reached();
11517            }
11518
11519            /* FP ops called directly, otherwise call now */
11520            if (genfn) {
11521                genfn(tcg_res[pass], tcg_op1, tcg_op2);
11522            }
11523
11524            tcg_temp_free_i32(tcg_op1);
11525            tcg_temp_free_i32(tcg_op2);
11526        }
11527
11528        for (pass = 0; pass < maxpass; pass++) {
11529            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11530            tcg_temp_free_i32(tcg_res[pass]);
11531        }
11532        clear_vec_high(s, is_q, rd);
11533    }
11534
11535    if (fpst) {
11536        tcg_temp_free_ptr(fpst);
11537    }
11538}
11539
11540/* Floating point op subgroup of C3.6.16. */
11541static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11542{
11543    /* For floating point ops, the U, size[1] and opcode bits
11544     * together indicate the operation. size[0] indicates single
11545     * or double.
11546     */
11547    int fpopcode = extract32(insn, 11, 5)
11548        | (extract32(insn, 23, 1) << 5)
11549        | (extract32(insn, 29, 1) << 6);
11550    int is_q = extract32(insn, 30, 1);
11551    int size = extract32(insn, 22, 1);
11552    int rm = extract32(insn, 16, 5);
11553    int rn = extract32(insn, 5, 5);
11554    int rd = extract32(insn, 0, 5);
11555
11556    int datasize = is_q ? 128 : 64;
11557    int esize = 32 << size;
11558    int elements = datasize / esize;
11559
11560    if (size == 1 && !is_q) {
11561        unallocated_encoding(s);
11562        return;
11563    }
11564
11565    switch (fpopcode) {
11566    case 0x58: /* FMAXNMP */
11567    case 0x5a: /* FADDP */
11568    case 0x5e: /* FMAXP */
11569    case 0x78: /* FMINNMP */
11570    case 0x7e: /* FMINP */
11571        if (size && !is_q) {
11572            unallocated_encoding(s);
11573            return;
11574        }
11575        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11576                               rn, rm, rd);
11577        return;
11578    case 0x1b: /* FMULX */
11579    case 0x1f: /* FRECPS */
11580    case 0x3f: /* FRSQRTS */
11581    case 0x5d: /* FACGE */
11582    case 0x7d: /* FACGT */
11583    case 0x19: /* FMLA */
11584    case 0x39: /* FMLS */
11585    case 0x18: /* FMAXNM */
11586    case 0x1a: /* FADD */
11587    case 0x1c: /* FCMEQ */
11588    case 0x1e: /* FMAX */
11589    case 0x38: /* FMINNM */
11590    case 0x3a: /* FSUB */
11591    case 0x3e: /* FMIN */
11592    case 0x5b: /* FMUL */
11593    case 0x5c: /* FCMGE */
11594    case 0x5f: /* FDIV */
11595    case 0x7a: /* FABD */
11596    case 0x7c: /* FCMGT */
11597        if (!fp_access_check(s)) {
11598            return;
11599        }
11600        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11601        return;
11602
11603    case 0x1d: /* FMLAL  */
11604    case 0x3d: /* FMLSL  */
11605    case 0x59: /* FMLAL2 */
11606    case 0x79: /* FMLSL2 */
11607        if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11608            unallocated_encoding(s);
11609            return;
11610        }
11611        if (fp_access_check(s)) {
11612            int is_s = extract32(insn, 23, 1);
11613            int is_2 = extract32(insn, 29, 1);
11614            int data = (is_2 << 1) | is_s;
11615            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11616                               vec_full_reg_offset(s, rn),
11617                               vec_full_reg_offset(s, rm), cpu_env,
11618                               is_q ? 16 : 8, vec_full_reg_size(s),
11619                               data, gen_helper_gvec_fmlal_a64);
11620        }
11621        return;
11622
11623    default:
11624        unallocated_encoding(s);
11625        return;
11626    }
11627}
11628
11629/* Integer op subgroup of C3.6.16. */
11630static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11631{
11632    int is_q = extract32(insn, 30, 1);
11633    int u = extract32(insn, 29, 1);
11634    int size = extract32(insn, 22, 2);
11635    int opcode = extract32(insn, 11, 5);
11636    int rm = extract32(insn, 16, 5);
11637    int rn = extract32(insn, 5, 5);
11638    int rd = extract32(insn, 0, 5);
11639    int pass;
11640    TCGCond cond;
11641
11642    switch (opcode) {
11643    case 0x13: /* MUL, PMUL */
11644        if (u && size != 0) {
11645            unallocated_encoding(s);
11646            return;
11647        }
11648        /* fall through */
11649    case 0x0: /* SHADD, UHADD */
11650    case 0x2: /* SRHADD, URHADD */
11651    case 0x4: /* SHSUB, UHSUB */
11652    case 0xc: /* SMAX, UMAX */
11653    case 0xd: /* SMIN, UMIN */
11654    case 0xe: /* SABD, UABD */
11655    case 0xf: /* SABA, UABA */
11656    case 0x12: /* MLA, MLS */
11657        if (size == 3) {
11658            unallocated_encoding(s);
11659            return;
11660        }
11661        break;
11662    case 0x16: /* SQDMULH, SQRDMULH */
11663        if (size == 0 || size == 3) {
11664            unallocated_encoding(s);
11665            return;
11666        }
11667        break;
11668    default:
11669        if (size == 3 && !is_q) {
11670            unallocated_encoding(s);
11671            return;
11672        }
11673        break;
11674    }
11675
11676    if (!fp_access_check(s)) {
11677        return;
11678    }
11679
11680    switch (opcode) {
11681    case 0x01: /* SQADD, UQADD */
11682        if (u) {
11683            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11684        } else {
11685            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11686        }
11687        return;
11688    case 0x05: /* SQSUB, UQSUB */
11689        if (u) {
11690            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11691        } else {
11692            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11693        }
11694        return;
11695    case 0x08: /* SSHL, USHL */
11696        if (u) {
11697            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11698        } else {
11699            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11700        }
11701        return;
11702    case 0x0c: /* SMAX, UMAX */
11703        if (u) {
11704            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11705        } else {
11706            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11707        }
11708        return;
11709    case 0x0d: /* SMIN, UMIN */
11710        if (u) {
11711            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11712        } else {
11713            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11714        }
11715        return;
11716    case 0xe: /* SABD, UABD */
11717        if (u) {
11718            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11719        } else {
11720            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11721        }
11722        return;
11723    case 0xf: /* SABA, UABA */
11724        if (u) {
11725            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11726        } else {
11727            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11728        }
11729        return;
11730    case 0x10: /* ADD, SUB */
11731        if (u) {
11732            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11733        } else {
11734            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11735        }
11736        return;
11737    case 0x13: /* MUL, PMUL */
11738        if (!u) { /* MUL */
11739            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11740        } else {  /* PMUL */
11741            gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11742        }
11743        return;
11744    case 0x12: /* MLA, MLS */
11745        if (u) {
11746            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11747        } else {
11748            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11749        }
11750        return;
11751    case 0x16: /* SQDMULH, SQRDMULH */
11752        {
11753            static gen_helper_gvec_3_ptr * const fns[2][2] = {
11754                { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11755                { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11756            };
11757            gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11758        }
11759        return;
11760    case 0x11:
11761        if (!u) { /* CMTST */
11762            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11763            return;
11764        }
11765        /* else CMEQ */
11766        cond = TCG_COND_EQ;
11767        goto do_gvec_cmp;
11768    case 0x06: /* CMGT, CMHI */
11769        cond = u ? TCG_COND_GTU : TCG_COND_GT;
11770        goto do_gvec_cmp;
11771    case 0x07: /* CMGE, CMHS */
11772        cond = u ? TCG_COND_GEU : TCG_COND_GE;
11773    do_gvec_cmp:
11774        tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11775                         vec_full_reg_offset(s, rn),
11776                         vec_full_reg_offset(s, rm),
11777                         is_q ? 16 : 8, vec_full_reg_size(s));
11778        return;
11779    }
11780
11781    if (size == 3) {
11782        assert(is_q);
11783        for (pass = 0; pass < 2; pass++) {
11784            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11785            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11786            TCGv_i64 tcg_res = tcg_temp_new_i64();
11787
11788            read_vec_element(s, tcg_op1, rn, pass, MO_64);
11789            read_vec_element(s, tcg_op2, rm, pass, MO_64);
11790
11791            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11792
11793            write_vec_element(s, tcg_res, rd, pass, MO_64);
11794
11795            tcg_temp_free_i64(tcg_res);
11796            tcg_temp_free_i64(tcg_op1);
11797            tcg_temp_free_i64(tcg_op2);
11798        }
11799    } else {
11800        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11801            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11802            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11803            TCGv_i32 tcg_res = tcg_temp_new_i32();
11804            NeonGenTwoOpFn *genfn = NULL;
11805            NeonGenTwoOpEnvFn *genenvfn = NULL;
11806
11807            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11808            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11809
11810            switch (opcode) {
11811            case 0x0: /* SHADD, UHADD */
11812            {
11813                static NeonGenTwoOpFn * const fns[3][2] = {
11814                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11815                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11816                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11817                };
11818                genfn = fns[size][u];
11819                break;
11820            }
11821            case 0x2: /* SRHADD, URHADD */
11822            {
11823                static NeonGenTwoOpFn * const fns[3][2] = {
11824                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11825                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11826                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11827                };
11828                genfn = fns[size][u];
11829                break;
11830            }
11831            case 0x4: /* SHSUB, UHSUB */
11832            {
11833                static NeonGenTwoOpFn * const fns[3][2] = {
11834                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11835                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11836                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11837                };
11838                genfn = fns[size][u];
11839                break;
11840            }
11841            case 0x9: /* SQSHL, UQSHL */
11842            {
11843                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11844                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11845                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11846                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11847                };
11848                genenvfn = fns[size][u];
11849                break;
11850            }
11851            case 0xa: /* SRSHL, URSHL */
11852            {
11853                static NeonGenTwoOpFn * const fns[3][2] = {
11854                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11855                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11856                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11857                };
11858                genfn = fns[size][u];
11859                break;
11860            }
11861            case 0xb: /* SQRSHL, UQRSHL */
11862            {
11863                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11864                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11865                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11866                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11867                };
11868                genenvfn = fns[size][u];
11869                break;
11870            }
11871            default:
11872                g_assert_not_reached();
11873            }
11874
11875            if (genenvfn) {
11876                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11877            } else {
11878                genfn(tcg_res, tcg_op1, tcg_op2);
11879            }
11880
11881            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11882
11883            tcg_temp_free_i32(tcg_res);
11884            tcg_temp_free_i32(tcg_op1);
11885            tcg_temp_free_i32(tcg_op2);
11886        }
11887    }
11888    clear_vec_high(s, is_q, rd);
11889}
11890
11891/* AdvSIMD three same
11892 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11893 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11894 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11895 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11896 */
11897static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11898{
11899    int opcode = extract32(insn, 11, 5);
11900
11901    switch (opcode) {
11902    case 0x3: /* logic ops */
11903        disas_simd_3same_logic(s, insn);
11904        break;
11905    case 0x17: /* ADDP */
11906    case 0x14: /* SMAXP, UMAXP */
11907    case 0x15: /* SMINP, UMINP */
11908    {
11909        /* Pairwise operations */
11910        int is_q = extract32(insn, 30, 1);
11911        int u = extract32(insn, 29, 1);
11912        int size = extract32(insn, 22, 2);
11913        int rm = extract32(insn, 16, 5);
11914        int rn = extract32(insn, 5, 5);
11915        int rd = extract32(insn, 0, 5);
11916        if (opcode == 0x17) {
11917            if (u || (size == 3 && !is_q)) {
11918                unallocated_encoding(s);
11919                return;
11920            }
11921        } else {
11922            if (size == 3) {
11923                unallocated_encoding(s);
11924                return;
11925            }
11926        }
11927        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11928        break;
11929    }
11930    case 0x18 ... 0x31:
11931        /* floating point ops, sz[1] and U are part of opcode */
11932        disas_simd_3same_float(s, insn);
11933        break;
11934    default:
11935        disas_simd_3same_int(s, insn);
11936        break;
11937    }
11938}
11939
11940/*
11941 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11942 *
11943 *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11944 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11945 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11946 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11947 *
11948 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11949 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11950 *
11951 */
11952static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11953{
11954    int opcode, fpopcode;
11955    int is_q, u, a, rm, rn, rd;
11956    int datasize, elements;
11957    int pass;
11958    TCGv_ptr fpst;
11959    bool pairwise = false;
11960
11961    if (!dc_isar_feature(aa64_fp16, s)) {
11962        unallocated_encoding(s);
11963        return;
11964    }
11965
11966    if (!fp_access_check(s)) {
11967        return;
11968    }
11969
11970    /* For these floating point ops, the U, a and opcode bits
11971     * together indicate the operation.
11972     */
11973    opcode = extract32(insn, 11, 3);
11974    u = extract32(insn, 29, 1);
11975    a = extract32(insn, 23, 1);
11976    is_q = extract32(insn, 30, 1);
11977    rm = extract32(insn, 16, 5);
11978    rn = extract32(insn, 5, 5);
11979    rd = extract32(insn, 0, 5);
11980
11981    fpopcode = opcode | (a << 3) |  (u << 4);
11982    datasize = is_q ? 128 : 64;
11983    elements = datasize / 16;
11984
11985    switch (fpopcode) {
11986    case 0x10: /* FMAXNMP */
11987    case 0x12: /* FADDP */
11988    case 0x16: /* FMAXP */
11989    case 0x18: /* FMINNMP */
11990    case 0x1e: /* FMINP */
11991        pairwise = true;
11992        break;
11993    }
11994
11995    fpst = fpstatus_ptr(FPST_FPCR_F16);
11996
11997    if (pairwise) {
11998        int maxpass = is_q ? 8 : 4;
11999        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
12000        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
12001        TCGv_i32 tcg_res[8];
12002
12003        for (pass = 0; pass < maxpass; pass++) {
12004            int passreg = pass < (maxpass / 2) ? rn : rm;
12005            int passelt = (pass << 1) & (maxpass - 1);
12006
12007            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
12008            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
12009            tcg_res[pass] = tcg_temp_new_i32();
12010
12011            switch (fpopcode) {
12012            case 0x10: /* FMAXNMP */
12013                gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
12014                                           fpst);
12015                break;
12016            case 0x12: /* FADDP */
12017                gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
12018                break;
12019            case 0x16: /* FMAXP */
12020                gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
12021                break;
12022            case 0x18: /* FMINNMP */
12023                gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
12024                                           fpst);
12025                break;
12026            case 0x1e: /* FMINP */
12027                gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
12028                break;
12029            default:
12030                g_assert_not_reached();
12031            }
12032        }
12033
12034        for (pass = 0; pass < maxpass; pass++) {
12035            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
12036            tcg_temp_free_i32(tcg_res[pass]);
12037        }
12038
12039        tcg_temp_free_i32(tcg_op1);
12040        tcg_temp_free_i32(tcg_op2);
12041
12042    } else {
12043        for (pass = 0; pass < elements; pass++) {
12044            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
12045            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
12046            TCGv_i32 tcg_res = tcg_temp_new_i32();
12047
12048            read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
12049            read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
12050
12051            switch (fpopcode) {
12052            case 0x0: /* FMAXNM */
12053                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
12054                break;
12055            case 0x1: /* FMLA */
12056                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12057                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
12058                                           fpst);
12059                break;
12060            case 0x2: /* FADD */
12061                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
12062                break;
12063            case 0x3: /* FMULX */
12064                gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
12065                break;
12066            case 0x4: /* FCMEQ */
12067                gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12068                break;
12069            case 0x6: /* FMAX */
12070                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
12071                break;
12072            case 0x7: /* FRECPS */
12073                gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12074                break;
12075            case 0x8: /* FMINNM */
12076                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
12077                break;
12078            case 0x9: /* FMLS */
12079                /* As usual for ARM, separate negation for fused multiply-add */
12080                tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
12081                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12082                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
12083                                           fpst);
12084                break;
12085            case 0xa: /* FSUB */
12086                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
12087                break;
12088            case 0xe: /* FMIN */
12089                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
12090                break;
12091            case 0xf: /* FRSQRTS */
12092                gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12093                break;
12094            case 0x13: /* FMUL */
12095                gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
12096                break;
12097            case 0x14: /* FCMGE */
12098                gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12099                break;
12100            case 0x15: /* FACGE */
12101                gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12102                break;
12103            case 0x17: /* FDIV */
12104                gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
12105                break;
12106            case 0x1a: /* FABD */
12107                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
12108                tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
12109                break;
12110            case 0x1c: /* FCMGT */
12111                gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12112                break;
12113            case 0x1d: /* FACGT */
12114                gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12115                break;
12116            default:
12117                fprintf(stderr, "%s: insn 0x%04x, fpop 0x%2x @ 0x%" PRIx64 "\n",
12118                        __func__, insn, fpopcode, s->pc_curr);
12119                g_assert_not_reached();
12120            }
12121
12122            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12123            tcg_temp_free_i32(tcg_res);
12124            tcg_temp_free_i32(tcg_op1);
12125            tcg_temp_free_i32(tcg_op2);
12126        }
12127    }
12128
12129    tcg_temp_free_ptr(fpst);
12130
12131    clear_vec_high(s, is_q, rd);
12132}
12133
12134/* AdvSIMD three same extra
12135 *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
12136 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
12137 * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
12138 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
12139 */
12140static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
12141{
12142    int rd = extract32(insn, 0, 5);
12143    int rn = extract32(insn, 5, 5);
12144    int opcode = extract32(insn, 11, 4);
12145    int rm = extract32(insn, 16, 5);
12146    int size = extract32(insn, 22, 2);
12147    bool u = extract32(insn, 29, 1);
12148    bool is_q = extract32(insn, 30, 1);
12149    bool feature;
12150    int rot;
12151
12152    switch (u * 16 + opcode) {
12153    case 0x10: /* SQRDMLAH (vector) */
12154    case 0x11: /* SQRDMLSH (vector) */
12155        if (size != 1 && size != 2) {
12156            unallocated_encoding(s);
12157            return;
12158        }
12159        feature = dc_isar_feature(aa64_rdm, s);
12160        break;
12161    case 0x02: /* SDOT (vector) */
12162    case 0x12: /* UDOT (vector) */
12163        if (size != MO_32) {
12164            unallocated_encoding(s);
12165            return;
12166        }
12167        feature = dc_isar_feature(aa64_dp, s);
12168        break;
12169    case 0x18: /* FCMLA, #0 */
12170    case 0x19: /* FCMLA, #90 */
12171    case 0x1a: /* FCMLA, #180 */
12172    case 0x1b: /* FCMLA, #270 */
12173    case 0x1c: /* FCADD, #90 */
12174    case 0x1e: /* FCADD, #270 */
12175        if (size == 0
12176            || (size == 1 && !dc_isar_feature(aa64_fp16, s))
12177            || (size == 3 && !is_q)) {
12178            unallocated_encoding(s);
12179            return;
12180        }
12181        feature = dc_isar_feature(aa64_fcma, s);
12182        break;
12183    default:
12184        unallocated_encoding(s);
12185        return;
12186    }
12187    if (!feature) {
12188        unallocated_encoding(s);
12189        return;
12190    }
12191    if (!fp_access_check(s)) {
12192        return;
12193    }
12194
12195    switch (opcode) {
12196    case 0x0: /* SQRDMLAH (vector) */
12197        gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
12198        return;
12199
12200    case 0x1: /* SQRDMLSH (vector) */
12201        gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
12202        return;
12203
12204    case 0x2: /* SDOT / UDOT */
12205        gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
12206                         u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
12207        return;
12208
12209    case 0x8: /* FCMLA, #0 */
12210    case 0x9: /* FCMLA, #90 */
12211    case 0xa: /* FCMLA, #180 */
12212    case 0xb: /* FCMLA, #270 */
12213        rot = extract32(opcode, 0, 2);
12214        switch (size) {
12215        case 1:
12216            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
12217                              gen_helper_gvec_fcmlah);
12218            break;
12219        case 2:
12220            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
12221                              gen_helper_gvec_fcmlas);
12222            break;
12223        case 3:
12224            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
12225                              gen_helper_gvec_fcmlad);
12226            break;
12227        default:
12228            g_assert_not_reached();
12229        }
12230        return;
12231
12232    case 0xc: /* FCADD, #90 */
12233    case 0xe: /* FCADD, #270 */
12234        rot = extract32(opcode, 1, 1);
12235        switch (size) {
12236        case 1:
12237            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
12238                              gen_helper_gvec_fcaddh);
12239            break;
12240        case 2:
12241            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
12242                              gen_helper_gvec_fcadds);
12243            break;
12244        case 3:
12245            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
12246                              gen_helper_gvec_fcaddd);
12247            break;
12248        default:
12249            g_assert_not_reached();
12250        }
12251        return;
12252
12253    default:
12254        g_assert_not_reached();
12255    }
12256}
12257
12258static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
12259                                  int size, int rn, int rd)
12260{
12261    /* Handle 2-reg-misc ops which are widening (so each size element
12262     * in the source becomes a 2*size element in the destination.
12263     * The only instruction like this is FCVTL.
12264     */
12265    int pass;
12266
12267    if (size == 3) {
12268        /* 32 -> 64 bit fp conversion */
12269        TCGv_i64 tcg_res[2];
12270        int srcelt = is_q ? 2 : 0;
12271
12272        for (pass = 0; pass < 2; pass++) {
12273            TCGv_i32 tcg_op = tcg_temp_new_i32();
12274            tcg_res[pass] = tcg_temp_new_i64();
12275
12276            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
12277            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
12278            tcg_temp_free_i32(tcg_op);
12279        }
12280        for (pass = 0; pass < 2; pass++) {
12281            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12282            tcg_temp_free_i64(tcg_res[pass]);
12283        }
12284    } else {
12285        /* 16 -> 32 bit fp conversion */
12286        int srcelt = is_q ? 4 : 0;
12287        TCGv_i32 tcg_res[4];
12288        TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
12289        TCGv_i32 ahp = get_ahp_flag();
12290
12291        for (pass = 0; pass < 4; pass++) {
12292            tcg_res[pass] = tcg_temp_new_i32();
12293
12294            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
12295            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
12296                                           fpst, ahp);
12297        }
12298        for (pass = 0; pass < 4; pass++) {
12299            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
12300            tcg_temp_free_i32(tcg_res[pass]);
12301        }
12302
12303        tcg_temp_free_ptr(fpst);
12304        tcg_temp_free_i32(ahp);
12305    }
12306}
12307
12308static void handle_rev(DisasContext *s, int opcode, bool u,
12309                       bool is_q, int size, int rn, int rd)
12310{
12311    int op = (opcode << 1) | u;
12312    int opsz = op + size;
12313    int grp_size = 3 - opsz;
12314    int dsize = is_q ? 128 : 64;
12315    int i;
12316
12317    if (opsz >= 3) {
12318        unallocated_encoding(s);
12319        return;
12320    }
12321
12322    if (!fp_access_check(s)) {
12323        return;
12324    }
12325
12326    if (size == 0) {
12327        /* Special case bytes, use bswap op on each group of elements */
12328        int groups = dsize / (8 << grp_size);
12329
12330        for (i = 0; i < groups; i++) {
12331            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
12332
12333            read_vec_element(s, tcg_tmp, rn, i, grp_size);
12334            switch (grp_size) {
12335            case MO_16:
12336                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
12337                break;
12338            case MO_32:
12339                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
12340                break;
12341            case MO_64:
12342                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
12343                break;
12344            default:
12345                g_assert_not_reached();
12346            }
12347            write_vec_element(s, tcg_tmp, rd, i, grp_size);
12348            tcg_temp_free_i64(tcg_tmp);
12349        }
12350        clear_vec_high(s, is_q, rd);
12351    } else {
12352        int revmask = (1 << grp_size) - 1;
12353        int esize = 8 << size;
12354        int elements = dsize / esize;
12355        TCGv_i64 tcg_rn = tcg_temp_new_i64();
12356        TCGv_i64 tcg_rd = tcg_const_i64(0);
12357        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
12358
12359        for (i = 0; i < elements; i++) {
12360            int e_rev = (i & 0xf) ^ revmask;
12361            int off = e_rev * esize;
12362            read_vec_element(s, tcg_rn, rn, i, size);
12363            if (off >= 64) {
12364                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
12365                                    tcg_rn, off - 64, esize);
12366            } else {
12367                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
12368            }
12369        }
12370        write_vec_element(s, tcg_rd, rd, 0, MO_64);
12371        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
12372
12373        tcg_temp_free_i64(tcg_rd_hi);
12374        tcg_temp_free_i64(tcg_rd);
12375        tcg_temp_free_i64(tcg_rn);
12376    }
12377}
12378
12379static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
12380                                  bool is_q, int size, int rn, int rd)
12381{
12382    /* Implement the pairwise operations from 2-misc:
12383     * SADDLP, UADDLP, SADALP, UADALP.
12384     * These all add pairs of elements in the input to produce a
12385     * double-width result element in the output (possibly accumulating).
12386     */
12387    bool accum = (opcode == 0x6);
12388    int maxpass = is_q ? 2 : 1;
12389    int pass;
12390    TCGv_i64 tcg_res[2];
12391
12392    if (size == 2) {
12393        /* 32 + 32 -> 64 op */
12394        MemOp memop = size + (u ? 0 : MO_SIGN);
12395
12396        for (pass = 0; pass < maxpass; pass++) {
12397            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
12398            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
12399
12400            tcg_res[pass] = tcg_temp_new_i64();
12401
12402            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
12403            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
12404            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
12405            if (accum) {
12406                read_vec_element(s, tcg_op1, rd, pass, MO_64);
12407                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12408            }
12409
12410            tcg_temp_free_i64(tcg_op1);
12411            tcg_temp_free_i64(tcg_op2);
12412        }
12413    } else {
12414        for (pass = 0; pass < maxpass; pass++) {
12415            TCGv_i64 tcg_op = tcg_temp_new_i64();
12416            NeonGenOne64OpFn *genfn;
12417            static NeonGenOne64OpFn * const fns[2][2] = {
12418                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
12419                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
12420            };
12421
12422            genfn = fns[size][u];
12423
12424            tcg_res[pass] = tcg_temp_new_i64();
12425
12426            read_vec_element(s, tcg_op, rn, pass, MO_64);
12427            genfn(tcg_res[pass], tcg_op);
12428
12429            if (accum) {
12430                read_vec_element(s, tcg_op, rd, pass, MO_64);
12431                if (size == 0) {
12432                    gen_helper_neon_addl_u16(tcg_res[pass],
12433                                             tcg_res[pass], tcg_op);
12434                } else {
12435                    gen_helper_neon_addl_u32(tcg_res[pass],
12436                                             tcg_res[pass], tcg_op);
12437                }
12438            }
12439            tcg_temp_free_i64(tcg_op);
12440        }
12441    }
12442    if (!is_q) {
12443        tcg_res[1] = tcg_const_i64(0);
12444    }
12445    for (pass = 0; pass < 2; pass++) {
12446        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12447        tcg_temp_free_i64(tcg_res[pass]);
12448    }
12449}
12450
12451static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
12452{
12453    /* Implement SHLL and SHLL2 */
12454    int pass;
12455    int part = is_q ? 2 : 0;
12456    TCGv_i64 tcg_res[2];
12457
12458    for (pass = 0; pass < 2; pass++) {
12459        static NeonGenWidenFn * const widenfns[3] = {
12460            gen_helper_neon_widen_u8,
12461            gen_helper_neon_widen_u16,
12462            tcg_gen_extu_i32_i64,
12463        };
12464        NeonGenWidenFn *widenfn = widenfns[size];
12465        TCGv_i32 tcg_op = tcg_temp_new_i32();
12466
12467        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
12468        tcg_res[pass] = tcg_temp_new_i64();
12469        widenfn(tcg_res[pass], tcg_op);
12470        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
12471
12472        tcg_temp_free_i32(tcg_op);
12473    }
12474
12475    for (pass = 0; pass < 2; pass++) {
12476        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12477        tcg_temp_free_i64(tcg_res[pass]);
12478    }
12479}
12480
12481/* AdvSIMD two reg misc
12482 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
12483 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12484 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12485 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12486 */
12487static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
12488{
12489    int size = extract32(insn, 22, 2);
12490    int opcode = extract32(insn, 12, 5);
12491    bool u = extract32(insn, 29, 1);
12492    bool is_q = extract32(insn, 30, 1);
12493    int rn = extract32(insn, 5, 5);
12494    int rd = extract32(insn, 0, 5);
12495    bool need_fpstatus = false;
12496    bool need_rmode = false;
12497    int rmode = -1;
12498    TCGv_i32 tcg_rmode;
12499    TCGv_ptr tcg_fpstatus;
12500
12501    switch (opcode) {
12502    case 0x0: /* REV64, REV32 */
12503    case 0x1: /* REV16 */
12504        handle_rev(s, opcode, u, is_q, size, rn, rd);
12505        return;
12506    case 0x5: /* CNT, NOT, RBIT */
12507        if (u && size == 0) {
12508            /* NOT */
12509            break;
12510        } else if (u && size == 1) {
12511            /* RBIT */
12512            break;
12513        } else if (!u && size == 0) {
12514            /* CNT */
12515            break;
12516        }
12517        unallocated_encoding(s);
12518        return;
12519    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12520    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12521        if (size == 3) {
12522            unallocated_encoding(s);
12523            return;
12524        }
12525        if (!fp_access_check(s)) {
12526            return;
12527        }
12528
12529        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12530        return;
12531    case 0x4: /* CLS, CLZ */
12532        if (size == 3) {
12533            unallocated_encoding(s);
12534            return;
12535        }
12536        break;
12537    case 0x2: /* SADDLP, UADDLP */
12538    case 0x6: /* SADALP, UADALP */
12539        if (size == 3) {
12540            unallocated_encoding(s);
12541            return;
12542        }
12543        if (!fp_access_check(s)) {
12544            return;
12545        }
12546        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12547        return;
12548    case 0x13: /* SHLL, SHLL2 */
12549        if (u == 0 || size == 3) {
12550            unallocated_encoding(s);
12551            return;
12552        }
12553        if (!fp_access_check(s)) {
12554            return;
12555        }
12556        handle_shll(s, is_q, size, rn, rd);
12557        return;
12558    case 0xa: /* CMLT */
12559        if (u == 1) {
12560            unallocated_encoding(s);
12561            return;
12562        }
12563        /* fall through */
12564    case 0x8: /* CMGT, CMGE */
12565    case 0x9: /* CMEQ, CMLE */
12566    case 0xb: /* ABS, NEG */
12567        if (size == 3 && !is_q) {
12568            unallocated_encoding(s);
12569            return;
12570        }
12571        break;
12572    case 0x3: /* SUQADD, USQADD */
12573        if (size == 3 && !is_q) {
12574            unallocated_encoding(s);
12575            return;
12576        }
12577        if (!fp_access_check(s)) {
12578            return;
12579        }
12580        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12581        return;
12582    case 0x7: /* SQABS, SQNEG */
12583        if (size == 3 && !is_q) {
12584            unallocated_encoding(s);
12585            return;
12586        }
12587        break;
12588    case 0xc ... 0xf:
12589    case 0x16 ... 0x1f:
12590    {
12591        /* Floating point: U, size[1] and opcode indicate operation;
12592         * size[0] indicates single or double precision.
12593         */
12594        int is_double = extract32(size, 0, 1);
12595        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12596        size = is_double ? 3 : 2;
12597        switch (opcode) {
12598        case 0x2f: /* FABS */
12599        case 0x6f: /* FNEG */
12600            if (size == 3 && !is_q) {
12601                unallocated_encoding(s);
12602                return;
12603            }
12604            break;
12605        case 0x1d: /* SCVTF */
12606        case 0x5d: /* UCVTF */
12607        {
12608            bool is_signed = (opcode == 0x1d) ? true : false;
12609            int elements = is_double ? 2 : is_q ? 4 : 2;
12610            if (is_double && !is_q) {
12611                unallocated_encoding(s);
12612                return;
12613            }
12614            if (!fp_access_check(s)) {
12615                return;
12616            }
12617            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12618            return;
12619        }
12620        case 0x2c: /* FCMGT (zero) */
12621        case 0x2d: /* FCMEQ (zero) */
12622        case 0x2e: /* FCMLT (zero) */
12623        case 0x6c: /* FCMGE (zero) */
12624        case 0x6d: /* FCMLE (zero) */
12625            if (size == 3 && !is_q) {
12626                unallocated_encoding(s);
12627                return;
12628            }
12629            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12630            return;
12631        case 0x7f: /* FSQRT */
12632            if (size == 3 && !is_q) {
12633                unallocated_encoding(s);
12634                return;
12635            }
12636            break;
12637        case 0x1a: /* FCVTNS */
12638        case 0x1b: /* FCVTMS */
12639        case 0x3a: /* FCVTPS */
12640        case 0x3b: /* FCVTZS */
12641        case 0x5a: /* FCVTNU */
12642        case 0x5b: /* FCVTMU */
12643        case 0x7a: /* FCVTPU */
12644        case 0x7b: /* FCVTZU */
12645            need_fpstatus = true;
12646            need_rmode = true;
12647            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12648            if (size == 3 && !is_q) {
12649                unallocated_encoding(s);
12650                return;
12651            }
12652            break;
12653        case 0x5c: /* FCVTAU */
12654        case 0x1c: /* FCVTAS */
12655            need_fpstatus = true;
12656            need_rmode = true;
12657            rmode = FPROUNDING_TIEAWAY;
12658            if (size == 3 && !is_q) {
12659                unallocated_encoding(s);
12660                return;
12661            }
12662            break;
12663        case 0x3c: /* URECPE */
12664            if (size == 3) {
12665                unallocated_encoding(s);
12666                return;
12667            }
12668            /* fall through */
12669        case 0x3d: /* FRECPE */
12670        case 0x7d: /* FRSQRTE */
12671            if (size == 3 && !is_q) {
12672                unallocated_encoding(s);
12673                return;
12674            }
12675            if (!fp_access_check(s)) {
12676                return;
12677            }
12678            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12679            return;
12680        case 0x56: /* FCVTXN, FCVTXN2 */
12681            if (size == 2) {
12682                unallocated_encoding(s);
12683                return;
12684            }
12685            /* fall through */
12686        case 0x16: /* FCVTN, FCVTN2 */
12687            /* handle_2misc_narrow does a 2*size -> size operation, but these
12688             * instructions encode the source size rather than dest size.
12689             */
12690            if (!fp_access_check(s)) {
12691                return;
12692            }
12693            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12694            return;
12695        case 0x17: /* FCVTL, FCVTL2 */
12696            if (!fp_access_check(s)) {
12697                return;
12698            }
12699            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12700            return;
12701        case 0x18: /* FRINTN */
12702        case 0x19: /* FRINTM */
12703        case 0x38: /* FRINTP */
12704        case 0x39: /* FRINTZ */
12705            need_rmode = true;
12706            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12707            /* fall through */
12708        case 0x59: /* FRINTX */
12709        case 0x79: /* FRINTI */
12710            need_fpstatus = true;
12711            if (size == 3 && !is_q) {
12712                unallocated_encoding(s);
12713                return;
12714            }
12715            break;
12716        case 0x58: /* FRINTA */
12717            need_rmode = true;
12718            rmode = FPROUNDING_TIEAWAY;
12719            need_fpstatus = true;
12720            if (size == 3 && !is_q) {
12721                unallocated_encoding(s);
12722                return;
12723            }
12724            break;
12725        case 0x7c: /* URSQRTE */
12726            if (size == 3) {
12727                unallocated_encoding(s);
12728                return;
12729            }
12730            break;
12731        case 0x1e: /* FRINT32Z */
12732        case 0x1f: /* FRINT64Z */
12733            need_rmode = true;
12734            rmode = FPROUNDING_ZERO;
12735            /* fall through */
12736        case 0x5e: /* FRINT32X */
12737        case 0x5f: /* FRINT64X */
12738            need_fpstatus = true;
12739            if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12740                unallocated_encoding(s);
12741                return;
12742            }
12743            break;
12744        default:
12745            unallocated_encoding(s);
12746            return;
12747        }
12748        break;
12749    }
12750    default:
12751        unallocated_encoding(s);
12752        return;
12753    }
12754
12755    if (!fp_access_check(s)) {
12756        return;
12757    }
12758
12759    if (need_fpstatus || need_rmode) {
12760        tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12761    } else {
12762        tcg_fpstatus = NULL;
12763    }
12764    if (need_rmode) {
12765        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12766        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12767    } else {
12768        tcg_rmode = NULL;
12769    }
12770
12771    switch (opcode) {
12772    case 0x5:
12773        if (u && size == 0) { /* NOT */
12774            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12775            return;
12776        }
12777        break;
12778    case 0x8: /* CMGT, CMGE */
12779        if (u) {
12780            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12781        } else {
12782            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12783        }
12784        return;
12785    case 0x9: /* CMEQ, CMLE */
12786        if (u) {
12787            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12788        } else {
12789            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12790        }
12791        return;
12792    case 0xa: /* CMLT */
12793        gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12794        return;
12795    case 0xb:
12796        if (u) { /* ABS, NEG */
12797            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12798        } else {
12799            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12800        }
12801        return;
12802    }
12803
12804    if (size == 3) {
12805        /* All 64-bit element operations can be shared with scalar 2misc */
12806        int pass;
12807
12808        /* Coverity claims (size == 3 && !is_q) has been eliminated
12809         * from all paths leading to here.
12810         */
12811        tcg_debug_assert(is_q);
12812        for (pass = 0; pass < 2; pass++) {
12813            TCGv_i64 tcg_op = tcg_temp_new_i64();
12814            TCGv_i64 tcg_res = tcg_temp_new_i64();
12815
12816            read_vec_element(s, tcg_op, rn, pass, MO_64);
12817
12818            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12819                            tcg_rmode, tcg_fpstatus);
12820
12821            write_vec_element(s, tcg_res, rd, pass, MO_64);
12822
12823            tcg_temp_free_i64(tcg_res);
12824            tcg_temp_free_i64(tcg_op);
12825        }
12826    } else {
12827        int pass;
12828
12829        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12830            TCGv_i32 tcg_op = tcg_temp_new_i32();
12831            TCGv_i32 tcg_res = tcg_temp_new_i32();
12832
12833            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12834
12835            if (size == 2) {
12836                /* Special cases for 32 bit elements */
12837                switch (opcode) {
12838                case 0x4: /* CLS */
12839                    if (u) {
12840                        tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12841                    } else {
12842                        tcg_gen_clrsb_i32(tcg_res, tcg_op);
12843                    }
12844                    break;
12845                case 0x7: /* SQABS, SQNEG */
12846                    if (u) {
12847                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12848                    } else {
12849                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12850                    }
12851                    break;
12852                case 0x2f: /* FABS */
12853                    gen_helper_vfp_abss(tcg_res, tcg_op);
12854                    break;
12855                case 0x6f: /* FNEG */
12856                    gen_helper_vfp_negs(tcg_res, tcg_op);
12857                    break;
12858                case 0x7f: /* FSQRT */
12859                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12860                    break;
12861                case 0x1a: /* FCVTNS */
12862                case 0x1b: /* FCVTMS */
12863                case 0x1c: /* FCVTAS */
12864                case 0x3a: /* FCVTPS */
12865                case 0x3b: /* FCVTZS */
12866                {
12867                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12868                    gen_helper_vfp_tosls(tcg_res, tcg_op,
12869                                         tcg_shift, tcg_fpstatus);
12870                    tcg_temp_free_i32(tcg_shift);
12871                    break;
12872                }
12873                case 0x5a: /* FCVTNU */
12874                case 0x5b: /* FCVTMU */
12875                case 0x5c: /* FCVTAU */
12876                case 0x7a: /* FCVTPU */
12877                case 0x7b: /* FCVTZU */
12878                {
12879                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12880                    gen_helper_vfp_touls(tcg_res, tcg_op,
12881                                         tcg_shift, tcg_fpstatus);
12882                    tcg_temp_free_i32(tcg_shift);
12883                    break;
12884                }
12885                case 0x18: /* FRINTN */
12886                case 0x19: /* FRINTM */
12887                case 0x38: /* FRINTP */
12888                case 0x39: /* FRINTZ */
12889                case 0x58: /* FRINTA */
12890                case 0x79: /* FRINTI */
12891                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12892                    break;
12893                case 0x59: /* FRINTX */
12894                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12895                    break;
12896                case 0x7c: /* URSQRTE */
12897                    gen_helper_rsqrte_u32(tcg_res, tcg_op);
12898                    break;
12899                case 0x1e: /* FRINT32Z */
12900                case 0x5e: /* FRINT32X */
12901                    gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12902                    break;
12903                case 0x1f: /* FRINT64Z */
12904                case 0x5f: /* FRINT64X */
12905                    gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12906                    break;
12907                default:
12908                    g_assert_not_reached();
12909                }
12910            } else {
12911                /* Use helpers for 8 and 16 bit elements */
12912                switch (opcode) {
12913                case 0x5: /* CNT, RBIT */
12914                    /* For these two insns size is part of the opcode specifier
12915                     * (handled earlier); they always operate on byte elements.
12916                     */
12917                    if (u) {
12918                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12919                    } else {
12920                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12921                    }
12922                    break;
12923                case 0x7: /* SQABS, SQNEG */
12924                {
12925                    NeonGenOneOpEnvFn *genfn;
12926                    static NeonGenOneOpEnvFn * const fns[2][2] = {
12927                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12928                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12929                    };
12930                    genfn = fns[size][u];
12931                    genfn(tcg_res, cpu_env, tcg_op);
12932                    break;
12933                }
12934                case 0x4: /* CLS, CLZ */
12935                    if (u) {
12936                        if (size == 0) {
12937                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
12938                        } else {
12939                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
12940                        }
12941                    } else {
12942                        if (size == 0) {
12943                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
12944                        } else {
12945                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
12946                        }
12947                    }
12948                    break;
12949                default:
12950                    g_assert_not_reached();
12951                }
12952            }
12953
12954            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12955
12956            tcg_temp_free_i32(tcg_res);
12957            tcg_temp_free_i32(tcg_op);
12958        }
12959    }
12960    clear_vec_high(s, is_q, rd);
12961
12962    if (need_rmode) {
12963        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12964        tcg_temp_free_i32(tcg_rmode);
12965    }
12966    if (need_fpstatus) {
12967        tcg_temp_free_ptr(tcg_fpstatus);
12968    }
12969}
12970
12971/* AdvSIMD [scalar] two register miscellaneous (FP16)
12972 *
12973 *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12974 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12975 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12976 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12977 *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12978 *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12979 *
12980 * This actually covers two groups where scalar access is governed by
12981 * bit 28. A bunch of the instructions (float to integral) only exist
12982 * in the vector form and are un-allocated for the scalar decode. Also
12983 * in the scalar decode Q is always 1.
12984 */
12985static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12986{
12987    int fpop, opcode, a, u;
12988    int rn, rd;
12989    bool is_q;
12990    bool is_scalar;
12991    bool only_in_vector = false;
12992
12993    int pass;
12994    TCGv_i32 tcg_rmode = NULL;
12995    TCGv_ptr tcg_fpstatus = NULL;
12996    bool need_rmode = false;
12997    bool need_fpst = true;
12998    int rmode;
12999
13000    if (!dc_isar_feature(aa64_fp16, s)) {
13001        unallocated_encoding(s);
13002        return;
13003    }
13004
13005    rd = extract32(insn, 0, 5);
13006    rn = extract32(insn, 5, 5);
13007
13008    a = extract32(insn, 23, 1);
13009    u = extract32(insn, 29, 1);
13010    is_scalar = extract32(insn, 28, 1);
13011    is_q = extract32(insn, 30, 1);
13012
13013    opcode = extract32(insn, 12, 5);
13014    fpop = deposit32(opcode, 5, 1, a);
13015    fpop = deposit32(fpop, 6, 1, u);
13016
13017    switch (fpop) {
13018    case 0x1d: /* SCVTF */
13019    case 0x5d: /* UCVTF */
13020    {
13021        int elements;
13022
13023        if (is_scalar) {
13024            elements = 1;
13025        } else {
13026            elements = (is_q ? 8 : 4);
13027        }
13028
13029        if (!fp_access_check(s)) {
13030            return;
13031        }
13032        handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
13033        return;
13034    }
13035    break;
13036    case 0x2c: /* FCMGT (zero) */
13037    case 0x2d: /* FCMEQ (zero) */
13038    case 0x2e: /* FCMLT (zero) */
13039    case 0x6c: /* FCMGE (zero) */
13040    case 0x6d: /* FCMLE (zero) */
13041        handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
13042        return;
13043    case 0x3d: /* FRECPE */
13044    case 0x3f: /* FRECPX */
13045        break;
13046    case 0x18: /* FRINTN */
13047        need_rmode = true;
13048        only_in_vector = true;
13049        rmode = FPROUNDING_TIEEVEN;
13050        break;
13051    case 0x19: /* FRINTM */
13052        need_rmode = true;
13053        only_in_vector = true;
13054        rmode = FPROUNDING_NEGINF;
13055        break;
13056    case 0x38: /* FRINTP */
13057        need_rmode = true;
13058        only_in_vector = true;
13059        rmode = FPROUNDING_POSINF;
13060        break;
13061    case 0x39: /* FRINTZ */
13062        need_rmode = true;
13063        only_in_vector = true;
13064        rmode = FPROUNDING_ZERO;
13065        break;
13066    case 0x58: /* FRINTA */
13067        need_rmode = true;
13068        only_in_vector = true;
13069        rmode = FPROUNDING_TIEAWAY;
13070        break;
13071    case 0x59: /* FRINTX */
13072    case 0x79: /* FRINTI */
13073        only_in_vector = true;
13074        /* current rounding mode */
13075        break;
13076    case 0x1a: /* FCVTNS */
13077        need_rmode = true;
13078        rmode = FPROUNDING_TIEEVEN;
13079        break;
13080    case 0x1b: /* FCVTMS */
13081        need_rmode = true;
13082        rmode = FPROUNDING_NEGINF;
13083        break;
13084    case 0x1c: /* FCVTAS */
13085        need_rmode = true;
13086        rmode = FPROUNDING_TIEAWAY;
13087        break;
13088    case 0x3a: /* FCVTPS */
13089        need_rmode = true;
13090        rmode = FPROUNDING_POSINF;
13091        break;
13092    case 0x3b: /* FCVTZS */
13093        need_rmode = true;
13094        rmode = FPROUNDING_ZERO;
13095        break;
13096    case 0x5a: /* FCVTNU */
13097        need_rmode = true;
13098        rmode = FPROUNDING_TIEEVEN;
13099        break;
13100    case 0x5b: /* FCVTMU */
13101        need_rmode = true;
13102        rmode = FPROUNDING_NEGINF;
13103        break;
13104    case 0x5c: /* FCVTAU */
13105        need_rmode = true;
13106        rmode = FPROUNDING_TIEAWAY;
13107        break;
13108    case 0x7a: /* FCVTPU */
13109        need_rmode = true;
13110        rmode = FPROUNDING_POSINF;
13111        break;
13112    case 0x7b: /* FCVTZU */
13113        need_rmode = true;
13114        rmode = FPROUNDING_ZERO;
13115        break;
13116    case 0x2f: /* FABS */
13117    case 0x6f: /* FNEG */
13118        need_fpst = false;
13119        break;
13120    case 0x7d: /* FRSQRTE */
13121    case 0x7f: /* FSQRT (vector) */
13122        break;
13123    default:
13124        fprintf(stderr, "%s: insn 0x%04x fpop 0x%2x\n", __func__, insn, fpop);
13125        g_assert_not_reached();
13126    }
13127
13128
13129    /* Check additional constraints for the scalar encoding */
13130    if (is_scalar) {
13131        if (!is_q) {
13132            unallocated_encoding(s);
13133            return;
13134        }
13135        /* FRINTxx is only in the vector form */
13136        if (only_in_vector) {
13137            unallocated_encoding(s);
13138            return;
13139        }
13140    }
13141
13142    if (!fp_access_check(s)) {
13143        return;
13144    }
13145
13146    if (need_rmode || need_fpst) {
13147        tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
13148    }
13149
13150    if (need_rmode) {
13151        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
13152        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
13153    }
13154
13155    if (is_scalar) {
13156        TCGv_i32 tcg_op = read_fp_hreg(s, rn);
13157        TCGv_i32 tcg_res = tcg_temp_new_i32();
13158
13159        switch (fpop) {
13160        case 0x1a: /* FCVTNS */
13161        case 0x1b: /* FCVTMS */
13162        case 0x1c: /* FCVTAS */
13163        case 0x3a: /* FCVTPS */
13164        case 0x3b: /* FCVTZS */
13165            gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
13166            break;
13167        case 0x3d: /* FRECPE */
13168            gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
13169            break;
13170        case 0x3f: /* FRECPX */
13171            gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
13172            break;
13173        case 0x5a: /* FCVTNU */
13174        case 0x5b: /* FCVTMU */
13175        case 0x5c: /* FCVTAU */
13176        case 0x7a: /* FCVTPU */
13177        case 0x7b: /* FCVTZU */
13178            gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
13179            break;
13180        case 0x6f: /* FNEG */
13181            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
13182            break;
13183        case 0x7d: /* FRSQRTE */
13184            gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
13185            break;
13186        default:
13187            g_assert_not_reached();
13188        }
13189
13190        /* limit any sign extension going on */
13191        tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
13192        write_fp_sreg(s, rd, tcg_res);
13193
13194        tcg_temp_free_i32(tcg_res);
13195        tcg_temp_free_i32(tcg_op);
13196    } else {
13197        for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
13198            TCGv_i32 tcg_op = tcg_temp_new_i32();
13199            TCGv_i32 tcg_res = tcg_temp_new_i32();
13200
13201            read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
13202
13203            switch (fpop) {
13204            case 0x1a: /* FCVTNS */
13205            case 0x1b: /* FCVTMS */
13206            case 0x1c: /* FCVTAS */
13207            case 0x3a: /* FCVTPS */
13208            case 0x3b: /* FCVTZS */
13209                gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
13210                break;
13211            case 0x3d: /* FRECPE */
13212                gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
13213                break;
13214            case 0x5a: /* FCVTNU */
13215            case 0x5b: /* FCVTMU */
13216            case 0x5c: /* FCVTAU */
13217            case 0x7a: /* FCVTPU */
13218            case 0x7b: /* FCVTZU */
13219                gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
13220                break;
13221            case 0x18: /* FRINTN */
13222            case 0x19: /* FRINTM */
13223            case 0x38: /* FRINTP */
13224            case 0x39: /* FRINTZ */
13225            case 0x58: /* FRINTA */
13226            case 0x79: /* FRINTI */
13227                gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
13228                break;
13229            case 0x59: /* FRINTX */
13230                gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
13231                break;
13232            case 0x2f: /* FABS */
13233                tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
13234                break;
13235            case 0x6f: /* FNEG */
13236                tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
13237                break;
13238            case 0x7d: /* FRSQRTE */
13239                gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
13240                break;
13241            case 0x7f: /* FSQRT */
13242                gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
13243                break;
13244            default:
13245                g_assert_not_reached();
13246            }
13247
13248            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
13249
13250            tcg_temp_free_i32(tcg_res);
13251            tcg_temp_free_i32(tcg_op);
13252        }
13253
13254        clear_vec_high(s, is_q, rd);
13255    }
13256
13257    if (tcg_rmode) {
13258        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
13259        tcg_temp_free_i32(tcg_rmode);
13260    }
13261
13262    if (tcg_fpstatus) {
13263        tcg_temp_free_ptr(tcg_fpstatus);
13264    }
13265}
13266
13267/* AdvSIMD scalar x indexed element
13268 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
13269 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
13270 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
13271 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
13272 * AdvSIMD vector x indexed element
13273 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
13274 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
13275 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
13276 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
13277 */
13278static void disas_simd_indexed(DisasContext *s, uint32_t insn)
13279{
13280    /* This encoding has two kinds of instruction:
13281     *  normal, where we perform elt x idxelt => elt for each
13282     *     element in the vector
13283     *  long, where we perform elt x idxelt and generate a result of
13284     *     double the width of the input element
13285     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
13286     */
13287    bool is_scalar = extract32(insn, 28, 1);
13288    bool is_q = extract32(insn, 30, 1);
13289    bool u = extract32(insn, 29, 1);
13290    int size = extract32(insn, 22, 2);
13291    int l = extract32(insn, 21, 1);
13292    int m = extract32(insn, 20, 1);
13293    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
13294    int rm = extract32(insn, 16, 4);
13295    int opcode = extract32(insn, 12, 4);
13296    int h = extract32(insn, 11, 1);
13297    int rn = extract32(insn, 5, 5);
13298    int rd = extract32(insn, 0, 5);
13299    bool is_long = false;
13300    int is_fp = 0;
13301    bool is_fp16 = false;
13302    int index;
13303    TCGv_ptr fpst;
13304
13305    switch (16 * u + opcode) {
13306    case 0x08: /* MUL */
13307    case 0x10: /* MLA */
13308    case 0x14: /* MLS */
13309        if (is_scalar) {
13310            unallocated_encoding(s);
13311            return;
13312        }
13313        break;
13314    case 0x02: /* SMLAL, SMLAL2 */
13315    case 0x12: /* UMLAL, UMLAL2 */
13316    case 0x06: /* SMLSL, SMLSL2 */
13317    case 0x16: /* UMLSL, UMLSL2 */
13318    case 0x0a: /* SMULL, SMULL2 */
13319    case 0x1a: /* UMULL, UMULL2 */
13320        if (is_scalar) {
13321            unallocated_encoding(s);
13322            return;
13323        }
13324        is_long = true;
13325        break;
13326    case 0x03: /* SQDMLAL, SQDMLAL2 */
13327    case 0x07: /* SQDMLSL, SQDMLSL2 */
13328    case 0x0b: /* SQDMULL, SQDMULL2 */
13329        is_long = true;
13330        break;
13331    case 0x0c: /* SQDMULH */
13332    case 0x0d: /* SQRDMULH */
13333        break;
13334    case 0x01: /* FMLA */
13335    case 0x05: /* FMLS */
13336    case 0x09: /* FMUL */
13337    case 0x19: /* FMULX */
13338        is_fp = 1;
13339        break;
13340    case 0x1d: /* SQRDMLAH */
13341    case 0x1f: /* SQRDMLSH */
13342        if (!dc_isar_feature(aa64_rdm, s)) {
13343            unallocated_encoding(s);
13344            return;
13345        }
13346        break;
13347    case 0x0e: /* SDOT */
13348    case 0x1e: /* UDOT */
13349        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
13350            unallocated_encoding(s);
13351            return;
13352        }
13353        break;
13354    case 0x11: /* FCMLA #0 */
13355    case 0x13: /* FCMLA #90 */
13356    case 0x15: /* FCMLA #180 */
13357    case 0x17: /* FCMLA #270 */
13358        if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
13359            unallocated_encoding(s);
13360            return;
13361        }
13362        is_fp = 2;
13363        break;
13364    case 0x00: /* FMLAL */
13365    case 0x04: /* FMLSL */
13366    case 0x18: /* FMLAL2 */
13367    case 0x1c: /* FMLSL2 */
13368        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
13369            unallocated_encoding(s);
13370            return;
13371        }
13372        size = MO_16;
13373        /* is_fp, but we pass cpu_env not fp_status.  */
13374        break;
13375    default:
13376        unallocated_encoding(s);
13377        return;
13378    }
13379
13380    switch (is_fp) {
13381    case 1: /* normal fp */
13382        /* convert insn encoded size to MemOp size */
13383        switch (size) {
13384        case 0: /* half-precision */
13385            size = MO_16;
13386            is_fp16 = true;
13387            break;
13388        case MO_32: /* single precision */
13389        case MO_64: /* double precision */
13390            break;
13391        default:
13392            unallocated_encoding(s);
13393            return;
13394        }
13395        break;
13396
13397    case 2: /* complex fp */
13398        /* Each indexable element is a complex pair.  */
13399        size += 1;
13400        switch (size) {
13401        case MO_32:
13402            if (h && !is_q) {
13403                unallocated_encoding(s);
13404                return;
13405            }
13406            is_fp16 = true;
13407            break;
13408        case MO_64:
13409            break;
13410        default:
13411            unallocated_encoding(s);
13412            return;
13413        }
13414        break;
13415
13416    default: /* integer */
13417        switch (size) {
13418        case MO_8:
13419        case MO_64:
13420            unallocated_encoding(s);
13421            return;
13422        }
13423        break;
13424    }
13425    if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
13426        unallocated_encoding(s);
13427        return;
13428    }
13429
13430    /* Given MemOp size, adjust register and indexing.  */
13431    switch (size) {
13432    case MO_16:
13433        index = h << 2 | l << 1 | m;
13434        break;
13435    case MO_32:
13436        index = h << 1 | l;
13437        rm |= m << 4;
13438        break;
13439    case MO_64:
13440        if (l || !is_q) {
13441            unallocated_encoding(s);
13442            return;
13443        }
13444        index = h;
13445        rm |= m << 4;
13446        break;
13447    default:
13448        g_assert_not_reached();
13449    }
13450
13451    if (!fp_access_check(s)) {
13452        return;
13453    }
13454
13455    if (is_fp) {
13456        fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
13457    } else {
13458        fpst = NULL;
13459    }
13460
13461    switch (16 * u + opcode) {
13462    case 0x0e: /* SDOT */
13463    case 0x1e: /* UDOT */
13464        gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
13465                         u ? gen_helper_gvec_udot_idx_b
13466                         : gen_helper_gvec_sdot_idx_b);
13467        return;
13468    case 0x11: /* FCMLA #0 */
13469    case 0x13: /* FCMLA #90 */
13470    case 0x15: /* FCMLA #180 */
13471    case 0x17: /* FCMLA #270 */
13472        {
13473            int rot = extract32(insn, 13, 2);
13474            int data = (index << 2) | rot;
13475            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13476                               vec_full_reg_offset(s, rn),
13477                               vec_full_reg_offset(s, rm), fpst,
13478                               is_q ? 16 : 8, vec_full_reg_size(s), data,
13479                               size == MO_64
13480                               ? gen_helper_gvec_fcmlas_idx
13481                               : gen_helper_gvec_fcmlah_idx);
13482            tcg_temp_free_ptr(fpst);
13483        }
13484        return;
13485
13486    case 0x00: /* FMLAL */
13487    case 0x04: /* FMLSL */
13488    case 0x18: /* FMLAL2 */
13489    case 0x1c: /* FMLSL2 */
13490        {
13491            int is_s = extract32(opcode, 2, 1);
13492            int is_2 = u;
13493            int data = (index << 2) | (is_2 << 1) | is_s;
13494            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13495                               vec_full_reg_offset(s, rn),
13496                               vec_full_reg_offset(s, rm), cpu_env,
13497                               is_q ? 16 : 8, vec_full_reg_size(s),
13498                               data, gen_helper_gvec_fmlal_idx_a64);
13499        }
13500        return;
13501
13502    case 0x08: /* MUL */
13503        if (!is_long && !is_scalar) {
13504            static gen_helper_gvec_3 * const fns[3] = {
13505                gen_helper_gvec_mul_idx_h,
13506                gen_helper_gvec_mul_idx_s,
13507                gen_helper_gvec_mul_idx_d,
13508            };
13509            tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13510                               vec_full_reg_offset(s, rn),
13511                               vec_full_reg_offset(s, rm),
13512                               is_q ? 16 : 8, vec_full_reg_size(s),
13513                               index, fns[size - 1]);
13514            return;
13515        }
13516        break;
13517
13518    case 0x10: /* MLA */
13519        if (!is_long && !is_scalar) {
13520            static gen_helper_gvec_4 * const fns[3] = {
13521                gen_helper_gvec_mla_idx_h,
13522                gen_helper_gvec_mla_idx_s,
13523                gen_helper_gvec_mla_idx_d,
13524            };
13525            tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13526                               vec_full_reg_offset(s, rn),
13527                               vec_full_reg_offset(s, rm),
13528                               vec_full_reg_offset(s, rd),
13529                               is_q ? 16 : 8, vec_full_reg_size(s),
13530                               index, fns[size - 1]);
13531            return;
13532        }
13533        break;
13534
13535    case 0x14: /* MLS */
13536        if (!is_long && !is_scalar) {
13537            static gen_helper_gvec_4 * const fns[3] = {
13538                gen_helper_gvec_mls_idx_h,
13539                gen_helper_gvec_mls_idx_s,
13540                gen_helper_gvec_mls_idx_d,
13541            };
13542            tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13543                               vec_full_reg_offset(s, rn),
13544                               vec_full_reg_offset(s, rm),
13545                               vec_full_reg_offset(s, rd),
13546                               is_q ? 16 : 8, vec_full_reg_size(s),
13547                               index, fns[size - 1]);
13548            return;
13549        }
13550        break;
13551    }
13552
13553    if (size == 3) {
13554        TCGv_i64 tcg_idx = tcg_temp_new_i64();
13555        int pass;
13556
13557        assert(is_fp && is_q && !is_long);
13558
13559        read_vec_element(s, tcg_idx, rm, index, MO_64);
13560
13561        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13562            TCGv_i64 tcg_op = tcg_temp_new_i64();
13563            TCGv_i64 tcg_res = tcg_temp_new_i64();
13564
13565            read_vec_element(s, tcg_op, rn, pass, MO_64);
13566
13567            switch (16 * u + opcode) {
13568            case 0x05: /* FMLS */
13569                /* As usual for ARM, separate negation for fused multiply-add */
13570                gen_helper_vfp_negd(tcg_op, tcg_op);
13571                /* fall through */
13572            case 0x01: /* FMLA */
13573                read_vec_element(s, tcg_res, rd, pass, MO_64);
13574                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13575                break;
13576            case 0x09: /* FMUL */
13577                gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13578                break;
13579            case 0x19: /* FMULX */
13580                gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13581                break;
13582            default:
13583                g_assert_not_reached();
13584            }
13585
13586            write_vec_element(s, tcg_res, rd, pass, MO_64);
13587            tcg_temp_free_i64(tcg_op);
13588            tcg_temp_free_i64(tcg_res);
13589        }
13590
13591        tcg_temp_free_i64(tcg_idx);
13592        clear_vec_high(s, !is_scalar, rd);
13593    } else if (!is_long) {
13594        /* 32 bit floating point, or 16 or 32 bit integer.
13595         * For the 16 bit scalar case we use the usual Neon helpers and
13596         * rely on the fact that 0 op 0 == 0 with no side effects.
13597         */
13598        TCGv_i32 tcg_idx = tcg_temp_new_i32();
13599        int pass, maxpasses;
13600
13601        if (is_scalar) {
13602            maxpasses = 1;
13603        } else {
13604            maxpasses = is_q ? 4 : 2;
13605        }
13606
13607        read_vec_element_i32(s, tcg_idx, rm, index, size);
13608
13609        if (size == 1 && !is_scalar) {
13610            /* The simplest way to handle the 16x16 indexed ops is to duplicate
13611             * the index into both halves of the 32 bit tcg_idx and then use
13612             * the usual Neon helpers.
13613             */
13614            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13615        }
13616
13617        for (pass = 0; pass < maxpasses; pass++) {
13618            TCGv_i32 tcg_op = tcg_temp_new_i32();
13619            TCGv_i32 tcg_res = tcg_temp_new_i32();
13620
13621            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13622
13623            switch (16 * u + opcode) {
13624            case 0x08: /* MUL */
13625            case 0x10: /* MLA */
13626            case 0x14: /* MLS */
13627            {
13628                static NeonGenTwoOpFn * const fns[2][2] = {
13629                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13630                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
13631                };
13632                NeonGenTwoOpFn *genfn;
13633                bool is_sub = opcode == 0x4;
13634
13635                if (size == 1) {
13636                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13637                } else {
13638                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13639                }
13640                if (opcode == 0x8) {
13641                    break;
13642                }
13643                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13644                genfn = fns[size - 1][is_sub];
13645                genfn(tcg_res, tcg_op, tcg_res);
13646                break;
13647            }
13648            case 0x05: /* FMLS */
13649            case 0x01: /* FMLA */
13650                read_vec_element_i32(s, tcg_res, rd, pass,
13651                                     is_scalar ? size : MO_32);
13652                switch (size) {
13653                case 1:
13654                    if (opcode == 0x5) {
13655                        /* As usual for ARM, separate negation for fused
13656                         * multiply-add */
13657                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13658                    }
13659                    if (is_scalar) {
13660                        gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13661                                                   tcg_res, fpst);
13662                    } else {
13663                        gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13664                                                    tcg_res, fpst);
13665                    }
13666                    break;
13667                case 2:
13668                    if (opcode == 0x5) {
13669                        /* As usual for ARM, separate negation for
13670                         * fused multiply-add */
13671                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13672                    }
13673                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13674                                           tcg_res, fpst);
13675                    break;
13676                default:
13677                    g_assert_not_reached();
13678                }
13679                break;
13680            case 0x09: /* FMUL */
13681                switch (size) {
13682                case 1:
13683                    if (is_scalar) {
13684                        gen_helper_advsimd_mulh(tcg_res, tcg_op,
13685                                                tcg_idx, fpst);
13686                    } else {
13687                        gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13688                                                 tcg_idx, fpst);
13689                    }
13690                    break;
13691                case 2:
13692                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13693                    break;
13694                default:
13695                    g_assert_not_reached();
13696                }
13697                break;
13698            case 0x19: /* FMULX */
13699                switch (size) {
13700                case 1:
13701                    if (is_scalar) {
13702                        gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13703                                                 tcg_idx, fpst);
13704                    } else {
13705                        gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13706                                                  tcg_idx, fpst);
13707                    }
13708                    break;
13709                case 2:
13710                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13711                    break;
13712                default:
13713                    g_assert_not_reached();
13714                }
13715                break;
13716            case 0x0c: /* SQDMULH */
13717                if (size == 1) {
13718                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13719                                               tcg_op, tcg_idx);
13720                } else {
13721                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13722                                               tcg_op, tcg_idx);
13723                }
13724                break;
13725            case 0x0d: /* SQRDMULH */
13726                if (size == 1) {
13727                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13728                                                tcg_op, tcg_idx);
13729                } else {
13730                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13731                                                tcg_op, tcg_idx);
13732                }
13733                break;
13734            case 0x1d: /* SQRDMLAH */
13735                read_vec_element_i32(s, tcg_res, rd, pass,
13736                                     is_scalar ? size : MO_32);
13737                if (size == 1) {
13738                    gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13739                                                tcg_op, tcg_idx, tcg_res);
13740                } else {
13741                    gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13742                                                tcg_op, tcg_idx, tcg_res);
13743                }
13744                break;
13745            case 0x1f: /* SQRDMLSH */
13746                read_vec_element_i32(s, tcg_res, rd, pass,
13747                                     is_scalar ? size : MO_32);
13748                if (size == 1) {
13749                    gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13750                                                tcg_op, tcg_idx, tcg_res);
13751                } else {
13752                    gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13753                                                tcg_op, tcg_idx, tcg_res);
13754                }
13755                break;
13756            default:
13757                g_assert_not_reached();
13758            }
13759
13760            if (is_scalar) {
13761                write_fp_sreg(s, rd, tcg_res);
13762            } else {
13763                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13764            }
13765
13766            tcg_temp_free_i32(tcg_op);
13767            tcg_temp_free_i32(tcg_res);
13768        }
13769
13770        tcg_temp_free_i32(tcg_idx);
13771        clear_vec_high(s, is_q, rd);
13772    } else {
13773        /* long ops: 16x16->32 or 32x32->64 */
13774        TCGv_i64 tcg_res[2];
13775        int pass;
13776        bool satop = extract32(opcode, 0, 1);
13777        MemOp memop = MO_32;
13778
13779        if (satop || !u) {
13780            memop |= MO_SIGN;
13781        }
13782
13783        if (size == 2) {
13784            TCGv_i64 tcg_idx = tcg_temp_new_i64();
13785
13786            read_vec_element(s, tcg_idx, rm, index, memop);
13787
13788            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13789                TCGv_i64 tcg_op = tcg_temp_new_i64();
13790                TCGv_i64 tcg_passres;
13791                int passelt;
13792
13793                if (is_scalar) {
13794                    passelt = 0;
13795                } else {
13796                    passelt = pass + (is_q * 2);
13797                }
13798
13799                read_vec_element(s, tcg_op, rn, passelt, memop);
13800
13801                tcg_res[pass] = tcg_temp_new_i64();
13802
13803                if (opcode == 0xa || opcode == 0xb) {
13804                    /* Non-accumulating ops */
13805                    tcg_passres = tcg_res[pass];
13806                } else {
13807                    tcg_passres = tcg_temp_new_i64();
13808                }
13809
13810                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13811                tcg_temp_free_i64(tcg_op);
13812
13813                if (satop) {
13814                    /* saturating, doubling */
13815                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13816                                                      tcg_passres, tcg_passres);
13817                }
13818
13819                if (opcode == 0xa || opcode == 0xb) {
13820                    continue;
13821                }
13822
13823                /* Accumulating op: handle accumulate step */
13824                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13825
13826                switch (opcode) {
13827                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13828                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13829                    break;
13830                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13831                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13832                    break;
13833                case 0x7: /* SQDMLSL, SQDMLSL2 */
13834                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
13835                    /* fall through */
13836                case 0x3: /* SQDMLAL, SQDMLAL2 */
13837                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13838                                                      tcg_res[pass],
13839                                                      tcg_passres);
13840                    break;
13841                default:
13842                    g_assert_not_reached();
13843                }
13844                tcg_temp_free_i64(tcg_passres);
13845            }
13846            tcg_temp_free_i64(tcg_idx);
13847
13848            clear_vec_high(s, !is_scalar, rd);
13849        } else {
13850            TCGv_i32 tcg_idx = tcg_temp_new_i32();
13851
13852            assert(size == 1);
13853            read_vec_element_i32(s, tcg_idx, rm, index, size);
13854
13855            if (!is_scalar) {
13856                /* The simplest way to handle the 16x16 indexed ops is to
13857                 * duplicate the index into both halves of the 32 bit tcg_idx
13858                 * and then use the usual Neon helpers.
13859                 */
13860                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13861            }
13862
13863            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13864                TCGv_i32 tcg_op = tcg_temp_new_i32();
13865                TCGv_i64 tcg_passres;
13866
13867                if (is_scalar) {
13868                    read_vec_element_i32(s, tcg_op, rn, pass, size);
13869                } else {
13870                    read_vec_element_i32(s, tcg_op, rn,
13871                                         pass + (is_q * 2), MO_32);
13872                }
13873
13874                tcg_res[pass] = tcg_temp_new_i64();
13875
13876                if (opcode == 0xa || opcode == 0xb) {
13877                    /* Non-accumulating ops */
13878                    tcg_passres = tcg_res[pass];
13879                } else {
13880                    tcg_passres = tcg_temp_new_i64();
13881                }
13882
13883                if (memop & MO_SIGN) {
13884                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13885                } else {
13886                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13887                }
13888                if (satop) {
13889                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13890                                                      tcg_passres, tcg_passres);
13891                }
13892                tcg_temp_free_i32(tcg_op);
13893
13894                if (opcode == 0xa || opcode == 0xb) {
13895                    continue;
13896                }
13897
13898                /* Accumulating op: handle accumulate step */
13899                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13900
13901                switch (opcode) {
13902                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13903                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13904                                             tcg_passres);
13905                    break;
13906                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13907                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13908                                             tcg_passres);
13909                    break;
13910                case 0x7: /* SQDMLSL, SQDMLSL2 */
13911                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13912                    /* fall through */
13913                case 0x3: /* SQDMLAL, SQDMLAL2 */
13914                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13915                                                      tcg_res[pass],
13916                                                      tcg_passres);
13917                    break;
13918                default:
13919                    g_assert_not_reached();
13920                }
13921                tcg_temp_free_i64(tcg_passres);
13922            }
13923            tcg_temp_free_i32(tcg_idx);
13924
13925            if (is_scalar) {
13926                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13927            }
13928        }
13929
13930        if (is_scalar) {
13931            tcg_res[1] = tcg_const_i64(0);
13932        }
13933
13934        for (pass = 0; pass < 2; pass++) {
13935            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13936            tcg_temp_free_i64(tcg_res[pass]);
13937        }
13938    }
13939
13940    if (fpst) {
13941        tcg_temp_free_ptr(fpst);
13942    }
13943}
13944
13945/* Crypto AES
13946 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13947 * +-----------------+------+-----------+--------+-----+------+------+
13948 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13949 * +-----------------+------+-----------+--------+-----+------+------+
13950 */
13951static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13952{
13953    int size = extract32(insn, 22, 2);
13954    int opcode = extract32(insn, 12, 5);
13955    int rn = extract32(insn, 5, 5);
13956    int rd = extract32(insn, 0, 5);
13957    int decrypt;
13958    gen_helper_gvec_2 *genfn2 = NULL;
13959    gen_helper_gvec_3 *genfn3 = NULL;
13960
13961    if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13962        unallocated_encoding(s);
13963        return;
13964    }
13965
13966    switch (opcode) {
13967    case 0x4: /* AESE */
13968        decrypt = 0;
13969        genfn3 = gen_helper_crypto_aese;
13970        break;
13971    case 0x6: /* AESMC */
13972        decrypt = 0;
13973        genfn2 = gen_helper_crypto_aesmc;
13974        break;
13975    case 0x5: /* AESD */
13976        decrypt = 1;
13977        genfn3 = gen_helper_crypto_aese;
13978        break;
13979    case 0x7: /* AESIMC */
13980        decrypt = 1;
13981        genfn2 = gen_helper_crypto_aesmc;
13982        break;
13983    default:
13984        unallocated_encoding(s);
13985        return;
13986    }
13987
13988    if (!fp_access_check(s)) {
13989        return;
13990    }
13991    if (genfn2) {
13992        gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13993    } else {
13994        gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13995    }
13996}
13997
13998/* Crypto three-reg SHA
13999 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
14000 * +-----------------+------+---+------+---+--------+-----+------+------+
14001 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
14002 * +-----------------+------+---+------+---+--------+-----+------+------+
14003 */
14004static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
14005{
14006    int size = extract32(insn, 22, 2);
14007    int opcode = extract32(insn, 12, 3);
14008    int rm = extract32(insn, 16, 5);
14009    int rn = extract32(insn, 5, 5);
14010    int rd = extract32(insn, 0, 5);
14011    gen_helper_gvec_3 *genfn;
14012    bool feature;
14013
14014    if (size != 0) {
14015        unallocated_encoding(s);
14016        return;
14017    }
14018
14019    switch (opcode) {
14020    case 0: /* SHA1C */
14021        genfn = gen_helper_crypto_sha1c;
14022        feature = dc_isar_feature(aa64_sha1, s);
14023        break;
14024    case 1: /* SHA1P */
14025        genfn = gen_helper_crypto_sha1p;
14026        feature = dc_isar_feature(aa64_sha1, s);
14027        break;
14028    case 2: /* SHA1M */
14029        genfn = gen_helper_crypto_sha1m;
14030        feature = dc_isar_feature(aa64_sha1, s);
14031        break;
14032    case 3: /* SHA1SU0 */
14033        genfn = gen_helper_crypto_sha1su0;
14034        feature = dc_isar_feature(aa64_sha1, s);
14035        break;
14036    case 4: /* SHA256H */
14037        genfn = gen_helper_crypto_sha256h;
14038        feature = dc_isar_feature(aa64_sha256, s);
14039        break;
14040    case 5: /* SHA256H2 */
14041        genfn = gen_helper_crypto_sha256h2;
14042        feature = dc_isar_feature(aa64_sha256, s);
14043        break;
14044    case 6: /* SHA256SU1 */
14045        genfn = gen_helper_crypto_sha256su1;
14046        feature = dc_isar_feature(aa64_sha256, s);
14047        break;
14048    default:
14049        unallocated_encoding(s);
14050        return;
14051    }
14052
14053    if (!feature) {
14054        unallocated_encoding(s);
14055        return;
14056    }
14057
14058    if (!fp_access_check(s)) {
14059        return;
14060    }
14061    gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
14062}
14063
14064/* Crypto two-reg SHA
14065 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
14066 * +-----------------+------+-----------+--------+-----+------+------+
14067 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
14068 * +-----------------+------+-----------+--------+-----+------+------+
14069 */
14070static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
14071{
14072    int size = extract32(insn, 22, 2);
14073    int opcode = extract32(insn, 12, 5);
14074    int rn = extract32(insn, 5, 5);
14075    int rd = extract32(insn, 0, 5);
14076    gen_helper_gvec_2 *genfn;
14077    bool feature;
14078
14079    if (size != 0) {
14080        unallocated_encoding(s);
14081        return;
14082    }
14083
14084    switch (opcode) {
14085    case 0: /* SHA1H */
14086        feature = dc_isar_feature(aa64_sha1, s);
14087        genfn = gen_helper_crypto_sha1h;
14088        break;
14089    case 1: /* SHA1SU1 */
14090        feature = dc_isar_feature(aa64_sha1, s);
14091        genfn = gen_helper_crypto_sha1su1;
14092        break;
14093    case 2: /* SHA256SU0 */
14094        feature = dc_isar_feature(aa64_sha256, s);
14095        genfn = gen_helper_crypto_sha256su0;
14096        break;
14097    default:
14098        unallocated_encoding(s);
14099        return;
14100    }
14101
14102    if (!feature) {
14103        unallocated_encoding(s);
14104        return;
14105    }
14106
14107    if (!fp_access_check(s)) {
14108        return;
14109    }
14110    gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
14111}
14112
14113static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
14114{
14115    tcg_gen_rotli_i64(d, m, 1);
14116    tcg_gen_xor_i64(d, d, n);
14117}
14118
14119static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
14120{
14121    tcg_gen_rotli_vec(vece, d, m, 1);
14122    tcg_gen_xor_vec(vece, d, d, n);
14123}
14124
14125void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
14126                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
14127{
14128    static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
14129    static const GVecGen3 op = {
14130        .fni8 = gen_rax1_i64,
14131        .fniv = gen_rax1_vec,
14132        .opt_opc = vecop_list,
14133        .fno = gen_helper_crypto_rax1,
14134        .vece = MO_64,
14135    };
14136    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
14137}
14138
14139/* Crypto three-reg SHA512
14140 *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
14141 * +-----------------------+------+---+---+-----+--------+------+------+
14142 * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
14143 * +-----------------------+------+---+---+-----+--------+------+------+
14144 */
14145static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
14146{
14147    int opcode = extract32(insn, 10, 2);
14148    int o =  extract32(insn, 14, 1);
14149    int rm = extract32(insn, 16, 5);
14150    int rn = extract32(insn, 5, 5);
14151    int rd = extract32(insn, 0, 5);
14152    bool feature;
14153    gen_helper_gvec_3 *oolfn = NULL;
14154    GVecGen3Fn *gvecfn = NULL;
14155
14156    if (o == 0) {
14157        switch (opcode) {
14158        case 0: /* SHA512H */
14159            feature = dc_isar_feature(aa64_sha512, s);
14160            oolfn = gen_helper_crypto_sha512h;
14161            break;
14162        case 1: /* SHA512H2 */
14163            feature = dc_isar_feature(aa64_sha512, s);
14164            oolfn = gen_helper_crypto_sha512h2;
14165            break;
14166        case 2: /* SHA512SU1 */
14167            feature = dc_isar_feature(aa64_sha512, s);
14168            oolfn = gen_helper_crypto_sha512su1;
14169            break;
14170        case 3: /* RAX1 */
14171            feature = dc_isar_feature(aa64_sha3, s);
14172            gvecfn = gen_gvec_rax1;
14173            break;
14174        default:
14175            g_assert_not_reached();
14176        }
14177    } else {
14178        switch (opcode) {
14179        case 0: /* SM3PARTW1 */
14180            feature = dc_isar_feature(aa64_sm3, s);
14181            oolfn = gen_helper_crypto_sm3partw1;
14182            break;
14183        case 1: /* SM3PARTW2 */
14184            feature = dc_isar_feature(aa64_sm3, s);
14185            oolfn = gen_helper_crypto_sm3partw2;
14186            break;
14187        case 2: /* SM4EKEY */
14188            feature = dc_isar_feature(aa64_sm4, s);
14189            oolfn = gen_helper_crypto_sm4ekey;
14190            break;
14191        default:
14192            unallocated_encoding(s);
14193            return;
14194        }
14195    }
14196
14197    if (!feature) {
14198        unallocated_encoding(s);
14199        return;
14200    }
14201
14202    if (!fp_access_check(s)) {
14203        return;
14204    }
14205
14206    if (oolfn) {
14207        gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
14208    } else {
14209        gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
14210    }
14211}
14212
14213/* Crypto two-reg SHA512
14214 *  31                                     12  11  10  9    5 4    0
14215 * +-----------------------------------------+--------+------+------+
14216 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
14217 * +-----------------------------------------+--------+------+------+
14218 */
14219static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
14220{
14221    int opcode = extract32(insn, 10, 2);
14222    int rn = extract32(insn, 5, 5);
14223    int rd = extract32(insn, 0, 5);
14224    bool feature;
14225
14226    switch (opcode) {
14227    case 0: /* SHA512SU0 */
14228        feature = dc_isar_feature(aa64_sha512, s);
14229        break;
14230    case 1: /* SM4E */
14231        feature = dc_isar_feature(aa64_sm4, s);
14232        break;
14233    default:
14234        unallocated_encoding(s);
14235        return;
14236    }
14237
14238    if (!feature) {
14239        unallocated_encoding(s);
14240        return;
14241    }
14242
14243    if (!fp_access_check(s)) {
14244        return;
14245    }
14246
14247    switch (opcode) {
14248    case 0: /* SHA512SU0 */
14249        gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
14250        break;
14251    case 1: /* SM4E */
14252        gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
14253        break;
14254    default:
14255        g_assert_not_reached();
14256    }
14257}
14258
14259/* Crypto four-register
14260 *  31               23 22 21 20  16 15  14  10 9    5 4    0
14261 * +-------------------+-----+------+---+------+------+------+
14262 * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
14263 * +-------------------+-----+------+---+------+------+------+
14264 */
14265static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
14266{
14267    int op0 = extract32(insn, 21, 2);
14268    int rm = extract32(insn, 16, 5);
14269    int ra = extract32(insn, 10, 5);
14270    int rn = extract32(insn, 5, 5);
14271    int rd = extract32(insn, 0, 5);
14272    bool feature;
14273
14274    switch (op0) {
14275    case 0: /* EOR3 */
14276    case 1: /* BCAX */
14277        feature = dc_isar_feature(aa64_sha3, s);
14278        break;
14279    case 2: /* SM3SS1 */
14280        feature = dc_isar_feature(aa64_sm3, s);
14281        break;
14282    default:
14283        unallocated_encoding(s);
14284        return;
14285    }
14286
14287    if (!feature) {
14288        unallocated_encoding(s);
14289        return;
14290    }
14291
14292    if (!fp_access_check(s)) {
14293        return;
14294    }
14295
14296    if (op0 < 2) {
14297        TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
14298        int pass;
14299
14300        tcg_op1 = tcg_temp_new_i64();
14301        tcg_op2 = tcg_temp_new_i64();
14302        tcg_op3 = tcg_temp_new_i64();
14303        tcg_res[0] = tcg_temp_new_i64();
14304        tcg_res[1] = tcg_temp_new_i64();
14305
14306        for (pass = 0; pass < 2; pass++) {
14307            read_vec_element(s, tcg_op1, rn, pass, MO_64);
14308            read_vec_element(s, tcg_op2, rm, pass, MO_64);
14309            read_vec_element(s, tcg_op3, ra, pass, MO_64);
14310
14311            if (op0 == 0) {
14312                /* EOR3 */
14313                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
14314            } else {
14315                /* BCAX */
14316                tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
14317            }
14318            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
14319        }
14320        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
14321        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
14322
14323        tcg_temp_free_i64(tcg_op1);
14324        tcg_temp_free_i64(tcg_op2);
14325        tcg_temp_free_i64(tcg_op3);
14326        tcg_temp_free_i64(tcg_res[0]);
14327        tcg_temp_free_i64(tcg_res[1]);
14328    } else {
14329        TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
14330
14331        tcg_op1 = tcg_temp_new_i32();
14332        tcg_op2 = tcg_temp_new_i32();
14333        tcg_op3 = tcg_temp_new_i32();
14334        tcg_res = tcg_temp_new_i32();
14335        tcg_zero = tcg_const_i32(0);
14336
14337        read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
14338        read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
14339        read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
14340
14341        tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
14342        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
14343        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
14344        tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
14345
14346        write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
14347        write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
14348        write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
14349        write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
14350
14351        tcg_temp_free_i32(tcg_op1);
14352        tcg_temp_free_i32(tcg_op2);
14353        tcg_temp_free_i32(tcg_op3);
14354        tcg_temp_free_i32(tcg_res);
14355        tcg_temp_free_i32(tcg_zero);
14356    }
14357}
14358
14359/* Crypto XAR
14360 *  31                   21 20  16 15    10 9    5 4    0
14361 * +-----------------------+------+--------+------+------+
14362 * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
14363 * +-----------------------+------+--------+------+------+
14364 */
14365static void disas_crypto_xar(DisasContext *s, uint32_t insn)
14366{
14367    int rm = extract32(insn, 16, 5);
14368    int imm6 = extract32(insn, 10, 6);
14369    int rn = extract32(insn, 5, 5);
14370    int rd = extract32(insn, 0, 5);
14371    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
14372    int pass;
14373
14374    if (!dc_isar_feature(aa64_sha3, s)) {
14375        unallocated_encoding(s);
14376        return;
14377    }
14378
14379    if (!fp_access_check(s)) {
14380        return;
14381    }
14382
14383    tcg_op1 = tcg_temp_new_i64();
14384    tcg_op2 = tcg_temp_new_i64();
14385    tcg_res[0] = tcg_temp_new_i64();
14386    tcg_res[1] = tcg_temp_new_i64();
14387
14388    for (pass = 0; pass < 2; pass++) {
14389        read_vec_element(s, tcg_op1, rn, pass, MO_64);
14390        read_vec_element(s, tcg_op2, rm, pass, MO_64);
14391
14392        tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
14393        tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
14394    }
14395    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
14396    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
14397
14398    tcg_temp_free_i64(tcg_op1);
14399    tcg_temp_free_i64(tcg_op2);
14400    tcg_temp_free_i64(tcg_res[0]);
14401    tcg_temp_free_i64(tcg_res[1]);
14402}
14403
14404/* Crypto three-reg imm2
14405 *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
14406 * +-----------------------+------+-----+------+--------+------+------+
14407 * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
14408 * +-----------------------+------+-----+------+--------+------+------+
14409 */
14410static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
14411{
14412    static gen_helper_gvec_3 * const fns[4] = {
14413        gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
14414        gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
14415    };
14416    int opcode = extract32(insn, 10, 2);
14417    int imm2 = extract32(insn, 12, 2);
14418    int rm = extract32(insn, 16, 5);
14419    int rn = extract32(insn, 5, 5);
14420    int rd = extract32(insn, 0, 5);
14421
14422    if (!dc_isar_feature(aa64_sm3, s)) {
14423        unallocated_encoding(s);
14424        return;
14425    }
14426
14427    if (!fp_access_check(s)) {
14428        return;
14429    }
14430
14431    gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
14432}
14433
14434/* C3.6 Data processing - SIMD, inc Crypto
14435 *
14436 * As the decode gets a little complex we are using a table based
14437 * approach for this part of the decode.
14438 */
14439static const AArch64DecodeTable data_proc_simd[] = {
14440    /* pattern  ,  mask     ,  fn                        */
14441    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
14442    { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
14443    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
14444    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
14445    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
14446    { 0x0e000400, 0x9fe08400, disas_simd_copy },
14447    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
14448    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
14449    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
14450    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
14451    { 0x0e000000, 0xbf208c00, disas_simd_tb },
14452    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
14453    { 0x2e000000, 0xbf208400, disas_simd_ext },
14454    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
14455    { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
14456    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
14457    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
14458    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
14459    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
14460    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
14461    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
14462    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
14463    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
14464    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
14465    { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
14466    { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
14467    { 0xce000000, 0xff808000, disas_crypto_four_reg },
14468    { 0xce800000, 0xffe00000, disas_crypto_xar },
14469    { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
14470    { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
14471    { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
14472    { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
14473    { 0x00000000, 0x00000000, NULL }
14474};
14475
14476static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
14477{
14478    /* Note that this is called with all non-FP cases from
14479     * table C3-6 so it must UNDEF for entries not specifically
14480     * allocated to instructions in that table.
14481     */
14482    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
14483    if (fn) {
14484        fn(s, insn);
14485    } else {
14486        unallocated_encoding(s);
14487    }
14488}
14489
14490/* C3.6 Data processing - SIMD and floating point */
14491static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
14492{
14493    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
14494        disas_data_proc_fp(s, insn);
14495    } else {
14496        /* SIMD, including crypto */
14497        disas_data_proc_simd(s, insn);
14498    }
14499}
14500
14501/**
14502 * is_guarded_page:
14503 * @env: The cpu environment
14504 * @s: The DisasContext
14505 *
14506 * Return true if the page is guarded.
14507 */
14508static bool is_guarded_page(CPUARMState *env, DisasContext *s)
14509{
14510    uint64_t addr = s->base.pc_first;
14511#ifdef CONFIG_USER_ONLY
14512    return page_get_flags(addr) & PAGE_BTI;
14513#else
14514    int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14515    unsigned int index = tlb_index(env, mmu_idx, addr);
14516    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
14517
14518    /*
14519     * We test this immediately after reading an insn, which means
14520     * that any normal page must be in the TLB.  The only exception
14521     * would be for executing from flash or device memory, which
14522     * does not retain the TLB entry.
14523     *
14524     * FIXME: Assume false for those, for now.  We could use
14525     * arm_cpu_get_phys_page_attrs_debug to re-read the page
14526     * table entry even for that case.
14527     */
14528    return (tlb_hit(entry->addr_code, addr) &&
14529            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
14530#endif
14531}
14532
14533/**
14534 * btype_destination_ok:
14535 * @insn: The instruction at the branch destination
14536 * @bt: SCTLR_ELx.BT
14537 * @btype: PSTATE.BTYPE, and is non-zero
14538 *
14539 * On a guarded page, there are a limited number of insns
14540 * that may be present at the branch target:
14541 *   - branch target identifiers,
14542 *   - paciasp, pacibsp,
14543 *   - BRK insn
14544 *   - HLT insn
14545 * Anything else causes a Branch Target Exception.
14546 *
14547 * Return true if the branch is compatible, false to raise BTITRAP.
14548 */
14549static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14550{
14551    if ((insn & 0xfffff01fu) == 0xd503201fu) {
14552        /* HINT space */
14553        switch (extract32(insn, 5, 7)) {
14554        case 0b011001: /* PACIASP */
14555        case 0b011011: /* PACIBSP */
14556            /*
14557             * If SCTLR_ELx.BT, then PACI*SP are not compatible
14558             * with btype == 3.  Otherwise all btype are ok.
14559             */
14560            return !bt || btype != 3;
14561        case 0b100000: /* BTI */
14562            /* Not compatible with any btype.  */
14563            return false;
14564        case 0b100010: /* BTI c */
14565            /* Not compatible with btype == 3 */
14566            return btype != 3;
14567        case 0b100100: /* BTI j */
14568            /* Not compatible with btype == 2 */
14569            return btype != 2;
14570        case 0b100110: /* BTI jc */
14571            /* Compatible with any btype.  */
14572            return true;
14573        }
14574    } else {
14575        switch (insn & 0xffe0001fu) {
14576        case 0xd4200000u: /* BRK */
14577        case 0xd4400000u: /* HLT */
14578            /* Give priority to the breakpoint exception.  */
14579            return true;
14580        }
14581    }
14582    return false;
14583}
14584
14585/* C3.1 A64 instruction index by encoding */
14586static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14587{
14588    uint32_t insn;
14589
14590    s->pc_curr = s->base.pc_next;
14591    insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
14592    s->insn = insn;
14593    s->base.pc_next += 4;
14594
14595    s->fp_access_checked = false;
14596    s->sve_access_checked = false;
14597
14598    if (dc_isar_feature(aa64_bti, s)) {
14599        if (s->base.num_insns == 1) {
14600            /*
14601             * At the first insn of the TB, compute s->guarded_page.
14602             * We delayed computing this until successfully reading
14603             * the first insn of the TB, above.  This (mostly) ensures
14604             * that the softmmu tlb entry has been populated, and the
14605             * page table GP bit is available.
14606             *
14607             * Note that we need to compute this even if btype == 0,
14608             * because this value is used for BR instructions later
14609             * where ENV is not available.
14610             */
14611            s->guarded_page = is_guarded_page(env, s);
14612
14613            /* First insn can have btype set to non-zero.  */
14614            tcg_debug_assert(s->btype >= 0);
14615
14616            /*
14617             * Note that the Branch Target Exception has fairly high
14618             * priority -- below debugging exceptions but above most
14619             * everything else.  This allows us to handle this now
14620             * instead of waiting until the insn is otherwise decoded.
14621             */
14622            if (s->btype != 0
14623                && s->guarded_page
14624                && !btype_destination_ok(insn, s->bt, s->btype)) {
14625                gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
14626                                   syn_btitrap(s->btype),
14627                                   default_exception_el(s));
14628                return;
14629            }
14630        } else {
14631            /* Not the first insn: btype must be 0.  */
14632            tcg_debug_assert(s->btype == 0);
14633        }
14634    }
14635
14636    switch (extract32(insn, 25, 4)) {
14637    case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14638        unallocated_encoding(s);
14639        break;
14640    case 0x2:
14641        if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14642            unallocated_encoding(s);
14643        }
14644        break;
14645    case 0x8: case 0x9: /* Data processing - immediate */
14646        disas_data_proc_imm(s, insn);
14647        break;
14648    case 0xa: case 0xb: /* Branch, exception generation and system insns */
14649        disas_b_exc_sys(s, insn);
14650        break;
14651    case 0x4:
14652    case 0x6:
14653    case 0xc:
14654    case 0xe:      /* Loads and stores */
14655        disas_ldst(s, insn);
14656        break;
14657    case 0x5:
14658    case 0xd:      /* Data processing - register */
14659        disas_data_proc_reg(s, insn);
14660        break;
14661    case 0x7:
14662    case 0xf:      /* Data processing - SIMD and floating point */
14663        disas_data_proc_simd_fp(s, insn);
14664        break;
14665    default:
14666        assert(FALSE); /* all 15 cases should be handled above */
14667        break;
14668    }
14669
14670    /* if we allocated any temporaries, free them here */
14671    free_tmp_a64(s);
14672
14673    /*
14674     * After execution of most insns, btype is reset to 0.
14675     * Note that we set btype == -1 when the insn sets btype.
14676     */
14677    if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14678        reset_btype(s);
14679    }
14680}
14681
14682static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14683                                          CPUState *cpu)
14684{
14685    DisasContext *dc = container_of(dcbase, DisasContext, base);
14686    CPUARMState *env = cpu->env_ptr;
14687    ARMCPU *arm_cpu = env_archcpu(env);
14688    uint32_t tb_flags = dc->base.tb->flags;
14689    int bound, core_mmu_idx;
14690
14691    dc->isar = &arm_cpu->isar;
14692    dc->condjmp = 0;
14693
14694    dc->aarch64 = 1;
14695    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14696     * there is no secure EL1, so we route exceptions to EL3.
14697     */
14698    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14699                               !arm_el_is_aa64(env, 3);
14700    dc->thumb = 0;
14701    dc->sctlr_b = 0;
14702    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
14703    dc->condexec_mask = 0;
14704    dc->condexec_cond = 0;
14705    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
14706    dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14707    dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
14708    dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
14709    dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA);
14710    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14711#if !defined(CONFIG_USER_ONLY)
14712    dc->user = (dc->current_el == 0);
14713#endif
14714    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
14715    dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
14716    dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
14717    dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
14718    dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
14719    dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
14720    dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV);
14721    dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA);
14722    dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE);
14723    dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE);
14724    dc->vec_len = 0;
14725    dc->vec_stride = 0;
14726    dc->cp_regs = arm_cpu->cp_regs;
14727    dc->features = env->features;
14728    dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14729
14730#ifdef CONFIG_USER_ONLY
14731    /* In sve_probe_page, we assume TBI is enabled. */
14732    tcg_debug_assert(dc->tbid & 1);
14733#endif
14734
14735    /* Single step state. The code-generation logic here is:
14736     *  SS_ACTIVE == 0:
14737     *   generate code with no special handling for single-stepping (except
14738     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14739     *   this happens anyway because those changes are all system register or
14740     *   PSTATE writes).
14741     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14742     *   emit code for one insn
14743     *   emit code to clear PSTATE.SS
14744     *   emit code to generate software step exception for completed step
14745     *   end TB (as usual for having generated an exception)
14746     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14747     *   emit code to generate a software step exception
14748     *   end the TB
14749     */
14750    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
14751    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
14752    dc->is_ldex = false;
14753    dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
14754
14755    /* Bound the number of insns to execute to those left on the page.  */
14756    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14757
14758    /* If architectural single step active, limit to 1.  */
14759    if (dc->ss_active) {
14760        bound = 1;
14761    }
14762    dc->base.max_insns = MIN(dc->base.max_insns, bound);
14763
14764    init_tmp_a64_array(dc);
14765}
14766
14767static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14768{
14769}
14770
14771static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14772{
14773    DisasContext *dc = container_of(dcbase, DisasContext, base);
14774
14775    tcg_gen_insn_start(dc->base.pc_next, 0, 0);
14776    dc->insn_start = tcg_last_op();
14777}
14778
14779static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
14780                                        const CPUBreakpoint *bp)
14781{
14782    DisasContext *dc = container_of(dcbase, DisasContext, base);
14783
14784    if (bp->flags & BP_CPU) {
14785        gen_a64_set_pc_im(dc->base.pc_next);
14786        gen_helper_check_breakpoints(cpu_env);
14787        /* End the TB early; it likely won't be executed */
14788        dc->base.is_jmp = DISAS_TOO_MANY;
14789    } else {
14790        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
14791        /* The address covered by the breakpoint must be
14792           included in [tb->pc, tb->pc + tb->size) in order
14793           to for it to be properly cleared -- thus we
14794           increment the PC here so that the logic setting
14795           tb->size below does the right thing.  */
14796        dc->base.pc_next += 4;
14797        dc->base.is_jmp = DISAS_NORETURN;
14798    }
14799
14800    return true;
14801}
14802
14803static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14804{
14805    DisasContext *dc = container_of(dcbase, DisasContext, base);
14806    CPUARMState *env = cpu->env_ptr;
14807
14808    if (dc->ss_active && !dc->pstate_ss) {
14809        /* Singlestep state is Active-pending.
14810         * If we're in this state at the start of a TB then either
14811         *  a) we just took an exception to an EL which is being debugged
14812         *     and this is the first insn in the exception handler
14813         *  b) debug exceptions were masked and we just unmasked them
14814         *     without changing EL (eg by clearing PSTATE.D)
14815         * In either case we're going to take a swstep exception in the
14816         * "did not step an insn" case, and so the syndrome ISV and EX
14817         * bits should be zero.
14818         */
14819        assert(dc->base.num_insns == 1);
14820        gen_swstep_exception(dc, 0, 0);
14821        dc->base.is_jmp = DISAS_NORETURN;
14822    } else {
14823        disas_a64_insn(env, dc);
14824    }
14825
14826    translator_loop_temp_check(&dc->base);
14827}
14828
14829static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14830{
14831    DisasContext *dc = container_of(dcbase, DisasContext, base);
14832
14833    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14834        /* Note that this means single stepping WFI doesn't halt the CPU.
14835         * For conditional branch insns this is harmless unreachable code as
14836         * gen_goto_tb() has already handled emitting the debug exception
14837         * (and thus a tb-jump is not possible when singlestepping).
14838         */
14839        switch (dc->base.is_jmp) {
14840        default:
14841            gen_a64_set_pc_im(dc->base.pc_next);
14842            /* fall through */
14843        case DISAS_EXIT:
14844        case DISAS_JUMP:
14845            if (dc->base.singlestep_enabled) {
14846                gen_exception_internal(EXCP_DEBUG);
14847            } else {
14848                gen_step_complete_exception(dc);
14849            }
14850            break;
14851        case DISAS_NORETURN:
14852            break;
14853        }
14854    } else {
14855        switch (dc->base.is_jmp) {
14856        case DISAS_NEXT:
14857        case DISAS_TOO_MANY:
14858            gen_goto_tb(dc, 1, dc->base.pc_next);
14859            break;
14860        default:
14861        case DISAS_UPDATE_EXIT:
14862            gen_a64_set_pc_im(dc->base.pc_next);
14863            /* fall through */
14864        case DISAS_EXIT:
14865            tcg_gen_exit_tb(NULL, 0);
14866            break;
14867        case DISAS_UPDATE_NOCHAIN:
14868            gen_a64_set_pc_im(dc->base.pc_next);
14869            /* fall through */
14870        case DISAS_JUMP:
14871            tcg_gen_lookup_and_goto_ptr();
14872            break;
14873        case DISAS_NORETURN:
14874        case DISAS_SWI:
14875            break;
14876        case DISAS_WFE:
14877            gen_a64_set_pc_im(dc->base.pc_next);
14878            gen_helper_wfe(cpu_env);
14879            break;
14880        case DISAS_YIELD:
14881            gen_a64_set_pc_im(dc->base.pc_next);
14882            gen_helper_yield(cpu_env);
14883            break;
14884        case DISAS_WFI:
14885        {
14886            /* This is a special case because we don't want to just halt the CPU
14887             * if trying to debug across a WFI.
14888             */
14889            TCGv_i32 tmp = tcg_const_i32(4);
14890
14891            gen_a64_set_pc_im(dc->base.pc_next);
14892            gen_helper_wfi(cpu_env, tmp);
14893            tcg_temp_free_i32(tmp);
14894            /* The helper doesn't necessarily throw an exception, but we
14895             * must go back to the main loop to check for interrupts anyway.
14896             */
14897            tcg_gen_exit_tb(NULL, 0);
14898            break;
14899        }
14900        }
14901    }
14902}
14903
14904static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14905                                      CPUState *cpu)
14906{
14907    DisasContext *dc = container_of(dcbase, DisasContext, base);
14908
14909    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14910    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14911}
14912
14913const TranslatorOps aarch64_translator_ops = {
14914    .init_disas_context = aarch64_tr_init_disas_context,
14915    .tb_start           = aarch64_tr_tb_start,
14916    .insn_start         = aarch64_tr_insn_start,
14917    .breakpoint_check   = aarch64_tr_breakpoint_check,
14918    .translate_insn     = aarch64_tr_translate_insn,
14919    .tb_stop            = aarch64_tr_tb_stop,
14920    .disas_log          = aarch64_tr_disas_log,
14921};
14922