qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg-op.h"
  28#include "tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "hw/semihosting/semihost.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36
  37#include "trace-tcg.h"
  38#include "exec/log.h"
  39
  40
  41#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43/* currently all emulated v5 cores are also v5TE, so don't bother */
  44#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45#define ENABLE_ARCH_5J    dc_isar_feature(jazelle, s)
  46#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54#include "translate.h"
  55
  56#if defined(CONFIG_USER_ONLY)
  57#define IS_USER(s) 1
  58#else
  59#define IS_USER(s) (s->user)
  60#endif
  61
  62/* We reuse the same 64-bit temporaries for efficiency.  */
  63static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64static TCGv_i32 cpu_R[16];
  65TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66TCGv_i64 cpu_exclusive_addr;
  67TCGv_i64 cpu_exclusive_val;
  68
  69#include "exec/gen-icount.h"
  70
  71static const char * const regnames[] =
  72    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75/* Function prototypes for gen_ functions calling Neon helpers.  */
  76typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                 TCGv_i32, TCGv_i32);
  78/* Function prototypes for gen_ functions for fix point conversions */
  79typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81/* initialize TCG globals.  */
  82void arm_translate_init(void)
  83{
  84    int i;
  85
  86    for (i = 0; i < 16; i++) {
  87        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                          offsetof(CPUARMState, regs[i]),
  89                                          regnames[i]);
  90    }
  91    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99        offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101    a64_translate_init();
 102}
 103
 104/* Flags for the disas_set_da_iss info argument:
 105 * lower bits hold the Rt register number, higher bits are flags.
 106 */
 107typedef enum ISSInfo {
 108    ISSNone = 0,
 109    ISSRegMask = 0x1f,
 110    ISSInvalid = (1 << 5),
 111    ISSIsAcqRel = (1 << 6),
 112    ISSIsWrite = (1 << 7),
 113    ISSIs16Bit = (1 << 8),
 114} ISSInfo;
 115
 116/* Save the syndrome information for a Data Abort */
 117static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118{
 119    uint32_t syn;
 120    int sas = memop & MO_SIZE;
 121    bool sse = memop & MO_SIGN;
 122    bool is_acqrel = issinfo & ISSIsAcqRel;
 123    bool is_write = issinfo & ISSIsWrite;
 124    bool is_16bit = issinfo & ISSIs16Bit;
 125    int srt = issinfo & ISSRegMask;
 126
 127    if (issinfo & ISSInvalid) {
 128        /* Some callsites want to conditionally provide ISS info,
 129         * eg "only if this was not a writeback"
 130         */
 131        return;
 132    }
 133
 134    if (srt == 15) {
 135        /* For AArch32, insns where the src/dest is R15 never generate
 136         * ISS information. Catching that here saves checking at all
 137         * the call sites.
 138         */
 139        return;
 140    }
 141
 142    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                  0, 0, 0, is_write, 0, is_16bit);
 144    disas_set_insn_syndrome(s, syn);
 145}
 146
 147static inline int get_a32_user_mem_index(DisasContext *s)
 148{
 149    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150     * insns:
 151     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152     *  otherwise, access as if at PL0.
 153     */
 154    switch (s->mmu_idx) {
 155    case ARMMMUIdx_S1E2:        /* this one is UNPREDICTABLE */
 156    case ARMMMUIdx_S12NSE0:
 157    case ARMMMUIdx_S12NSE1:
 158        return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
 159    case ARMMMUIdx_S1E3:
 160    case ARMMMUIdx_S1SE0:
 161    case ARMMMUIdx_S1SE1:
 162        return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
 163    case ARMMMUIdx_MUser:
 164    case ARMMMUIdx_MPriv:
 165        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 166    case ARMMMUIdx_MUserNegPri:
 167    case ARMMMUIdx_MPrivNegPri:
 168        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 169    case ARMMMUIdx_MSUser:
 170    case ARMMMUIdx_MSPriv:
 171        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 172    case ARMMMUIdx_MSUserNegPri:
 173    case ARMMMUIdx_MSPrivNegPri:
 174        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 175    case ARMMMUIdx_S2NS:
 176    default:
 177        g_assert_not_reached();
 178    }
 179}
 180
 181static inline TCGv_i32 load_cpu_offset(int offset)
 182{
 183    TCGv_i32 tmp = tcg_temp_new_i32();
 184    tcg_gen_ld_i32(tmp, cpu_env, offset);
 185    return tmp;
 186}
 187
 188#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 189
 190static inline void store_cpu_offset(TCGv_i32 var, int offset)
 191{
 192    tcg_gen_st_i32(var, cpu_env, offset);
 193    tcg_temp_free_i32(var);
 194}
 195
 196#define store_cpu_field(var, name) \
 197    store_cpu_offset(var, offsetof(CPUARMState, name))
 198
 199/* The architectural value of PC.  */
 200static uint32_t read_pc(DisasContext *s)
 201{
 202    return s->pc_curr + (s->thumb ? 4 : 8);
 203}
 204
 205/* Set a variable to the value of a CPU register.  */
 206static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 207{
 208    if (reg == 15) {
 209        tcg_gen_movi_i32(var, read_pc(s));
 210    } else {
 211        tcg_gen_mov_i32(var, cpu_R[reg]);
 212    }
 213}
 214
 215/* Create a new temporary and set it to the value of a CPU register.  */
 216static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 217{
 218    TCGv_i32 tmp = tcg_temp_new_i32();
 219    load_reg_var(s, tmp, reg);
 220    return tmp;
 221}
 222
 223/*
 224 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 225 * This is used for load/store for which use of PC implies (literal),
 226 * or ADD that implies ADR.
 227 */
 228static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 229{
 230    TCGv_i32 tmp = tcg_temp_new_i32();
 231
 232    if (reg == 15) {
 233        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 234    } else {
 235        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 236    }
 237    return tmp;
 238}
 239
 240/* Set a CPU register.  The source must be a temporary and will be
 241   marked as dead.  */
 242static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 243{
 244    if (reg == 15) {
 245        /* In Thumb mode, we must ignore bit 0.
 246         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 247         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 248         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 249         */
 250        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 251        s->base.is_jmp = DISAS_JUMP;
 252    }
 253    tcg_gen_mov_i32(cpu_R[reg], var);
 254    tcg_temp_free_i32(var);
 255}
 256
 257/*
 258 * Variant of store_reg which applies v8M stack-limit checks before updating
 259 * SP. If the check fails this will result in an exception being taken.
 260 * We disable the stack checks for CONFIG_USER_ONLY because we have
 261 * no idea what the stack limits should be in that case.
 262 * If stack checking is not being done this just acts like store_reg().
 263 */
 264static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 265{
 266#ifndef CONFIG_USER_ONLY
 267    if (s->v8m_stackcheck) {
 268        gen_helper_v8m_stackcheck(cpu_env, var);
 269    }
 270#endif
 271    store_reg(s, 13, var);
 272}
 273
 274/* Value extensions.  */
 275#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 276#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 277#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 278#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 279
 280#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 281#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 282
 283
 284static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 285{
 286    TCGv_i32 tmp_mask = tcg_const_i32(mask);
 287    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 288    tcg_temp_free_i32(tmp_mask);
 289}
 290/* Set NZCV flags from the high 4 bits of var.  */
 291#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 292
 293static void gen_exception_internal(int excp)
 294{
 295    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 296
 297    assert(excp_is_internal(excp));
 298    gen_helper_exception_internal(cpu_env, tcg_excp);
 299    tcg_temp_free_i32(tcg_excp);
 300}
 301
 302static void gen_step_complete_exception(DisasContext *s)
 303{
 304    /* We just completed step of an insn. Move from Active-not-pending
 305     * to Active-pending, and then also take the swstep exception.
 306     * This corresponds to making the (IMPDEF) choice to prioritize
 307     * swstep exceptions over asynchronous exceptions taken to an exception
 308     * level where debug is disabled. This choice has the advantage that
 309     * we do not need to maintain internal state corresponding to the
 310     * ISV/EX syndrome bits between completion of the step and generation
 311     * of the exception, and our syndrome information is always correct.
 312     */
 313    gen_ss_advance(s);
 314    gen_swstep_exception(s, 1, s->is_ldex);
 315    s->base.is_jmp = DISAS_NORETURN;
 316}
 317
 318static void gen_singlestep_exception(DisasContext *s)
 319{
 320    /* Generate the right kind of exception for singlestep, which is
 321     * either the architectural singlestep or EXCP_DEBUG for QEMU's
 322     * gdb singlestepping.
 323     */
 324    if (s->ss_active) {
 325        gen_step_complete_exception(s);
 326    } else {
 327        gen_exception_internal(EXCP_DEBUG);
 328    }
 329}
 330
 331static inline bool is_singlestepping(DisasContext *s)
 332{
 333    /* Return true if we are singlestepping either because of
 334     * architectural singlestep or QEMU gdbstub singlestep. This does
 335     * not include the command line '-singlestep' mode which is rather
 336     * misnamed as it only means "one instruction per TB" and doesn't
 337     * affect the code we generate.
 338     */
 339    return s->base.singlestep_enabled || s->ss_active;
 340}
 341
 342static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 343{
 344    TCGv_i32 tmp1 = tcg_temp_new_i32();
 345    TCGv_i32 tmp2 = tcg_temp_new_i32();
 346    tcg_gen_ext16s_i32(tmp1, a);
 347    tcg_gen_ext16s_i32(tmp2, b);
 348    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 349    tcg_temp_free_i32(tmp2);
 350    tcg_gen_sari_i32(a, a, 16);
 351    tcg_gen_sari_i32(b, b, 16);
 352    tcg_gen_mul_i32(b, b, a);
 353    tcg_gen_mov_i32(a, tmp1);
 354    tcg_temp_free_i32(tmp1);
 355}
 356
 357/* Byteswap each halfword.  */
 358static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 359{
 360    TCGv_i32 tmp = tcg_temp_new_i32();
 361    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 362    tcg_gen_shri_i32(tmp, var, 8);
 363    tcg_gen_and_i32(tmp, tmp, mask);
 364    tcg_gen_and_i32(var, var, mask);
 365    tcg_gen_shli_i32(var, var, 8);
 366    tcg_gen_or_i32(dest, var, tmp);
 367    tcg_temp_free_i32(mask);
 368    tcg_temp_free_i32(tmp);
 369}
 370
 371/* Byteswap low halfword and sign extend.  */
 372static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 373{
 374    tcg_gen_ext16u_i32(var, var);
 375    tcg_gen_bswap16_i32(var, var);
 376    tcg_gen_ext16s_i32(dest, var);
 377}
 378
 379/* 32x32->64 multiply.  Marks inputs as dead.  */
 380static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 381{
 382    TCGv_i32 lo = tcg_temp_new_i32();
 383    TCGv_i32 hi = tcg_temp_new_i32();
 384    TCGv_i64 ret;
 385
 386    tcg_gen_mulu2_i32(lo, hi, a, b);
 387    tcg_temp_free_i32(a);
 388    tcg_temp_free_i32(b);
 389
 390    ret = tcg_temp_new_i64();
 391    tcg_gen_concat_i32_i64(ret, lo, hi);
 392    tcg_temp_free_i32(lo);
 393    tcg_temp_free_i32(hi);
 394
 395    return ret;
 396}
 397
 398static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 399{
 400    TCGv_i32 lo = tcg_temp_new_i32();
 401    TCGv_i32 hi = tcg_temp_new_i32();
 402    TCGv_i64 ret;
 403
 404    tcg_gen_muls2_i32(lo, hi, a, b);
 405    tcg_temp_free_i32(a);
 406    tcg_temp_free_i32(b);
 407
 408    ret = tcg_temp_new_i64();
 409    tcg_gen_concat_i32_i64(ret, lo, hi);
 410    tcg_temp_free_i32(lo);
 411    tcg_temp_free_i32(hi);
 412
 413    return ret;
 414}
 415
 416/* Swap low and high halfwords.  */
 417static void gen_swap_half(TCGv_i32 var)
 418{
 419    tcg_gen_rotri_i32(var, var, 16);
 420}
 421
 422/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 423    tmp = (t0 ^ t1) & 0x8000;
 424    t0 &= ~0x8000;
 425    t1 &= ~0x8000;
 426    t0 = (t0 + t1) ^ tmp;
 427 */
 428
 429static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 430{
 431    TCGv_i32 tmp = tcg_temp_new_i32();
 432    tcg_gen_xor_i32(tmp, t0, t1);
 433    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 434    tcg_gen_andi_i32(t0, t0, ~0x8000);
 435    tcg_gen_andi_i32(t1, t1, ~0x8000);
 436    tcg_gen_add_i32(t0, t0, t1);
 437    tcg_gen_xor_i32(dest, t0, tmp);
 438    tcg_temp_free_i32(tmp);
 439}
 440
 441/* Set N and Z flags from var.  */
 442static inline void gen_logic_CC(TCGv_i32 var)
 443{
 444    tcg_gen_mov_i32(cpu_NF, var);
 445    tcg_gen_mov_i32(cpu_ZF, var);
 446}
 447
 448/* dest = T0 + T1 + CF. */
 449static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 450{
 451    tcg_gen_add_i32(dest, t0, t1);
 452    tcg_gen_add_i32(dest, dest, cpu_CF);
 453}
 454
 455/* dest = T0 - T1 + CF - 1.  */
 456static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 457{
 458    tcg_gen_sub_i32(dest, t0, t1);
 459    tcg_gen_add_i32(dest, dest, cpu_CF);
 460    tcg_gen_subi_i32(dest, dest, 1);
 461}
 462
 463/* dest = T0 + T1. Compute C, N, V and Z flags */
 464static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 465{
 466    TCGv_i32 tmp = tcg_temp_new_i32();
 467    tcg_gen_movi_i32(tmp, 0);
 468    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 469    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 470    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 471    tcg_gen_xor_i32(tmp, t0, t1);
 472    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 473    tcg_temp_free_i32(tmp);
 474    tcg_gen_mov_i32(dest, cpu_NF);
 475}
 476
 477/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 478static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 479{
 480    TCGv_i32 tmp = tcg_temp_new_i32();
 481    if (TCG_TARGET_HAS_add2_i32) {
 482        tcg_gen_movi_i32(tmp, 0);
 483        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 484        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 485    } else {
 486        TCGv_i64 q0 = tcg_temp_new_i64();
 487        TCGv_i64 q1 = tcg_temp_new_i64();
 488        tcg_gen_extu_i32_i64(q0, t0);
 489        tcg_gen_extu_i32_i64(q1, t1);
 490        tcg_gen_add_i64(q0, q0, q1);
 491        tcg_gen_extu_i32_i64(q1, cpu_CF);
 492        tcg_gen_add_i64(q0, q0, q1);
 493        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 494        tcg_temp_free_i64(q0);
 495        tcg_temp_free_i64(q1);
 496    }
 497    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 498    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 499    tcg_gen_xor_i32(tmp, t0, t1);
 500    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 501    tcg_temp_free_i32(tmp);
 502    tcg_gen_mov_i32(dest, cpu_NF);
 503}
 504
 505/* dest = T0 - T1. Compute C, N, V and Z flags */
 506static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 507{
 508    TCGv_i32 tmp;
 509    tcg_gen_sub_i32(cpu_NF, t0, t1);
 510    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 511    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 512    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 513    tmp = tcg_temp_new_i32();
 514    tcg_gen_xor_i32(tmp, t0, t1);
 515    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 516    tcg_temp_free_i32(tmp);
 517    tcg_gen_mov_i32(dest, cpu_NF);
 518}
 519
 520/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 521static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 522{
 523    TCGv_i32 tmp = tcg_temp_new_i32();
 524    tcg_gen_not_i32(tmp, t1);
 525    gen_adc_CC(dest, t0, tmp);
 526    tcg_temp_free_i32(tmp);
 527}
 528
 529#define GEN_SHIFT(name)                                               \
 530static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 531{                                                                     \
 532    TCGv_i32 tmp1, tmp2, tmp3;                                        \
 533    tmp1 = tcg_temp_new_i32();                                        \
 534    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 535    tmp2 = tcg_const_i32(0);                                          \
 536    tmp3 = tcg_const_i32(0x1f);                                       \
 537    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 538    tcg_temp_free_i32(tmp3);                                          \
 539    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 540    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 541    tcg_temp_free_i32(tmp2);                                          \
 542    tcg_temp_free_i32(tmp1);                                          \
 543}
 544GEN_SHIFT(shl)
 545GEN_SHIFT(shr)
 546#undef GEN_SHIFT
 547
 548static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 549{
 550    TCGv_i32 tmp1, tmp2;
 551    tmp1 = tcg_temp_new_i32();
 552    tcg_gen_andi_i32(tmp1, t1, 0xff);
 553    tmp2 = tcg_const_i32(0x1f);
 554    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 555    tcg_temp_free_i32(tmp2);
 556    tcg_gen_sar_i32(dest, t0, tmp1);
 557    tcg_temp_free_i32(tmp1);
 558}
 559
 560static void shifter_out_im(TCGv_i32 var, int shift)
 561{
 562    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 563}
 564
 565/* Shift by immediate.  Includes special handling for shift == 0.  */
 566static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 567                                    int shift, int flags)
 568{
 569    switch (shiftop) {
 570    case 0: /* LSL */
 571        if (shift != 0) {
 572            if (flags)
 573                shifter_out_im(var, 32 - shift);
 574            tcg_gen_shli_i32(var, var, shift);
 575        }
 576        break;
 577    case 1: /* LSR */
 578        if (shift == 0) {
 579            if (flags) {
 580                tcg_gen_shri_i32(cpu_CF, var, 31);
 581            }
 582            tcg_gen_movi_i32(var, 0);
 583        } else {
 584            if (flags)
 585                shifter_out_im(var, shift - 1);
 586            tcg_gen_shri_i32(var, var, shift);
 587        }
 588        break;
 589    case 2: /* ASR */
 590        if (shift == 0)
 591            shift = 32;
 592        if (flags)
 593            shifter_out_im(var, shift - 1);
 594        if (shift == 32)
 595          shift = 31;
 596        tcg_gen_sari_i32(var, var, shift);
 597        break;
 598    case 3: /* ROR/RRX */
 599        if (shift != 0) {
 600            if (flags)
 601                shifter_out_im(var, shift - 1);
 602            tcg_gen_rotri_i32(var, var, shift); break;
 603        } else {
 604            TCGv_i32 tmp = tcg_temp_new_i32();
 605            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 606            if (flags)
 607                shifter_out_im(var, 0);
 608            tcg_gen_shri_i32(var, var, 1);
 609            tcg_gen_or_i32(var, var, tmp);
 610            tcg_temp_free_i32(tmp);
 611        }
 612    }
 613};
 614
 615static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 616                                     TCGv_i32 shift, int flags)
 617{
 618    if (flags) {
 619        switch (shiftop) {
 620        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 621        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 622        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 623        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 624        }
 625    } else {
 626        switch (shiftop) {
 627        case 0:
 628            gen_shl(var, var, shift);
 629            break;
 630        case 1:
 631            gen_shr(var, var, shift);
 632            break;
 633        case 2:
 634            gen_sar(var, var, shift);
 635            break;
 636        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 637                tcg_gen_rotr_i32(var, var, shift); break;
 638        }
 639    }
 640    tcg_temp_free_i32(shift);
 641}
 642
 643/*
 644 * Generate a conditional based on ARM condition code cc.
 645 * This is common between ARM and Aarch64 targets.
 646 */
 647void arm_test_cc(DisasCompare *cmp, int cc)
 648{
 649    TCGv_i32 value;
 650    TCGCond cond;
 651    bool global = true;
 652
 653    switch (cc) {
 654    case 0: /* eq: Z */
 655    case 1: /* ne: !Z */
 656        cond = TCG_COND_EQ;
 657        value = cpu_ZF;
 658        break;
 659
 660    case 2: /* cs: C */
 661    case 3: /* cc: !C */
 662        cond = TCG_COND_NE;
 663        value = cpu_CF;
 664        break;
 665
 666    case 4: /* mi: N */
 667    case 5: /* pl: !N */
 668        cond = TCG_COND_LT;
 669        value = cpu_NF;
 670        break;
 671
 672    case 6: /* vs: V */
 673    case 7: /* vc: !V */
 674        cond = TCG_COND_LT;
 675        value = cpu_VF;
 676        break;
 677
 678    case 8: /* hi: C && !Z */
 679    case 9: /* ls: !C || Z -> !(C && !Z) */
 680        cond = TCG_COND_NE;
 681        value = tcg_temp_new_i32();
 682        global = false;
 683        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 684           ZF is non-zero for !Z; so AND the two subexpressions.  */
 685        tcg_gen_neg_i32(value, cpu_CF);
 686        tcg_gen_and_i32(value, value, cpu_ZF);
 687        break;
 688
 689    case 10: /* ge: N == V -> N ^ V == 0 */
 690    case 11: /* lt: N != V -> N ^ V != 0 */
 691        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 692        cond = TCG_COND_GE;
 693        value = tcg_temp_new_i32();
 694        global = false;
 695        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 696        break;
 697
 698    case 12: /* gt: !Z && N == V */
 699    case 13: /* le: Z || N != V */
 700        cond = TCG_COND_NE;
 701        value = tcg_temp_new_i32();
 702        global = false;
 703        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 704         * the sign bit then AND with ZF to yield the result.  */
 705        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 706        tcg_gen_sari_i32(value, value, 31);
 707        tcg_gen_andc_i32(value, cpu_ZF, value);
 708        break;
 709
 710    case 14: /* always */
 711    case 15: /* always */
 712        /* Use the ALWAYS condition, which will fold early.
 713         * It doesn't matter what we use for the value.  */
 714        cond = TCG_COND_ALWAYS;
 715        value = cpu_ZF;
 716        goto no_invert;
 717
 718    default:
 719        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 720        abort();
 721    }
 722
 723    if (cc & 1) {
 724        cond = tcg_invert_cond(cond);
 725    }
 726
 727 no_invert:
 728    cmp->cond = cond;
 729    cmp->value = value;
 730    cmp->value_global = global;
 731}
 732
 733void arm_free_cc(DisasCompare *cmp)
 734{
 735    if (!cmp->value_global) {
 736        tcg_temp_free_i32(cmp->value);
 737    }
 738}
 739
 740void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 741{
 742    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 743}
 744
 745void arm_gen_test_cc(int cc, TCGLabel *label)
 746{
 747    DisasCompare cmp;
 748    arm_test_cc(&cmp, cc);
 749    arm_jump_cc(&cmp, label);
 750    arm_free_cc(&cmp);
 751}
 752
 753static inline void gen_set_condexec(DisasContext *s)
 754{
 755    if (s->condexec_mask) {
 756        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 757        TCGv_i32 tmp = tcg_temp_new_i32();
 758        tcg_gen_movi_i32(tmp, val);
 759        store_cpu_field(tmp, condexec_bits);
 760    }
 761}
 762
 763static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 764{
 765    tcg_gen_movi_i32(cpu_R[15], val);
 766}
 767
 768/* Set PC and Thumb state from var.  var is marked as dead.  */
 769static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 770{
 771    s->base.is_jmp = DISAS_JUMP;
 772    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 773    tcg_gen_andi_i32(var, var, 1);
 774    store_cpu_field(var, thumb);
 775}
 776
 777/*
 778 * Set PC and Thumb state from var. var is marked as dead.
 779 * For M-profile CPUs, include logic to detect exception-return
 780 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 781 * and BX reg, and no others, and happens only for code in Handler mode.
 782 * The Security Extension also requires us to check for the FNC_RETURN
 783 * which signals a function return from non-secure state; this can happen
 784 * in both Handler and Thread mode.
 785 * To avoid having to do multiple comparisons in inline generated code,
 786 * we make the check we do here loose, so it will match for EXC_RETURN
 787 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 788 * for these spurious cases and returns without doing anything (giving
 789 * the same behaviour as for a branch to a non-magic address).
 790 *
 791 * In linux-user mode it is unclear what the right behaviour for an
 792 * attempted FNC_RETURN should be, because in real hardware this will go
 793 * directly to Secure code (ie not the Linux kernel) which will then treat
 794 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 795 * attempt behave the way it would on a CPU without the security extension,
 796 * which is to say "like a normal branch". That means we can simply treat
 797 * all branches as normal with no magic address behaviour.
 798 */
 799static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 800{
 801    /* Generate the same code here as for a simple bx, but flag via
 802     * s->base.is_jmp that we need to do the rest of the work later.
 803     */
 804    gen_bx(s, var);
 805#ifndef CONFIG_USER_ONLY
 806    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 807        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 808        s->base.is_jmp = DISAS_BX_EXCRET;
 809    }
 810#endif
 811}
 812
 813static inline void gen_bx_excret_final_code(DisasContext *s)
 814{
 815    /* Generate the code to finish possible exception return and end the TB */
 816    TCGLabel *excret_label = gen_new_label();
 817    uint32_t min_magic;
 818
 819    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 820        /* Covers FNC_RETURN and EXC_RETURN magic */
 821        min_magic = FNC_RETURN_MIN_MAGIC;
 822    } else {
 823        /* EXC_RETURN magic only */
 824        min_magic = EXC_RETURN_MIN_MAGIC;
 825    }
 826
 827    /* Is the new PC value in the magic range indicating exception return? */
 828    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 829    /* No: end the TB as we would for a DISAS_JMP */
 830    if (is_singlestepping(s)) {
 831        gen_singlestep_exception(s);
 832    } else {
 833        tcg_gen_exit_tb(NULL, 0);
 834    }
 835    gen_set_label(excret_label);
 836    /* Yes: this is an exception return.
 837     * At this point in runtime env->regs[15] and env->thumb will hold
 838     * the exception-return magic number, which do_v7m_exception_exit()
 839     * will read. Nothing else will be able to see those values because
 840     * the cpu-exec main loop guarantees that we will always go straight
 841     * from raising the exception to the exception-handling code.
 842     *
 843     * gen_ss_advance(s) does nothing on M profile currently but
 844     * calling it is conceptually the right thing as we have executed
 845     * this instruction (compare SWI, HVC, SMC handling).
 846     */
 847    gen_ss_advance(s);
 848    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 849}
 850
 851static inline void gen_bxns(DisasContext *s, int rm)
 852{
 853    TCGv_i32 var = load_reg(s, rm);
 854
 855    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 856     * we need to sync state before calling it, but:
 857     *  - we don't need to do gen_set_pc_im() because the bxns helper will
 858     *    always set the PC itself
 859     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 860     *    unless it's outside an IT block or the last insn in an IT block,
 861     *    so we know that condexec == 0 (already set at the top of the TB)
 862     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 863     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 864     */
 865    gen_helper_v7m_bxns(cpu_env, var);
 866    tcg_temp_free_i32(var);
 867    s->base.is_jmp = DISAS_EXIT;
 868}
 869
 870static inline void gen_blxns(DisasContext *s, int rm)
 871{
 872    TCGv_i32 var = load_reg(s, rm);
 873
 874    /* We don't need to sync condexec state, for the same reason as bxns.
 875     * We do however need to set the PC, because the blxns helper reads it.
 876     * The blxns helper may throw an exception.
 877     */
 878    gen_set_pc_im(s, s->base.pc_next);
 879    gen_helper_v7m_blxns(cpu_env, var);
 880    tcg_temp_free_i32(var);
 881    s->base.is_jmp = DISAS_EXIT;
 882}
 883
 884/* Variant of store_reg which uses branch&exchange logic when storing
 885   to r15 in ARM architecture v7 and above. The source must be a temporary
 886   and will be marked as dead. */
 887static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 888{
 889    if (reg == 15 && ENABLE_ARCH_7) {
 890        gen_bx(s, var);
 891    } else {
 892        store_reg(s, reg, var);
 893    }
 894}
 895
 896/* Variant of store_reg which uses branch&exchange logic when storing
 897 * to r15 in ARM architecture v5T and above. This is used for storing
 898 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 899 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 900static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 901{
 902    if (reg == 15 && ENABLE_ARCH_5) {
 903        gen_bx_excret(s, var);
 904    } else {
 905        store_reg(s, reg, var);
 906    }
 907}
 908
 909#ifdef CONFIG_USER_ONLY
 910#define IS_USER_ONLY 1
 911#else
 912#define IS_USER_ONLY 0
 913#endif
 914
 915/* Abstractions of "generate code to do a guest load/store for
 916 * AArch32", where a vaddr is always 32 bits (and is zero
 917 * extended if we're a 64 bit core) and  data is also
 918 * 32 bits unless specifically doing a 64 bit access.
 919 * These functions work like tcg_gen_qemu_{ld,st}* except
 920 * that the address argument is TCGv_i32 rather than TCGv.
 921 */
 922
 923static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 924{
 925    TCGv addr = tcg_temp_new();
 926    tcg_gen_extu_i32_tl(addr, a32);
 927
 928    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 929    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 930        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 931    }
 932    return addr;
 933}
 934
 935static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 936                            int index, MemOp opc)
 937{
 938    TCGv addr;
 939
 940    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 941        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 942        opc |= MO_ALIGN;
 943    }
 944
 945    addr = gen_aa32_addr(s, a32, opc);
 946    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 947    tcg_temp_free(addr);
 948}
 949
 950static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 951                            int index, MemOp opc)
 952{
 953    TCGv addr;
 954
 955    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 956        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 957        opc |= MO_ALIGN;
 958    }
 959
 960    addr = gen_aa32_addr(s, a32, opc);
 961    tcg_gen_qemu_st_i32(val, addr, index, opc);
 962    tcg_temp_free(addr);
 963}
 964
 965#define DO_GEN_LD(SUFF, OPC)                                             \
 966static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 967                                     TCGv_i32 a32, int index)            \
 968{                                                                        \
 969    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 970}
 971
 972#define DO_GEN_ST(SUFF, OPC)                                             \
 973static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 974                                     TCGv_i32 a32, int index)            \
 975{                                                                        \
 976    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 977}
 978
 979static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 980{
 981    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 982    if (!IS_USER_ONLY && s->sctlr_b) {
 983        tcg_gen_rotri_i64(val, val, 32);
 984    }
 985}
 986
 987static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 988                            int index, MemOp opc)
 989{
 990    TCGv addr = gen_aa32_addr(s, a32, opc);
 991    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 992    gen_aa32_frob64(s, val);
 993    tcg_temp_free(addr);
 994}
 995
 996static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 997                                 TCGv_i32 a32, int index)
 998{
 999    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1000}
1001
1002static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003                            int index, MemOp opc)
1004{
1005    TCGv addr = gen_aa32_addr(s, a32, opc);
1006
1007    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1008    if (!IS_USER_ONLY && s->sctlr_b) {
1009        TCGv_i64 tmp = tcg_temp_new_i64();
1010        tcg_gen_rotri_i64(tmp, val, 32);
1011        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1012        tcg_temp_free_i64(tmp);
1013    } else {
1014        tcg_gen_qemu_st_i64(val, addr, index, opc);
1015    }
1016    tcg_temp_free(addr);
1017}
1018
1019static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1020                                 TCGv_i32 a32, int index)
1021{
1022    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1023}
1024
1025DO_GEN_LD(8u, MO_UB)
1026DO_GEN_LD(16u, MO_UW)
1027DO_GEN_LD(32u, MO_UL)
1028DO_GEN_ST(8, MO_UB)
1029DO_GEN_ST(16, MO_UW)
1030DO_GEN_ST(32, MO_UL)
1031
1032static inline void gen_hvc(DisasContext *s, int imm16)
1033{
1034    /* The pre HVC helper handles cases when HVC gets trapped
1035     * as an undefined insn by runtime configuration (ie before
1036     * the insn really executes).
1037     */
1038    gen_set_pc_im(s, s->pc_curr);
1039    gen_helper_pre_hvc(cpu_env);
1040    /* Otherwise we will treat this as a real exception which
1041     * happens after execution of the insn. (The distinction matters
1042     * for the PC value reported to the exception handler and also
1043     * for single stepping.)
1044     */
1045    s->svc_imm = imm16;
1046    gen_set_pc_im(s, s->base.pc_next);
1047    s->base.is_jmp = DISAS_HVC;
1048}
1049
1050static inline void gen_smc(DisasContext *s)
1051{
1052    /* As with HVC, we may take an exception either before or after
1053     * the insn executes.
1054     */
1055    TCGv_i32 tmp;
1056
1057    gen_set_pc_im(s, s->pc_curr);
1058    tmp = tcg_const_i32(syn_aa32_smc());
1059    gen_helper_pre_smc(cpu_env, tmp);
1060    tcg_temp_free_i32(tmp);
1061    gen_set_pc_im(s, s->base.pc_next);
1062    s->base.is_jmp = DISAS_SMC;
1063}
1064
1065static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1066{
1067    gen_set_condexec(s);
1068    gen_set_pc_im(s, pc);
1069    gen_exception_internal(excp);
1070    s->base.is_jmp = DISAS_NORETURN;
1071}
1072
1073static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1074                               int syn, uint32_t target_el)
1075{
1076    gen_set_condexec(s);
1077    gen_set_pc_im(s, pc);
1078    gen_exception(excp, syn, target_el);
1079    s->base.is_jmp = DISAS_NORETURN;
1080}
1081
1082static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1083{
1084    TCGv_i32 tcg_syn;
1085
1086    gen_set_condexec(s);
1087    gen_set_pc_im(s, s->pc_curr);
1088    tcg_syn = tcg_const_i32(syn);
1089    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1090    tcg_temp_free_i32(tcg_syn);
1091    s->base.is_jmp = DISAS_NORETURN;
1092}
1093
1094static void unallocated_encoding(DisasContext *s)
1095{
1096    /* Unallocated and reserved encodings are uncategorized */
1097    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1098                       default_exception_el(s));
1099}
1100
1101/* Force a TB lookup after an instruction that changes the CPU state.  */
1102static inline void gen_lookup_tb(DisasContext *s)
1103{
1104    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1105    s->base.is_jmp = DISAS_EXIT;
1106}
1107
1108static inline void gen_hlt(DisasContext *s, int imm)
1109{
1110    /* HLT. This has two purposes.
1111     * Architecturally, it is an external halting debug instruction.
1112     * Since QEMU doesn't implement external debug, we treat this as
1113     * it is required for halting debug disabled: it will UNDEF.
1114     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1115     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1116     * must trigger semihosting even for ARMv7 and earlier, where
1117     * HLT was an undefined encoding.
1118     * In system mode, we don't allow userspace access to
1119     * semihosting, to provide some semblance of security
1120     * (and for consistency with our 32-bit semihosting).
1121     */
1122    if (semihosting_enabled() &&
1123#ifndef CONFIG_USER_ONLY
1124        s->current_el != 0 &&
1125#endif
1126        (imm == (s->thumb ? 0x3c : 0xf000))) {
1127        gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1128        return;
1129    }
1130
1131    unallocated_encoding(s);
1132}
1133
1134static TCGv_ptr get_fpstatus_ptr(int neon)
1135{
1136    TCGv_ptr statusptr = tcg_temp_new_ptr();
1137    int offset;
1138    if (neon) {
1139        offset = offsetof(CPUARMState, vfp.standard_fp_status);
1140    } else {
1141        offset = offsetof(CPUARMState, vfp.fp_status);
1142    }
1143    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1144    return statusptr;
1145}
1146
1147static inline long vfp_reg_offset(bool dp, unsigned reg)
1148{
1149    if (dp) {
1150        return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1151    } else {
1152        long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1153        if (reg & 1) {
1154            ofs += offsetof(CPU_DoubleU, l.upper);
1155        } else {
1156            ofs += offsetof(CPU_DoubleU, l.lower);
1157        }
1158        return ofs;
1159    }
1160}
1161
1162/* Return the offset of a 32-bit piece of a NEON register.
1163   zero is the least significant end of the register.  */
1164static inline long
1165neon_reg_offset (int reg, int n)
1166{
1167    int sreg;
1168    sreg = reg * 2 + n;
1169    return vfp_reg_offset(0, sreg);
1170}
1171
1172/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1173 * where 0 is the least significant end of the register.
1174 */
1175static inline long
1176neon_element_offset(int reg, int element, MemOp size)
1177{
1178    int element_size = 1 << size;
1179    int ofs = element * element_size;
1180#ifdef HOST_WORDS_BIGENDIAN
1181    /* Calculate the offset assuming fully little-endian,
1182     * then XOR to account for the order of the 8-byte units.
1183     */
1184    if (element_size < 8) {
1185        ofs ^= 8 - element_size;
1186    }
1187#endif
1188    return neon_reg_offset(reg, 0) + ofs;
1189}
1190
1191static TCGv_i32 neon_load_reg(int reg, int pass)
1192{
1193    TCGv_i32 tmp = tcg_temp_new_i32();
1194    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1195    return tmp;
1196}
1197
1198static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1199{
1200    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1201
1202    switch (mop) {
1203    case MO_UB:
1204        tcg_gen_ld8u_i32(var, cpu_env, offset);
1205        break;
1206    case MO_UW:
1207        tcg_gen_ld16u_i32(var, cpu_env, offset);
1208        break;
1209    case MO_UL:
1210        tcg_gen_ld_i32(var, cpu_env, offset);
1211        break;
1212    default:
1213        g_assert_not_reached();
1214    }
1215}
1216
1217static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1218{
1219    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1220
1221    switch (mop) {
1222    case MO_UB:
1223        tcg_gen_ld8u_i64(var, cpu_env, offset);
1224        break;
1225    case MO_UW:
1226        tcg_gen_ld16u_i64(var, cpu_env, offset);
1227        break;
1228    case MO_UL:
1229        tcg_gen_ld32u_i64(var, cpu_env, offset);
1230        break;
1231    case MO_Q:
1232        tcg_gen_ld_i64(var, cpu_env, offset);
1233        break;
1234    default:
1235        g_assert_not_reached();
1236    }
1237}
1238
1239static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1240{
1241    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1242    tcg_temp_free_i32(var);
1243}
1244
1245static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1246{
1247    long offset = neon_element_offset(reg, ele, size);
1248
1249    switch (size) {
1250    case MO_8:
1251        tcg_gen_st8_i32(var, cpu_env, offset);
1252        break;
1253    case MO_16:
1254        tcg_gen_st16_i32(var, cpu_env, offset);
1255        break;
1256    case MO_32:
1257        tcg_gen_st_i32(var, cpu_env, offset);
1258        break;
1259    default:
1260        g_assert_not_reached();
1261    }
1262}
1263
1264static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1265{
1266    long offset = neon_element_offset(reg, ele, size);
1267
1268    switch (size) {
1269    case MO_8:
1270        tcg_gen_st8_i64(var, cpu_env, offset);
1271        break;
1272    case MO_16:
1273        tcg_gen_st16_i64(var, cpu_env, offset);
1274        break;
1275    case MO_32:
1276        tcg_gen_st32_i64(var, cpu_env, offset);
1277        break;
1278    case MO_64:
1279        tcg_gen_st_i64(var, cpu_env, offset);
1280        break;
1281    default:
1282        g_assert_not_reached();
1283    }
1284}
1285
1286static inline void neon_load_reg64(TCGv_i64 var, int reg)
1287{
1288    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1289}
1290
1291static inline void neon_store_reg64(TCGv_i64 var, int reg)
1292{
1293    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1294}
1295
1296static inline void neon_load_reg32(TCGv_i32 var, int reg)
1297{
1298    tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1299}
1300
1301static inline void neon_store_reg32(TCGv_i32 var, int reg)
1302{
1303    tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1304}
1305
1306static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1307{
1308    TCGv_ptr ret = tcg_temp_new_ptr();
1309    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1310    return ret;
1311}
1312
1313#define ARM_CP_RW_BIT   (1 << 20)
1314
1315/* Include the VFP decoder */
1316#include "translate-vfp.inc.c"
1317
1318static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1319{
1320    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1321}
1322
1323static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1324{
1325    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1326}
1327
1328static inline TCGv_i32 iwmmxt_load_creg(int reg)
1329{
1330    TCGv_i32 var = tcg_temp_new_i32();
1331    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1332    return var;
1333}
1334
1335static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1336{
1337    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1338    tcg_temp_free_i32(var);
1339}
1340
1341static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1342{
1343    iwmmxt_store_reg(cpu_M0, rn);
1344}
1345
1346static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1347{
1348    iwmmxt_load_reg(cpu_M0, rn);
1349}
1350
1351static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1352{
1353    iwmmxt_load_reg(cpu_V1, rn);
1354    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1355}
1356
1357static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1358{
1359    iwmmxt_load_reg(cpu_V1, rn);
1360    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1361}
1362
1363static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1364{
1365    iwmmxt_load_reg(cpu_V1, rn);
1366    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1367}
1368
1369#define IWMMXT_OP(name) \
1370static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1371{ \
1372    iwmmxt_load_reg(cpu_V1, rn); \
1373    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1374}
1375
1376#define IWMMXT_OP_ENV(name) \
1377static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1378{ \
1379    iwmmxt_load_reg(cpu_V1, rn); \
1380    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1381}
1382
1383#define IWMMXT_OP_ENV_SIZE(name) \
1384IWMMXT_OP_ENV(name##b) \
1385IWMMXT_OP_ENV(name##w) \
1386IWMMXT_OP_ENV(name##l)
1387
1388#define IWMMXT_OP_ENV1(name) \
1389static inline void gen_op_iwmmxt_##name##_M0(void) \
1390{ \
1391    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1392}
1393
1394IWMMXT_OP(maddsq)
1395IWMMXT_OP(madduq)
1396IWMMXT_OP(sadb)
1397IWMMXT_OP(sadw)
1398IWMMXT_OP(mulslw)
1399IWMMXT_OP(mulshw)
1400IWMMXT_OP(mululw)
1401IWMMXT_OP(muluhw)
1402IWMMXT_OP(macsw)
1403IWMMXT_OP(macuw)
1404
1405IWMMXT_OP_ENV_SIZE(unpackl)
1406IWMMXT_OP_ENV_SIZE(unpackh)
1407
1408IWMMXT_OP_ENV1(unpacklub)
1409IWMMXT_OP_ENV1(unpackluw)
1410IWMMXT_OP_ENV1(unpacklul)
1411IWMMXT_OP_ENV1(unpackhub)
1412IWMMXT_OP_ENV1(unpackhuw)
1413IWMMXT_OP_ENV1(unpackhul)
1414IWMMXT_OP_ENV1(unpacklsb)
1415IWMMXT_OP_ENV1(unpacklsw)
1416IWMMXT_OP_ENV1(unpacklsl)
1417IWMMXT_OP_ENV1(unpackhsb)
1418IWMMXT_OP_ENV1(unpackhsw)
1419IWMMXT_OP_ENV1(unpackhsl)
1420
1421IWMMXT_OP_ENV_SIZE(cmpeq)
1422IWMMXT_OP_ENV_SIZE(cmpgtu)
1423IWMMXT_OP_ENV_SIZE(cmpgts)
1424
1425IWMMXT_OP_ENV_SIZE(mins)
1426IWMMXT_OP_ENV_SIZE(minu)
1427IWMMXT_OP_ENV_SIZE(maxs)
1428IWMMXT_OP_ENV_SIZE(maxu)
1429
1430IWMMXT_OP_ENV_SIZE(subn)
1431IWMMXT_OP_ENV_SIZE(addn)
1432IWMMXT_OP_ENV_SIZE(subu)
1433IWMMXT_OP_ENV_SIZE(addu)
1434IWMMXT_OP_ENV_SIZE(subs)
1435IWMMXT_OP_ENV_SIZE(adds)
1436
1437IWMMXT_OP_ENV(avgb0)
1438IWMMXT_OP_ENV(avgb1)
1439IWMMXT_OP_ENV(avgw0)
1440IWMMXT_OP_ENV(avgw1)
1441
1442IWMMXT_OP_ENV(packuw)
1443IWMMXT_OP_ENV(packul)
1444IWMMXT_OP_ENV(packuq)
1445IWMMXT_OP_ENV(packsw)
1446IWMMXT_OP_ENV(packsl)
1447IWMMXT_OP_ENV(packsq)
1448
1449static void gen_op_iwmmxt_set_mup(void)
1450{
1451    TCGv_i32 tmp;
1452    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1453    tcg_gen_ori_i32(tmp, tmp, 2);
1454    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455}
1456
1457static void gen_op_iwmmxt_set_cup(void)
1458{
1459    TCGv_i32 tmp;
1460    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1461    tcg_gen_ori_i32(tmp, tmp, 1);
1462    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463}
1464
1465static void gen_op_iwmmxt_setpsr_nz(void)
1466{
1467    TCGv_i32 tmp = tcg_temp_new_i32();
1468    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1469    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1470}
1471
1472static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1473{
1474    iwmmxt_load_reg(cpu_V1, rn);
1475    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1476    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1477}
1478
1479static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1480                                     TCGv_i32 dest)
1481{
1482    int rd;
1483    uint32_t offset;
1484    TCGv_i32 tmp;
1485
1486    rd = (insn >> 16) & 0xf;
1487    tmp = load_reg(s, rd);
1488
1489    offset = (insn & 0xff) << ((insn >> 7) & 2);
1490    if (insn & (1 << 24)) {
1491        /* Pre indexed */
1492        if (insn & (1 << 23))
1493            tcg_gen_addi_i32(tmp, tmp, offset);
1494        else
1495            tcg_gen_addi_i32(tmp, tmp, -offset);
1496        tcg_gen_mov_i32(dest, tmp);
1497        if (insn & (1 << 21))
1498            store_reg(s, rd, tmp);
1499        else
1500            tcg_temp_free_i32(tmp);
1501    } else if (insn & (1 << 21)) {
1502        /* Post indexed */
1503        tcg_gen_mov_i32(dest, tmp);
1504        if (insn & (1 << 23))
1505            tcg_gen_addi_i32(tmp, tmp, offset);
1506        else
1507            tcg_gen_addi_i32(tmp, tmp, -offset);
1508        store_reg(s, rd, tmp);
1509    } else if (!(insn & (1 << 23)))
1510        return 1;
1511    return 0;
1512}
1513
1514static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1515{
1516    int rd = (insn >> 0) & 0xf;
1517    TCGv_i32 tmp;
1518
1519    if (insn & (1 << 8)) {
1520        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1521            return 1;
1522        } else {
1523            tmp = iwmmxt_load_creg(rd);
1524        }
1525    } else {
1526        tmp = tcg_temp_new_i32();
1527        iwmmxt_load_reg(cpu_V0, rd);
1528        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1529    }
1530    tcg_gen_andi_i32(tmp, tmp, mask);
1531    tcg_gen_mov_i32(dest, tmp);
1532    tcg_temp_free_i32(tmp);
1533    return 0;
1534}
1535
1536/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1537   (ie. an undefined instruction).  */
1538static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1539{
1540    int rd, wrd;
1541    int rdhi, rdlo, rd0, rd1, i;
1542    TCGv_i32 addr;
1543    TCGv_i32 tmp, tmp2, tmp3;
1544
1545    if ((insn & 0x0e000e00) == 0x0c000000) {
1546        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1547            wrd = insn & 0xf;
1548            rdlo = (insn >> 12) & 0xf;
1549            rdhi = (insn >> 16) & 0xf;
1550            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1551                iwmmxt_load_reg(cpu_V0, wrd);
1552                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1553                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1554            } else {                                    /* TMCRR */
1555                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1556                iwmmxt_store_reg(cpu_V0, wrd);
1557                gen_op_iwmmxt_set_mup();
1558            }
1559            return 0;
1560        }
1561
1562        wrd = (insn >> 12) & 0xf;
1563        addr = tcg_temp_new_i32();
1564        if (gen_iwmmxt_address(s, insn, addr)) {
1565            tcg_temp_free_i32(addr);
1566            return 1;
1567        }
1568        if (insn & ARM_CP_RW_BIT) {
1569            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1570                tmp = tcg_temp_new_i32();
1571                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1572                iwmmxt_store_creg(wrd, tmp);
1573            } else {
1574                i = 1;
1575                if (insn & (1 << 8)) {
1576                    if (insn & (1 << 22)) {             /* WLDRD */
1577                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1578                        i = 0;
1579                    } else {                            /* WLDRW wRd */
1580                        tmp = tcg_temp_new_i32();
1581                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1582                    }
1583                } else {
1584                    tmp = tcg_temp_new_i32();
1585                    if (insn & (1 << 22)) {             /* WLDRH */
1586                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1587                    } else {                            /* WLDRB */
1588                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1589                    }
1590                }
1591                if (i) {
1592                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1593                    tcg_temp_free_i32(tmp);
1594                }
1595                gen_op_iwmmxt_movq_wRn_M0(wrd);
1596            }
1597        } else {
1598            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1599                tmp = iwmmxt_load_creg(wrd);
1600                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1601            } else {
1602                gen_op_iwmmxt_movq_M0_wRn(wrd);
1603                tmp = tcg_temp_new_i32();
1604                if (insn & (1 << 8)) {
1605                    if (insn & (1 << 22)) {             /* WSTRD */
1606                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1607                    } else {                            /* WSTRW wRd */
1608                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1609                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1610                    }
1611                } else {
1612                    if (insn & (1 << 22)) {             /* WSTRH */
1613                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1614                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1615                    } else {                            /* WSTRB */
1616                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1617                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1618                    }
1619                }
1620            }
1621            tcg_temp_free_i32(tmp);
1622        }
1623        tcg_temp_free_i32(addr);
1624        return 0;
1625    }
1626
1627    if ((insn & 0x0f000000) != 0x0e000000)
1628        return 1;
1629
1630    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1631    case 0x000:                                                 /* WOR */
1632        wrd = (insn >> 12) & 0xf;
1633        rd0 = (insn >> 0) & 0xf;
1634        rd1 = (insn >> 16) & 0xf;
1635        gen_op_iwmmxt_movq_M0_wRn(rd0);
1636        gen_op_iwmmxt_orq_M0_wRn(rd1);
1637        gen_op_iwmmxt_setpsr_nz();
1638        gen_op_iwmmxt_movq_wRn_M0(wrd);
1639        gen_op_iwmmxt_set_mup();
1640        gen_op_iwmmxt_set_cup();
1641        break;
1642    case 0x011:                                                 /* TMCR */
1643        if (insn & 0xf)
1644            return 1;
1645        rd = (insn >> 12) & 0xf;
1646        wrd = (insn >> 16) & 0xf;
1647        switch (wrd) {
1648        case ARM_IWMMXT_wCID:
1649        case ARM_IWMMXT_wCASF:
1650            break;
1651        case ARM_IWMMXT_wCon:
1652            gen_op_iwmmxt_set_cup();
1653            /* Fall through.  */
1654        case ARM_IWMMXT_wCSSF:
1655            tmp = iwmmxt_load_creg(wrd);
1656            tmp2 = load_reg(s, rd);
1657            tcg_gen_andc_i32(tmp, tmp, tmp2);
1658            tcg_temp_free_i32(tmp2);
1659            iwmmxt_store_creg(wrd, tmp);
1660            break;
1661        case ARM_IWMMXT_wCGR0:
1662        case ARM_IWMMXT_wCGR1:
1663        case ARM_IWMMXT_wCGR2:
1664        case ARM_IWMMXT_wCGR3:
1665            gen_op_iwmmxt_set_cup();
1666            tmp = load_reg(s, rd);
1667            iwmmxt_store_creg(wrd, tmp);
1668            break;
1669        default:
1670            return 1;
1671        }
1672        break;
1673    case 0x100:                                                 /* WXOR */
1674        wrd = (insn >> 12) & 0xf;
1675        rd0 = (insn >> 0) & 0xf;
1676        rd1 = (insn >> 16) & 0xf;
1677        gen_op_iwmmxt_movq_M0_wRn(rd0);
1678        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1679        gen_op_iwmmxt_setpsr_nz();
1680        gen_op_iwmmxt_movq_wRn_M0(wrd);
1681        gen_op_iwmmxt_set_mup();
1682        gen_op_iwmmxt_set_cup();
1683        break;
1684    case 0x111:                                                 /* TMRC */
1685        if (insn & 0xf)
1686            return 1;
1687        rd = (insn >> 12) & 0xf;
1688        wrd = (insn >> 16) & 0xf;
1689        tmp = iwmmxt_load_creg(wrd);
1690        store_reg(s, rd, tmp);
1691        break;
1692    case 0x300:                                                 /* WANDN */
1693        wrd = (insn >> 12) & 0xf;
1694        rd0 = (insn >> 0) & 0xf;
1695        rd1 = (insn >> 16) & 0xf;
1696        gen_op_iwmmxt_movq_M0_wRn(rd0);
1697        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1698        gen_op_iwmmxt_andq_M0_wRn(rd1);
1699        gen_op_iwmmxt_setpsr_nz();
1700        gen_op_iwmmxt_movq_wRn_M0(wrd);
1701        gen_op_iwmmxt_set_mup();
1702        gen_op_iwmmxt_set_cup();
1703        break;
1704    case 0x200:                                                 /* WAND */
1705        wrd = (insn >> 12) & 0xf;
1706        rd0 = (insn >> 0) & 0xf;
1707        rd1 = (insn >> 16) & 0xf;
1708        gen_op_iwmmxt_movq_M0_wRn(rd0);
1709        gen_op_iwmmxt_andq_M0_wRn(rd1);
1710        gen_op_iwmmxt_setpsr_nz();
1711        gen_op_iwmmxt_movq_wRn_M0(wrd);
1712        gen_op_iwmmxt_set_mup();
1713        gen_op_iwmmxt_set_cup();
1714        break;
1715    case 0x810: case 0xa10:                             /* WMADD */
1716        wrd = (insn >> 12) & 0xf;
1717        rd0 = (insn >> 0) & 0xf;
1718        rd1 = (insn >> 16) & 0xf;
1719        gen_op_iwmmxt_movq_M0_wRn(rd0);
1720        if (insn & (1 << 21))
1721            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1722        else
1723            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1724        gen_op_iwmmxt_movq_wRn_M0(wrd);
1725        gen_op_iwmmxt_set_mup();
1726        break;
1727    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1728        wrd = (insn >> 12) & 0xf;
1729        rd0 = (insn >> 16) & 0xf;
1730        rd1 = (insn >> 0) & 0xf;
1731        gen_op_iwmmxt_movq_M0_wRn(rd0);
1732        switch ((insn >> 22) & 3) {
1733        case 0:
1734            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1735            break;
1736        case 1:
1737            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1738            break;
1739        case 2:
1740            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1741            break;
1742        case 3:
1743            return 1;
1744        }
1745        gen_op_iwmmxt_movq_wRn_M0(wrd);
1746        gen_op_iwmmxt_set_mup();
1747        gen_op_iwmmxt_set_cup();
1748        break;
1749    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1750        wrd = (insn >> 12) & 0xf;
1751        rd0 = (insn >> 16) & 0xf;
1752        rd1 = (insn >> 0) & 0xf;
1753        gen_op_iwmmxt_movq_M0_wRn(rd0);
1754        switch ((insn >> 22) & 3) {
1755        case 0:
1756            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1757            break;
1758        case 1:
1759            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1760            break;
1761        case 2:
1762            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1763            break;
1764        case 3:
1765            return 1;
1766        }
1767        gen_op_iwmmxt_movq_wRn_M0(wrd);
1768        gen_op_iwmmxt_set_mup();
1769        gen_op_iwmmxt_set_cup();
1770        break;
1771    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1772        wrd = (insn >> 12) & 0xf;
1773        rd0 = (insn >> 16) & 0xf;
1774        rd1 = (insn >> 0) & 0xf;
1775        gen_op_iwmmxt_movq_M0_wRn(rd0);
1776        if (insn & (1 << 22))
1777            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1778        else
1779            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1780        if (!(insn & (1 << 20)))
1781            gen_op_iwmmxt_addl_M0_wRn(wrd);
1782        gen_op_iwmmxt_movq_wRn_M0(wrd);
1783        gen_op_iwmmxt_set_mup();
1784        break;
1785    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1786        wrd = (insn >> 12) & 0xf;
1787        rd0 = (insn >> 16) & 0xf;
1788        rd1 = (insn >> 0) & 0xf;
1789        gen_op_iwmmxt_movq_M0_wRn(rd0);
1790        if (insn & (1 << 21)) {
1791            if (insn & (1 << 20))
1792                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1793            else
1794                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1795        } else {
1796            if (insn & (1 << 20))
1797                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1798            else
1799                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1800        }
1801        gen_op_iwmmxt_movq_wRn_M0(wrd);
1802        gen_op_iwmmxt_set_mup();
1803        break;
1804    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1805        wrd = (insn >> 12) & 0xf;
1806        rd0 = (insn >> 16) & 0xf;
1807        rd1 = (insn >> 0) & 0xf;
1808        gen_op_iwmmxt_movq_M0_wRn(rd0);
1809        if (insn & (1 << 21))
1810            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1811        else
1812            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1813        if (!(insn & (1 << 20))) {
1814            iwmmxt_load_reg(cpu_V1, wrd);
1815            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1816        }
1817        gen_op_iwmmxt_movq_wRn_M0(wrd);
1818        gen_op_iwmmxt_set_mup();
1819        break;
1820    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1821        wrd = (insn >> 12) & 0xf;
1822        rd0 = (insn >> 16) & 0xf;
1823        rd1 = (insn >> 0) & 0xf;
1824        gen_op_iwmmxt_movq_M0_wRn(rd0);
1825        switch ((insn >> 22) & 3) {
1826        case 0:
1827            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1828            break;
1829        case 1:
1830            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1831            break;
1832        case 2:
1833            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1834            break;
1835        case 3:
1836            return 1;
1837        }
1838        gen_op_iwmmxt_movq_wRn_M0(wrd);
1839        gen_op_iwmmxt_set_mup();
1840        gen_op_iwmmxt_set_cup();
1841        break;
1842    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1843        wrd = (insn >> 12) & 0xf;
1844        rd0 = (insn >> 16) & 0xf;
1845        rd1 = (insn >> 0) & 0xf;
1846        gen_op_iwmmxt_movq_M0_wRn(rd0);
1847        if (insn & (1 << 22)) {
1848            if (insn & (1 << 20))
1849                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1850            else
1851                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1852        } else {
1853            if (insn & (1 << 20))
1854                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1855            else
1856                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1857        }
1858        gen_op_iwmmxt_movq_wRn_M0(wrd);
1859        gen_op_iwmmxt_set_mup();
1860        gen_op_iwmmxt_set_cup();
1861        break;
1862    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1863        wrd = (insn >> 12) & 0xf;
1864        rd0 = (insn >> 16) & 0xf;
1865        rd1 = (insn >> 0) & 0xf;
1866        gen_op_iwmmxt_movq_M0_wRn(rd0);
1867        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1868        tcg_gen_andi_i32(tmp, tmp, 7);
1869        iwmmxt_load_reg(cpu_V1, rd1);
1870        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1871        tcg_temp_free_i32(tmp);
1872        gen_op_iwmmxt_movq_wRn_M0(wrd);
1873        gen_op_iwmmxt_set_mup();
1874        break;
1875    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1876        if (((insn >> 6) & 3) == 3)
1877            return 1;
1878        rd = (insn >> 12) & 0xf;
1879        wrd = (insn >> 16) & 0xf;
1880        tmp = load_reg(s, rd);
1881        gen_op_iwmmxt_movq_M0_wRn(wrd);
1882        switch ((insn >> 6) & 3) {
1883        case 0:
1884            tmp2 = tcg_const_i32(0xff);
1885            tmp3 = tcg_const_i32((insn & 7) << 3);
1886            break;
1887        case 1:
1888            tmp2 = tcg_const_i32(0xffff);
1889            tmp3 = tcg_const_i32((insn & 3) << 4);
1890            break;
1891        case 2:
1892            tmp2 = tcg_const_i32(0xffffffff);
1893            tmp3 = tcg_const_i32((insn & 1) << 5);
1894            break;
1895        default:
1896            tmp2 = NULL;
1897            tmp3 = NULL;
1898        }
1899        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1900        tcg_temp_free_i32(tmp3);
1901        tcg_temp_free_i32(tmp2);
1902        tcg_temp_free_i32(tmp);
1903        gen_op_iwmmxt_movq_wRn_M0(wrd);
1904        gen_op_iwmmxt_set_mup();
1905        break;
1906    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1907        rd = (insn >> 12) & 0xf;
1908        wrd = (insn >> 16) & 0xf;
1909        if (rd == 15 || ((insn >> 22) & 3) == 3)
1910            return 1;
1911        gen_op_iwmmxt_movq_M0_wRn(wrd);
1912        tmp = tcg_temp_new_i32();
1913        switch ((insn >> 22) & 3) {
1914        case 0:
1915            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1916            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1917            if (insn & 8) {
1918                tcg_gen_ext8s_i32(tmp, tmp);
1919            } else {
1920                tcg_gen_andi_i32(tmp, tmp, 0xff);
1921            }
1922            break;
1923        case 1:
1924            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1925            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1926            if (insn & 8) {
1927                tcg_gen_ext16s_i32(tmp, tmp);
1928            } else {
1929                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1930            }
1931            break;
1932        case 2:
1933            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1934            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1935            break;
1936        }
1937        store_reg(s, rd, tmp);
1938        break;
1939    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1940        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1941            return 1;
1942        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1943        switch ((insn >> 22) & 3) {
1944        case 0:
1945            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1946            break;
1947        case 1:
1948            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1949            break;
1950        case 2:
1951            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1952            break;
1953        }
1954        tcg_gen_shli_i32(tmp, tmp, 28);
1955        gen_set_nzcv(tmp);
1956        tcg_temp_free_i32(tmp);
1957        break;
1958    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1959        if (((insn >> 6) & 3) == 3)
1960            return 1;
1961        rd = (insn >> 12) & 0xf;
1962        wrd = (insn >> 16) & 0xf;
1963        tmp = load_reg(s, rd);
1964        switch ((insn >> 6) & 3) {
1965        case 0:
1966            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1967            break;
1968        case 1:
1969            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1970            break;
1971        case 2:
1972            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1973            break;
1974        }
1975        tcg_temp_free_i32(tmp);
1976        gen_op_iwmmxt_movq_wRn_M0(wrd);
1977        gen_op_iwmmxt_set_mup();
1978        break;
1979    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1980        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1981            return 1;
1982        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1983        tmp2 = tcg_temp_new_i32();
1984        tcg_gen_mov_i32(tmp2, tmp);
1985        switch ((insn >> 22) & 3) {
1986        case 0:
1987            for (i = 0; i < 7; i ++) {
1988                tcg_gen_shli_i32(tmp2, tmp2, 4);
1989                tcg_gen_and_i32(tmp, tmp, tmp2);
1990            }
1991            break;
1992        case 1:
1993            for (i = 0; i < 3; i ++) {
1994                tcg_gen_shli_i32(tmp2, tmp2, 8);
1995                tcg_gen_and_i32(tmp, tmp, tmp2);
1996            }
1997            break;
1998        case 2:
1999            tcg_gen_shli_i32(tmp2, tmp2, 16);
2000            tcg_gen_and_i32(tmp, tmp, tmp2);
2001            break;
2002        }
2003        gen_set_nzcv(tmp);
2004        tcg_temp_free_i32(tmp2);
2005        tcg_temp_free_i32(tmp);
2006        break;
2007    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2008        wrd = (insn >> 12) & 0xf;
2009        rd0 = (insn >> 16) & 0xf;
2010        gen_op_iwmmxt_movq_M0_wRn(rd0);
2011        switch ((insn >> 22) & 3) {
2012        case 0:
2013            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2014            break;
2015        case 1:
2016            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2017            break;
2018        case 2:
2019            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2020            break;
2021        case 3:
2022            return 1;
2023        }
2024        gen_op_iwmmxt_movq_wRn_M0(wrd);
2025        gen_op_iwmmxt_set_mup();
2026        break;
2027    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2028        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2029            return 1;
2030        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2031        tmp2 = tcg_temp_new_i32();
2032        tcg_gen_mov_i32(tmp2, tmp);
2033        switch ((insn >> 22) & 3) {
2034        case 0:
2035            for (i = 0; i < 7; i ++) {
2036                tcg_gen_shli_i32(tmp2, tmp2, 4);
2037                tcg_gen_or_i32(tmp, tmp, tmp2);
2038            }
2039            break;
2040        case 1:
2041            for (i = 0; i < 3; i ++) {
2042                tcg_gen_shli_i32(tmp2, tmp2, 8);
2043                tcg_gen_or_i32(tmp, tmp, tmp2);
2044            }
2045            break;
2046        case 2:
2047            tcg_gen_shli_i32(tmp2, tmp2, 16);
2048            tcg_gen_or_i32(tmp, tmp, tmp2);
2049            break;
2050        }
2051        gen_set_nzcv(tmp);
2052        tcg_temp_free_i32(tmp2);
2053        tcg_temp_free_i32(tmp);
2054        break;
2055    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2056        rd = (insn >> 12) & 0xf;
2057        rd0 = (insn >> 16) & 0xf;
2058        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2059            return 1;
2060        gen_op_iwmmxt_movq_M0_wRn(rd0);
2061        tmp = tcg_temp_new_i32();
2062        switch ((insn >> 22) & 3) {
2063        case 0:
2064            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2065            break;
2066        case 1:
2067            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2068            break;
2069        case 2:
2070            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2071            break;
2072        }
2073        store_reg(s, rd, tmp);
2074        break;
2075    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2076    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2077        wrd = (insn >> 12) & 0xf;
2078        rd0 = (insn >> 16) & 0xf;
2079        rd1 = (insn >> 0) & 0xf;
2080        gen_op_iwmmxt_movq_M0_wRn(rd0);
2081        switch ((insn >> 22) & 3) {
2082        case 0:
2083            if (insn & (1 << 21))
2084                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2085            else
2086                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2087            break;
2088        case 1:
2089            if (insn & (1 << 21))
2090                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2091            else
2092                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2093            break;
2094        case 2:
2095            if (insn & (1 << 21))
2096                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2097            else
2098                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2099            break;
2100        case 3:
2101            return 1;
2102        }
2103        gen_op_iwmmxt_movq_wRn_M0(wrd);
2104        gen_op_iwmmxt_set_mup();
2105        gen_op_iwmmxt_set_cup();
2106        break;
2107    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2108    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2109        wrd = (insn >> 12) & 0xf;
2110        rd0 = (insn >> 16) & 0xf;
2111        gen_op_iwmmxt_movq_M0_wRn(rd0);
2112        switch ((insn >> 22) & 3) {
2113        case 0:
2114            if (insn & (1 << 21))
2115                gen_op_iwmmxt_unpacklsb_M0();
2116            else
2117                gen_op_iwmmxt_unpacklub_M0();
2118            break;
2119        case 1:
2120            if (insn & (1 << 21))
2121                gen_op_iwmmxt_unpacklsw_M0();
2122            else
2123                gen_op_iwmmxt_unpackluw_M0();
2124            break;
2125        case 2:
2126            if (insn & (1 << 21))
2127                gen_op_iwmmxt_unpacklsl_M0();
2128            else
2129                gen_op_iwmmxt_unpacklul_M0();
2130            break;
2131        case 3:
2132            return 1;
2133        }
2134        gen_op_iwmmxt_movq_wRn_M0(wrd);
2135        gen_op_iwmmxt_set_mup();
2136        gen_op_iwmmxt_set_cup();
2137        break;
2138    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2139    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2140        wrd = (insn >> 12) & 0xf;
2141        rd0 = (insn >> 16) & 0xf;
2142        gen_op_iwmmxt_movq_M0_wRn(rd0);
2143        switch ((insn >> 22) & 3) {
2144        case 0:
2145            if (insn & (1 << 21))
2146                gen_op_iwmmxt_unpackhsb_M0();
2147            else
2148                gen_op_iwmmxt_unpackhub_M0();
2149            break;
2150        case 1:
2151            if (insn & (1 << 21))
2152                gen_op_iwmmxt_unpackhsw_M0();
2153            else
2154                gen_op_iwmmxt_unpackhuw_M0();
2155            break;
2156        case 2:
2157            if (insn & (1 << 21))
2158                gen_op_iwmmxt_unpackhsl_M0();
2159            else
2160                gen_op_iwmmxt_unpackhul_M0();
2161            break;
2162        case 3:
2163            return 1;
2164        }
2165        gen_op_iwmmxt_movq_wRn_M0(wrd);
2166        gen_op_iwmmxt_set_mup();
2167        gen_op_iwmmxt_set_cup();
2168        break;
2169    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2170    case 0x214: case 0x614: case 0xa14: case 0xe14:
2171        if (((insn >> 22) & 3) == 0)
2172            return 1;
2173        wrd = (insn >> 12) & 0xf;
2174        rd0 = (insn >> 16) & 0xf;
2175        gen_op_iwmmxt_movq_M0_wRn(rd0);
2176        tmp = tcg_temp_new_i32();
2177        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2178            tcg_temp_free_i32(tmp);
2179            return 1;
2180        }
2181        switch ((insn >> 22) & 3) {
2182        case 1:
2183            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2184            break;
2185        case 2:
2186            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2187            break;
2188        case 3:
2189            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2190            break;
2191        }
2192        tcg_temp_free_i32(tmp);
2193        gen_op_iwmmxt_movq_wRn_M0(wrd);
2194        gen_op_iwmmxt_set_mup();
2195        gen_op_iwmmxt_set_cup();
2196        break;
2197    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2198    case 0x014: case 0x414: case 0x814: case 0xc14:
2199        if (((insn >> 22) & 3) == 0)
2200            return 1;
2201        wrd = (insn >> 12) & 0xf;
2202        rd0 = (insn >> 16) & 0xf;
2203        gen_op_iwmmxt_movq_M0_wRn(rd0);
2204        tmp = tcg_temp_new_i32();
2205        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2206            tcg_temp_free_i32(tmp);
2207            return 1;
2208        }
2209        switch ((insn >> 22) & 3) {
2210        case 1:
2211            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2212            break;
2213        case 2:
2214            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2215            break;
2216        case 3:
2217            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2218            break;
2219        }
2220        tcg_temp_free_i32(tmp);
2221        gen_op_iwmmxt_movq_wRn_M0(wrd);
2222        gen_op_iwmmxt_set_mup();
2223        gen_op_iwmmxt_set_cup();
2224        break;
2225    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2226    case 0x114: case 0x514: case 0x914: case 0xd14:
2227        if (((insn >> 22) & 3) == 0)
2228            return 1;
2229        wrd = (insn >> 12) & 0xf;
2230        rd0 = (insn >> 16) & 0xf;
2231        gen_op_iwmmxt_movq_M0_wRn(rd0);
2232        tmp = tcg_temp_new_i32();
2233        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2234            tcg_temp_free_i32(tmp);
2235            return 1;
2236        }
2237        switch ((insn >> 22) & 3) {
2238        case 1:
2239            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2240            break;
2241        case 2:
2242            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2243            break;
2244        case 3:
2245            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2246            break;
2247        }
2248        tcg_temp_free_i32(tmp);
2249        gen_op_iwmmxt_movq_wRn_M0(wrd);
2250        gen_op_iwmmxt_set_mup();
2251        gen_op_iwmmxt_set_cup();
2252        break;
2253    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2254    case 0x314: case 0x714: case 0xb14: case 0xf14:
2255        if (((insn >> 22) & 3) == 0)
2256            return 1;
2257        wrd = (insn >> 12) & 0xf;
2258        rd0 = (insn >> 16) & 0xf;
2259        gen_op_iwmmxt_movq_M0_wRn(rd0);
2260        tmp = tcg_temp_new_i32();
2261        switch ((insn >> 22) & 3) {
2262        case 1:
2263            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2264                tcg_temp_free_i32(tmp);
2265                return 1;
2266            }
2267            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2268            break;
2269        case 2:
2270            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2271                tcg_temp_free_i32(tmp);
2272                return 1;
2273            }
2274            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2275            break;
2276        case 3:
2277            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2278                tcg_temp_free_i32(tmp);
2279                return 1;
2280            }
2281            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2282            break;
2283        }
2284        tcg_temp_free_i32(tmp);
2285        gen_op_iwmmxt_movq_wRn_M0(wrd);
2286        gen_op_iwmmxt_set_mup();
2287        gen_op_iwmmxt_set_cup();
2288        break;
2289    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2290    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2291        wrd = (insn >> 12) & 0xf;
2292        rd0 = (insn >> 16) & 0xf;
2293        rd1 = (insn >> 0) & 0xf;
2294        gen_op_iwmmxt_movq_M0_wRn(rd0);
2295        switch ((insn >> 22) & 3) {
2296        case 0:
2297            if (insn & (1 << 21))
2298                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2299            else
2300                gen_op_iwmmxt_minub_M0_wRn(rd1);
2301            break;
2302        case 1:
2303            if (insn & (1 << 21))
2304                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2305            else
2306                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2307            break;
2308        case 2:
2309            if (insn & (1 << 21))
2310                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2311            else
2312                gen_op_iwmmxt_minul_M0_wRn(rd1);
2313            break;
2314        case 3:
2315            return 1;
2316        }
2317        gen_op_iwmmxt_movq_wRn_M0(wrd);
2318        gen_op_iwmmxt_set_mup();
2319        break;
2320    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2321    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2322        wrd = (insn >> 12) & 0xf;
2323        rd0 = (insn >> 16) & 0xf;
2324        rd1 = (insn >> 0) & 0xf;
2325        gen_op_iwmmxt_movq_M0_wRn(rd0);
2326        switch ((insn >> 22) & 3) {
2327        case 0:
2328            if (insn & (1 << 21))
2329                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2330            else
2331                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2332            break;
2333        case 1:
2334            if (insn & (1 << 21))
2335                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2336            else
2337                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2338            break;
2339        case 2:
2340            if (insn & (1 << 21))
2341                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2342            else
2343                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2344            break;
2345        case 3:
2346            return 1;
2347        }
2348        gen_op_iwmmxt_movq_wRn_M0(wrd);
2349        gen_op_iwmmxt_set_mup();
2350        break;
2351    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2352    case 0x402: case 0x502: case 0x602: case 0x702:
2353        wrd = (insn >> 12) & 0xf;
2354        rd0 = (insn >> 16) & 0xf;
2355        rd1 = (insn >> 0) & 0xf;
2356        gen_op_iwmmxt_movq_M0_wRn(rd0);
2357        tmp = tcg_const_i32((insn >> 20) & 3);
2358        iwmmxt_load_reg(cpu_V1, rd1);
2359        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2360        tcg_temp_free_i32(tmp);
2361        gen_op_iwmmxt_movq_wRn_M0(wrd);
2362        gen_op_iwmmxt_set_mup();
2363        break;
2364    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2365    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2366    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2367    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2368        wrd = (insn >> 12) & 0xf;
2369        rd0 = (insn >> 16) & 0xf;
2370        rd1 = (insn >> 0) & 0xf;
2371        gen_op_iwmmxt_movq_M0_wRn(rd0);
2372        switch ((insn >> 20) & 0xf) {
2373        case 0x0:
2374            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2375            break;
2376        case 0x1:
2377            gen_op_iwmmxt_subub_M0_wRn(rd1);
2378            break;
2379        case 0x3:
2380            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2381            break;
2382        case 0x4:
2383            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2384            break;
2385        case 0x5:
2386            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2387            break;
2388        case 0x7:
2389            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2390            break;
2391        case 0x8:
2392            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2393            break;
2394        case 0x9:
2395            gen_op_iwmmxt_subul_M0_wRn(rd1);
2396            break;
2397        case 0xb:
2398            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2399            break;
2400        default:
2401            return 1;
2402        }
2403        gen_op_iwmmxt_movq_wRn_M0(wrd);
2404        gen_op_iwmmxt_set_mup();
2405        gen_op_iwmmxt_set_cup();
2406        break;
2407    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2408    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2409    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2410    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2411        wrd = (insn >> 12) & 0xf;
2412        rd0 = (insn >> 16) & 0xf;
2413        gen_op_iwmmxt_movq_M0_wRn(rd0);
2414        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2415        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2416        tcg_temp_free_i32(tmp);
2417        gen_op_iwmmxt_movq_wRn_M0(wrd);
2418        gen_op_iwmmxt_set_mup();
2419        gen_op_iwmmxt_set_cup();
2420        break;
2421    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2422    case 0x418: case 0x518: case 0x618: case 0x718:
2423    case 0x818: case 0x918: case 0xa18: case 0xb18:
2424    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2425        wrd = (insn >> 12) & 0xf;
2426        rd0 = (insn >> 16) & 0xf;
2427        rd1 = (insn >> 0) & 0xf;
2428        gen_op_iwmmxt_movq_M0_wRn(rd0);
2429        switch ((insn >> 20) & 0xf) {
2430        case 0x0:
2431            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2432            break;
2433        case 0x1:
2434            gen_op_iwmmxt_addub_M0_wRn(rd1);
2435            break;
2436        case 0x3:
2437            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2438            break;
2439        case 0x4:
2440            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2441            break;
2442        case 0x5:
2443            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2444            break;
2445        case 0x7:
2446            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2447            break;
2448        case 0x8:
2449            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2450            break;
2451        case 0x9:
2452            gen_op_iwmmxt_addul_M0_wRn(rd1);
2453            break;
2454        case 0xb:
2455            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2456            break;
2457        default:
2458            return 1;
2459        }
2460        gen_op_iwmmxt_movq_wRn_M0(wrd);
2461        gen_op_iwmmxt_set_mup();
2462        gen_op_iwmmxt_set_cup();
2463        break;
2464    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2465    case 0x408: case 0x508: case 0x608: case 0x708:
2466    case 0x808: case 0x908: case 0xa08: case 0xb08:
2467    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2468        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2469            return 1;
2470        wrd = (insn >> 12) & 0xf;
2471        rd0 = (insn >> 16) & 0xf;
2472        rd1 = (insn >> 0) & 0xf;
2473        gen_op_iwmmxt_movq_M0_wRn(rd0);
2474        switch ((insn >> 22) & 3) {
2475        case 1:
2476            if (insn & (1 << 21))
2477                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2478            else
2479                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2480            break;
2481        case 2:
2482            if (insn & (1 << 21))
2483                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2484            else
2485                gen_op_iwmmxt_packul_M0_wRn(rd1);
2486            break;
2487        case 3:
2488            if (insn & (1 << 21))
2489                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2490            else
2491                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2492            break;
2493        }
2494        gen_op_iwmmxt_movq_wRn_M0(wrd);
2495        gen_op_iwmmxt_set_mup();
2496        gen_op_iwmmxt_set_cup();
2497        break;
2498    case 0x201: case 0x203: case 0x205: case 0x207:
2499    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2500    case 0x211: case 0x213: case 0x215: case 0x217:
2501    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2502        wrd = (insn >> 5) & 0xf;
2503        rd0 = (insn >> 12) & 0xf;
2504        rd1 = (insn >> 0) & 0xf;
2505        if (rd0 == 0xf || rd1 == 0xf)
2506            return 1;
2507        gen_op_iwmmxt_movq_M0_wRn(wrd);
2508        tmp = load_reg(s, rd0);
2509        tmp2 = load_reg(s, rd1);
2510        switch ((insn >> 16) & 0xf) {
2511        case 0x0:                                       /* TMIA */
2512            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2513            break;
2514        case 0x8:                                       /* TMIAPH */
2515            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2516            break;
2517        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2518            if (insn & (1 << 16))
2519                tcg_gen_shri_i32(tmp, tmp, 16);
2520            if (insn & (1 << 17))
2521                tcg_gen_shri_i32(tmp2, tmp2, 16);
2522            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2523            break;
2524        default:
2525            tcg_temp_free_i32(tmp2);
2526            tcg_temp_free_i32(tmp);
2527            return 1;
2528        }
2529        tcg_temp_free_i32(tmp2);
2530        tcg_temp_free_i32(tmp);
2531        gen_op_iwmmxt_movq_wRn_M0(wrd);
2532        gen_op_iwmmxt_set_mup();
2533        break;
2534    default:
2535        return 1;
2536    }
2537
2538    return 0;
2539}
2540
2541/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2542   (ie. an undefined instruction).  */
2543static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2544{
2545    int acc, rd0, rd1, rdhi, rdlo;
2546    TCGv_i32 tmp, tmp2;
2547
2548    if ((insn & 0x0ff00f10) == 0x0e200010) {
2549        /* Multiply with Internal Accumulate Format */
2550        rd0 = (insn >> 12) & 0xf;
2551        rd1 = insn & 0xf;
2552        acc = (insn >> 5) & 7;
2553
2554        if (acc != 0)
2555            return 1;
2556
2557        tmp = load_reg(s, rd0);
2558        tmp2 = load_reg(s, rd1);
2559        switch ((insn >> 16) & 0xf) {
2560        case 0x0:                                       /* MIA */
2561            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2562            break;
2563        case 0x8:                                       /* MIAPH */
2564            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2565            break;
2566        case 0xc:                                       /* MIABB */
2567        case 0xd:                                       /* MIABT */
2568        case 0xe:                                       /* MIATB */
2569        case 0xf:                                       /* MIATT */
2570            if (insn & (1 << 16))
2571                tcg_gen_shri_i32(tmp, tmp, 16);
2572            if (insn & (1 << 17))
2573                tcg_gen_shri_i32(tmp2, tmp2, 16);
2574            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2575            break;
2576        default:
2577            return 1;
2578        }
2579        tcg_temp_free_i32(tmp2);
2580        tcg_temp_free_i32(tmp);
2581
2582        gen_op_iwmmxt_movq_wRn_M0(acc);
2583        return 0;
2584    }
2585
2586    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2587        /* Internal Accumulator Access Format */
2588        rdhi = (insn >> 16) & 0xf;
2589        rdlo = (insn >> 12) & 0xf;
2590        acc = insn & 7;
2591
2592        if (acc != 0)
2593            return 1;
2594
2595        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2596            iwmmxt_load_reg(cpu_V0, acc);
2597            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2598            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2599            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2600        } else {                                        /* MAR */
2601            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2602            iwmmxt_store_reg(cpu_V0, acc);
2603        }
2604        return 0;
2605    }
2606
2607    return 1;
2608}
2609
2610#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2611#define VFP_SREG(insn, bigbit, smallbit) \
2612  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2613#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614    if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2615        reg = (((insn) >> (bigbit)) & 0x0f) \
2616              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617    } else { \
2618        if (insn & (1 << (smallbit))) \
2619            return 1; \
2620        reg = ((insn) >> (bigbit)) & 0x0f; \
2621    }} while (0)
2622
2623#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2624#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2625#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2626#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2627#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2628#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2629
2630static void gen_neon_dup_low16(TCGv_i32 var)
2631{
2632    TCGv_i32 tmp = tcg_temp_new_i32();
2633    tcg_gen_ext16u_i32(var, var);
2634    tcg_gen_shli_i32(tmp, var, 16);
2635    tcg_gen_or_i32(var, var, tmp);
2636    tcg_temp_free_i32(tmp);
2637}
2638
2639static void gen_neon_dup_high16(TCGv_i32 var)
2640{
2641    TCGv_i32 tmp = tcg_temp_new_i32();
2642    tcg_gen_andi_i32(var, var, 0xffff0000);
2643    tcg_gen_shri_i32(tmp, var, 16);
2644    tcg_gen_or_i32(var, var, tmp);
2645    tcg_temp_free_i32(tmp);
2646}
2647
2648/*
2649 * Disassemble a VFP instruction.  Returns nonzero if an error occurred
2650 * (ie. an undefined instruction).
2651 */
2652static int disas_vfp_insn(DisasContext *s, uint32_t insn)
2653{
2654    if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
2655        return 1;
2656    }
2657
2658    /*
2659     * If the decodetree decoder handles this insn it will always
2660     * emit code to either execute the insn or generate an appropriate
2661     * exception; so we don't need to ever return non-zero to tell
2662     * the calling code to emit an UNDEF exception.
2663     */
2664    if (extract32(insn, 28, 4) == 0xf) {
2665        if (disas_vfp_uncond(s, insn)) {
2666            return 0;
2667        }
2668    } else {
2669        if (disas_vfp(s, insn)) {
2670            return 0;
2671        }
2672    }
2673    /* If the decodetree decoder didn't handle this insn, it must be UNDEF */
2674    return 1;
2675}
2676
2677static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2678{
2679#ifndef CONFIG_USER_ONLY
2680    return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2681           ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2682#else
2683    return true;
2684#endif
2685}
2686
2687static void gen_goto_ptr(void)
2688{
2689    tcg_gen_lookup_and_goto_ptr();
2690}
2691
2692/* This will end the TB but doesn't guarantee we'll return to
2693 * cpu_loop_exec. Any live exit_requests will be processed as we
2694 * enter the next TB.
2695 */
2696static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2697{
2698    if (use_goto_tb(s, dest)) {
2699        tcg_gen_goto_tb(n);
2700        gen_set_pc_im(s, dest);
2701        tcg_gen_exit_tb(s->base.tb, n);
2702    } else {
2703        gen_set_pc_im(s, dest);
2704        gen_goto_ptr();
2705    }
2706    s->base.is_jmp = DISAS_NORETURN;
2707}
2708
2709static inline void gen_jmp (DisasContext *s, uint32_t dest)
2710{
2711    if (unlikely(is_singlestepping(s))) {
2712        /* An indirect jump so that we still trigger the debug exception.  */
2713        gen_set_pc_im(s, dest);
2714        s->base.is_jmp = DISAS_JUMP;
2715    } else {
2716        gen_goto_tb(s, 0, dest);
2717    }
2718}
2719
2720static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2721{
2722    if (x)
2723        tcg_gen_sari_i32(t0, t0, 16);
2724    else
2725        gen_sxth(t0);
2726    if (y)
2727        tcg_gen_sari_i32(t1, t1, 16);
2728    else
2729        gen_sxth(t1);
2730    tcg_gen_mul_i32(t0, t0, t1);
2731}
2732
2733/* Return the mask of PSR bits set by a MSR instruction.  */
2734static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2735{
2736    uint32_t mask;
2737
2738    mask = 0;
2739    if (flags & (1 << 0))
2740        mask |= 0xff;
2741    if (flags & (1 << 1))
2742        mask |= 0xff00;
2743    if (flags & (1 << 2))
2744        mask |= 0xff0000;
2745    if (flags & (1 << 3))
2746        mask |= 0xff000000;
2747
2748    /* Mask out undefined bits.  */
2749    mask &= ~CPSR_RESERVED;
2750    if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
2751        mask &= ~CPSR_T;
2752    }
2753    if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
2754        mask &= ~CPSR_Q; /* V5TE in reality*/
2755    }
2756    if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
2757        mask &= ~(CPSR_E | CPSR_GE);
2758    }
2759    if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
2760        mask &= ~CPSR_IT;
2761    }
2762    /* Mask out execution state and reserved bits.  */
2763    if (!spsr) {
2764        mask &= ~(CPSR_EXEC | CPSR_RESERVED);
2765    }
2766    /* Mask out privileged bits.  */
2767    if (IS_USER(s))
2768        mask &= CPSR_USER;
2769    return mask;
2770}
2771
2772/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2773static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2774{
2775    TCGv_i32 tmp;
2776    if (spsr) {
2777        /* ??? This is also undefined in system mode.  */
2778        if (IS_USER(s))
2779            return 1;
2780
2781        tmp = load_cpu_field(spsr);
2782        tcg_gen_andi_i32(tmp, tmp, ~mask);
2783        tcg_gen_andi_i32(t0, t0, mask);
2784        tcg_gen_or_i32(tmp, tmp, t0);
2785        store_cpu_field(tmp, spsr);
2786    } else {
2787        gen_set_cpsr(t0, mask);
2788    }
2789    tcg_temp_free_i32(t0);
2790    gen_lookup_tb(s);
2791    return 0;
2792}
2793
2794/* Returns nonzero if access to the PSR is not permitted.  */
2795static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2796{
2797    TCGv_i32 tmp;
2798    tmp = tcg_temp_new_i32();
2799    tcg_gen_movi_i32(tmp, val);
2800    return gen_set_psr(s, mask, spsr, tmp);
2801}
2802
2803static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2804                                     int *tgtmode, int *regno)
2805{
2806    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2807     * the target mode and register number, and identify the various
2808     * unpredictable cases.
2809     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2810     *  + executed in user mode
2811     *  + using R15 as the src/dest register
2812     *  + accessing an unimplemented register
2813     *  + accessing a register that's inaccessible at current PL/security state*
2814     *  + accessing a register that you could access with a different insn
2815     * We choose to UNDEF in all these cases.
2816     * Since we don't know which of the various AArch32 modes we are in
2817     * we have to defer some checks to runtime.
2818     * Accesses to Monitor mode registers from Secure EL1 (which implies
2819     * that EL3 is AArch64) must trap to EL3.
2820     *
2821     * If the access checks fail this function will emit code to take
2822     * an exception and return false. Otherwise it will return true,
2823     * and set *tgtmode and *regno appropriately.
2824     */
2825    int exc_target = default_exception_el(s);
2826
2827    /* These instructions are present only in ARMv8, or in ARMv7 with the
2828     * Virtualization Extensions.
2829     */
2830    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2831        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2832        goto undef;
2833    }
2834
2835    if (IS_USER(s) || rn == 15) {
2836        goto undef;
2837    }
2838
2839    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2840     * of registers into (r, sysm).
2841     */
2842    if (r) {
2843        /* SPSRs for other modes */
2844        switch (sysm) {
2845        case 0xe: /* SPSR_fiq */
2846            *tgtmode = ARM_CPU_MODE_FIQ;
2847            break;
2848        case 0x10: /* SPSR_irq */
2849            *tgtmode = ARM_CPU_MODE_IRQ;
2850            break;
2851        case 0x12: /* SPSR_svc */
2852            *tgtmode = ARM_CPU_MODE_SVC;
2853            break;
2854        case 0x14: /* SPSR_abt */
2855            *tgtmode = ARM_CPU_MODE_ABT;
2856            break;
2857        case 0x16: /* SPSR_und */
2858            *tgtmode = ARM_CPU_MODE_UND;
2859            break;
2860        case 0x1c: /* SPSR_mon */
2861            *tgtmode = ARM_CPU_MODE_MON;
2862            break;
2863        case 0x1e: /* SPSR_hyp */
2864            *tgtmode = ARM_CPU_MODE_HYP;
2865            break;
2866        default: /* unallocated */
2867            goto undef;
2868        }
2869        /* We arbitrarily assign SPSR a register number of 16. */
2870        *regno = 16;
2871    } else {
2872        /* general purpose registers for other modes */
2873        switch (sysm) {
2874        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2875            *tgtmode = ARM_CPU_MODE_USR;
2876            *regno = sysm + 8;
2877            break;
2878        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2879            *tgtmode = ARM_CPU_MODE_FIQ;
2880            *regno = sysm;
2881            break;
2882        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2883            *tgtmode = ARM_CPU_MODE_IRQ;
2884            *regno = sysm & 1 ? 13 : 14;
2885            break;
2886        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2887            *tgtmode = ARM_CPU_MODE_SVC;
2888            *regno = sysm & 1 ? 13 : 14;
2889            break;
2890        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2891            *tgtmode = ARM_CPU_MODE_ABT;
2892            *regno = sysm & 1 ? 13 : 14;
2893            break;
2894        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2895            *tgtmode = ARM_CPU_MODE_UND;
2896            *regno = sysm & 1 ? 13 : 14;
2897            break;
2898        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2899            *tgtmode = ARM_CPU_MODE_MON;
2900            *regno = sysm & 1 ? 13 : 14;
2901            break;
2902        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2903            *tgtmode = ARM_CPU_MODE_HYP;
2904            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2905            *regno = sysm & 1 ? 13 : 17;
2906            break;
2907        default: /* unallocated */
2908            goto undef;
2909        }
2910    }
2911
2912    /* Catch the 'accessing inaccessible register' cases we can detect
2913     * at translate time.
2914     */
2915    switch (*tgtmode) {
2916    case ARM_CPU_MODE_MON:
2917        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2918            goto undef;
2919        }
2920        if (s->current_el == 1) {
2921            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2922             * then accesses to Mon registers trap to EL3
2923             */
2924            exc_target = 3;
2925            goto undef;
2926        }
2927        break;
2928    case ARM_CPU_MODE_HYP:
2929        /*
2930         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2931         * (and so we can forbid accesses from EL2 or below). elr_hyp
2932         * can be accessed also from Hyp mode, so forbid accesses from
2933         * EL0 or EL1.
2934         */
2935        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2936            (s->current_el < 3 && *regno != 17)) {
2937            goto undef;
2938        }
2939        break;
2940    default:
2941        break;
2942    }
2943
2944    return true;
2945
2946undef:
2947    /* If we get here then some access check did not pass */
2948    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2949                       syn_uncategorized(), exc_target);
2950    return false;
2951}
2952
2953static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2954{
2955    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2956    int tgtmode = 0, regno = 0;
2957
2958    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2959        return;
2960    }
2961
2962    /* Sync state because msr_banked() can raise exceptions */
2963    gen_set_condexec(s);
2964    gen_set_pc_im(s, s->pc_curr);
2965    tcg_reg = load_reg(s, rn);
2966    tcg_tgtmode = tcg_const_i32(tgtmode);
2967    tcg_regno = tcg_const_i32(regno);
2968    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2969    tcg_temp_free_i32(tcg_tgtmode);
2970    tcg_temp_free_i32(tcg_regno);
2971    tcg_temp_free_i32(tcg_reg);
2972    s->base.is_jmp = DISAS_UPDATE;
2973}
2974
2975static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2976{
2977    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2978    int tgtmode = 0, regno = 0;
2979
2980    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2981        return;
2982    }
2983
2984    /* Sync state because mrs_banked() can raise exceptions */
2985    gen_set_condexec(s);
2986    gen_set_pc_im(s, s->pc_curr);
2987    tcg_reg = tcg_temp_new_i32();
2988    tcg_tgtmode = tcg_const_i32(tgtmode);
2989    tcg_regno = tcg_const_i32(regno);
2990    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2991    tcg_temp_free_i32(tcg_tgtmode);
2992    tcg_temp_free_i32(tcg_regno);
2993    store_reg(s, rn, tcg_reg);
2994    s->base.is_jmp = DISAS_UPDATE;
2995}
2996
2997/* Store value to PC as for an exception return (ie don't
2998 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2999 * will do the masking based on the new value of the Thumb bit.
3000 */
3001static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
3002{
3003    tcg_gen_mov_i32(cpu_R[15], pc);
3004    tcg_temp_free_i32(pc);
3005}
3006
3007/* Generate a v6 exception return.  Marks both values as dead.  */
3008static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
3009{
3010    store_pc_exc_ret(s, pc);
3011    /* The cpsr_write_eret helper will mask the low bits of PC
3012     * appropriately depending on the new Thumb bit, so it must
3013     * be called after storing the new PC.
3014     */
3015    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
3016        gen_io_start();
3017    }
3018    gen_helper_cpsr_write_eret(cpu_env, cpsr);
3019    tcg_temp_free_i32(cpsr);
3020    /* Must exit loop to check un-masked IRQs */
3021    s->base.is_jmp = DISAS_EXIT;
3022}
3023
3024/* Generate an old-style exception return. Marks pc as dead. */
3025static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
3026{
3027    gen_rfe(s, pc, load_cpu_field(spsr));
3028}
3029
3030#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3031
3032static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
3033{
3034    switch (size) {
3035    case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3036    case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3037    case 2: tcg_gen_add_i32(t0, t0, t1); break;
3038    default: abort();
3039    }
3040}
3041
3042static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3043{
3044    switch (size) {
3045    case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3046    case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3047    case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3048    default: return;
3049    }
3050}
3051
3052/* 32-bit pairwise ops end up the same as the elementwise versions.  */
3053#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
3054#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
3055#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
3056#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
3057
3058#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3059    switch ((size << 1) | u) { \
3060    case 0: \
3061        gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3062        break; \
3063    case 1: \
3064        gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3065        break; \
3066    case 2: \
3067        gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3068        break; \
3069    case 3: \
3070        gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3071        break; \
3072    case 4: \
3073        gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3074        break; \
3075    case 5: \
3076        gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3077        break; \
3078    default: return 1; \
3079    }} while (0)
3080
3081#define GEN_NEON_INTEGER_OP(name) do { \
3082    switch ((size << 1) | u) { \
3083    case 0: \
3084        gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3085        break; \
3086    case 1: \
3087        gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3088        break; \
3089    case 2: \
3090        gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3091        break; \
3092    case 3: \
3093        gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3094        break; \
3095    case 4: \
3096        gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3097        break; \
3098    case 5: \
3099        gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3100        break; \
3101    default: return 1; \
3102    }} while (0)
3103
3104static TCGv_i32 neon_load_scratch(int scratch)
3105{
3106    TCGv_i32 tmp = tcg_temp_new_i32();
3107    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3108    return tmp;
3109}
3110
3111static void neon_store_scratch(int scratch, TCGv_i32 var)
3112{
3113    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3114    tcg_temp_free_i32(var);
3115}
3116
3117static inline TCGv_i32 neon_get_scalar(int size, int reg)
3118{
3119    TCGv_i32 tmp;
3120    if (size == 1) {
3121        tmp = neon_load_reg(reg & 7, reg >> 4);
3122        if (reg & 8) {
3123            gen_neon_dup_high16(tmp);
3124        } else {
3125            gen_neon_dup_low16(tmp);
3126        }
3127    } else {
3128        tmp = neon_load_reg(reg & 15, reg >> 4);
3129    }
3130    return tmp;
3131}
3132
3133static int gen_neon_unzip(int rd, int rm, int size, int q)
3134{
3135    TCGv_ptr pd, pm;
3136    
3137    if (!q && size == 2) {
3138        return 1;
3139    }
3140    pd = vfp_reg_ptr(true, rd);
3141    pm = vfp_reg_ptr(true, rm);
3142    if (q) {
3143        switch (size) {
3144        case 0:
3145            gen_helper_neon_qunzip8(pd, pm);
3146            break;
3147        case 1:
3148            gen_helper_neon_qunzip16(pd, pm);
3149            break;
3150        case 2:
3151            gen_helper_neon_qunzip32(pd, pm);
3152            break;
3153        default:
3154            abort();
3155        }
3156    } else {
3157        switch (size) {
3158        case 0:
3159            gen_helper_neon_unzip8(pd, pm);
3160            break;
3161        case 1:
3162            gen_helper_neon_unzip16(pd, pm);
3163            break;
3164        default:
3165            abort();
3166        }
3167    }
3168    tcg_temp_free_ptr(pd);
3169    tcg_temp_free_ptr(pm);
3170    return 0;
3171}
3172
3173static int gen_neon_zip(int rd, int rm, int size, int q)
3174{
3175    TCGv_ptr pd, pm;
3176
3177    if (!q && size == 2) {
3178        return 1;
3179    }
3180    pd = vfp_reg_ptr(true, rd);
3181    pm = vfp_reg_ptr(true, rm);
3182    if (q) {
3183        switch (size) {
3184        case 0:
3185            gen_helper_neon_qzip8(pd, pm);
3186            break;
3187        case 1:
3188            gen_helper_neon_qzip16(pd, pm);
3189            break;
3190        case 2:
3191            gen_helper_neon_qzip32(pd, pm);
3192            break;
3193        default:
3194            abort();
3195        }
3196    } else {
3197        switch (size) {
3198        case 0:
3199            gen_helper_neon_zip8(pd, pm);
3200            break;
3201        case 1:
3202            gen_helper_neon_zip16(pd, pm);
3203            break;
3204        default:
3205            abort();
3206        }
3207    }
3208    tcg_temp_free_ptr(pd);
3209    tcg_temp_free_ptr(pm);
3210    return 0;
3211}
3212
3213static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3214{
3215    TCGv_i32 rd, tmp;
3216
3217    rd = tcg_temp_new_i32();
3218    tmp = tcg_temp_new_i32();
3219
3220    tcg_gen_shli_i32(rd, t0, 8);
3221    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3222    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3223    tcg_gen_or_i32(rd, rd, tmp);
3224
3225    tcg_gen_shri_i32(t1, t1, 8);
3226    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3227    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3228    tcg_gen_or_i32(t1, t1, tmp);
3229    tcg_gen_mov_i32(t0, rd);
3230
3231    tcg_temp_free_i32(tmp);
3232    tcg_temp_free_i32(rd);
3233}
3234
3235static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3236{
3237    TCGv_i32 rd, tmp;
3238
3239    rd = tcg_temp_new_i32();
3240    tmp = tcg_temp_new_i32();
3241
3242    tcg_gen_shli_i32(rd, t0, 16);
3243    tcg_gen_andi_i32(tmp, t1, 0xffff);
3244    tcg_gen_or_i32(rd, rd, tmp);
3245    tcg_gen_shri_i32(t1, t1, 16);
3246    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3247    tcg_gen_or_i32(t1, t1, tmp);
3248    tcg_gen_mov_i32(t0, rd);
3249
3250    tcg_temp_free_i32(tmp);
3251    tcg_temp_free_i32(rd);
3252}
3253
3254
3255static struct {
3256    int nregs;
3257    int interleave;
3258    int spacing;
3259} const neon_ls_element_type[11] = {
3260    {1, 4, 1},
3261    {1, 4, 2},
3262    {4, 1, 1},
3263    {2, 2, 2},
3264    {1, 3, 1},
3265    {1, 3, 2},
3266    {3, 1, 1},
3267    {1, 1, 1},
3268    {1, 2, 1},
3269    {1, 2, 2},
3270    {2, 1, 1}
3271};
3272
3273/* Translate a NEON load/store element instruction.  Return nonzero if the
3274   instruction is invalid.  */
3275static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
3276{
3277    int rd, rn, rm;
3278    int op;
3279    int nregs;
3280    int interleave;
3281    int spacing;
3282    int stride;
3283    int size;
3284    int reg;
3285    int load;
3286    int n;
3287    int vec_size;
3288    int mmu_idx;
3289    MemOp endian;
3290    TCGv_i32 addr;
3291    TCGv_i32 tmp;
3292    TCGv_i32 tmp2;
3293    TCGv_i64 tmp64;
3294
3295    /* FIXME: this access check should not take precedence over UNDEF
3296     * for invalid encodings; we will generate incorrect syndrome information
3297     * for attempts to execute invalid vfp/neon encodings with FP disabled.
3298     */
3299    if (s->fp_excp_el) {
3300        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
3301                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
3302        return 0;
3303    }
3304
3305    if (!s->vfp_enabled)
3306      return 1;
3307    VFP_DREG_D(rd, insn);
3308    rn = (insn >> 16) & 0xf;
3309    rm = insn & 0xf;
3310    load = (insn & (1 << 21)) != 0;
3311    endian = s->be_data;
3312    mmu_idx = get_mem_index(s);
3313    if ((insn & (1 << 23)) == 0) {
3314        /* Load store all elements.  */
3315        op = (insn >> 8) & 0xf;
3316        size = (insn >> 6) & 3;
3317        if (op > 10)
3318            return 1;
3319        /* Catch UNDEF cases for bad values of align field */
3320        switch (op & 0xc) {
3321        case 4:
3322            if (((insn >> 5) & 1) == 1) {
3323                return 1;
3324            }
3325            break;
3326        case 8:
3327            if (((insn >> 4) & 3) == 3) {
3328                return 1;
3329            }
3330            break;
3331        default:
3332            break;
3333        }
3334        nregs = neon_ls_element_type[op].nregs;
3335        interleave = neon_ls_element_type[op].interleave;
3336        spacing = neon_ls_element_type[op].spacing;
3337        if (size == 3 && (interleave | spacing) != 1) {
3338            return 1;
3339        }
3340        /* For our purposes, bytes are always little-endian.  */
3341        if (size == 0) {
3342            endian = MO_LE;
3343        }
3344        /* Consecutive little-endian elements from a single register
3345         * can be promoted to a larger little-endian operation.
3346         */
3347        if (interleave == 1 && endian == MO_LE) {
3348            size = 3;
3349        }
3350        tmp64 = tcg_temp_new_i64();
3351        addr = tcg_temp_new_i32();
3352        tmp2 = tcg_const_i32(1 << size);
3353        load_reg_var(s, addr, rn);
3354        for (reg = 0; reg < nregs; reg++) {
3355            for (n = 0; n < 8 >> size; n++) {
3356                int xs;
3357                for (xs = 0; xs < interleave; xs++) {
3358                    int tt = rd + reg + spacing * xs;
3359
3360                    if (load) {
3361                        gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
3362                        neon_store_element64(tt, n, size, tmp64);
3363                    } else {
3364                        neon_load_element64(tmp64, tt, n, size);
3365                        gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
3366                    }
3367                    tcg_gen_add_i32(addr, addr, tmp2);
3368                }
3369            }
3370        }
3371        tcg_temp_free_i32(addr);
3372        tcg_temp_free_i32(tmp2);
3373        tcg_temp_free_i64(tmp64);
3374        stride = nregs * interleave * 8;
3375    } else {
3376        size = (insn >> 10) & 3;
3377        if (size == 3) {
3378            /* Load single element to all lanes.  */
3379            int a = (insn >> 4) & 1;
3380            if (!load) {
3381                return 1;
3382            }
3383            size = (insn >> 6) & 3;
3384            nregs = ((insn >> 8) & 3) + 1;
3385
3386            if (size == 3) {
3387                if (nregs != 4 || a == 0) {
3388                    return 1;
3389                }
3390                /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
3391                size = 2;
3392            }
3393            if (nregs == 1 && a == 1 && size == 0) {
3394                return 1;
3395            }
3396            if (nregs == 3 && a == 1) {
3397                return 1;
3398            }
3399            addr = tcg_temp_new_i32();
3400            load_reg_var(s, addr, rn);
3401
3402            /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
3403             * VLD2/3/4 to all lanes: bit 5 indicates register stride.
3404             */
3405            stride = (insn & (1 << 5)) ? 2 : 1;
3406            vec_size = nregs == 1 ? stride * 8 : 8;
3407
3408            tmp = tcg_temp_new_i32();
3409            for (reg = 0; reg < nregs; reg++) {
3410                gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3411                                s->be_data | size);
3412                if ((rd & 1) && vec_size == 16) {
3413                    /* We cannot write 16 bytes at once because the
3414                     * destination is unaligned.
3415                     */
3416                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3417                                         8, 8, tmp);
3418                    tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
3419                                     neon_reg_offset(rd, 0), 8, 8);
3420                } else {
3421                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3422                                         vec_size, vec_size, tmp);
3423                }
3424                tcg_gen_addi_i32(addr, addr, 1 << size);
3425                rd += stride;
3426            }
3427            tcg_temp_free_i32(tmp);
3428            tcg_temp_free_i32(addr);
3429            stride = (1 << size) * nregs;
3430        } else {
3431            /* Single element.  */
3432            int idx = (insn >> 4) & 0xf;
3433            int reg_idx;
3434            switch (size) {
3435            case 0:
3436                reg_idx = (insn >> 5) & 7;
3437                stride = 1;
3438                break;
3439            case 1:
3440                reg_idx = (insn >> 6) & 3;
3441                stride = (insn & (1 << 5)) ? 2 : 1;
3442                break;
3443            case 2:
3444                reg_idx = (insn >> 7) & 1;
3445                stride = (insn & (1 << 6)) ? 2 : 1;
3446                break;
3447            default:
3448                abort();
3449            }
3450            nregs = ((insn >> 8) & 3) + 1;
3451            /* Catch the UNDEF cases. This is unavoidably a bit messy. */
3452            switch (nregs) {
3453            case 1:
3454                if (((idx & (1 << size)) != 0) ||
3455                    (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
3456                    return 1;
3457                }
3458                break;
3459            case 3:
3460                if ((idx & 1) != 0) {
3461                    return 1;
3462                }
3463                /* fall through */
3464            case 2:
3465                if (size == 2 && (idx & 2) != 0) {
3466                    return 1;
3467                }
3468                break;
3469            case 4:
3470                if ((size == 2) && ((idx & 3) == 3)) {
3471                    return 1;
3472                }
3473                break;
3474            default:
3475                abort();
3476            }
3477            if ((rd + stride * (nregs - 1)) > 31) {
3478                /* Attempts to write off the end of the register file
3479                 * are UNPREDICTABLE; we choose to UNDEF because otherwise
3480                 * the neon_load_reg() would write off the end of the array.
3481                 */
3482                return 1;
3483            }
3484            tmp = tcg_temp_new_i32();
3485            addr = tcg_temp_new_i32();
3486            load_reg_var(s, addr, rn);
3487            for (reg = 0; reg < nregs; reg++) {
3488                if (load) {
3489                    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3490                                    s->be_data | size);
3491                    neon_store_element(rd, reg_idx, size, tmp);
3492                } else { /* Store */
3493                    neon_load_element(tmp, rd, reg_idx, size);
3494                    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
3495                                    s->be_data | size);
3496                }
3497                rd += stride;
3498                tcg_gen_addi_i32(addr, addr, 1 << size);
3499            }
3500            tcg_temp_free_i32(addr);
3501            tcg_temp_free_i32(tmp);
3502            stride = nregs * (1 << size);
3503        }
3504    }
3505    if (rm != 15) {
3506        TCGv_i32 base;
3507
3508        base = load_reg(s, rn);
3509        if (rm == 13) {
3510            tcg_gen_addi_i32(base, base, stride);
3511        } else {
3512            TCGv_i32 index;
3513            index = load_reg(s, rm);
3514            tcg_gen_add_i32(base, base, index);
3515            tcg_temp_free_i32(index);
3516        }
3517        store_reg(s, rn, base);
3518    }
3519    return 0;
3520}
3521
3522static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3523{
3524    switch (size) {
3525    case 0: gen_helper_neon_narrow_u8(dest, src); break;
3526    case 1: gen_helper_neon_narrow_u16(dest, src); break;
3527    case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3528    default: abort();
3529    }
3530}
3531
3532static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3533{
3534    switch (size) {
3535    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3536    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3537    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3538    default: abort();
3539    }
3540}
3541
3542static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3543{
3544    switch (size) {
3545    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3546    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3547    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3548    default: abort();
3549    }
3550}
3551
3552static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3553{
3554    switch (size) {
3555    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3556    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3557    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3558    default: abort();
3559    }
3560}
3561
3562static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3563                                         int q, int u)
3564{
3565    if (q) {
3566        if (u) {
3567            switch (size) {
3568            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3569            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3570            default: abort();
3571            }
3572        } else {
3573            switch (size) {
3574            case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3575            case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3576            default: abort();
3577            }
3578        }
3579    } else {
3580        if (u) {
3581            switch (size) {
3582            case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3583            case 2: gen_helper_neon_shl_u32(var, var, shift); break;
3584            default: abort();
3585            }
3586        } else {
3587            switch (size) {
3588            case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3589            case 2: gen_helper_neon_shl_s32(var, var, shift); break;
3590            default: abort();
3591            }
3592        }
3593    }
3594}
3595
3596static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3597{
3598    if (u) {
3599        switch (size) {
3600        case 0: gen_helper_neon_widen_u8(dest, src); break;
3601        case 1: gen_helper_neon_widen_u16(dest, src); break;
3602        case 2: tcg_gen_extu_i32_i64(dest, src); break;
3603        default: abort();
3604        }
3605    } else {
3606        switch (size) {
3607        case 0: gen_helper_neon_widen_s8(dest, src); break;
3608        case 1: gen_helper_neon_widen_s16(dest, src); break;
3609        case 2: tcg_gen_ext_i32_i64(dest, src); break;
3610        default: abort();
3611        }
3612    }
3613    tcg_temp_free_i32(src);
3614}
3615
3616static inline void gen_neon_addl(int size)
3617{
3618    switch (size) {
3619    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3620    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3621    case 2: tcg_gen_add_i64(CPU_V001); break;
3622    default: abort();
3623    }
3624}
3625
3626static inline void gen_neon_subl(int size)
3627{
3628    switch (size) {
3629    case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3630    case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3631    case 2: tcg_gen_sub_i64(CPU_V001); break;
3632    default: abort();
3633    }
3634}
3635
3636static inline void gen_neon_negl(TCGv_i64 var, int size)
3637{
3638    switch (size) {
3639    case 0: gen_helper_neon_negl_u16(var, var); break;
3640    case 1: gen_helper_neon_negl_u32(var, var); break;
3641    case 2:
3642        tcg_gen_neg_i64(var, var);
3643        break;
3644    default: abort();
3645    }
3646}
3647
3648static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3649{
3650    switch (size) {
3651    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3652    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3653    default: abort();
3654    }
3655}
3656
3657static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3658                                 int size, int u)
3659{
3660    TCGv_i64 tmp;
3661
3662    switch ((size << 1) | u) {
3663    case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3664    case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3665    case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3666    case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3667    case 4:
3668        tmp = gen_muls_i64_i32(a, b);
3669        tcg_gen_mov_i64(dest, tmp);
3670        tcg_temp_free_i64(tmp);
3671        break;
3672    case 5:
3673        tmp = gen_mulu_i64_i32(a, b);
3674        tcg_gen_mov_i64(dest, tmp);
3675        tcg_temp_free_i64(tmp);
3676        break;
3677    default: abort();
3678    }
3679
3680    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3681       Don't forget to clean them now.  */
3682    if (size < 2) {
3683        tcg_temp_free_i32(a);
3684        tcg_temp_free_i32(b);
3685    }
3686}
3687
3688static void gen_neon_narrow_op(int op, int u, int size,
3689                               TCGv_i32 dest, TCGv_i64 src)
3690{
3691    if (op) {
3692        if (u) {
3693            gen_neon_unarrow_sats(size, dest, src);
3694        } else {
3695            gen_neon_narrow(size, dest, src);
3696        }
3697    } else {
3698        if (u) {
3699            gen_neon_narrow_satu(size, dest, src);
3700        } else {
3701            gen_neon_narrow_sats(size, dest, src);
3702        }
3703    }
3704}
3705
3706/* Symbolic constants for op fields for Neon 3-register same-length.
3707 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3708 * table A7-9.
3709 */
3710#define NEON_3R_VHADD 0
3711#define NEON_3R_VQADD 1
3712#define NEON_3R_VRHADD 2
3713#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3714#define NEON_3R_VHSUB 4
3715#define NEON_3R_VQSUB 5
3716#define NEON_3R_VCGT 6
3717#define NEON_3R_VCGE 7
3718#define NEON_3R_VSHL 8
3719#define NEON_3R_VQSHL 9
3720#define NEON_3R_VRSHL 10
3721#define NEON_3R_VQRSHL 11
3722#define NEON_3R_VMAX 12
3723#define NEON_3R_VMIN 13
3724#define NEON_3R_VABD 14
3725#define NEON_3R_VABA 15
3726#define NEON_3R_VADD_VSUB 16
3727#define NEON_3R_VTST_VCEQ 17
3728#define NEON_3R_VML 18 /* VMLA, VMLS */
3729#define NEON_3R_VMUL 19
3730#define NEON_3R_VPMAX 20
3731#define NEON_3R_VPMIN 21
3732#define NEON_3R_VQDMULH_VQRDMULH 22
3733#define NEON_3R_VPADD_VQRDMLAH 23
3734#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3735#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3736#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3737#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3738#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3739#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3740#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3741#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3742
3743static const uint8_t neon_3r_sizes[] = {
3744    [NEON_3R_VHADD] = 0x7,
3745    [NEON_3R_VQADD] = 0xf,
3746    [NEON_3R_VRHADD] = 0x7,
3747    [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3748    [NEON_3R_VHSUB] = 0x7,
3749    [NEON_3R_VQSUB] = 0xf,
3750    [NEON_3R_VCGT] = 0x7,
3751    [NEON_3R_VCGE] = 0x7,
3752    [NEON_3R_VSHL] = 0xf,
3753    [NEON_3R_VQSHL] = 0xf,
3754    [NEON_3R_VRSHL] = 0xf,
3755    [NEON_3R_VQRSHL] = 0xf,
3756    [NEON_3R_VMAX] = 0x7,
3757    [NEON_3R_VMIN] = 0x7,
3758    [NEON_3R_VABD] = 0x7,
3759    [NEON_3R_VABA] = 0x7,
3760    [NEON_3R_VADD_VSUB] = 0xf,
3761    [NEON_3R_VTST_VCEQ] = 0x7,
3762    [NEON_3R_VML] = 0x7,
3763    [NEON_3R_VMUL] = 0x7,
3764    [NEON_3R_VPMAX] = 0x7,
3765    [NEON_3R_VPMIN] = 0x7,
3766    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3767    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3768    [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3769    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3770    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3771    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3772    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3773    [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3774    [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3775    [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3776};
3777
3778/* Symbolic constants for op fields for Neon 2-register miscellaneous.
3779 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3780 * table A7-13.
3781 */
3782#define NEON_2RM_VREV64 0
3783#define NEON_2RM_VREV32 1
3784#define NEON_2RM_VREV16 2
3785#define NEON_2RM_VPADDL 4
3786#define NEON_2RM_VPADDL_U 5
3787#define NEON_2RM_AESE 6 /* Includes AESD */
3788#define NEON_2RM_AESMC 7 /* Includes AESIMC */
3789#define NEON_2RM_VCLS 8
3790#define NEON_2RM_VCLZ 9
3791#define NEON_2RM_VCNT 10
3792#define NEON_2RM_VMVN 11
3793#define NEON_2RM_VPADAL 12
3794#define NEON_2RM_VPADAL_U 13
3795#define NEON_2RM_VQABS 14
3796#define NEON_2RM_VQNEG 15
3797#define NEON_2RM_VCGT0 16
3798#define NEON_2RM_VCGE0 17
3799#define NEON_2RM_VCEQ0 18
3800#define NEON_2RM_VCLE0 19
3801#define NEON_2RM_VCLT0 20
3802#define NEON_2RM_SHA1H 21
3803#define NEON_2RM_VABS 22
3804#define NEON_2RM_VNEG 23
3805#define NEON_2RM_VCGT0_F 24
3806#define NEON_2RM_VCGE0_F 25
3807#define NEON_2RM_VCEQ0_F 26
3808#define NEON_2RM_VCLE0_F 27
3809#define NEON_2RM_VCLT0_F 28
3810#define NEON_2RM_VABS_F 30
3811#define NEON_2RM_VNEG_F 31
3812#define NEON_2RM_VSWP 32
3813#define NEON_2RM_VTRN 33
3814#define NEON_2RM_VUZP 34
3815#define NEON_2RM_VZIP 35
3816#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3817#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3818#define NEON_2RM_VSHLL 38
3819#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3820#define NEON_2RM_VRINTN 40
3821#define NEON_2RM_VRINTX 41
3822#define NEON_2RM_VRINTA 42
3823#define NEON_2RM_VRINTZ 43
3824#define NEON_2RM_VCVT_F16_F32 44
3825#define NEON_2RM_VRINTM 45
3826#define NEON_2RM_VCVT_F32_F16 46
3827#define NEON_2RM_VRINTP 47
3828#define NEON_2RM_VCVTAU 48
3829#define NEON_2RM_VCVTAS 49
3830#define NEON_2RM_VCVTNU 50
3831#define NEON_2RM_VCVTNS 51
3832#define NEON_2RM_VCVTPU 52
3833#define NEON_2RM_VCVTPS 53
3834#define NEON_2RM_VCVTMU 54
3835#define NEON_2RM_VCVTMS 55
3836#define NEON_2RM_VRECPE 56
3837#define NEON_2RM_VRSQRTE 57
3838#define NEON_2RM_VRECPE_F 58
3839#define NEON_2RM_VRSQRTE_F 59
3840#define NEON_2RM_VCVT_FS 60
3841#define NEON_2RM_VCVT_FU 61
3842#define NEON_2RM_VCVT_SF 62
3843#define NEON_2RM_VCVT_UF 63
3844
3845static bool neon_2rm_is_v8_op(int op)
3846{
3847    /* Return true if this neon 2reg-misc op is ARMv8 and up */
3848    switch (op) {
3849    case NEON_2RM_VRINTN:
3850    case NEON_2RM_VRINTA:
3851    case NEON_2RM_VRINTM:
3852    case NEON_2RM_VRINTP:
3853    case NEON_2RM_VRINTZ:
3854    case NEON_2RM_VRINTX:
3855    case NEON_2RM_VCVTAU:
3856    case NEON_2RM_VCVTAS:
3857    case NEON_2RM_VCVTNU:
3858    case NEON_2RM_VCVTNS:
3859    case NEON_2RM_VCVTPU:
3860    case NEON_2RM_VCVTPS:
3861    case NEON_2RM_VCVTMU:
3862    case NEON_2RM_VCVTMS:
3863        return true;
3864    default:
3865        return false;
3866    }
3867}
3868
3869/* Each entry in this array has bit n set if the insn allows
3870 * size value n (otherwise it will UNDEF). Since unallocated
3871 * op values will have no bits set they always UNDEF.
3872 */
3873static const uint8_t neon_2rm_sizes[] = {
3874    [NEON_2RM_VREV64] = 0x7,
3875    [NEON_2RM_VREV32] = 0x3,
3876    [NEON_2RM_VREV16] = 0x1,
3877    [NEON_2RM_VPADDL] = 0x7,
3878    [NEON_2RM_VPADDL_U] = 0x7,
3879    [NEON_2RM_AESE] = 0x1,
3880    [NEON_2RM_AESMC] = 0x1,
3881    [NEON_2RM_VCLS] = 0x7,
3882    [NEON_2RM_VCLZ] = 0x7,
3883    [NEON_2RM_VCNT] = 0x1,
3884    [NEON_2RM_VMVN] = 0x1,
3885    [NEON_2RM_VPADAL] = 0x7,
3886    [NEON_2RM_VPADAL_U] = 0x7,
3887    [NEON_2RM_VQABS] = 0x7,
3888    [NEON_2RM_VQNEG] = 0x7,
3889    [NEON_2RM_VCGT0] = 0x7,
3890    [NEON_2RM_VCGE0] = 0x7,
3891    [NEON_2RM_VCEQ0] = 0x7,
3892    [NEON_2RM_VCLE0] = 0x7,
3893    [NEON_2RM_VCLT0] = 0x7,
3894    [NEON_2RM_SHA1H] = 0x4,
3895    [NEON_2RM_VABS] = 0x7,
3896    [NEON_2RM_VNEG] = 0x7,
3897    [NEON_2RM_VCGT0_F] = 0x4,
3898    [NEON_2RM_VCGE0_F] = 0x4,
3899    [NEON_2RM_VCEQ0_F] = 0x4,
3900    [NEON_2RM_VCLE0_F] = 0x4,
3901    [NEON_2RM_VCLT0_F] = 0x4,
3902    [NEON_2RM_VABS_F] = 0x4,
3903    [NEON_2RM_VNEG_F] = 0x4,
3904    [NEON_2RM_VSWP] = 0x1,
3905    [NEON_2RM_VTRN] = 0x7,
3906    [NEON_2RM_VUZP] = 0x7,
3907    [NEON_2RM_VZIP] = 0x7,
3908    [NEON_2RM_VMOVN] = 0x7,
3909    [NEON_2RM_VQMOVN] = 0x7,
3910    [NEON_2RM_VSHLL] = 0x7,
3911    [NEON_2RM_SHA1SU1] = 0x4,
3912    [NEON_2RM_VRINTN] = 0x4,
3913    [NEON_2RM_VRINTX] = 0x4,
3914    [NEON_2RM_VRINTA] = 0x4,
3915    [NEON_2RM_VRINTZ] = 0x4,
3916    [NEON_2RM_VCVT_F16_F32] = 0x2,
3917    [NEON_2RM_VRINTM] = 0x4,
3918    [NEON_2RM_VCVT_F32_F16] = 0x2,
3919    [NEON_2RM_VRINTP] = 0x4,
3920    [NEON_2RM_VCVTAU] = 0x4,
3921    [NEON_2RM_VCVTAS] = 0x4,
3922    [NEON_2RM_VCVTNU] = 0x4,
3923    [NEON_2RM_VCVTNS] = 0x4,
3924    [NEON_2RM_VCVTPU] = 0x4,
3925    [NEON_2RM_VCVTPS] = 0x4,
3926    [NEON_2RM_VCVTMU] = 0x4,
3927    [NEON_2RM_VCVTMS] = 0x4,
3928    [NEON_2RM_VRECPE] = 0x4,
3929    [NEON_2RM_VRSQRTE] = 0x4,
3930    [NEON_2RM_VRECPE_F] = 0x4,
3931    [NEON_2RM_VRSQRTE_F] = 0x4,
3932    [NEON_2RM_VCVT_FS] = 0x4,
3933    [NEON_2RM_VCVT_FU] = 0x4,
3934    [NEON_2RM_VCVT_SF] = 0x4,
3935    [NEON_2RM_VCVT_UF] = 0x4,
3936};
3937
3938
3939/* Expand v8.1 simd helper.  */
3940static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3941                         int q, int rd, int rn, int rm)
3942{
3943    if (dc_isar_feature(aa32_rdm, s)) {
3944        int opr_sz = (1 + q) * 8;
3945        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3946                           vfp_reg_offset(1, rn),
3947                           vfp_reg_offset(1, rm), cpu_env,
3948                           opr_sz, opr_sz, 0, fn);
3949        return 0;
3950    }
3951    return 1;
3952}
3953
3954static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3955{
3956    tcg_gen_vec_sar8i_i64(a, a, shift);
3957    tcg_gen_vec_add8_i64(d, d, a);
3958}
3959
3960static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3961{
3962    tcg_gen_vec_sar16i_i64(a, a, shift);
3963    tcg_gen_vec_add16_i64(d, d, a);
3964}
3965
3966static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3967{
3968    tcg_gen_sari_i32(a, a, shift);
3969    tcg_gen_add_i32(d, d, a);
3970}
3971
3972static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3973{
3974    tcg_gen_sari_i64(a, a, shift);
3975    tcg_gen_add_i64(d, d, a);
3976}
3977
3978static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3979{
3980    tcg_gen_sari_vec(vece, a, a, sh);
3981    tcg_gen_add_vec(vece, d, d, a);
3982}
3983
3984static const TCGOpcode vecop_list_ssra[] = {
3985    INDEX_op_sari_vec, INDEX_op_add_vec, 0
3986};
3987
3988const GVecGen2i ssra_op[4] = {
3989    { .fni8 = gen_ssra8_i64,
3990      .fniv = gen_ssra_vec,
3991      .load_dest = true,
3992      .opt_opc = vecop_list_ssra,
3993      .vece = MO_8 },
3994    { .fni8 = gen_ssra16_i64,
3995      .fniv = gen_ssra_vec,
3996      .load_dest = true,
3997      .opt_opc = vecop_list_ssra,
3998      .vece = MO_16 },
3999    { .fni4 = gen_ssra32_i32,
4000      .fniv = gen_ssra_vec,
4001      .load_dest = true,
4002      .opt_opc = vecop_list_ssra,
4003      .vece = MO_32 },
4004    { .fni8 = gen_ssra64_i64,
4005      .fniv = gen_ssra_vec,
4006      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4007      .opt_opc = vecop_list_ssra,
4008      .load_dest = true,
4009      .vece = MO_64 },
4010};
4011
4012static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4013{
4014    tcg_gen_vec_shr8i_i64(a, a, shift);
4015    tcg_gen_vec_add8_i64(d, d, a);
4016}
4017
4018static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4019{
4020    tcg_gen_vec_shr16i_i64(a, a, shift);
4021    tcg_gen_vec_add16_i64(d, d, a);
4022}
4023
4024static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4025{
4026    tcg_gen_shri_i32(a, a, shift);
4027    tcg_gen_add_i32(d, d, a);
4028}
4029
4030static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4031{
4032    tcg_gen_shri_i64(a, a, shift);
4033    tcg_gen_add_i64(d, d, a);
4034}
4035
4036static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4037{
4038    tcg_gen_shri_vec(vece, a, a, sh);
4039    tcg_gen_add_vec(vece, d, d, a);
4040}
4041
4042static const TCGOpcode vecop_list_usra[] = {
4043    INDEX_op_shri_vec, INDEX_op_add_vec, 0
4044};
4045
4046const GVecGen2i usra_op[4] = {
4047    { .fni8 = gen_usra8_i64,
4048      .fniv = gen_usra_vec,
4049      .load_dest = true,
4050      .opt_opc = vecop_list_usra,
4051      .vece = MO_8, },
4052    { .fni8 = gen_usra16_i64,
4053      .fniv = gen_usra_vec,
4054      .load_dest = true,
4055      .opt_opc = vecop_list_usra,
4056      .vece = MO_16, },
4057    { .fni4 = gen_usra32_i32,
4058      .fniv = gen_usra_vec,
4059      .load_dest = true,
4060      .opt_opc = vecop_list_usra,
4061      .vece = MO_32, },
4062    { .fni8 = gen_usra64_i64,
4063      .fniv = gen_usra_vec,
4064      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4065      .load_dest = true,
4066      .opt_opc = vecop_list_usra,
4067      .vece = MO_64, },
4068};
4069
4070static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4071{
4072    uint64_t mask = dup_const(MO_8, 0xff >> shift);
4073    TCGv_i64 t = tcg_temp_new_i64();
4074
4075    tcg_gen_shri_i64(t, a, shift);
4076    tcg_gen_andi_i64(t, t, mask);
4077    tcg_gen_andi_i64(d, d, ~mask);
4078    tcg_gen_or_i64(d, d, t);
4079    tcg_temp_free_i64(t);
4080}
4081
4082static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4083{
4084    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4085    TCGv_i64 t = tcg_temp_new_i64();
4086
4087    tcg_gen_shri_i64(t, a, shift);
4088    tcg_gen_andi_i64(t, t, mask);
4089    tcg_gen_andi_i64(d, d, ~mask);
4090    tcg_gen_or_i64(d, d, t);
4091    tcg_temp_free_i64(t);
4092}
4093
4094static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4095{
4096    tcg_gen_shri_i32(a, a, shift);
4097    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4098}
4099
4100static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4101{
4102    tcg_gen_shri_i64(a, a, shift);
4103    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4104}
4105
4106static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4107{
4108    if (sh == 0) {
4109        tcg_gen_mov_vec(d, a);
4110    } else {
4111        TCGv_vec t = tcg_temp_new_vec_matching(d);
4112        TCGv_vec m = tcg_temp_new_vec_matching(d);
4113
4114        tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4115        tcg_gen_shri_vec(vece, t, a, sh);
4116        tcg_gen_and_vec(vece, d, d, m);
4117        tcg_gen_or_vec(vece, d, d, t);
4118
4119        tcg_temp_free_vec(t);
4120        tcg_temp_free_vec(m);
4121    }
4122}
4123
4124static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
4125
4126const GVecGen2i sri_op[4] = {
4127    { .fni8 = gen_shr8_ins_i64,
4128      .fniv = gen_shr_ins_vec,
4129      .load_dest = true,
4130      .opt_opc = vecop_list_sri,
4131      .vece = MO_8 },
4132    { .fni8 = gen_shr16_ins_i64,
4133      .fniv = gen_shr_ins_vec,
4134      .load_dest = true,
4135      .opt_opc = vecop_list_sri,
4136      .vece = MO_16 },
4137    { .fni4 = gen_shr32_ins_i32,
4138      .fniv = gen_shr_ins_vec,
4139      .load_dest = true,
4140      .opt_opc = vecop_list_sri,
4141      .vece = MO_32 },
4142    { .fni8 = gen_shr64_ins_i64,
4143      .fniv = gen_shr_ins_vec,
4144      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4145      .load_dest = true,
4146      .opt_opc = vecop_list_sri,
4147      .vece = MO_64 },
4148};
4149
4150static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4151{
4152    uint64_t mask = dup_const(MO_8, 0xff << shift);
4153    TCGv_i64 t = tcg_temp_new_i64();
4154
4155    tcg_gen_shli_i64(t, a, shift);
4156    tcg_gen_andi_i64(t, t, mask);
4157    tcg_gen_andi_i64(d, d, ~mask);
4158    tcg_gen_or_i64(d, d, t);
4159    tcg_temp_free_i64(t);
4160}
4161
4162static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4163{
4164    uint64_t mask = dup_const(MO_16, 0xffff << shift);
4165    TCGv_i64 t = tcg_temp_new_i64();
4166
4167    tcg_gen_shli_i64(t, a, shift);
4168    tcg_gen_andi_i64(t, t, mask);
4169    tcg_gen_andi_i64(d, d, ~mask);
4170    tcg_gen_or_i64(d, d, t);
4171    tcg_temp_free_i64(t);
4172}
4173
4174static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4175{
4176    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4177}
4178
4179static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4180{
4181    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4182}
4183
4184static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4185{
4186    if (sh == 0) {
4187        tcg_gen_mov_vec(d, a);
4188    } else {
4189        TCGv_vec t = tcg_temp_new_vec_matching(d);
4190        TCGv_vec m = tcg_temp_new_vec_matching(d);
4191
4192        tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4193        tcg_gen_shli_vec(vece, t, a, sh);
4194        tcg_gen_and_vec(vece, d, d, m);
4195        tcg_gen_or_vec(vece, d, d, t);
4196
4197        tcg_temp_free_vec(t);
4198        tcg_temp_free_vec(m);
4199    }
4200}
4201
4202static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
4203
4204const GVecGen2i sli_op[4] = {
4205    { .fni8 = gen_shl8_ins_i64,
4206      .fniv = gen_shl_ins_vec,
4207      .load_dest = true,
4208      .opt_opc = vecop_list_sli,
4209      .vece = MO_8 },
4210    { .fni8 = gen_shl16_ins_i64,
4211      .fniv = gen_shl_ins_vec,
4212      .load_dest = true,
4213      .opt_opc = vecop_list_sli,
4214      .vece = MO_16 },
4215    { .fni4 = gen_shl32_ins_i32,
4216      .fniv = gen_shl_ins_vec,
4217      .load_dest = true,
4218      .opt_opc = vecop_list_sli,
4219      .vece = MO_32 },
4220    { .fni8 = gen_shl64_ins_i64,
4221      .fniv = gen_shl_ins_vec,
4222      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4223      .load_dest = true,
4224      .opt_opc = vecop_list_sli,
4225      .vece = MO_64 },
4226};
4227
4228static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4229{
4230    gen_helper_neon_mul_u8(a, a, b);
4231    gen_helper_neon_add_u8(d, d, a);
4232}
4233
4234static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4235{
4236    gen_helper_neon_mul_u8(a, a, b);
4237    gen_helper_neon_sub_u8(d, d, a);
4238}
4239
4240static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4241{
4242    gen_helper_neon_mul_u16(a, a, b);
4243    gen_helper_neon_add_u16(d, d, a);
4244}
4245
4246static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4247{
4248    gen_helper_neon_mul_u16(a, a, b);
4249    gen_helper_neon_sub_u16(d, d, a);
4250}
4251
4252static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4253{
4254    tcg_gen_mul_i32(a, a, b);
4255    tcg_gen_add_i32(d, d, a);
4256}
4257
4258static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4259{
4260    tcg_gen_mul_i32(a, a, b);
4261    tcg_gen_sub_i32(d, d, a);
4262}
4263
4264static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4265{
4266    tcg_gen_mul_i64(a, a, b);
4267    tcg_gen_add_i64(d, d, a);
4268}
4269
4270static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4271{
4272    tcg_gen_mul_i64(a, a, b);
4273    tcg_gen_sub_i64(d, d, a);
4274}
4275
4276static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4277{
4278    tcg_gen_mul_vec(vece, a, a, b);
4279    tcg_gen_add_vec(vece, d, d, a);
4280}
4281
4282static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4283{
4284    tcg_gen_mul_vec(vece, a, a, b);
4285    tcg_gen_sub_vec(vece, d, d, a);
4286}
4287
4288/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4289 * these tables are shared with AArch64 which does support them.
4290 */
4291
4292static const TCGOpcode vecop_list_mla[] = {
4293    INDEX_op_mul_vec, INDEX_op_add_vec, 0
4294};
4295
4296static const TCGOpcode vecop_list_mls[] = {
4297    INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4298};
4299
4300const GVecGen3 mla_op[4] = {
4301    { .fni4 = gen_mla8_i32,
4302      .fniv = gen_mla_vec,
4303      .load_dest = true,
4304      .opt_opc = vecop_list_mla,
4305      .vece = MO_8 },
4306    { .fni4 = gen_mla16_i32,
4307      .fniv = gen_mla_vec,
4308      .load_dest = true,
4309      .opt_opc = vecop_list_mla,
4310      .vece = MO_16 },
4311    { .fni4 = gen_mla32_i32,
4312      .fniv = gen_mla_vec,
4313      .load_dest = true,
4314      .opt_opc = vecop_list_mla,
4315      .vece = MO_32 },
4316    { .fni8 = gen_mla64_i64,
4317      .fniv = gen_mla_vec,
4318      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4319      .load_dest = true,
4320      .opt_opc = vecop_list_mla,
4321      .vece = MO_64 },
4322};
4323
4324const GVecGen3 mls_op[4] = {
4325    { .fni4 = gen_mls8_i32,
4326      .fniv = gen_mls_vec,
4327      .load_dest = true,
4328      .opt_opc = vecop_list_mls,
4329      .vece = MO_8 },
4330    { .fni4 = gen_mls16_i32,
4331      .fniv = gen_mls_vec,
4332      .load_dest = true,
4333      .opt_opc = vecop_list_mls,
4334      .vece = MO_16 },
4335    { .fni4 = gen_mls32_i32,
4336      .fniv = gen_mls_vec,
4337      .load_dest = true,
4338      .opt_opc = vecop_list_mls,
4339      .vece = MO_32 },
4340    { .fni8 = gen_mls64_i64,
4341      .fniv = gen_mls_vec,
4342      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4343      .load_dest = true,
4344      .opt_opc = vecop_list_mls,
4345      .vece = MO_64 },
4346};
4347
4348/* CMTST : test is "if (X & Y != 0)". */
4349static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4350{
4351    tcg_gen_and_i32(d, a, b);
4352    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4353    tcg_gen_neg_i32(d, d);
4354}
4355
4356void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4357{
4358    tcg_gen_and_i64(d, a, b);
4359    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4360    tcg_gen_neg_i64(d, d);
4361}
4362
4363static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4364{
4365    tcg_gen_and_vec(vece, d, a, b);
4366    tcg_gen_dupi_vec(vece, a, 0);
4367    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4368}
4369
4370static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4371
4372const GVecGen3 cmtst_op[4] = {
4373    { .fni4 = gen_helper_neon_tst_u8,
4374      .fniv = gen_cmtst_vec,
4375      .opt_opc = vecop_list_cmtst,
4376      .vece = MO_8 },
4377    { .fni4 = gen_helper_neon_tst_u16,
4378      .fniv = gen_cmtst_vec,
4379      .opt_opc = vecop_list_cmtst,
4380      .vece = MO_16 },
4381    { .fni4 = gen_cmtst_i32,
4382      .fniv = gen_cmtst_vec,
4383      .opt_opc = vecop_list_cmtst,
4384      .vece = MO_32 },
4385    { .fni8 = gen_cmtst_i64,
4386      .fniv = gen_cmtst_vec,
4387      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4388      .opt_opc = vecop_list_cmtst,
4389      .vece = MO_64 },
4390};
4391
4392static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4393                          TCGv_vec a, TCGv_vec b)
4394{
4395    TCGv_vec x = tcg_temp_new_vec_matching(t);
4396    tcg_gen_add_vec(vece, x, a, b);
4397    tcg_gen_usadd_vec(vece, t, a, b);
4398    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4399    tcg_gen_or_vec(vece, sat, sat, x);
4400    tcg_temp_free_vec(x);
4401}
4402
4403static const TCGOpcode vecop_list_uqadd[] = {
4404    INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4405};
4406
4407const GVecGen4 uqadd_op[4] = {
4408    { .fniv = gen_uqadd_vec,
4409      .fno = gen_helper_gvec_uqadd_b,
4410      .write_aofs = true,
4411      .opt_opc = vecop_list_uqadd,
4412      .vece = MO_8 },
4413    { .fniv = gen_uqadd_vec,
4414      .fno = gen_helper_gvec_uqadd_h,
4415      .write_aofs = true,
4416      .opt_opc = vecop_list_uqadd,
4417      .vece = MO_16 },
4418    { .fniv = gen_uqadd_vec,
4419      .fno = gen_helper_gvec_uqadd_s,
4420      .write_aofs = true,
4421      .opt_opc = vecop_list_uqadd,
4422      .vece = MO_32 },
4423    { .fniv = gen_uqadd_vec,
4424      .fno = gen_helper_gvec_uqadd_d,
4425      .write_aofs = true,
4426      .opt_opc = vecop_list_uqadd,
4427      .vece = MO_64 },
4428};
4429
4430static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4431                          TCGv_vec a, TCGv_vec b)
4432{
4433    TCGv_vec x = tcg_temp_new_vec_matching(t);
4434    tcg_gen_add_vec(vece, x, a, b);
4435    tcg_gen_ssadd_vec(vece, t, a, b);
4436    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4437    tcg_gen_or_vec(vece, sat, sat, x);
4438    tcg_temp_free_vec(x);
4439}
4440
4441static const TCGOpcode vecop_list_sqadd[] = {
4442    INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4443};
4444
4445const GVecGen4 sqadd_op[4] = {
4446    { .fniv = gen_sqadd_vec,
4447      .fno = gen_helper_gvec_sqadd_b,
4448      .opt_opc = vecop_list_sqadd,
4449      .write_aofs = true,
4450      .vece = MO_8 },
4451    { .fniv = gen_sqadd_vec,
4452      .fno = gen_helper_gvec_sqadd_h,
4453      .opt_opc = vecop_list_sqadd,
4454      .write_aofs = true,
4455      .vece = MO_16 },
4456    { .fniv = gen_sqadd_vec,
4457      .fno = gen_helper_gvec_sqadd_s,
4458      .opt_opc = vecop_list_sqadd,
4459      .write_aofs = true,
4460      .vece = MO_32 },
4461    { .fniv = gen_sqadd_vec,
4462      .fno = gen_helper_gvec_sqadd_d,
4463      .opt_opc = vecop_list_sqadd,
4464      .write_aofs = true,
4465      .vece = MO_64 },
4466};
4467
4468static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4469                          TCGv_vec a, TCGv_vec b)
4470{
4471    TCGv_vec x = tcg_temp_new_vec_matching(t);
4472    tcg_gen_sub_vec(vece, x, a, b);
4473    tcg_gen_ussub_vec(vece, t, a, b);
4474    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4475    tcg_gen_or_vec(vece, sat, sat, x);
4476    tcg_temp_free_vec(x);
4477}
4478
4479static const TCGOpcode vecop_list_uqsub[] = {
4480    INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4481};
4482
4483const GVecGen4 uqsub_op[4] = {
4484    { .fniv = gen_uqsub_vec,
4485      .fno = gen_helper_gvec_uqsub_b,
4486      .opt_opc = vecop_list_uqsub,
4487      .write_aofs = true,
4488      .vece = MO_8 },
4489    { .fniv = gen_uqsub_vec,
4490      .fno = gen_helper_gvec_uqsub_h,
4491      .opt_opc = vecop_list_uqsub,
4492      .write_aofs = true,
4493      .vece = MO_16 },
4494    { .fniv = gen_uqsub_vec,
4495      .fno = gen_helper_gvec_uqsub_s,
4496      .opt_opc = vecop_list_uqsub,
4497      .write_aofs = true,
4498      .vece = MO_32 },
4499    { .fniv = gen_uqsub_vec,
4500      .fno = gen_helper_gvec_uqsub_d,
4501      .opt_opc = vecop_list_uqsub,
4502      .write_aofs = true,
4503      .vece = MO_64 },
4504};
4505
4506static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4507                          TCGv_vec a, TCGv_vec b)
4508{
4509    TCGv_vec x = tcg_temp_new_vec_matching(t);
4510    tcg_gen_sub_vec(vece, x, a, b);
4511    tcg_gen_sssub_vec(vece, t, a, b);
4512    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4513    tcg_gen_or_vec(vece, sat, sat, x);
4514    tcg_temp_free_vec(x);
4515}
4516
4517static const TCGOpcode vecop_list_sqsub[] = {
4518    INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4519};
4520
4521const GVecGen4 sqsub_op[4] = {
4522    { .fniv = gen_sqsub_vec,
4523      .fno = gen_helper_gvec_sqsub_b,
4524      .opt_opc = vecop_list_sqsub,
4525      .write_aofs = true,
4526      .vece = MO_8 },
4527    { .fniv = gen_sqsub_vec,
4528      .fno = gen_helper_gvec_sqsub_h,
4529      .opt_opc = vecop_list_sqsub,
4530      .write_aofs = true,
4531      .vece = MO_16 },
4532    { .fniv = gen_sqsub_vec,
4533      .fno = gen_helper_gvec_sqsub_s,
4534      .opt_opc = vecop_list_sqsub,
4535      .write_aofs = true,
4536      .vece = MO_32 },
4537    { .fniv = gen_sqsub_vec,
4538      .fno = gen_helper_gvec_sqsub_d,
4539      .opt_opc = vecop_list_sqsub,
4540      .write_aofs = true,
4541      .vece = MO_64 },
4542};
4543
4544/* Translate a NEON data processing instruction.  Return nonzero if the
4545   instruction is invalid.
4546   We process data in a mixture of 32-bit and 64-bit chunks.
4547   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
4548
4549static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4550{
4551    int op;
4552    int q;
4553    int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
4554    int size;
4555    int shift;
4556    int pass;
4557    int count;
4558    int pairwise;
4559    int u;
4560    int vec_size;
4561    uint32_t imm;
4562    TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
4563    TCGv_ptr ptr1, ptr2, ptr3;
4564    TCGv_i64 tmp64;
4565
4566    /* FIXME: this access check should not take precedence over UNDEF
4567     * for invalid encodings; we will generate incorrect syndrome information
4568     * for attempts to execute invalid vfp/neon encodings with FP disabled.
4569     */
4570    if (s->fp_excp_el) {
4571        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
4572                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4573        return 0;
4574    }
4575
4576    if (!s->vfp_enabled)
4577      return 1;
4578    q = (insn & (1 << 6)) != 0;
4579    u = (insn >> 24) & 1;
4580    VFP_DREG_D(rd, insn);
4581    VFP_DREG_N(rn, insn);
4582    VFP_DREG_M(rm, insn);
4583    size = (insn >> 20) & 3;
4584    vec_size = q ? 16 : 8;
4585    rd_ofs = neon_reg_offset(rd, 0);
4586    rn_ofs = neon_reg_offset(rn, 0);
4587    rm_ofs = neon_reg_offset(rm, 0);
4588
4589    if ((insn & (1 << 23)) == 0) {
4590        /* Three register same length.  */
4591        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4592        /* Catch invalid op and bad size combinations: UNDEF */
4593        if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4594            return 1;
4595        }
4596        /* All insns of this form UNDEF for either this condition or the
4597         * superset of cases "Q==1"; we catch the latter later.
4598         */
4599        if (q && ((rd | rn | rm) & 1)) {
4600            return 1;
4601        }
4602        switch (op) {
4603        case NEON_3R_SHA:
4604            /* The SHA-1/SHA-256 3-register instructions require special
4605             * treatment here, as their size field is overloaded as an
4606             * op type selector, and they all consume their input in a
4607             * single pass.
4608             */
4609            if (!q) {
4610                return 1;
4611            }
4612            if (!u) { /* SHA-1 */
4613                if (!dc_isar_feature(aa32_sha1, s)) {
4614                    return 1;
4615                }
4616                ptr1 = vfp_reg_ptr(true, rd);
4617                ptr2 = vfp_reg_ptr(true, rn);
4618                ptr3 = vfp_reg_ptr(true, rm);
4619                tmp4 = tcg_const_i32(size);
4620                gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
4621                tcg_temp_free_i32(tmp4);
4622            } else { /* SHA-256 */
4623                if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
4624                    return 1;
4625                }
4626                ptr1 = vfp_reg_ptr(true, rd);
4627                ptr2 = vfp_reg_ptr(true, rn);
4628                ptr3 = vfp_reg_ptr(true, rm);
4629                switch (size) {
4630                case 0:
4631                    gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
4632                    break;
4633                case 1:
4634                    gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
4635                    break;
4636                case 2:
4637                    gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
4638                    break;
4639                }
4640            }
4641            tcg_temp_free_ptr(ptr1);
4642            tcg_temp_free_ptr(ptr2);
4643            tcg_temp_free_ptr(ptr3);
4644            return 0;
4645
4646        case NEON_3R_VPADD_VQRDMLAH:
4647            if (!u) {
4648                break;  /* VPADD */
4649            }
4650            /* VQRDMLAH */
4651            switch (size) {
4652            case 1:
4653                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
4654                                     q, rd, rn, rm);
4655            case 2:
4656                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
4657                                     q, rd, rn, rm);
4658            }
4659            return 1;
4660
4661        case NEON_3R_VFM_VQRDMLSH:
4662            if (!u) {
4663                /* VFM, VFMS */
4664                if (size == 1) {
4665                    return 1;
4666                }
4667                break;
4668            }
4669            /* VQRDMLSH */
4670            switch (size) {
4671            case 1:
4672                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
4673                                     q, rd, rn, rm);
4674            case 2:
4675                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
4676                                     q, rd, rn, rm);
4677            }
4678            return 1;
4679
4680        case NEON_3R_LOGIC: /* Logic ops.  */
4681            switch ((u << 2) | size) {
4682            case 0: /* VAND */
4683                tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
4684                                 vec_size, vec_size);
4685                break;
4686            case 1: /* VBIC */
4687                tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
4688                                  vec_size, vec_size);
4689                break;
4690            case 2: /* VORR */
4691                tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
4692                                vec_size, vec_size);
4693                break;
4694            case 3: /* VORN */
4695                tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
4696                                 vec_size, vec_size);
4697                break;
4698            case 4: /* VEOR */
4699                tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
4700                                 vec_size, vec_size);
4701                break;
4702            case 5: /* VBSL */
4703                tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
4704                                    vec_size, vec_size);
4705                break;
4706            case 6: /* VBIT */
4707                tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
4708                                    vec_size, vec_size);
4709                break;
4710            case 7: /* VBIF */
4711                tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
4712                                    vec_size, vec_size);
4713                break;
4714            }
4715            return 0;
4716
4717        case NEON_3R_VADD_VSUB:
4718            if (u) {
4719                tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
4720                                 vec_size, vec_size);
4721            } else {
4722                tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
4723                                 vec_size, vec_size);
4724            }
4725            return 0;
4726
4727        case NEON_3R_VQADD:
4728            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4729                           rn_ofs, rm_ofs, vec_size, vec_size,
4730                           (u ? uqadd_op : sqadd_op) + size);
4731            return 0;
4732
4733        case NEON_3R_VQSUB:
4734            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4735                           rn_ofs, rm_ofs, vec_size, vec_size,
4736                           (u ? uqsub_op : sqsub_op) + size);
4737            return 0;
4738
4739        case NEON_3R_VMUL: /* VMUL */
4740            if (u) {
4741                /* Polynomial case allows only P8 and is handled below.  */
4742                if (size != 0) {
4743                    return 1;
4744                }
4745            } else {
4746                tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
4747                                 vec_size, vec_size);
4748                return 0;
4749            }
4750            break;
4751
4752        case NEON_3R_VML: /* VMLA, VMLS */
4753            tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
4754                           u ? &mls_op[size] : &mla_op[size]);
4755            return 0;
4756
4757        case NEON_3R_VTST_VCEQ:
4758            if (u) { /* VCEQ */
4759                tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
4760                                 vec_size, vec_size);
4761            } else { /* VTST */
4762                tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
4763                               vec_size, vec_size, &cmtst_op[size]);
4764            }
4765            return 0;
4766
4767        case NEON_3R_VCGT:
4768            tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
4769                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4770            return 0;
4771
4772        case NEON_3R_VCGE:
4773            tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
4774                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4775            return 0;
4776
4777        case NEON_3R_VMAX:
4778            if (u) {
4779                tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
4780                                  vec_size, vec_size);
4781            } else {
4782                tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
4783                                  vec_size, vec_size);
4784            }
4785            return 0;
4786        case NEON_3R_VMIN:
4787            if (u) {
4788                tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
4789                                  vec_size, vec_size);
4790            } else {
4791                tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
4792                                  vec_size, vec_size);
4793            }
4794            return 0;
4795        }
4796
4797        if (size == 3) {
4798            /* 64-bit element instructions. */
4799            for (pass = 0; pass < (q ? 2 : 1); pass++) {
4800                neon_load_reg64(cpu_V0, rn + pass);
4801                neon_load_reg64(cpu_V1, rm + pass);
4802                switch (op) {
4803                case NEON_3R_VSHL:
4804                    if (u) {
4805                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4806                    } else {
4807                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4808                    }
4809                    break;
4810                case NEON_3R_VQSHL:
4811                    if (u) {
4812                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4813                                                 cpu_V1, cpu_V0);
4814                    } else {
4815                        gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4816                                                 cpu_V1, cpu_V0);
4817                    }
4818                    break;
4819                case NEON_3R_VRSHL:
4820                    if (u) {
4821                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4822                    } else {
4823                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4824                    }
4825                    break;
4826                case NEON_3R_VQRSHL:
4827                    if (u) {
4828                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4829                                                  cpu_V1, cpu_V0);
4830                    } else {
4831                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4832                                                  cpu_V1, cpu_V0);
4833                    }
4834                    break;
4835                default:
4836                    abort();
4837                }
4838                neon_store_reg64(cpu_V0, rd + pass);
4839            }
4840            return 0;
4841        }
4842        pairwise = 0;
4843        switch (op) {
4844        case NEON_3R_VSHL:
4845        case NEON_3R_VQSHL:
4846        case NEON_3R_VRSHL:
4847        case NEON_3R_VQRSHL:
4848            {
4849                int rtmp;
4850                /* Shift instruction operands are reversed.  */
4851                rtmp = rn;
4852                rn = rm;
4853                rm = rtmp;
4854            }
4855            break;
4856        case NEON_3R_VPADD_VQRDMLAH:
4857        case NEON_3R_VPMAX:
4858        case NEON_3R_VPMIN:
4859            pairwise = 1;
4860            break;
4861        case NEON_3R_FLOAT_ARITH:
4862            pairwise = (u && size < 2); /* if VPADD (float) */
4863            break;
4864        case NEON_3R_FLOAT_MINMAX:
4865            pairwise = u; /* if VPMIN/VPMAX (float) */
4866            break;
4867        case NEON_3R_FLOAT_CMP:
4868            if (!u && size) {
4869                /* no encoding for U=0 C=1x */
4870                return 1;
4871            }
4872            break;
4873        case NEON_3R_FLOAT_ACMP:
4874            if (!u) {
4875                return 1;
4876            }
4877            break;
4878        case NEON_3R_FLOAT_MISC:
4879            /* VMAXNM/VMINNM in ARMv8 */
4880            if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
4881                return 1;
4882            }
4883            break;
4884        case NEON_3R_VFM_VQRDMLSH:
4885            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
4886                return 1;
4887            }
4888            break;
4889        default:
4890            break;
4891        }
4892
4893        if (pairwise && q) {
4894            /* All the pairwise insns UNDEF if Q is set */
4895            return 1;
4896        }
4897
4898        for (pass = 0; pass < (q ? 4 : 2); pass++) {
4899
4900        if (pairwise) {
4901            /* Pairwise.  */
4902            if (pass < 1) {
4903                tmp = neon_load_reg(rn, 0);
4904                tmp2 = neon_load_reg(rn, 1);
4905            } else {
4906                tmp = neon_load_reg(rm, 0);
4907                tmp2 = neon_load_reg(rm, 1);
4908            }
4909        } else {
4910            /* Elementwise.  */
4911            tmp = neon_load_reg(rn, pass);
4912            tmp2 = neon_load_reg(rm, pass);
4913        }
4914        switch (op) {
4915        case NEON_3R_VHADD:
4916            GEN_NEON_INTEGER_OP(hadd);
4917            break;
4918        case NEON_3R_VRHADD:
4919            GEN_NEON_INTEGER_OP(rhadd);
4920            break;
4921        case NEON_3R_VHSUB:
4922            GEN_NEON_INTEGER_OP(hsub);
4923            break;
4924        case NEON_3R_VSHL:
4925            GEN_NEON_INTEGER_OP(shl);
4926            break;
4927        case NEON_3R_VQSHL:
4928            GEN_NEON_INTEGER_OP_ENV(qshl);
4929            break;
4930        case NEON_3R_VRSHL:
4931            GEN_NEON_INTEGER_OP(rshl);
4932            break;
4933        case NEON_3R_VQRSHL:
4934            GEN_NEON_INTEGER_OP_ENV(qrshl);
4935            break;
4936        case NEON_3R_VABD:
4937            GEN_NEON_INTEGER_OP(abd);
4938            break;
4939        case NEON_3R_VABA:
4940            GEN_NEON_INTEGER_OP(abd);
4941            tcg_temp_free_i32(tmp2);
4942            tmp2 = neon_load_reg(rd, pass);
4943            gen_neon_add(size, tmp, tmp2);
4944            break;
4945        case NEON_3R_VMUL:
4946            /* VMUL.P8; other cases already eliminated.  */
4947            gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4948            break;
4949        case NEON_3R_VPMAX:
4950            GEN_NEON_INTEGER_OP(pmax);
4951            break;
4952        case NEON_3R_VPMIN:
4953            GEN_NEON_INTEGER_OP(pmin);
4954            break;
4955        case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
4956            if (!u) { /* VQDMULH */
4957                switch (size) {
4958                case 1:
4959                    gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4960                    break;
4961                case 2:
4962                    gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4963                    break;
4964                default: abort();
4965                }
4966            } else { /* VQRDMULH */
4967                switch (size) {
4968                case 1:
4969                    gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4970                    break;
4971                case 2:
4972                    gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4973                    break;
4974                default: abort();
4975                }
4976            }
4977            break;
4978        case NEON_3R_VPADD_VQRDMLAH:
4979            switch (size) {
4980            case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4981            case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4982            case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4983            default: abort();
4984            }
4985            break;
4986        case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
4987        {
4988            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4989            switch ((u << 2) | size) {
4990            case 0: /* VADD */
4991            case 4: /* VPADD */
4992                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4993                break;
4994            case 2: /* VSUB */
4995                gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
4996                break;
4997            case 6: /* VABD */
4998                gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
4999                break;
5000            default:
5001                abort();
5002            }
5003            tcg_temp_free_ptr(fpstatus);
5004            break;
5005        }
5006        case NEON_3R_FLOAT_MULTIPLY:
5007        {
5008            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5009            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5010            if (!u) {
5011                tcg_temp_free_i32(tmp2);
5012                tmp2 = neon_load_reg(rd, pass);
5013                if (size == 0) {
5014                    gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5015                } else {
5016                    gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5017                }
5018            }
5019            tcg_temp_free_ptr(fpstatus);
5020            break;
5021        }
5022        case NEON_3R_FLOAT_CMP:
5023        {
5024            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5025            if (!u) {
5026                gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5027            } else {
5028                if (size == 0) {
5029                    gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5030                } else {
5031                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5032                }
5033            }
5034            tcg_temp_free_ptr(fpstatus);
5035            break;
5036        }
5037        case NEON_3R_FLOAT_ACMP:
5038        {
5039            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5040            if (size == 0) {
5041                gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5042            } else {
5043                gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5044            }
5045            tcg_temp_free_ptr(fpstatus);
5046            break;
5047        }
5048        case NEON_3R_FLOAT_MINMAX:
5049        {
5050            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5051            if (size == 0) {
5052                gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5053            } else {
5054                gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5055            }
5056            tcg_temp_free_ptr(fpstatus);
5057            break;
5058        }
5059        case NEON_3R_FLOAT_MISC:
5060            if (u) {
5061                /* VMAXNM/VMINNM */
5062                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5063                if (size == 0) {
5064                    gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5065                } else {
5066                    gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5067                }
5068                tcg_temp_free_ptr(fpstatus);
5069            } else {
5070                if (size == 0) {
5071                    gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5072                } else {
5073                    gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5074              }
5075            }
5076            break;
5077        case NEON_3R_VFM_VQRDMLSH:
5078        {
5079            /* VFMA, VFMS: fused multiply-add */
5080            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5081            TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5082            if (size) {
5083                /* VFMS */
5084                gen_helper_vfp_negs(tmp, tmp);
5085            }
5086            gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5087            tcg_temp_free_i32(tmp3);
5088            tcg_temp_free_ptr(fpstatus);
5089            break;
5090        }
5091        default:
5092            abort();
5093        }
5094        tcg_temp_free_i32(tmp2);
5095
5096        /* Save the result.  For elementwise operations we can put it
5097           straight into the destination register.  For pairwise operations
5098           we have to be careful to avoid clobbering the source operands.  */
5099        if (pairwise && rd == rm) {
5100            neon_store_scratch(pass, tmp);
5101        } else {
5102            neon_store_reg(rd, pass, tmp);
5103        }
5104
5105        } /* for pass */
5106        if (pairwise && rd == rm) {
5107            for (pass = 0; pass < (q ? 4 : 2); pass++) {
5108                tmp = neon_load_scratch(pass);
5109                neon_store_reg(rd, pass, tmp);
5110            }
5111        }
5112        /* End of 3 register same size operations.  */
5113    } else if (insn & (1 << 4)) {
5114        if ((insn & 0x00380080) != 0) {
5115            /* Two registers and shift.  */
5116            op = (insn >> 8) & 0xf;
5117            if (insn & (1 << 7)) {
5118                /* 64-bit shift. */
5119                if (op > 7) {
5120                    return 1;
5121                }
5122                size = 3;
5123            } else {
5124                size = 2;
5125                while ((insn & (1 << (size + 19))) == 0)
5126                    size--;
5127            }
5128            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5129            if (op < 8) {
5130                /* Shift by immediate:
5131                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5132                if (q && ((rd | rm) & 1)) {
5133                    return 1;
5134                }
5135                if (!u && (op == 4 || op == 6)) {
5136                    return 1;
5137                }
5138                /* Right shifts are encoded as N - shift, where N is the
5139                   element size in bits.  */
5140                if (op <= 4) {
5141                    shift = shift - (1 << (size + 3));
5142                }
5143
5144                switch (op) {
5145                case 0:  /* VSHR */
5146                    /* Right shift comes here negative.  */
5147                    shift = -shift;
5148                    /* Shifts larger than the element size are architecturally
5149                     * valid.  Unsigned results in all zeros; signed results
5150                     * in all sign bits.
5151                     */
5152                    if (!u) {
5153                        tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5154                                          MIN(shift, (8 << size) - 1),
5155                                          vec_size, vec_size);
5156                    } else if (shift >= 8 << size) {
5157                        tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5158                    } else {
5159                        tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5160                                          vec_size, vec_size);
5161                    }
5162                    return 0;
5163
5164                case 1:  /* VSRA */
5165                    /* Right shift comes here negative.  */
5166                    shift = -shift;
5167                    /* Shifts larger than the element size are architecturally
5168                     * valid.  Unsigned results in all zeros; signed results
5169                     * in all sign bits.
5170                     */
5171                    if (!u) {
5172                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5173                                        MIN(shift, (8 << size) - 1),
5174                                        &ssra_op[size]);
5175                    } else if (shift >= 8 << size) {
5176                        /* rd += 0 */
5177                    } else {
5178                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5179                                        shift, &usra_op[size]);
5180                    }
5181                    return 0;
5182
5183                case 4: /* VSRI */
5184                    if (!u) {
5185                        return 1;
5186                    }
5187                    /* Right shift comes here negative.  */
5188                    shift = -shift;
5189                    /* Shift out of range leaves destination unchanged.  */
5190                    if (shift < 8 << size) {
5191                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5192                                        shift, &sri_op[size]);
5193                    }
5194                    return 0;
5195
5196                case 5: /* VSHL, VSLI */
5197                    if (u) { /* VSLI */
5198                        /* Shift out of range leaves destination unchanged.  */
5199                        if (shift < 8 << size) {
5200                            tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
5201                                            vec_size, shift, &sli_op[size]);
5202                        }
5203                    } else { /* VSHL */
5204                        /* Shifts larger than the element size are
5205                         * architecturally valid and results in zero.
5206                         */
5207                        if (shift >= 8 << size) {
5208                            tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5209                        } else {
5210                            tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5211                                              vec_size, vec_size);
5212                        }
5213                    }
5214                    return 0;
5215                }
5216
5217                if (size == 3) {
5218                    count = q + 1;
5219                } else {
5220                    count = q ? 4: 2;
5221                }
5222
5223                /* To avoid excessive duplication of ops we implement shift
5224                 * by immediate using the variable shift operations.
5225                  */
5226                imm = dup_const(size, shift);
5227
5228                for (pass = 0; pass < count; pass++) {
5229                    if (size == 3) {
5230                        neon_load_reg64(cpu_V0, rm + pass);
5231                        tcg_gen_movi_i64(cpu_V1, imm);
5232                        switch (op) {
5233                        case 2: /* VRSHR */
5234                        case 3: /* VRSRA */
5235                            if (u)
5236                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5237                            else
5238                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5239                            break;
5240                        case 6: /* VQSHLU */
5241                            gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5242                                                      cpu_V0, cpu_V1);
5243                            break;
5244                        case 7: /* VQSHL */
5245                            if (u) {
5246                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5247                                                         cpu_V0, cpu_V1);
5248                            } else {
5249                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5250                                                         cpu_V0, cpu_V1);
5251                            }
5252                            break;
5253                        default:
5254                            g_assert_not_reached();
5255                        }
5256                        if (op == 3) {
5257                            /* Accumulate.  */
5258                            neon_load_reg64(cpu_V1, rd + pass);
5259                            tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5260                        }
5261                        neon_store_reg64(cpu_V0, rd + pass);
5262                    } else { /* size < 3 */
5263                        /* Operands in T0 and T1.  */
5264                        tmp = neon_load_reg(rm, pass);
5265                        tmp2 = tcg_temp_new_i32();
5266                        tcg_gen_movi_i32(tmp2, imm);
5267                        switch (op) {
5268                        case 2: /* VRSHR */
5269                        case 3: /* VRSRA */
5270                            GEN_NEON_INTEGER_OP(rshl);
5271                            break;
5272                        case 6: /* VQSHLU */
5273                            switch (size) {
5274                            case 0:
5275                                gen_helper_neon_qshlu_s8(tmp, cpu_env,
5276                                                         tmp, tmp2);
5277                                break;
5278                            case 1:
5279                                gen_helper_neon_qshlu_s16(tmp, cpu_env,
5280                                                          tmp, tmp2);
5281                                break;
5282                            case 2:
5283                                gen_helper_neon_qshlu_s32(tmp, cpu_env,
5284                                                          tmp, tmp2);
5285                                break;
5286                            default:
5287                                abort();
5288                            }
5289                            break;
5290                        case 7: /* VQSHL */
5291                            GEN_NEON_INTEGER_OP_ENV(qshl);
5292                            break;
5293                        default:
5294                            g_assert_not_reached();
5295                        }
5296                        tcg_temp_free_i32(tmp2);
5297
5298                        if (op == 3) {
5299                            /* Accumulate.  */
5300                            tmp2 = neon_load_reg(rd, pass);
5301                            gen_neon_add(size, tmp, tmp2);
5302                            tcg_temp_free_i32(tmp2);
5303                        }
5304                        neon_store_reg(rd, pass, tmp);
5305                    }
5306                } /* for pass */
5307            } else if (op < 10) {
5308                /* Shift by immediate and narrow:
5309                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5310                int input_unsigned = (op == 8) ? !u : u;
5311                if (rm & 1) {
5312                    return 1;
5313                }
5314                shift = shift - (1 << (size + 3));
5315                size++;
5316                if (size == 3) {
5317                    tmp64 = tcg_const_i64(shift);
5318                    neon_load_reg64(cpu_V0, rm);
5319                    neon_load_reg64(cpu_V1, rm + 1);
5320                    for (pass = 0; pass < 2; pass++) {
5321                        TCGv_i64 in;
5322                        if (pass == 0) {
5323                            in = cpu_V0;
5324                        } else {
5325                            in = cpu_V1;
5326                        }
5327                        if (q) {
5328                            if (input_unsigned) {
5329                                gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5330                            } else {
5331                                gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5332                            }
5333                        } else {
5334                            if (input_unsigned) {
5335                                gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5336                            } else {
5337                                gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5338                            }
5339                        }
5340                        tmp = tcg_temp_new_i32();
5341                        gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5342                        neon_store_reg(rd, pass, tmp);
5343                    } /* for pass */
5344                    tcg_temp_free_i64(tmp64);
5345                } else {
5346                    if (size == 1) {
5347                        imm = (uint16_t)shift;
5348                        imm |= imm << 16;
5349                    } else {
5350                        /* size == 2 */
5351                        imm = (uint32_t)shift;
5352                    }
5353                    tmp2 = tcg_const_i32(imm);
5354                    tmp4 = neon_load_reg(rm + 1, 0);
5355                    tmp5 = neon_load_reg(rm + 1, 1);
5356                    for (pass = 0; pass < 2; pass++) {
5357                        if (pass == 0) {
5358                            tmp = neon_load_reg(rm, 0);
5359                        } else {
5360                            tmp = tmp4;
5361                        }
5362                        gen_neon_shift_narrow(size, tmp, tmp2, q,
5363                                              input_unsigned);
5364                        if (pass == 0) {
5365                            tmp3 = neon_load_reg(rm, 1);
5366                        } else {
5367                            tmp3 = tmp5;
5368                        }
5369                        gen_neon_shift_narrow(size, tmp3, tmp2, q,
5370                                              input_unsigned);
5371                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5372                        tcg_temp_free_i32(tmp);
5373                        tcg_temp_free_i32(tmp3);
5374                        tmp = tcg_temp_new_i32();
5375                        gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5376                        neon_store_reg(rd, pass, tmp);
5377                    } /* for pass */
5378                    tcg_temp_free_i32(tmp2);
5379                }
5380            } else if (op == 10) {
5381                /* VSHLL, VMOVL */
5382                if (q || (rd & 1)) {
5383                    return 1;
5384                }
5385                tmp = neon_load_reg(rm, 0);
5386                tmp2 = neon_load_reg(rm, 1);
5387                for (pass = 0; pass < 2; pass++) {
5388                    if (pass == 1)
5389                        tmp = tmp2;
5390
5391                    gen_neon_widen(cpu_V0, tmp, size, u);
5392
5393                    if (shift != 0) {
5394                        /* The shift is less than the width of the source
5395                           type, so we can just shift the whole register.  */
5396                        tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5397                        /* Widen the result of shift: we need to clear
5398                         * the potential overflow bits resulting from
5399                         * left bits of the narrow input appearing as
5400                         * right bits of left the neighbour narrow
5401                         * input.  */
5402                        if (size < 2 || !u) {
5403                            uint64_t imm64;
5404                            if (size == 0) {
5405                                imm = (0xffu >> (8 - shift));
5406                                imm |= imm << 16;
5407                            } else if (size == 1) {
5408                                imm = 0xffff >> (16 - shift);
5409                            } else {
5410                                /* size == 2 */
5411                                imm = 0xffffffff >> (32 - shift);
5412                            }
5413                            if (size < 2) {
5414                                imm64 = imm | (((uint64_t)imm) << 32);
5415                            } else {
5416                                imm64 = imm;
5417                            }
5418                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5419                        }
5420                    }
5421                    neon_store_reg64(cpu_V0, rd + pass);
5422                }
5423            } else if (op >= 14) {
5424                /* VCVT fixed-point.  */
5425                TCGv_ptr fpst;
5426                TCGv_i32 shiftv;
5427                VFPGenFixPointFn *fn;
5428
5429                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5430                    return 1;
5431                }
5432
5433                if (!(op & 1)) {
5434                    if (u) {
5435                        fn = gen_helper_vfp_ultos;
5436                    } else {
5437                        fn = gen_helper_vfp_sltos;
5438                    }
5439                } else {
5440                    if (u) {
5441                        fn = gen_helper_vfp_touls_round_to_zero;
5442                    } else {
5443                        fn = gen_helper_vfp_tosls_round_to_zero;
5444                    }
5445                }
5446
5447                /* We have already masked out the must-be-1 top bit of imm6,
5448                 * hence this 32-shift where the ARM ARM has 64-imm6.
5449                 */
5450                shift = 32 - shift;
5451                fpst = get_fpstatus_ptr(1);
5452                shiftv = tcg_const_i32(shift);
5453                for (pass = 0; pass < (q ? 4 : 2); pass++) {
5454                    TCGv_i32 tmpf = neon_load_reg(rm, pass);
5455                    fn(tmpf, tmpf, shiftv, fpst);
5456                    neon_store_reg(rd, pass, tmpf);
5457                }
5458                tcg_temp_free_ptr(fpst);
5459                tcg_temp_free_i32(shiftv);
5460            } else {
5461                return 1;
5462            }
5463        } else { /* (insn & 0x00380080) == 0 */
5464            int invert, reg_ofs, vec_size;
5465
5466            if (q && (rd & 1)) {
5467                return 1;
5468            }
5469
5470            op = (insn >> 8) & 0xf;
5471            /* One register and immediate.  */
5472            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5473            invert = (insn & (1 << 5)) != 0;
5474            /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5475             * We choose to not special-case this and will behave as if a
5476             * valid constant encoding of 0 had been given.
5477             */
5478            switch (op) {
5479            case 0: case 1:
5480                /* no-op */
5481                break;
5482            case 2: case 3:
5483                imm <<= 8;
5484                break;
5485            case 4: case 5:
5486                imm <<= 16;
5487                break;
5488            case 6: case 7:
5489                imm <<= 24;
5490                break;
5491            case 8: case 9:
5492                imm |= imm << 16;
5493                break;
5494            case 10: case 11:
5495                imm = (imm << 8) | (imm << 24);
5496                break;
5497            case 12:
5498                imm = (imm << 8) | 0xff;
5499                break;
5500            case 13:
5501                imm = (imm << 16) | 0xffff;
5502                break;
5503            case 14:
5504                imm |= (imm << 8) | (imm << 16) | (imm << 24);
5505                if (invert) {
5506                    imm = ~imm;
5507                }
5508                break;
5509            case 15:
5510                if (invert) {
5511                    return 1;
5512                }
5513                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5514                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5515                break;
5516            }
5517            if (invert) {
5518                imm = ~imm;
5519            }
5520
5521            reg_ofs = neon_reg_offset(rd, 0);
5522            vec_size = q ? 16 : 8;
5523
5524            if (op & 1 && op < 12) {
5525                if (invert) {
5526                    /* The immediate value has already been inverted,
5527                     * so BIC becomes AND.
5528                     */
5529                    tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5530                                      vec_size, vec_size);
5531                } else {
5532                    tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5533                                     vec_size, vec_size);
5534                }
5535            } else {
5536                /* VMOV, VMVN.  */
5537                if (op == 14 && invert) {
5538                    TCGv_i64 t64 = tcg_temp_new_i64();
5539
5540                    for (pass = 0; pass <= q; ++pass) {
5541                        uint64_t val = 0;
5542                        int n;
5543
5544                        for (n = 0; n < 8; n++) {
5545                            if (imm & (1 << (n + pass * 8))) {
5546                                val |= 0xffull << (n * 8);
5547                            }
5548                        }
5549                        tcg_gen_movi_i64(t64, val);
5550                        neon_store_reg64(t64, rd + pass);
5551                    }
5552                    tcg_temp_free_i64(t64);
5553                } else {
5554                    tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
5555                }
5556            }
5557        }
5558    } else { /* (insn & 0x00800010 == 0x00800000) */
5559        if (size != 3) {
5560            op = (insn >> 8) & 0xf;
5561            if ((insn & (1 << 6)) == 0) {
5562                /* Three registers of different lengths.  */
5563                int src1_wide;
5564                int src2_wide;
5565                int prewiden;
5566                /* undefreq: bit 0 : UNDEF if size == 0
5567                 *           bit 1 : UNDEF if size == 1
5568                 *           bit 2 : UNDEF if size == 2
5569                 *           bit 3 : UNDEF if U == 1
5570                 * Note that [2:0] set implies 'always UNDEF'
5571                 */
5572                int undefreq;
5573                /* prewiden, src1_wide, src2_wide, undefreq */
5574                static const int neon_3reg_wide[16][4] = {
5575                    {1, 0, 0, 0}, /* VADDL */
5576                    {1, 1, 0, 0}, /* VADDW */
5577                    {1, 0, 0, 0}, /* VSUBL */
5578                    {1, 1, 0, 0}, /* VSUBW */
5579                    {0, 1, 1, 0}, /* VADDHN */
5580                    {0, 0, 0, 0}, /* VABAL */
5581                    {0, 1, 1, 0}, /* VSUBHN */
5582                    {0, 0, 0, 0}, /* VABDL */
5583                    {0, 0, 0, 0}, /* VMLAL */
5584                    {0, 0, 0, 9}, /* VQDMLAL */
5585                    {0, 0, 0, 0}, /* VMLSL */
5586                    {0, 0, 0, 9}, /* VQDMLSL */
5587                    {0, 0, 0, 0}, /* Integer VMULL */
5588                    {0, 0, 0, 1}, /* VQDMULL */
5589                    {0, 0, 0, 0xa}, /* Polynomial VMULL */
5590                    {0, 0, 0, 7}, /* Reserved: always UNDEF */
5591                };
5592
5593                prewiden = neon_3reg_wide[op][0];
5594                src1_wide = neon_3reg_wide[op][1];
5595                src2_wide = neon_3reg_wide[op][2];
5596                undefreq = neon_3reg_wide[op][3];
5597
5598                if ((undefreq & (1 << size)) ||
5599                    ((undefreq & 8) && u)) {
5600                    return 1;
5601                }
5602                if ((src1_wide && (rn & 1)) ||
5603                    (src2_wide && (rm & 1)) ||
5604                    (!src2_wide && (rd & 1))) {
5605                    return 1;
5606                }
5607
5608                /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
5609                 * outside the loop below as it only performs a single pass.
5610                 */
5611                if (op == 14 && size == 2) {
5612                    TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
5613
5614                    if (!dc_isar_feature(aa32_pmull, s)) {
5615                        return 1;
5616                    }
5617                    tcg_rn = tcg_temp_new_i64();
5618                    tcg_rm = tcg_temp_new_i64();
5619                    tcg_rd = tcg_temp_new_i64();
5620                    neon_load_reg64(tcg_rn, rn);
5621                    neon_load_reg64(tcg_rm, rm);
5622                    gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
5623                    neon_store_reg64(tcg_rd, rd);
5624                    gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
5625                    neon_store_reg64(tcg_rd, rd + 1);
5626                    tcg_temp_free_i64(tcg_rn);
5627                    tcg_temp_free_i64(tcg_rm);
5628                    tcg_temp_free_i64(tcg_rd);
5629                    return 0;
5630                }
5631
5632                /* Avoid overlapping operands.  Wide source operands are
5633                   always aligned so will never overlap with wide
5634                   destinations in problematic ways.  */
5635                if (rd == rm && !src2_wide) {
5636                    tmp = neon_load_reg(rm, 1);
5637                    neon_store_scratch(2, tmp);
5638                } else if (rd == rn && !src1_wide) {
5639                    tmp = neon_load_reg(rn, 1);
5640                    neon_store_scratch(2, tmp);
5641                }
5642                tmp3 = NULL;
5643                for (pass = 0; pass < 2; pass++) {
5644                    if (src1_wide) {
5645                        neon_load_reg64(cpu_V0, rn + pass);
5646                        tmp = NULL;
5647                    } else {
5648                        if (pass == 1 && rd == rn) {
5649                            tmp = neon_load_scratch(2);
5650                        } else {
5651                            tmp = neon_load_reg(rn, pass);
5652                        }
5653                        if (prewiden) {
5654                            gen_neon_widen(cpu_V0, tmp, size, u);
5655                        }
5656                    }
5657                    if (src2_wide) {
5658                        neon_load_reg64(cpu_V1, rm + pass);
5659                        tmp2 = NULL;
5660                    } else {
5661                        if (pass == 1 && rd == rm) {
5662                            tmp2 = neon_load_scratch(2);
5663                        } else {
5664                            tmp2 = neon_load_reg(rm, pass);
5665                        }
5666                        if (prewiden) {
5667                            gen_neon_widen(cpu_V1, tmp2, size, u);
5668                        }
5669                    }
5670                    switch (op) {
5671                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5672                        gen_neon_addl(size);
5673                        break;
5674                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5675                        gen_neon_subl(size);
5676                        break;
5677                    case 5: case 7: /* VABAL, VABDL */
5678                        switch ((size << 1) | u) {
5679                        case 0:
5680                            gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5681                            break;
5682                        case 1:
5683                            gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5684                            break;
5685                        case 2:
5686                            gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5687                            break;
5688                        case 3:
5689                            gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5690                            break;
5691                        case 4:
5692                            gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5693                            break;
5694                        case 5:
5695                            gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5696                            break;
5697                        default: abort();
5698                        }
5699                        tcg_temp_free_i32(tmp2);
5700                        tcg_temp_free_i32(tmp);
5701                        break;
5702                    case 8: case 9: case 10: case 11: case 12: case 13:
5703                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5704                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5705                        break;
5706                    case 14: /* Polynomial VMULL */
5707                        gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
5708                        tcg_temp_free_i32(tmp2);
5709                        tcg_temp_free_i32(tmp);
5710                        break;
5711                    default: /* 15 is RESERVED: caught earlier  */
5712                        abort();
5713                    }
5714                    if (op == 13) {
5715                        /* VQDMULL */
5716                        gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5717                        neon_store_reg64(cpu_V0, rd + pass);
5718                    } else if (op == 5 || (op >= 8 && op <= 11)) {
5719                        /* Accumulate.  */
5720                        neon_load_reg64(cpu_V1, rd + pass);
5721                        switch (op) {
5722                        case 10: /* VMLSL */
5723                            gen_neon_negl(cpu_V0, size);
5724                            /* Fall through */
5725                        case 5: case 8: /* VABAL, VMLAL */
5726                            gen_neon_addl(size);
5727                            break;
5728                        case 9: case 11: /* VQDMLAL, VQDMLSL */
5729                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5730                            if (op == 11) {
5731                                gen_neon_negl(cpu_V0, size);
5732                            }
5733                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5734                            break;
5735                        default:
5736                            abort();
5737                        }
5738                        neon_store_reg64(cpu_V0, rd + pass);
5739                    } else if (op == 4 || op == 6) {
5740                        /* Narrowing operation.  */
5741                        tmp = tcg_temp_new_i32();
5742                        if (!u) {
5743                            switch (size) {
5744                            case 0:
5745                                gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5746                                break;
5747                            case 1:
5748                                gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5749                                break;
5750                            case 2:
5751                                tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5752                                break;
5753                            default: abort();
5754                            }
5755                        } else {
5756                            switch (size) {
5757                            case 0:
5758                                gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5759                                break;
5760                            case 1:
5761                                gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5762                                break;
5763                            case 2:
5764                                tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5765                                tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5766                                break;
5767                            default: abort();
5768                            }
5769                        }
5770                        if (pass == 0) {
5771                            tmp3 = tmp;
5772                        } else {
5773                            neon_store_reg(rd, 0, tmp3);
5774                            neon_store_reg(rd, 1, tmp);
5775                        }
5776                    } else {
5777                        /* Write back the result.  */
5778                        neon_store_reg64(cpu_V0, rd + pass);
5779                    }
5780                }
5781            } else {
5782                /* Two registers and a scalar. NB that for ops of this form
5783                 * the ARM ARM labels bit 24 as Q, but it is in our variable
5784                 * 'u', not 'q'.
5785                 */
5786                if (size == 0) {
5787                    return 1;
5788                }
5789                switch (op) {
5790                case 1: /* Float VMLA scalar */
5791                case 5: /* Floating point VMLS scalar */
5792                case 9: /* Floating point VMUL scalar */
5793                    if (size == 1) {
5794                        return 1;
5795                    }
5796                    /* fall through */
5797                case 0: /* Integer VMLA scalar */
5798                case 4: /* Integer VMLS scalar */
5799                case 8: /* Integer VMUL scalar */
5800                case 12: /* VQDMULH scalar */
5801                case 13: /* VQRDMULH scalar */
5802                    if (u && ((rd | rn) & 1)) {
5803                        return 1;
5804                    }
5805                    tmp = neon_get_scalar(size, rm);
5806                    neon_store_scratch(0, tmp);
5807                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
5808                        tmp = neon_load_scratch(0);
5809                        tmp2 = neon_load_reg(rn, pass);
5810                        if (op == 12) {
5811                            if (size == 1) {
5812                                gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5813                            } else {
5814                                gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5815                            }
5816                        } else if (op == 13) {
5817                            if (size == 1) {
5818                                gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5819                            } else {
5820                                gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5821                            }
5822                        } else if (op & 1) {
5823                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5824                            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5825                            tcg_temp_free_ptr(fpstatus);
5826                        } else {
5827                            switch (size) {
5828                            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5829                            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5830                            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5831                            default: abort();
5832                            }
5833                        }
5834                        tcg_temp_free_i32(tmp2);
5835                        if (op < 8) {
5836                            /* Accumulate.  */
5837                            tmp2 = neon_load_reg(rd, pass);
5838                            switch (op) {
5839                            case 0:
5840                                gen_neon_add(size, tmp, tmp2);
5841                                break;
5842                            case 1:
5843                            {
5844                                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5845                                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5846                                tcg_temp_free_ptr(fpstatus);
5847                                break;
5848                            }
5849                            case 4:
5850                                gen_neon_rsb(size, tmp, tmp2);
5851                                break;
5852                            case 5:
5853                            {
5854                                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5855                                gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5856                                tcg_temp_free_ptr(fpstatus);
5857                                break;
5858                            }
5859                            default:
5860                                abort();
5861                            }
5862                            tcg_temp_free_i32(tmp2);
5863                        }
5864                        neon_store_reg(rd, pass, tmp);
5865                    }
5866                    break;
5867                case 3: /* VQDMLAL scalar */
5868                case 7: /* VQDMLSL scalar */
5869                case 11: /* VQDMULL scalar */
5870                    if (u == 1) {
5871                        return 1;
5872                    }
5873                    /* fall through */
5874                case 2: /* VMLAL sclar */
5875                case 6: /* VMLSL scalar */
5876                case 10: /* VMULL scalar */
5877                    if (rd & 1) {
5878                        return 1;
5879                    }
5880                    tmp2 = neon_get_scalar(size, rm);
5881                    /* We need a copy of tmp2 because gen_neon_mull
5882                     * deletes it during pass 0.  */
5883                    tmp4 = tcg_temp_new_i32();
5884                    tcg_gen_mov_i32(tmp4, tmp2);
5885                    tmp3 = neon_load_reg(rn, 1);
5886
5887                    for (pass = 0; pass < 2; pass++) {
5888                        if (pass == 0) {
5889                            tmp = neon_load_reg(rn, 0);
5890                        } else {
5891                            tmp = tmp3;
5892                            tmp2 = tmp4;
5893                        }
5894                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5895                        if (op != 11) {
5896                            neon_load_reg64(cpu_V1, rd + pass);
5897                        }
5898                        switch (op) {
5899                        case 6:
5900                            gen_neon_negl(cpu_V0, size);
5901                            /* Fall through */
5902                        case 2:
5903                            gen_neon_addl(size);
5904                            break;
5905                        case 3: case 7:
5906                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5907                            if (op == 7) {
5908                                gen_neon_negl(cpu_V0, size);
5909                            }
5910                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5911                            break;
5912                        case 10:
5913                            /* no-op */
5914                            break;
5915                        case 11:
5916                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5917                            break;
5918                        default:
5919                            abort();
5920                        }
5921                        neon_store_reg64(cpu_V0, rd + pass);
5922                    }
5923                    break;
5924                case 14: /* VQRDMLAH scalar */
5925                case 15: /* VQRDMLSH scalar */
5926                    {
5927                        NeonGenThreeOpEnvFn *fn;
5928
5929                        if (!dc_isar_feature(aa32_rdm, s)) {
5930                            return 1;
5931                        }
5932                        if (u && ((rd | rn) & 1)) {
5933                            return 1;
5934                        }
5935                        if (op == 14) {
5936                            if (size == 1) {
5937                                fn = gen_helper_neon_qrdmlah_s16;
5938                            } else {
5939                                fn = gen_helper_neon_qrdmlah_s32;
5940                            }
5941                        } else {
5942                            if (size == 1) {
5943                                fn = gen_helper_neon_qrdmlsh_s16;
5944                            } else {
5945                                fn = gen_helper_neon_qrdmlsh_s32;
5946                            }
5947                        }
5948
5949                        tmp2 = neon_get_scalar(size, rm);
5950                        for (pass = 0; pass < (u ? 4 : 2); pass++) {
5951                            tmp = neon_load_reg(rn, pass);
5952                            tmp3 = neon_load_reg(rd, pass);
5953                            fn(tmp, cpu_env, tmp, tmp2, tmp3);
5954                            tcg_temp_free_i32(tmp3);
5955                            neon_store_reg(rd, pass, tmp);
5956                        }
5957                        tcg_temp_free_i32(tmp2);
5958                    }
5959                    break;
5960                default:
5961                    g_assert_not_reached();
5962                }
5963            }
5964        } else { /* size == 3 */
5965            if (!u) {
5966                /* Extract.  */
5967                imm = (insn >> 8) & 0xf;
5968
5969                if (imm > 7 && !q)
5970                    return 1;
5971
5972                if (q && ((rd | rn | rm) & 1)) {
5973                    return 1;
5974                }
5975
5976                if (imm == 0) {
5977                    neon_load_reg64(cpu_V0, rn);
5978                    if (q) {
5979                        neon_load_reg64(cpu_V1, rn + 1);
5980                    }
5981                } else if (imm == 8) {
5982                    neon_load_reg64(cpu_V0, rn + 1);
5983                    if (q) {
5984                        neon_load_reg64(cpu_V1, rm);
5985                    }
5986                } else if (q) {
5987                    tmp64 = tcg_temp_new_i64();
5988                    if (imm < 8) {
5989                        neon_load_reg64(cpu_V0, rn);
5990                        neon_load_reg64(tmp64, rn + 1);
5991                    } else {
5992                        neon_load_reg64(cpu_V0, rn + 1);
5993                        neon_load_reg64(tmp64, rm);
5994                    }
5995                    tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5996                    tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5997                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5998                    if (imm < 8) {
5999                        neon_load_reg64(cpu_V1, rm);
6000                    } else {
6001                        neon_load_reg64(cpu_V1, rm + 1);
6002                        imm -= 8;
6003                    }
6004                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6005                    tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6006                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6007                    tcg_temp_free_i64(tmp64);
6008                } else {
6009                    /* BUGFIX */
6010                    neon_load_reg64(cpu_V0, rn);
6011                    tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6012                    neon_load_reg64(cpu_V1, rm);
6013                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6014                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6015                }
6016                neon_store_reg64(cpu_V0, rd);
6017                if (q) {
6018                    neon_store_reg64(cpu_V1, rd + 1);
6019                }
6020            } else if ((insn & (1 << 11)) == 0) {
6021                /* Two register misc.  */
6022                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6023                size = (insn >> 18) & 3;
6024                /* UNDEF for unknown op values and bad op-size combinations */
6025                if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6026                    return 1;
6027                }
6028                if (neon_2rm_is_v8_op(op) &&
6029                    !arm_dc_feature(s, ARM_FEATURE_V8)) {
6030                    return 1;
6031                }
6032                if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6033                    q && ((rm | rd) & 1)) {
6034                    return 1;
6035                }
6036                switch (op) {
6037                case NEON_2RM_VREV64:
6038                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
6039                        tmp = neon_load_reg(rm, pass * 2);
6040                        tmp2 = neon_load_reg(rm, pass * 2 + 1);
6041                        switch (size) {
6042                        case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6043                        case 1: gen_swap_half(tmp); break;
6044                        case 2: /* no-op */ break;
6045                        default: abort();
6046                        }
6047                        neon_store_reg(rd, pass * 2 + 1, tmp);
6048                        if (size == 2) {
6049                            neon_store_reg(rd, pass * 2, tmp2);
6050                        } else {
6051                            switch (size) {
6052                            case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6053                            case 1: gen_swap_half(tmp2); break;
6054                            default: abort();
6055                            }
6056                            neon_store_reg(rd, pass * 2, tmp2);
6057                        }
6058                    }
6059                    break;
6060                case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6061                case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6062                    for (pass = 0; pass < q + 1; pass++) {
6063                        tmp = neon_load_reg(rm, pass * 2);
6064                        gen_neon_widen(cpu_V0, tmp, size, op & 1);
6065                        tmp = neon_load_reg(rm, pass * 2 + 1);
6066                        gen_neon_widen(cpu_V1, tmp, size, op & 1);
6067                        switch (size) {
6068                        case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6069                        case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6070                        case 2: tcg_gen_add_i64(CPU_V001); break;
6071                        default: abort();
6072                        }
6073                        if (op >= NEON_2RM_VPADAL) {
6074                            /* Accumulate.  */
6075                            neon_load_reg64(cpu_V1, rd + pass);
6076                            gen_neon_addl(size);
6077                        }
6078                        neon_store_reg64(cpu_V0, rd + pass);
6079                    }
6080                    break;
6081                case NEON_2RM_VTRN:
6082                    if (size == 2) {
6083                        int n;
6084                        for (n = 0; n < (q ? 4 : 2); n += 2) {
6085                            tmp = neon_load_reg(rm, n);
6086                            tmp2 = neon_load_reg(rd, n + 1);
6087                            neon_store_reg(rm, n, tmp2);
6088                            neon_store_reg(rd, n + 1, tmp);
6089                        }
6090                    } else {
6091                        goto elementwise;
6092                    }
6093                    break;
6094                case NEON_2RM_VUZP:
6095                    if (gen_neon_unzip(rd, rm, size, q)) {
6096                        return 1;
6097                    }
6098                    break;
6099                case NEON_2RM_VZIP:
6100                    if (gen_neon_zip(rd, rm, size, q)) {
6101                        return 1;
6102                    }
6103                    break;
6104                case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6105                    /* also VQMOVUN; op field and mnemonics don't line up */
6106                    if (rm & 1) {
6107                        return 1;
6108                    }
6109                    tmp2 = NULL;
6110                    for (pass = 0; pass < 2; pass++) {
6111                        neon_load_reg64(cpu_V0, rm + pass);
6112                        tmp = tcg_temp_new_i32();
6113                        gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6114                                           tmp, cpu_V0);
6115                        if (pass == 0) {
6116                            tmp2 = tmp;
6117                        } else {
6118                            neon_store_reg(rd, 0, tmp2);
6119                            neon_store_reg(rd, 1, tmp);
6120                        }
6121                    }
6122                    break;
6123                case NEON_2RM_VSHLL:
6124                    if (q || (rd & 1)) {
6125                        return 1;
6126                    }
6127                    tmp = neon_load_reg(rm, 0);
6128                    tmp2 = neon_load_reg(rm, 1);
6129                    for (pass = 0; pass < 2; pass++) {
6130                        if (pass == 1)
6131                            tmp = tmp2;
6132                        gen_neon_widen(cpu_V0, tmp, size, 1);
6133                        tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6134                        neon_store_reg64(cpu_V0, rd + pass);
6135                    }
6136                    break;
6137                case NEON_2RM_VCVT_F16_F32:
6138                {
6139                    TCGv_ptr fpst;
6140                    TCGv_i32 ahp;
6141
6142                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6143                        q || (rm & 1)) {
6144                        return 1;
6145                    }
6146                    fpst = get_fpstatus_ptr(true);
6147                    ahp = get_ahp_flag();
6148                    tmp = neon_load_reg(rm, 0);
6149                    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6150                    tmp2 = neon_load_reg(rm, 1);
6151                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6152                    tcg_gen_shli_i32(tmp2, tmp2, 16);
6153                    tcg_gen_or_i32(tmp2, tmp2, tmp);
6154                    tcg_temp_free_i32(tmp);
6155                    tmp = neon_load_reg(rm, 2);
6156                    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6157                    tmp3 = neon_load_reg(rm, 3);
6158                    neon_store_reg(rd, 0, tmp2);
6159                    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6160                    tcg_gen_shli_i32(tmp3, tmp3, 16);
6161                    tcg_gen_or_i32(tmp3, tmp3, tmp);
6162                    neon_store_reg(rd, 1, tmp3);
6163                    tcg_temp_free_i32(tmp);
6164                    tcg_temp_free_i32(ahp);
6165                    tcg_temp_free_ptr(fpst);
6166                    break;
6167                }
6168                case NEON_2RM_VCVT_F32_F16:
6169                {
6170                    TCGv_ptr fpst;
6171                    TCGv_i32 ahp;
6172                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6173                        q || (rd & 1)) {
6174                        return 1;
6175                    }
6176                    fpst = get_fpstatus_ptr(true);
6177                    ahp = get_ahp_flag();
6178                    tmp3 = tcg_temp_new_i32();
6179                    tmp = neon_load_reg(rm, 0);
6180                    tmp2 = neon_load_reg(rm, 1);
6181                    tcg_gen_ext16u_i32(tmp3, tmp);
6182                    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6183                    neon_store_reg(rd, 0, tmp3);
6184                    tcg_gen_shri_i32(tmp, tmp, 16);
6185                    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6186                    neon_store_reg(rd, 1, tmp);
6187                    tmp3 = tcg_temp_new_i32();
6188                    tcg_gen_ext16u_i32(tmp3, tmp2);
6189                    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6190                    neon_store_reg(rd, 2, tmp3);
6191                    tcg_gen_shri_i32(tmp2, tmp2, 16);
6192                    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6193                    neon_store_reg(rd, 3, tmp2);
6194                    tcg_temp_free_i32(ahp);
6195                    tcg_temp_free_ptr(fpst);
6196                    break;
6197                }
6198                case NEON_2RM_AESE: case NEON_2RM_AESMC:
6199                    if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6200                        return 1;
6201                    }
6202                    ptr1 = vfp_reg_ptr(true, rd);
6203                    ptr2 = vfp_reg_ptr(true, rm);
6204
6205                     /* Bit 6 is the lowest opcode bit; it distinguishes between
6206                      * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6207                      */
6208                    tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6209
6210                    if (op == NEON_2RM_AESE) {
6211                        gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6212                    } else {
6213                        gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6214                    }
6215                    tcg_temp_free_ptr(ptr1);
6216                    tcg_temp_free_ptr(ptr2);
6217                    tcg_temp_free_i32(tmp3);
6218                    break;
6219                case NEON_2RM_SHA1H:
6220                    if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6221                        return 1;
6222                    }
6223                    ptr1 = vfp_reg_ptr(true, rd);
6224                    ptr2 = vfp_reg_ptr(true, rm);
6225
6226                    gen_helper_crypto_sha1h(ptr1, ptr2);
6227
6228                    tcg_temp_free_ptr(ptr1);
6229                    tcg_temp_free_ptr(ptr2);
6230                    break;
6231                case NEON_2RM_SHA1SU1:
6232                    if ((rm | rd) & 1) {
6233                            return 1;
6234                    }
6235                    /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6236                    if (q) {
6237                        if (!dc_isar_feature(aa32_sha2, s)) {
6238                            return 1;
6239                        }
6240                    } else if (!dc_isar_feature(aa32_sha1, s)) {
6241                        return 1;
6242                    }
6243                    ptr1 = vfp_reg_ptr(true, rd);
6244                    ptr2 = vfp_reg_ptr(true, rm);
6245                    if (q) {
6246                        gen_helper_crypto_sha256su0(ptr1, ptr2);
6247                    } else {
6248                        gen_helper_crypto_sha1su1(ptr1, ptr2);
6249                    }
6250                    tcg_temp_free_ptr(ptr1);
6251                    tcg_temp_free_ptr(ptr2);
6252                    break;
6253
6254                case NEON_2RM_VMVN:
6255                    tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6256                    break;
6257                case NEON_2RM_VNEG:
6258                    tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6259                    break;
6260                case NEON_2RM_VABS:
6261                    tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6262                    break;
6263
6264                default:
6265                elementwise:
6266                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
6267                        tmp = neon_load_reg(rm, pass);
6268                        switch (op) {
6269                        case NEON_2RM_VREV32:
6270                            switch (size) {
6271                            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6272                            case 1: gen_swap_half(tmp); break;
6273                            default: abort();
6274                            }
6275                            break;
6276                        case NEON_2RM_VREV16:
6277                            gen_rev16(tmp, tmp);
6278                            break;
6279                        case NEON_2RM_VCLS:
6280                            switch (size) {
6281                            case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6282                            case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6283                            case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6284                            default: abort();
6285                            }
6286                            break;
6287                        case NEON_2RM_VCLZ:
6288                            switch (size) {
6289                            case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6290                            case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6291                            case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6292                            default: abort();
6293                            }
6294                            break;
6295                        case NEON_2RM_VCNT:
6296                            gen_helper_neon_cnt_u8(tmp, tmp);
6297                            break;
6298                        case NEON_2RM_VQABS:
6299                            switch (size) {
6300                            case 0:
6301                                gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6302                                break;
6303                            case 1:
6304                                gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6305                                break;
6306                            case 2:
6307                                gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6308                                break;
6309                            default: abort();
6310                            }
6311                            break;
6312                        case NEON_2RM_VQNEG:
6313                            switch (size) {
6314                            case 0:
6315                                gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6316                                break;
6317                            case 1:
6318                                gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6319                                break;
6320                            case 2:
6321                                gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6322                                break;
6323                            default: abort();
6324                            }
6325                            break;
6326                        case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6327                            tmp2 = tcg_const_i32(0);
6328                            switch(size) {
6329                            case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6330                            case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6331                            case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6332                            default: abort();
6333                            }
6334                            tcg_temp_free_i32(tmp2);
6335                            if (op == NEON_2RM_VCLE0) {
6336                                tcg_gen_not_i32(tmp, tmp);
6337                            }
6338                            break;
6339                        case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6340                            tmp2 = tcg_const_i32(0);
6341                            switch(size) {
6342                            case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6343                            case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6344                            case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6345                            default: abort();
6346                            }
6347                            tcg_temp_free_i32(tmp2);
6348                            if (op == NEON_2RM_VCLT0) {
6349                                tcg_gen_not_i32(tmp, tmp);
6350                            }
6351                            break;
6352                        case NEON_2RM_VCEQ0:
6353                            tmp2 = tcg_const_i32(0);
6354                            switch(size) {
6355                            case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6356                            case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6357                            case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6358                            default: abort();
6359                            }
6360                            tcg_temp_free_i32(tmp2);
6361                            break;
6362                        case NEON_2RM_VCGT0_F:
6363                        {
6364                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6365                            tmp2 = tcg_const_i32(0);
6366                            gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6367                            tcg_temp_free_i32(tmp2);
6368                            tcg_temp_free_ptr(fpstatus);
6369                            break;
6370                        }
6371                        case NEON_2RM_VCGE0_F:
6372                        {
6373                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6374                            tmp2 = tcg_const_i32(0);
6375                            gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6376                            tcg_temp_free_i32(tmp2);
6377                            tcg_temp_free_ptr(fpstatus);
6378                            break;
6379                        }
6380                        case NEON_2RM_VCEQ0_F:
6381                        {
6382                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6383                            tmp2 = tcg_const_i32(0);
6384                            gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6385                            tcg_temp_free_i32(tmp2);
6386                            tcg_temp_free_ptr(fpstatus);
6387                            break;
6388                        }
6389                        case NEON_2RM_VCLE0_F:
6390                        {
6391                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6392                            tmp2 = tcg_const_i32(0);
6393                            gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6394                            tcg_temp_free_i32(tmp2);
6395                            tcg_temp_free_ptr(fpstatus);
6396                            break;
6397                        }
6398                        case NEON_2RM_VCLT0_F:
6399                        {
6400                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6401                            tmp2 = tcg_const_i32(0);
6402                            gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6403                            tcg_temp_free_i32(tmp2);
6404                            tcg_temp_free_ptr(fpstatus);
6405                            break;
6406                        }
6407                        case NEON_2RM_VABS_F:
6408                            gen_helper_vfp_abss(tmp, tmp);
6409                            break;
6410                        case NEON_2RM_VNEG_F:
6411                            gen_helper_vfp_negs(tmp, tmp);
6412                            break;
6413                        case NEON_2RM_VSWP:
6414                            tmp2 = neon_load_reg(rd, pass);
6415                            neon_store_reg(rm, pass, tmp2);
6416                            break;
6417                        case NEON_2RM_VTRN:
6418                            tmp2 = neon_load_reg(rd, pass);
6419                            switch (size) {
6420                            case 0: gen_neon_trn_u8(tmp, tmp2); break;
6421                            case 1: gen_neon_trn_u16(tmp, tmp2); break;
6422                            default: abort();
6423                            }
6424                            neon_store_reg(rm, pass, tmp2);
6425                            break;
6426                        case NEON_2RM_VRINTN:
6427                        case NEON_2RM_VRINTA:
6428                        case NEON_2RM_VRINTM:
6429                        case NEON_2RM_VRINTP:
6430                        case NEON_2RM_VRINTZ:
6431                        {
6432                            TCGv_i32 tcg_rmode;
6433                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6434                            int rmode;
6435
6436                            if (op == NEON_2RM_VRINTZ) {
6437                                rmode = FPROUNDING_ZERO;
6438                            } else {
6439                                rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6440                            }
6441
6442                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6443                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6444                                                      cpu_env);
6445                            gen_helper_rints(tmp, tmp, fpstatus);
6446                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6447                                                      cpu_env);
6448                            tcg_temp_free_ptr(fpstatus);
6449                            tcg_temp_free_i32(tcg_rmode);
6450                            break;
6451                        }
6452                        case NEON_2RM_VRINTX:
6453                        {
6454                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6455                            gen_helper_rints_exact(tmp, tmp, fpstatus);
6456                            tcg_temp_free_ptr(fpstatus);
6457                            break;
6458                        }
6459                        case NEON_2RM_VCVTAU:
6460                        case NEON_2RM_VCVTAS:
6461                        case NEON_2RM_VCVTNU:
6462                        case NEON_2RM_VCVTNS:
6463                        case NEON_2RM_VCVTPU:
6464                        case NEON_2RM_VCVTPS:
6465                        case NEON_2RM_VCVTMU:
6466                        case NEON_2RM_VCVTMS:
6467                        {
6468                            bool is_signed = !extract32(insn, 7, 1);
6469                            TCGv_ptr fpst = get_fpstatus_ptr(1);
6470                            TCGv_i32 tcg_rmode, tcg_shift;
6471                            int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6472
6473                            tcg_shift = tcg_const_i32(0);
6474                            tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6475                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6476                                                      cpu_env);
6477
6478                            if (is_signed) {
6479                                gen_helper_vfp_tosls(tmp, tmp,
6480                                                     tcg_shift, fpst);
6481                            } else {
6482                                gen_helper_vfp_touls(tmp, tmp,
6483                                                     tcg_shift, fpst);
6484                            }
6485
6486                            gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6487                                                      cpu_env);
6488                            tcg_temp_free_i32(tcg_rmode);
6489                            tcg_temp_free_i32(tcg_shift);
6490                            tcg_temp_free_ptr(fpst);
6491                            break;
6492                        }
6493                        case NEON_2RM_VRECPE:
6494                        {
6495                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6496                            gen_helper_recpe_u32(tmp, tmp, fpstatus);
6497                            tcg_temp_free_ptr(fpstatus);
6498                            break;
6499                        }
6500                        case NEON_2RM_VRSQRTE:
6501                        {
6502                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6503                            gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6504                            tcg_temp_free_ptr(fpstatus);
6505                            break;
6506                        }
6507                        case NEON_2RM_VRECPE_F:
6508                        {
6509                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6510                            gen_helper_recpe_f32(tmp, tmp, fpstatus);
6511                            tcg_temp_free_ptr(fpstatus);
6512                            break;
6513                        }
6514                        case NEON_2RM_VRSQRTE_F:
6515                        {
6516                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6517                            gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6518                            tcg_temp_free_ptr(fpstatus);
6519                            break;
6520                        }
6521                        case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6522                        {
6523                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6524                            gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6525                            tcg_temp_free_ptr(fpstatus);
6526                            break;
6527                        }
6528                        case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6529                        {
6530                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6531                            gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6532                            tcg_temp_free_ptr(fpstatus);
6533                            break;
6534                        }
6535                        case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6536                        {
6537                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6538                            gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6539                            tcg_temp_free_ptr(fpstatus);
6540                            break;
6541                        }
6542                        case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6543                        {
6544                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6545                            gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6546                            tcg_temp_free_ptr(fpstatus);
6547                            break;
6548                        }
6549                        default:
6550                            /* Reserved op values were caught by the
6551                             * neon_2rm_sizes[] check earlier.
6552                             */
6553                            abort();
6554                        }
6555                        neon_store_reg(rd, pass, tmp);
6556                    }
6557                    break;
6558                }
6559            } else if ((insn & (1 << 10)) == 0) {
6560                /* VTBL, VTBX.  */
6561                int n = ((insn >> 8) & 3) + 1;
6562                if ((rn + n) > 32) {
6563                    /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6564                     * helper function running off the end of the register file.
6565                     */
6566                    return 1;
6567                }
6568                n <<= 3;
6569                if (insn & (1 << 6)) {
6570                    tmp = neon_load_reg(rd, 0);
6571                } else {
6572                    tmp = tcg_temp_new_i32();
6573                    tcg_gen_movi_i32(tmp, 0);
6574                }
6575                tmp2 = neon_load_reg(rm, 0);
6576                ptr1 = vfp_reg_ptr(true, rn);
6577                tmp5 = tcg_const_i32(n);
6578                gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6579                tcg_temp_free_i32(tmp);
6580                if (insn & (1 << 6)) {
6581                    tmp = neon_load_reg(rd, 1);
6582                } else {
6583                    tmp = tcg_temp_new_i32();
6584                    tcg_gen_movi_i32(tmp, 0);
6585                }
6586                tmp3 = neon_load_reg(rm, 1);
6587                gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6588                tcg_temp_free_i32(tmp5);
6589                tcg_temp_free_ptr(ptr1);
6590                neon_store_reg(rd, 0, tmp2);
6591                neon_store_reg(rd, 1, tmp3);
6592                tcg_temp_free_i32(tmp);
6593            } else if ((insn & 0x380) == 0) {
6594                /* VDUP */
6595                int element;
6596                MemOp size;
6597
6598                if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6599                    return 1;
6600                }
6601                if (insn & (1 << 16)) {
6602                    size = MO_8;
6603                    element = (insn >> 17) & 7;
6604                } else if (insn & (1 << 17)) {
6605                    size = MO_16;
6606                    element = (insn >> 18) & 3;
6607                } else {
6608                    size = MO_32;
6609                    element = (insn >> 19) & 1;
6610                }
6611                tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6612                                     neon_element_offset(rm, element, size),
6613                                     q ? 16 : 8, q ? 16 : 8);
6614            } else {
6615                return 1;
6616            }
6617        }
6618    }
6619    return 0;
6620}
6621
6622/* Advanced SIMD three registers of the same length extension.
6623 *  31           25    23  22    20   16   12  11   10   9    8        3     0
6624 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6625 * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
6626 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6627 */
6628static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
6629{
6630    gen_helper_gvec_3 *fn_gvec = NULL;
6631    gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6632    int rd, rn, rm, opr_sz;
6633    int data = 0;
6634    int off_rn, off_rm;
6635    bool is_long = false, q = extract32(insn, 6, 1);
6636    bool ptr_is_env = false;
6637
6638    if ((insn & 0xfe200f10) == 0xfc200800) {
6639        /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
6640        int size = extract32(insn, 20, 1);
6641        data = extract32(insn, 23, 2); /* rot */
6642        if (!dc_isar_feature(aa32_vcma, s)
6643            || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6644            return 1;
6645        }
6646        fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
6647    } else if ((insn & 0xfea00f10) == 0xfc800800) {
6648        /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
6649        int size = extract32(insn, 20, 1);
6650        data = extract32(insn, 24, 1); /* rot */
6651        if (!dc_isar_feature(aa32_vcma, s)
6652            || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6653            return 1;
6654        }
6655        fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
6656    } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
6657        /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
6658        bool u = extract32(insn, 4, 1);
6659        if (!dc_isar_feature(aa32_dp, s)) {
6660            return 1;
6661        }
6662        fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
6663    } else if ((insn & 0xff300f10) == 0xfc200810) {
6664        /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
6665        int is_s = extract32(insn, 23, 1);
6666        if (!dc_isar_feature(aa32_fhm, s)) {
6667            return 1;
6668        }
6669        is_long = true;
6670        data = is_s; /* is_2 == 0 */
6671        fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
6672        ptr_is_env = true;
6673    } else {
6674        return 1;
6675    }
6676
6677    VFP_DREG_D(rd, insn);
6678    if (rd & q) {
6679        return 1;
6680    }
6681    if (q || !is_long) {
6682        VFP_DREG_N(rn, insn);
6683        VFP_DREG_M(rm, insn);
6684        if ((rn | rm) & q & !is_long) {
6685            return 1;
6686        }
6687        off_rn = vfp_reg_offset(1, rn);
6688        off_rm = vfp_reg_offset(1, rm);
6689    } else {
6690        rn = VFP_SREG_N(insn);
6691        rm = VFP_SREG_M(insn);
6692        off_rn = vfp_reg_offset(0, rn);
6693        off_rm = vfp_reg_offset(0, rm);
6694    }
6695
6696    if (s->fp_excp_el) {
6697        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
6698                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
6699        return 0;
6700    }
6701    if (!s->vfp_enabled) {
6702        return 1;
6703    }
6704
6705    opr_sz = (1 + q) * 8;
6706    if (fn_gvec_ptr) {
6707        TCGv_ptr ptr;
6708        if (ptr_is_env) {
6709            ptr = cpu_env;
6710        } else {
6711            ptr = get_fpstatus_ptr(1);
6712        }
6713        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
6714                           opr_sz, opr_sz, data, fn_gvec_ptr);
6715        if (!ptr_is_env) {
6716            tcg_temp_free_ptr(ptr);
6717        }
6718    } else {
6719        tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
6720                           opr_sz, opr_sz, data, fn_gvec);
6721    }
6722    return 0;
6723}
6724
6725/* Advanced SIMD two registers and a scalar extension.
6726 *  31             24   23  22   20   16   12  11   10   9    8        3     0
6727 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
6728 * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
6729 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
6730 *
6731 */
6732
6733static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
6734{
6735    gen_helper_gvec_3 *fn_gvec = NULL;
6736    gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6737    int rd, rn, rm, opr_sz, data;
6738    int off_rn, off_rm;
6739    bool is_long = false, q = extract32(insn, 6, 1);
6740    bool ptr_is_env = false;
6741
6742    if ((insn & 0xff000f10) == 0xfe000800) {
6743        /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
6744        int rot = extract32(insn, 20, 2);
6745        int size = extract32(insn, 23, 1);
6746        int index;
6747
6748        if (!dc_isar_feature(aa32_vcma, s)) {
6749            return 1;
6750        }
6751        if (size == 0) {
6752            if (!dc_isar_feature(aa32_fp16_arith, s)) {
6753                return 1;
6754            }
6755            /* For fp16, rm is just Vm, and index is M.  */
6756            rm = extract32(insn, 0, 4);
6757            index = extract32(insn, 5, 1);
6758        } else {
6759            /* For fp32, rm is the usual M:Vm, and index is 0.  */
6760            VFP_DREG_M(rm, insn);
6761            index = 0;
6762        }
6763        data = (index << 2) | rot;
6764        fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
6765                       : gen_helper_gvec_fcmlah_idx);
6766    } else if ((insn & 0xffb00f00) == 0xfe200d00) {
6767        /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
6768        int u = extract32(insn, 4, 1);
6769
6770        if (!dc_isar_feature(aa32_dp, s)) {
6771            return 1;
6772        }
6773        fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
6774        /* rm is just Vm, and index is M.  */
6775        data = extract32(insn, 5, 1); /* index */
6776        rm = extract32(insn, 0, 4);
6777    } else if ((insn & 0xffa00f10) == 0xfe000810) {
6778        /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
6779        int is_s = extract32(insn, 20, 1);
6780        int vm20 = extract32(insn, 0, 3);
6781        int vm3 = extract32(insn, 3, 1);
6782        int m = extract32(insn, 5, 1);
6783        int index;
6784
6785        if (!dc_isar_feature(aa32_fhm, s)) {
6786            return 1;
6787        }
6788        if (q) {
6789            rm = vm20;
6790            index = m * 2 + vm3;
6791        } else {
6792            rm = vm20 * 2 + m;
6793            index = vm3;
6794        }
6795        is_long = true;
6796        data = (index << 2) | is_s; /* is_2 == 0 */
6797        fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
6798        ptr_is_env = true;
6799    } else {
6800        return 1;
6801    }
6802
6803    VFP_DREG_D(rd, insn);
6804    if (rd & q) {
6805        return 1;
6806    }
6807    if (q || !is_long) {
6808        VFP_DREG_N(rn, insn);
6809        if (rn & q & !is_long) {
6810            return 1;
6811        }
6812        off_rn = vfp_reg_offset(1, rn);
6813        off_rm = vfp_reg_offset(1, rm);
6814    } else {
6815        rn = VFP_SREG_N(insn);
6816        off_rn = vfp_reg_offset(0, rn);
6817        off_rm = vfp_reg_offset(0, rm);
6818    }
6819    if (s->fp_excp_el) {
6820        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
6821                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
6822        return 0;
6823    }
6824    if (!s->vfp_enabled) {
6825        return 1;
6826    }
6827
6828    opr_sz = (1 + q) * 8;
6829    if (fn_gvec_ptr) {
6830        TCGv_ptr ptr;
6831        if (ptr_is_env) {
6832            ptr = cpu_env;
6833        } else {
6834            ptr = get_fpstatus_ptr(1);
6835        }
6836        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
6837                           opr_sz, opr_sz, data, fn_gvec_ptr);
6838        if (!ptr_is_env) {
6839            tcg_temp_free_ptr(ptr);
6840        }
6841    } else {
6842        tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
6843                           opr_sz, opr_sz, data, fn_gvec);
6844    }
6845    return 0;
6846}
6847
6848static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6849{
6850    int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6851    const ARMCPRegInfo *ri;
6852
6853    cpnum = (insn >> 8) & 0xf;
6854
6855    /* First check for coprocessor space used for XScale/iwMMXt insns */
6856    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6857        if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6858            return 1;
6859        }
6860        if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6861            return disas_iwmmxt_insn(s, insn);
6862        } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6863            return disas_dsp_insn(s, insn);
6864        }
6865        return 1;
6866    }
6867
6868    /* Otherwise treat as a generic register access */
6869    is64 = (insn & (1 << 25)) == 0;
6870    if (!is64 && ((insn & (1 << 4)) == 0)) {
6871        /* cdp */
6872        return 1;
6873    }
6874
6875    crm = insn & 0xf;
6876    if (is64) {
6877        crn = 0;
6878        opc1 = (insn >> 4) & 0xf;
6879        opc2 = 0;
6880        rt2 = (insn >> 16) & 0xf;
6881    } else {
6882        crn = (insn >> 16) & 0xf;
6883        opc1 = (insn >> 21) & 7;
6884        opc2 = (insn >> 5) & 7;
6885        rt2 = 0;
6886    }
6887    isread = (insn >> 20) & 1;
6888    rt = (insn >> 12) & 0xf;
6889
6890    ri = get_arm_cp_reginfo(s->cp_regs,
6891            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6892    if (ri) {
6893        bool need_exit_tb;
6894
6895        /* Check access permissions */
6896        if (!cp_access_ok(s->current_el, ri, isread)) {
6897            return 1;
6898        }
6899
6900        if (ri->accessfn ||
6901            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6902            /* Emit code to perform further access permissions checks at
6903             * runtime; this may result in an exception.
6904             * Note that on XScale all cp0..c13 registers do an access check
6905             * call in order to handle c15_cpar.
6906             */
6907            TCGv_ptr tmpptr;
6908            TCGv_i32 tcg_syn, tcg_isread;
6909            uint32_t syndrome;
6910
6911            /* Note that since we are an implementation which takes an
6912             * exception on a trapped conditional instruction only if the
6913             * instruction passes its condition code check, we can take
6914             * advantage of the clause in the ARM ARM that allows us to set
6915             * the COND field in the instruction to 0xE in all cases.
6916             * We could fish the actual condition out of the insn (ARM)
6917             * or the condexec bits (Thumb) but it isn't necessary.
6918             */
6919            switch (cpnum) {
6920            case 14:
6921                if (is64) {
6922                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6923                                                 isread, false);
6924                } else {
6925                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6926                                                rt, isread, false);
6927                }
6928                break;
6929            case 15:
6930                if (is64) {
6931                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6932                                                 isread, false);
6933                } else {
6934                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6935                                                rt, isread, false);
6936                }
6937                break;
6938            default:
6939                /* ARMv8 defines that only coprocessors 14 and 15 exist,
6940                 * so this can only happen if this is an ARMv7 or earlier CPU,
6941                 * in which case the syndrome information won't actually be
6942                 * guest visible.
6943                 */
6944                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6945                syndrome = syn_uncategorized();
6946                break;
6947            }
6948
6949            gen_set_condexec(s);
6950            gen_set_pc_im(s, s->pc_curr);
6951            tmpptr = tcg_const_ptr(ri);
6952            tcg_syn = tcg_const_i32(syndrome);
6953            tcg_isread = tcg_const_i32(isread);
6954            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6955                                           tcg_isread);
6956            tcg_temp_free_ptr(tmpptr);
6957            tcg_temp_free_i32(tcg_syn);
6958            tcg_temp_free_i32(tcg_isread);
6959        } else if (ri->type & ARM_CP_RAISES_EXC) {
6960            /*
6961             * The readfn or writefn might raise an exception;
6962             * synchronize the CPU state in case it does.
6963             */
6964            gen_set_condexec(s);
6965            gen_set_pc_im(s, s->pc_curr);
6966        }
6967
6968        /* Handle special cases first */
6969        switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6970        case ARM_CP_NOP:
6971            return 0;
6972        case ARM_CP_WFI:
6973            if (isread) {
6974                return 1;
6975            }
6976            gen_set_pc_im(s, s->base.pc_next);
6977            s->base.is_jmp = DISAS_WFI;
6978            return 0;
6979        default:
6980            break;
6981        }
6982
6983        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6984            gen_io_start();
6985        }
6986
6987        if (isread) {
6988            /* Read */
6989            if (is64) {
6990                TCGv_i64 tmp64;
6991                TCGv_i32 tmp;
6992                if (ri->type & ARM_CP_CONST) {
6993                    tmp64 = tcg_const_i64(ri->resetvalue);
6994                } else if (ri->readfn) {
6995                    TCGv_ptr tmpptr;
6996                    tmp64 = tcg_temp_new_i64();
6997                    tmpptr = tcg_const_ptr(ri);
6998                    gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6999                    tcg_temp_free_ptr(tmpptr);
7000                } else {
7001                    tmp64 = tcg_temp_new_i64();
7002                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7003                }
7004                tmp = tcg_temp_new_i32();
7005                tcg_gen_extrl_i64_i32(tmp, tmp64);
7006                store_reg(s, rt, tmp);
7007                tmp = tcg_temp_new_i32();
7008                tcg_gen_extrh_i64_i32(tmp, tmp64);
7009                tcg_temp_free_i64(tmp64);
7010                store_reg(s, rt2, tmp);
7011            } else {
7012                TCGv_i32 tmp;
7013                if (ri->type & ARM_CP_CONST) {
7014                    tmp = tcg_const_i32(ri->resetvalue);
7015                } else if (ri->readfn) {
7016                    TCGv_ptr tmpptr;
7017                    tmp = tcg_temp_new_i32();
7018                    tmpptr = tcg_const_ptr(ri);
7019                    gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7020                    tcg_temp_free_ptr(tmpptr);
7021                } else {
7022                    tmp = load_cpu_offset(ri->fieldoffset);
7023                }
7024                if (rt == 15) {
7025                    /* Destination register of r15 for 32 bit loads sets
7026                     * the condition codes from the high 4 bits of the value
7027                     */
7028                    gen_set_nzcv(tmp);
7029                    tcg_temp_free_i32(tmp);
7030                } else {
7031                    store_reg(s, rt, tmp);
7032                }
7033            }
7034        } else {
7035            /* Write */
7036            if (ri->type & ARM_CP_CONST) {
7037                /* If not forbidden by access permissions, treat as WI */
7038                return 0;
7039            }
7040
7041            if (is64) {
7042                TCGv_i32 tmplo, tmphi;
7043                TCGv_i64 tmp64 = tcg_temp_new_i64();
7044                tmplo = load_reg(s, rt);
7045                tmphi = load_reg(s, rt2);
7046                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7047                tcg_temp_free_i32(tmplo);
7048                tcg_temp_free_i32(tmphi);
7049                if (ri->writefn) {
7050                    TCGv_ptr tmpptr = tcg_const_ptr(ri);
7051                    gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7052                    tcg_temp_free_ptr(tmpptr);
7053                } else {
7054                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7055                }
7056                tcg_temp_free_i64(tmp64);
7057            } else {
7058                if (ri->writefn) {
7059                    TCGv_i32 tmp;
7060                    TCGv_ptr tmpptr;
7061                    tmp = load_reg(s, rt);
7062                    tmpptr = tcg_const_ptr(ri);
7063                    gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7064                    tcg_temp_free_ptr(tmpptr);
7065                    tcg_temp_free_i32(tmp);
7066                } else {
7067                    TCGv_i32 tmp = load_reg(s, rt);
7068                    store_cpu_offset(tmp, ri->fieldoffset);
7069                }
7070            }
7071        }
7072
7073        /* I/O operations must end the TB here (whether read or write) */
7074        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7075                        (ri->type & ARM_CP_IO));
7076
7077        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7078            /*
7079             * A write to any coprocessor regiser that ends a TB
7080             * must rebuild the hflags for the next TB.
7081             */
7082            TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7083            if (arm_dc_feature(s, ARM_FEATURE_M)) {
7084                gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7085            } else {
7086                gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7087            }
7088            tcg_temp_free_i32(tcg_el);
7089            /*
7090             * We default to ending the TB on a coprocessor register write,
7091             * but allow this to be suppressed by the register definition
7092             * (usually only necessary to work around guest bugs).
7093             */
7094            need_exit_tb = true;
7095        }
7096        if (need_exit_tb) {
7097            gen_lookup_tb(s);
7098        }
7099
7100        return 0;
7101    }
7102
7103    /* Unknown register; this might be a guest error or a QEMU
7104     * unimplemented feature.
7105     */
7106    if (is64) {
7107        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7108                      "64 bit system register cp:%d opc1: %d crm:%d "
7109                      "(%s)\n",
7110                      isread ? "read" : "write", cpnum, opc1, crm,
7111                      s->ns ? "non-secure" : "secure");
7112    } else {
7113        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7114                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7115                      "(%s)\n",
7116                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7117                      s->ns ? "non-secure" : "secure");
7118    }
7119
7120    return 1;
7121}
7122
7123
7124/* Store a 64-bit value to a register pair.  Clobbers val.  */
7125static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7126{
7127    TCGv_i32 tmp;
7128    tmp = tcg_temp_new_i32();
7129    tcg_gen_extrl_i64_i32(tmp, val);
7130    store_reg(s, rlow, tmp);
7131    tmp = tcg_temp_new_i32();
7132    tcg_gen_extrh_i64_i32(tmp, val);
7133    store_reg(s, rhigh, tmp);
7134}
7135
7136/* load and add a 64-bit value from a register pair.  */
7137static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7138{
7139    TCGv_i64 tmp;
7140    TCGv_i32 tmpl;
7141    TCGv_i32 tmph;
7142
7143    /* Load 64-bit value rd:rn.  */
7144    tmpl = load_reg(s, rlow);
7145    tmph = load_reg(s, rhigh);
7146    tmp = tcg_temp_new_i64();
7147    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7148    tcg_temp_free_i32(tmpl);
7149    tcg_temp_free_i32(tmph);
7150    tcg_gen_add_i64(val, val, tmp);
7151    tcg_temp_free_i64(tmp);
7152}
7153
7154/* Set N and Z flags from hi|lo.  */
7155static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7156{
7157    tcg_gen_mov_i32(cpu_NF, hi);
7158    tcg_gen_or_i32(cpu_ZF, lo, hi);
7159}
7160
7161/* Load/Store exclusive instructions are implemented by remembering
7162   the value/address loaded, and seeing if these are the same
7163   when the store is performed.  This should be sufficient to implement
7164   the architecturally mandated semantics, and avoids having to monitor
7165   regular stores.  The compare vs the remembered value is done during
7166   the cmpxchg operation, but we must compare the addresses manually.  */
7167static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7168                               TCGv_i32 addr, int size)
7169{
7170    TCGv_i32 tmp = tcg_temp_new_i32();
7171    MemOp opc = size | MO_ALIGN | s->be_data;
7172
7173    s->is_ldex = true;
7174
7175    if (size == 3) {
7176        TCGv_i32 tmp2 = tcg_temp_new_i32();
7177        TCGv_i64 t64 = tcg_temp_new_i64();
7178
7179        /* For AArch32, architecturally the 32-bit word at the lowest
7180         * address is always Rt and the one at addr+4 is Rt2, even if
7181         * the CPU is big-endian. That means we don't want to do a
7182         * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7183         * for an architecturally 64-bit access, but instead do a
7184         * 64-bit access using MO_BE if appropriate and then split
7185         * the two halves.
7186         * This only makes a difference for BE32 user-mode, where
7187         * frob64() must not flip the two halves of the 64-bit data
7188         * but this code must treat BE32 user-mode like BE32 system.
7189         */
7190        TCGv taddr = gen_aa32_addr(s, addr, opc);
7191
7192        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7193        tcg_temp_free(taddr);
7194        tcg_gen_mov_i64(cpu_exclusive_val, t64);
7195        if (s->be_data == MO_BE) {
7196            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7197        } else {
7198            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7199        }
7200        tcg_temp_free_i64(t64);
7201
7202        store_reg(s, rt2, tmp2);
7203    } else {
7204        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7205        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7206    }
7207
7208    store_reg(s, rt, tmp);
7209    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7210}
7211
7212static void gen_clrex(DisasContext *s)
7213{
7214    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7215}
7216
7217static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7218                                TCGv_i32 addr, int size)
7219{
7220    TCGv_i32 t0, t1, t2;
7221    TCGv_i64 extaddr;
7222    TCGv taddr;
7223    TCGLabel *done_label;
7224    TCGLabel *fail_label;
7225    MemOp opc = size | MO_ALIGN | s->be_data;
7226
7227    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7228         [addr] = {Rt};
7229         {Rd} = 0;
7230       } else {
7231         {Rd} = 1;
7232       } */
7233    fail_label = gen_new_label();
7234    done_label = gen_new_label();
7235    extaddr = tcg_temp_new_i64();
7236    tcg_gen_extu_i32_i64(extaddr, addr);
7237    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7238    tcg_temp_free_i64(extaddr);
7239
7240    taddr = gen_aa32_addr(s, addr, opc);
7241    t0 = tcg_temp_new_i32();
7242    t1 = load_reg(s, rt);
7243    if (size == 3) {
7244        TCGv_i64 o64 = tcg_temp_new_i64();
7245        TCGv_i64 n64 = tcg_temp_new_i64();
7246
7247        t2 = load_reg(s, rt2);
7248        /* For AArch32, architecturally the 32-bit word at the lowest
7249         * address is always Rt and the one at addr+4 is Rt2, even if
7250         * the CPU is big-endian. Since we're going to treat this as a
7251         * single 64-bit BE store, we need to put the two halves in the
7252         * opposite order for BE to LE, so that they end up in the right
7253         * places.
7254         * We don't want gen_aa32_frob64() because that does the wrong
7255         * thing for BE32 usermode.
7256         */
7257        if (s->be_data == MO_BE) {
7258            tcg_gen_concat_i32_i64(n64, t2, t1);
7259        } else {
7260            tcg_gen_concat_i32_i64(n64, t1, t2);
7261        }
7262        tcg_temp_free_i32(t2);
7263
7264        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7265                                   get_mem_index(s), opc);
7266        tcg_temp_free_i64(n64);
7267
7268        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7269        tcg_gen_extrl_i64_i32(t0, o64);
7270
7271        tcg_temp_free_i64(o64);
7272    } else {
7273        t2 = tcg_temp_new_i32();
7274        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7275        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7276        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7277        tcg_temp_free_i32(t2);
7278    }
7279    tcg_temp_free_i32(t1);
7280    tcg_temp_free(taddr);
7281    tcg_gen_mov_i32(cpu_R[rd], t0);
7282    tcg_temp_free_i32(t0);
7283    tcg_gen_br(done_label);
7284
7285    gen_set_label(fail_label);
7286    tcg_gen_movi_i32(cpu_R[rd], 1);
7287    gen_set_label(done_label);
7288    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7289}
7290
7291/* gen_srs:
7292 * @env: CPUARMState
7293 * @s: DisasContext
7294 * @mode: mode field from insn (which stack to store to)
7295 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7296 * @writeback: true if writeback bit set
7297 *
7298 * Generate code for the SRS (Store Return State) insn.
7299 */
7300static void gen_srs(DisasContext *s,
7301                    uint32_t mode, uint32_t amode, bool writeback)
7302{
7303    int32_t offset;
7304    TCGv_i32 addr, tmp;
7305    bool undef = false;
7306
7307    /* SRS is:
7308     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7309     *   and specified mode is monitor mode
7310     * - UNDEFINED in Hyp mode
7311     * - UNPREDICTABLE in User or System mode
7312     * - UNPREDICTABLE if the specified mode is:
7313     * -- not implemented
7314     * -- not a valid mode number
7315     * -- a mode that's at a higher exception level
7316     * -- Monitor, if we are Non-secure
7317     * For the UNPREDICTABLE cases we choose to UNDEF.
7318     */
7319    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7320        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7321        return;
7322    }
7323
7324    if (s->current_el == 0 || s->current_el == 2) {
7325        undef = true;
7326    }
7327
7328    switch (mode) {
7329    case ARM_CPU_MODE_USR:
7330    case ARM_CPU_MODE_FIQ:
7331    case ARM_CPU_MODE_IRQ:
7332    case ARM_CPU_MODE_SVC:
7333    case ARM_CPU_MODE_ABT:
7334    case ARM_CPU_MODE_UND:
7335    case ARM_CPU_MODE_SYS:
7336        break;
7337    case ARM_CPU_MODE_HYP:
7338        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7339            undef = true;
7340        }
7341        break;
7342    case ARM_CPU_MODE_MON:
7343        /* No need to check specifically for "are we non-secure" because
7344         * we've already made EL0 UNDEF and handled the trap for S-EL1;
7345         * so if this isn't EL3 then we must be non-secure.
7346         */
7347        if (s->current_el != 3) {
7348            undef = true;
7349        }
7350        break;
7351    default:
7352        undef = true;
7353    }
7354
7355    if (undef) {
7356        unallocated_encoding(s);
7357        return;
7358    }
7359
7360    addr = tcg_temp_new_i32();
7361    tmp = tcg_const_i32(mode);
7362    /* get_r13_banked() will raise an exception if called from System mode */
7363    gen_set_condexec(s);
7364    gen_set_pc_im(s, s->pc_curr);
7365    gen_helper_get_r13_banked(addr, cpu_env, tmp);
7366    tcg_temp_free_i32(tmp);
7367    switch (amode) {
7368    case 0: /* DA */
7369        offset = -4;
7370        break;
7371    case 1: /* IA */
7372        offset = 0;
7373        break;
7374    case 2: /* DB */
7375        offset = -8;
7376        break;
7377    case 3: /* IB */
7378        offset = 4;
7379        break;
7380    default:
7381        abort();
7382    }
7383    tcg_gen_addi_i32(addr, addr, offset);
7384    tmp = load_reg(s, 14);
7385    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7386    tcg_temp_free_i32(tmp);
7387    tmp = load_cpu_field(spsr);
7388    tcg_gen_addi_i32(addr, addr, 4);
7389    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7390    tcg_temp_free_i32(tmp);
7391    if (writeback) {
7392        switch (amode) {
7393        case 0:
7394            offset = -8;
7395            break;
7396        case 1:
7397            offset = 4;
7398            break;
7399        case 2:
7400            offset = -4;
7401            break;
7402        case 3:
7403            offset = 0;
7404            break;
7405        default:
7406            abort();
7407        }
7408        tcg_gen_addi_i32(addr, addr, offset);
7409        tmp = tcg_const_i32(mode);
7410        gen_helper_set_r13_banked(cpu_env, tmp, addr);
7411        tcg_temp_free_i32(tmp);
7412    }
7413    tcg_temp_free_i32(addr);
7414    s->base.is_jmp = DISAS_UPDATE;
7415}
7416
7417/* Generate a label used for skipping this instruction */
7418static void arm_gen_condlabel(DisasContext *s)
7419{
7420    if (!s->condjmp) {
7421        s->condlabel = gen_new_label();
7422        s->condjmp = 1;
7423    }
7424}
7425
7426/* Skip this instruction if the ARM condition is false */
7427static void arm_skip_unless(DisasContext *s, uint32_t cond)
7428{
7429    arm_gen_condlabel(s);
7430    arm_gen_test_cc(cond ^ 1, s->condlabel);
7431}
7432
7433
7434/*
7435 * Constant expanders for the decoders.
7436 */
7437
7438static int negate(DisasContext *s, int x)
7439{
7440    return -x;
7441}
7442
7443static int plus_2(DisasContext *s, int x)
7444{
7445    return x + 2;
7446}
7447
7448static int times_2(DisasContext *s, int x)
7449{
7450    return x * 2;
7451}
7452
7453static int times_4(DisasContext *s, int x)
7454{
7455    return x * 4;
7456}
7457
7458/* Return only the rotation part of T32ExpandImm.  */
7459static int t32_expandimm_rot(DisasContext *s, int x)
7460{
7461    return x & 0xc00 ? extract32(x, 7, 5) : 0;
7462}
7463
7464/* Return the unrotated immediate from T32ExpandImm.  */
7465static int t32_expandimm_imm(DisasContext *s, int x)
7466{
7467    int imm = extract32(x, 0, 8);
7468
7469    switch (extract32(x, 8, 4)) {
7470    case 0: /* XY */
7471        /* Nothing to do.  */
7472        break;
7473    case 1: /* 00XY00XY */
7474        imm *= 0x00010001;
7475        break;
7476    case 2: /* XY00XY00 */
7477        imm *= 0x01000100;
7478        break;
7479    case 3: /* XYXYXYXY */
7480        imm *= 0x01010101;
7481        break;
7482    default:
7483        /* Rotated constant.  */
7484        imm |= 0x80;
7485        break;
7486    }
7487    return imm;
7488}
7489
7490static int t32_branch24(DisasContext *s, int x)
7491{
7492    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
7493    x ^= !(x < 0) * (3 << 21);
7494    /* Append the final zero.  */
7495    return x << 1;
7496}
7497
7498static int t16_setflags(DisasContext *s)
7499{
7500    return s->condexec_mask == 0;
7501}
7502
7503static int t16_push_list(DisasContext *s, int x)
7504{
7505    return (x & 0xff) | (x & 0x100) << (14 - 8);
7506}
7507
7508static int t16_pop_list(DisasContext *s, int x)
7509{
7510    return (x & 0xff) | (x & 0x100) << (15 - 8);
7511}
7512
7513/*
7514 * Include the generated decoders.
7515 */
7516
7517#include "decode-a32.inc.c"
7518#include "decode-a32-uncond.inc.c"
7519#include "decode-t32.inc.c"
7520#include "decode-t16.inc.c"
7521
7522/* Helpers to swap operands for reverse-subtract.  */
7523static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7524{
7525    tcg_gen_sub_i32(dst, b, a);
7526}
7527
7528static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7529{
7530    gen_sub_CC(dst, b, a);
7531}
7532
7533static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7534{
7535    gen_sub_carry(dest, b, a);
7536}
7537
7538static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7539{
7540    gen_sbc_CC(dest, b, a);
7541}
7542
7543/*
7544 * Helpers for the data processing routines.
7545 *
7546 * After the computation store the results back.
7547 * This may be suppressed altogether (STREG_NONE), require a runtime
7548 * check against the stack limits (STREG_SP_CHECK), or generate an
7549 * exception return.  Oh, or store into a register.
7550 *
7551 * Always return true, indicating success for a trans_* function.
7552 */
7553typedef enum {
7554   STREG_NONE,
7555   STREG_NORMAL,
7556   STREG_SP_CHECK,
7557   STREG_EXC_RET,
7558} StoreRegKind;
7559
7560static bool store_reg_kind(DisasContext *s, int rd,
7561                            TCGv_i32 val, StoreRegKind kind)
7562{
7563    switch (kind) {
7564    case STREG_NONE:
7565        tcg_temp_free_i32(val);
7566        return true;
7567    case STREG_NORMAL:
7568        /* See ALUWritePC: Interworking only from a32 mode. */
7569        if (s->thumb) {
7570            store_reg(s, rd, val);
7571        } else {
7572            store_reg_bx(s, rd, val);
7573        }
7574        return true;
7575    case STREG_SP_CHECK:
7576        store_sp_checked(s, val);
7577        return true;
7578    case STREG_EXC_RET:
7579        gen_exception_return(s, val);
7580        return true;
7581    }
7582    g_assert_not_reached();
7583}
7584
7585/*
7586 * Data Processing (register)
7587 *
7588 * Operate, with set flags, one register source,
7589 * one immediate shifted register source, and a destination.
7590 */
7591static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7592                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7593                         int logic_cc, StoreRegKind kind)
7594{
7595    TCGv_i32 tmp1, tmp2;
7596
7597    tmp2 = load_reg(s, a->rm);
7598    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7599    tmp1 = load_reg(s, a->rn);
7600
7601    gen(tmp1, tmp1, tmp2);
7602    tcg_temp_free_i32(tmp2);
7603
7604    if (logic_cc) {
7605        gen_logic_CC(tmp1);
7606    }
7607    return store_reg_kind(s, a->rd, tmp1, kind);
7608}
7609
7610static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7611                         void (*gen)(TCGv_i32, TCGv_i32),
7612                         int logic_cc, StoreRegKind kind)
7613{
7614    TCGv_i32 tmp;
7615
7616    tmp = load_reg(s, a->rm);
7617    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7618
7619    gen(tmp, tmp);
7620    if (logic_cc) {
7621        gen_logic_CC(tmp);
7622    }
7623    return store_reg_kind(s, a->rd, tmp, kind);
7624}
7625
7626/*
7627 * Data-processing (register-shifted register)
7628 *
7629 * Operate, with set flags, one register source,
7630 * one register shifted register source, and a destination.
7631 */
7632static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7633                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7634                         int logic_cc, StoreRegKind kind)
7635{
7636    TCGv_i32 tmp1, tmp2;
7637
7638    tmp1 = load_reg(s, a->rs);
7639    tmp2 = load_reg(s, a->rm);
7640    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7641    tmp1 = load_reg(s, a->rn);
7642
7643    gen(tmp1, tmp1, tmp2);
7644    tcg_temp_free_i32(tmp2);
7645
7646    if (logic_cc) {
7647        gen_logic_CC(tmp1);
7648    }
7649    return store_reg_kind(s, a->rd, tmp1, kind);
7650}
7651
7652static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7653                         void (*gen)(TCGv_i32, TCGv_i32),
7654                         int logic_cc, StoreRegKind kind)
7655{
7656    TCGv_i32 tmp1, tmp2;
7657
7658    tmp1 = load_reg(s, a->rs);
7659    tmp2 = load_reg(s, a->rm);
7660    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7661
7662    gen(tmp2, tmp2);
7663    if (logic_cc) {
7664        gen_logic_CC(tmp2);
7665    }
7666    return store_reg_kind(s, a->rd, tmp2, kind);
7667}
7668
7669/*
7670 * Data-processing (immediate)
7671 *
7672 * Operate, with set flags, one register source,
7673 * one rotated immediate, and a destination.
7674 *
7675 * Note that logic_cc && a->rot setting CF based on the msb of the
7676 * immediate is the reason why we must pass in the unrotated form
7677 * of the immediate.
7678 */
7679static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7680                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7681                         int logic_cc, StoreRegKind kind)
7682{
7683    TCGv_i32 tmp1, tmp2;
7684    uint32_t imm;
7685
7686    imm = ror32(a->imm, a->rot);
7687    if (logic_cc && a->rot) {
7688        tcg_gen_movi_i32(cpu_CF, imm >> 31);
7689    }
7690    tmp2 = tcg_const_i32(imm);
7691    tmp1 = load_reg(s, a->rn);
7692
7693    gen(tmp1, tmp1, tmp2);
7694    tcg_temp_free_i32(tmp2);
7695
7696    if (logic_cc) {
7697        gen_logic_CC(tmp1);
7698    }
7699    return store_reg_kind(s, a->rd, tmp1, kind);
7700}
7701
7702static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7703                         void (*gen)(TCGv_i32, TCGv_i32),
7704                         int logic_cc, StoreRegKind kind)
7705{
7706    TCGv_i32 tmp;
7707    uint32_t imm;
7708
7709    imm = ror32(a->imm, a->rot);
7710    if (logic_cc && a->rot) {
7711        tcg_gen_movi_i32(cpu_CF, imm >> 31);
7712    }
7713    tmp = tcg_const_i32(imm);
7714
7715    gen(tmp, tmp);
7716    if (logic_cc) {
7717        gen_logic_CC(tmp);
7718    }
7719    return store_reg_kind(s, a->rd, tmp, kind);
7720}
7721
7722#define DO_ANY3(NAME, OP, L, K)                                         \
7723    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
7724    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
7725    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7726    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
7727    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
7728    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7729
7730#define DO_ANY2(NAME, OP, L, K)                                         \
7731    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
7732    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
7733    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
7734    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
7735    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
7736    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7737
7738#define DO_CMP2(NAME, OP, L)                                            \
7739    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
7740    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
7741    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7742    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
7743    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
7744    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7745
7746DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7747DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7748DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7749DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7750
7751DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7752DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7753DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7754DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7755
7756DO_CMP2(TST, tcg_gen_and_i32, true)
7757DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7758DO_CMP2(CMN, gen_add_CC, false)
7759DO_CMP2(CMP, gen_sub_CC, false)
7760
7761DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7762        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7763
7764/*
7765 * Note for the computation of StoreRegKind we return out of the
7766 * middle of the functions that are expanded by DO_ANY3, and that
7767 * we modify a->s via that parameter before it is used by OP.
7768 */
7769DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7770        ({
7771            StoreRegKind ret = STREG_NORMAL;
7772            if (a->rd == 15 && a->s) {
7773                /*
7774                 * See ALUExceptionReturn:
7775                 * In User mode, UNPREDICTABLE; we choose UNDEF.
7776                 * In Hyp mode, UNDEFINED.
7777                 */
7778                if (IS_USER(s) || s->current_el == 2) {
7779                    unallocated_encoding(s);
7780                    return true;
7781                }
7782                /* There is no writeback of nzcv to PSTATE.  */
7783                a->s = 0;
7784                ret = STREG_EXC_RET;
7785            } else if (a->rd == 13 && a->rn == 13) {
7786                ret = STREG_SP_CHECK;
7787            }
7788            ret;
7789        }))
7790
7791DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7792        ({
7793            StoreRegKind ret = STREG_NORMAL;
7794            if (a->rd == 15 && a->s) {
7795                /*
7796                 * See ALUExceptionReturn:
7797                 * In User mode, UNPREDICTABLE; we choose UNDEF.
7798                 * In Hyp mode, UNDEFINED.
7799                 */
7800                if (IS_USER(s) || s->current_el == 2) {
7801                    unallocated_encoding(s);
7802                    return true;
7803                }
7804                /* There is no writeback of nzcv to PSTATE.  */
7805                a->s = 0;
7806                ret = STREG_EXC_RET;
7807            } else if (a->rd == 13) {
7808                ret = STREG_SP_CHECK;
7809            }
7810            ret;
7811        }))
7812
7813DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7814
7815/*
7816 * ORN is only available with T32, so there is no register-shifted-register
7817 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
7818 */
7819static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7820{
7821    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7822}
7823
7824static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7825{
7826    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7827}
7828
7829#undef DO_ANY3
7830#undef DO_ANY2
7831#undef DO_CMP2
7832
7833static bool trans_ADR(DisasContext *s, arg_ri *a)
7834{
7835    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7836    return true;
7837}
7838
7839static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7840{
7841    TCGv_i32 tmp;
7842
7843    if (!ENABLE_ARCH_6T2) {
7844        return false;
7845    }
7846
7847    tmp = tcg_const_i32(a->imm);
7848    store_reg(s, a->rd, tmp);
7849    return true;
7850}
7851
7852static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7853{
7854    TCGv_i32 tmp;
7855
7856    if (!ENABLE_ARCH_6T2) {
7857        return false;
7858    }
7859
7860    tmp = load_reg(s, a->rd);
7861    tcg_gen_ext16u_i32(tmp, tmp);
7862    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7863    store_reg(s, a->rd, tmp);
7864    return true;
7865}
7866
7867/*
7868 * Multiply and multiply accumulate
7869 */
7870
7871static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7872{
7873    TCGv_i32 t1, t2;
7874
7875    t1 = load_reg(s, a->rn);
7876    t2 = load_reg(s, a->rm);
7877    tcg_gen_mul_i32(t1, t1, t2);
7878    tcg_temp_free_i32(t2);
7879    if (add) {
7880        t2 = load_reg(s, a->ra);
7881        tcg_gen_add_i32(t1, t1, t2);
7882        tcg_temp_free_i32(t2);
7883    }
7884    if (a->s) {
7885        gen_logic_CC(t1);
7886    }
7887    store_reg(s, a->rd, t1);
7888    return true;
7889}
7890
7891static bool trans_MUL(DisasContext *s, arg_MUL *a)
7892{
7893    return op_mla(s, a, false);
7894}
7895
7896static bool trans_MLA(DisasContext *s, arg_MLA *a)
7897{
7898    return op_mla(s, a, true);
7899}
7900
7901static bool trans_MLS(DisasContext *s, arg_MLS *a)
7902{
7903    TCGv_i32 t1, t2;
7904
7905    if (!ENABLE_ARCH_6T2) {
7906        return false;
7907    }
7908    t1 = load_reg(s, a->rn);
7909    t2 = load_reg(s, a->rm);
7910    tcg_gen_mul_i32(t1, t1, t2);
7911    tcg_temp_free_i32(t2);
7912    t2 = load_reg(s, a->ra);
7913    tcg_gen_sub_i32(t1, t2, t1);
7914    tcg_temp_free_i32(t2);
7915    store_reg(s, a->rd, t1);
7916    return true;
7917}
7918
7919static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7920{
7921    TCGv_i32 t0, t1, t2, t3;
7922
7923    t0 = load_reg(s, a->rm);
7924    t1 = load_reg(s, a->rn);
7925    if (uns) {
7926        tcg_gen_mulu2_i32(t0, t1, t0, t1);
7927    } else {
7928        tcg_gen_muls2_i32(t0, t1, t0, t1);
7929    }
7930    if (add) {
7931        t2 = load_reg(s, a->ra);
7932        t3 = load_reg(s, a->rd);
7933        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7934        tcg_temp_free_i32(t2);
7935        tcg_temp_free_i32(t3);
7936    }
7937    if (a->s) {
7938        gen_logicq_cc(t0, t1);
7939    }
7940    store_reg(s, a->ra, t0);
7941    store_reg(s, a->rd, t1);
7942    return true;
7943}
7944
7945static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7946{
7947    return op_mlal(s, a, true, false);
7948}
7949
7950static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7951{
7952    return op_mlal(s, a, false, false);
7953}
7954
7955static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7956{
7957    return op_mlal(s, a, true, true);
7958}
7959
7960static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7961{
7962    return op_mlal(s, a, false, true);
7963}
7964
7965static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7966{
7967    TCGv_i32 t0, t1, t2, zero;
7968
7969    if (s->thumb
7970        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7971        : !ENABLE_ARCH_6) {
7972        return false;
7973    }
7974
7975    t0 = load_reg(s, a->rm);
7976    t1 = load_reg(s, a->rn);
7977    tcg_gen_mulu2_i32(t0, t1, t0, t1);
7978    zero = tcg_const_i32(0);
7979    t2 = load_reg(s, a->ra);
7980    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7981    tcg_temp_free_i32(t2);
7982    t2 = load_reg(s, a->rd);
7983    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7984    tcg_temp_free_i32(t2);
7985    tcg_temp_free_i32(zero);
7986    store_reg(s, a->ra, t0);
7987    store_reg(s, a->rd, t1);
7988    return true;
7989}
7990
7991/*
7992 * Saturating addition and subtraction
7993 */
7994
7995static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7996{
7997    TCGv_i32 t0, t1;
7998
7999    if (s->thumb
8000        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8001        : !ENABLE_ARCH_5TE) {
8002        return false;
8003    }
8004
8005    t0 = load_reg(s, a->rm);
8006    t1 = load_reg(s, a->rn);
8007    if (doub) {
8008        gen_helper_add_saturate(t1, cpu_env, t1, t1);
8009    }
8010    if (add) {
8011        gen_helper_add_saturate(t0, cpu_env, t0, t1);
8012    } else {
8013        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8014    }
8015    tcg_temp_free_i32(t1);
8016    store_reg(s, a->rd, t0);
8017    return true;
8018}
8019
8020#define DO_QADDSUB(NAME, ADD, DOUB) \
8021static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
8022{                                                        \
8023    return op_qaddsub(s, a, ADD, DOUB);                  \
8024}
8025
8026DO_QADDSUB(QADD, true, false)
8027DO_QADDSUB(QSUB, false, false)
8028DO_QADDSUB(QDADD, true, true)
8029DO_QADDSUB(QDSUB, false, true)
8030
8031#undef DO_QADDSUB
8032
8033/*
8034 * Halfword multiply and multiply accumulate
8035 */
8036
8037static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8038                       int add_long, bool nt, bool mt)
8039{
8040    TCGv_i32 t0, t1, tl, th;
8041
8042    if (s->thumb
8043        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8044        : !ENABLE_ARCH_5TE) {
8045        return false;
8046    }
8047
8048    t0 = load_reg(s, a->rn);
8049    t1 = load_reg(s, a->rm);
8050    gen_mulxy(t0, t1, nt, mt);
8051    tcg_temp_free_i32(t1);
8052
8053    switch (add_long) {
8054    case 0:
8055        store_reg(s, a->rd, t0);
8056        break;
8057    case 1:
8058        t1 = load_reg(s, a->ra);
8059        gen_helper_add_setq(t0, cpu_env, t0, t1);
8060        tcg_temp_free_i32(t1);
8061        store_reg(s, a->rd, t0);
8062        break;
8063    case 2:
8064        tl = load_reg(s, a->ra);
8065        th = load_reg(s, a->rd);
8066        /* Sign-extend the 32-bit product to 64 bits.  */
8067        t1 = tcg_temp_new_i32();
8068        tcg_gen_sari_i32(t1, t0, 31);
8069        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8070        tcg_temp_free_i32(t0);
8071        tcg_temp_free_i32(t1);
8072        store_reg(s, a->ra, tl);
8073        store_reg(s, a->rd, th);
8074        break;
8075    default:
8076        g_assert_not_reached();
8077    }
8078    return true;
8079}
8080
8081#define DO_SMLAX(NAME, add, nt, mt) \
8082static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8083{                                                          \
8084    return op_smlaxxx(s, a, add, nt, mt);                  \
8085}
8086
8087DO_SMLAX(SMULBB, 0, 0, 0)
8088DO_SMLAX(SMULBT, 0, 0, 1)
8089DO_SMLAX(SMULTB, 0, 1, 0)
8090DO_SMLAX(SMULTT, 0, 1, 1)
8091
8092DO_SMLAX(SMLABB, 1, 0, 0)
8093DO_SMLAX(SMLABT, 1, 0, 1)
8094DO_SMLAX(SMLATB, 1, 1, 0)
8095DO_SMLAX(SMLATT, 1, 1, 1)
8096
8097DO_SMLAX(SMLALBB, 2, 0, 0)
8098DO_SMLAX(SMLALBT, 2, 0, 1)
8099DO_SMLAX(SMLALTB, 2, 1, 0)
8100DO_SMLAX(SMLALTT, 2, 1, 1)
8101
8102#undef DO_SMLAX
8103
8104static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8105{
8106    TCGv_i32 t0, t1;
8107
8108    if (!ENABLE_ARCH_5TE) {
8109        return false;
8110    }
8111
8112    t0 = load_reg(s, a->rn);
8113    t1 = load_reg(s, a->rm);
8114    /*
8115     * Since the nominal result is product<47:16>, shift the 16-bit
8116     * input up by 16 bits, so that the result is at product<63:32>.
8117     */
8118    if (mt) {
8119        tcg_gen_andi_i32(t1, t1, 0xffff0000);
8120    } else {
8121        tcg_gen_shli_i32(t1, t1, 16);
8122    }
8123    tcg_gen_muls2_i32(t0, t1, t0, t1);
8124    tcg_temp_free_i32(t0);
8125    if (add) {
8126        t0 = load_reg(s, a->ra);
8127        gen_helper_add_setq(t1, cpu_env, t1, t0);
8128        tcg_temp_free_i32(t0);
8129    }
8130    store_reg(s, a->rd, t1);
8131    return true;
8132}
8133
8134#define DO_SMLAWX(NAME, add, mt) \
8135static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8136{                                                          \
8137    return op_smlawx(s, a, add, mt);                       \
8138}
8139
8140DO_SMLAWX(SMULWB, 0, 0)
8141DO_SMLAWX(SMULWT, 0, 1)
8142DO_SMLAWX(SMLAWB, 1, 0)
8143DO_SMLAWX(SMLAWT, 1, 1)
8144
8145#undef DO_SMLAWX
8146
8147/*
8148 * MSR (immediate) and hints
8149 */
8150
8151static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8152{
8153    /*
8154     * When running single-threaded TCG code, use the helper to ensure that
8155     * the next round-robin scheduled vCPU gets a crack.  When running in
8156     * MTTCG we don't generate jumps to the helper as it won't affect the
8157     * scheduling of other vCPUs.
8158     */
8159    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8160        gen_set_pc_im(s, s->base.pc_next);
8161        s->base.is_jmp = DISAS_YIELD;
8162    }
8163    return true;
8164}
8165
8166static bool trans_WFE(DisasContext *s, arg_WFE *a)
8167{
8168    /*
8169     * When running single-threaded TCG code, use the helper to ensure that
8170     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
8171     * just skip this instruction.  Currently the SEV/SEVL instructions,
8172     * which are *one* of many ways to wake the CPU from WFE, are not
8173     * implemented so we can't sleep like WFI does.
8174     */
8175    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8176        gen_set_pc_im(s, s->base.pc_next);
8177        s->base.is_jmp = DISAS_WFE;
8178    }
8179    return true;
8180}
8181
8182static bool trans_WFI(DisasContext *s, arg_WFI *a)
8183{
8184    /* For WFI, halt the vCPU until an IRQ. */
8185    gen_set_pc_im(s, s->base.pc_next);
8186    s->base.is_jmp = DISAS_WFI;
8187    return true;
8188}
8189
8190static bool trans_NOP(DisasContext *s, arg_NOP *a)
8191{
8192    return true;
8193}
8194
8195static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8196{
8197    uint32_t val = ror32(a->imm, a->rot * 2);
8198    uint32_t mask = msr_mask(s, a->mask, a->r);
8199
8200    if (gen_set_psr_im(s, mask, a->r, val)) {
8201        unallocated_encoding(s);
8202    }
8203    return true;
8204}
8205
8206/*
8207 * Cyclic Redundancy Check
8208 */
8209
8210static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8211{
8212    TCGv_i32 t1, t2, t3;
8213
8214    if (!dc_isar_feature(aa32_crc32, s)) {
8215        return false;
8216    }
8217
8218    t1 = load_reg(s, a->rn);
8219    t2 = load_reg(s, a->rm);
8220    switch (sz) {
8221    case MO_8:
8222        gen_uxtb(t2);
8223        break;
8224    case MO_16:
8225        gen_uxth(t2);
8226        break;
8227    case MO_32:
8228        break;
8229    default:
8230        g_assert_not_reached();
8231    }
8232    t3 = tcg_const_i32(1 << sz);
8233    if (c) {
8234        gen_helper_crc32c(t1, t1, t2, t3);
8235    } else {
8236        gen_helper_crc32(t1, t1, t2, t3);
8237    }
8238    tcg_temp_free_i32(t2);
8239    tcg_temp_free_i32(t3);
8240    store_reg(s, a->rd, t1);
8241    return true;
8242}
8243
8244#define DO_CRC32(NAME, c, sz) \
8245static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
8246    { return op_crc32(s, a, c, sz); }
8247
8248DO_CRC32(CRC32B, false, MO_8)
8249DO_CRC32(CRC32H, false, MO_16)
8250DO_CRC32(CRC32W, false, MO_32)
8251DO_CRC32(CRC32CB, true, MO_8)
8252DO_CRC32(CRC32CH, true, MO_16)
8253DO_CRC32(CRC32CW, true, MO_32)
8254
8255#undef DO_CRC32
8256
8257/*
8258 * Miscellaneous instructions
8259 */
8260
8261static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8262{
8263    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8264        return false;
8265    }
8266    gen_mrs_banked(s, a->r, a->sysm, a->rd);
8267    return true;
8268}
8269
8270static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8271{
8272    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8273        return false;
8274    }
8275    gen_msr_banked(s, a->r, a->sysm, a->rn);
8276    return true;
8277}
8278
8279static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8280{
8281    TCGv_i32 tmp;
8282
8283    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8284        return false;
8285    }
8286    if (a->r) {
8287        if (IS_USER(s)) {
8288            unallocated_encoding(s);
8289            return true;
8290        }
8291        tmp = load_cpu_field(spsr);
8292    } else {
8293        tmp = tcg_temp_new_i32();
8294        gen_helper_cpsr_read(tmp, cpu_env);
8295    }
8296    store_reg(s, a->rd, tmp);
8297    return true;
8298}
8299
8300static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8301{
8302    TCGv_i32 tmp;
8303    uint32_t mask = msr_mask(s, a->mask, a->r);
8304
8305    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8306        return false;
8307    }
8308    tmp = load_reg(s, a->rn);
8309    if (gen_set_psr(s, mask, a->r, tmp)) {
8310        unallocated_encoding(s);
8311    }
8312    return true;
8313}
8314
8315static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8316{
8317    TCGv_i32 tmp;
8318
8319    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8320        return false;
8321    }
8322    tmp = tcg_const_i32(a->sysm);
8323    gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8324    store_reg(s, a->rd, tmp);
8325    return true;
8326}
8327
8328static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8329{
8330    TCGv_i32 addr, reg, el;
8331
8332    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8333        return false;
8334    }
8335    addr = tcg_const_i32((a->mask << 10) | a->sysm);
8336    reg = load_reg(s, a->rn);
8337    gen_helper_v7m_msr(cpu_env, addr, reg);
8338    tcg_temp_free_i32(addr);
8339    tcg_temp_free_i32(reg);
8340    el = tcg_const_i32(s->current_el);
8341    gen_helper_rebuild_hflags_m32(cpu_env, el);
8342    tcg_temp_free_i32(el);
8343    gen_lookup_tb(s);
8344    return true;
8345}
8346
8347static bool trans_BX(DisasContext *s, arg_BX *a)
8348{
8349    if (!ENABLE_ARCH_4T) {
8350        return false;
8351    }
8352    gen_bx_excret(s, load_reg(s, a->rm));
8353    return true;
8354}
8355
8356static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8357{
8358    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8359        return false;
8360    }
8361    /* Trivial implementation equivalent to bx.  */
8362    gen_bx(s, load_reg(s, a->rm));
8363    return true;
8364}
8365
8366static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8367{
8368    TCGv_i32 tmp;
8369
8370    if (!ENABLE_ARCH_5) {
8371        return false;
8372    }
8373    tmp = load_reg(s, a->rm);
8374    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8375    gen_bx(s, tmp);
8376    return true;
8377}
8378
8379/*
8380 * BXNS/BLXNS: only exist for v8M with the security extensions,
8381 * and always UNDEF if NonSecure.  We don't implement these in
8382 * the user-only mode either (in theory you can use them from
8383 * Secure User mode but they are too tied in to system emulation).
8384 */
8385static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8386{
8387    if (!s->v8m_secure || IS_USER_ONLY) {
8388        unallocated_encoding(s);
8389    } else {
8390        gen_bxns(s, a->rm);
8391    }
8392    return true;
8393}
8394
8395static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8396{
8397    if (!s->v8m_secure || IS_USER_ONLY) {
8398        unallocated_encoding(s);
8399    } else {
8400        gen_blxns(s, a->rm);
8401    }
8402    return true;
8403}
8404
8405static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8406{
8407    TCGv_i32 tmp;
8408
8409    if (!ENABLE_ARCH_5) {
8410        return false;
8411    }
8412    tmp = load_reg(s, a->rm);
8413    tcg_gen_clzi_i32(tmp, tmp, 32);
8414    store_reg(s, a->rd, tmp);
8415    return true;
8416}
8417
8418static bool trans_ERET(DisasContext *s, arg_ERET *a)
8419{
8420    TCGv_i32 tmp;
8421
8422    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8423        return false;
8424    }
8425    if (IS_USER(s)) {
8426        unallocated_encoding(s);
8427        return true;
8428    }
8429    if (s->current_el == 2) {
8430        /* ERET from Hyp uses ELR_Hyp, not LR */
8431        tmp = load_cpu_field(elr_el[2]);
8432    } else {
8433        tmp = load_reg(s, 14);
8434    }
8435    gen_exception_return(s, tmp);
8436    return true;
8437}
8438
8439static bool trans_HLT(DisasContext *s, arg_HLT *a)
8440{
8441    gen_hlt(s, a->imm);
8442    return true;
8443}
8444
8445static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8446{
8447    if (!ENABLE_ARCH_5) {
8448        return false;
8449    }
8450    if (arm_dc_feature(s, ARM_FEATURE_M) &&
8451        semihosting_enabled() &&
8452#ifndef CONFIG_USER_ONLY
8453        !IS_USER(s) &&
8454#endif
8455        (a->imm == 0xab)) {
8456        gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
8457    } else {
8458        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8459    }
8460    return true;
8461}
8462
8463static bool trans_HVC(DisasContext *s, arg_HVC *a)
8464{
8465    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8466        return false;
8467    }
8468    if (IS_USER(s)) {
8469        unallocated_encoding(s);
8470    } else {
8471        gen_hvc(s, a->imm);
8472    }
8473    return true;
8474}
8475
8476static bool trans_SMC(DisasContext *s, arg_SMC *a)
8477{
8478    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8479        return false;
8480    }
8481    if (IS_USER(s)) {
8482        unallocated_encoding(s);
8483    } else {
8484        gen_smc(s);
8485    }
8486    return true;
8487}
8488
8489static bool trans_SG(DisasContext *s, arg_SG *a)
8490{
8491    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8492        !arm_dc_feature(s, ARM_FEATURE_V8)) {
8493        return false;
8494    }
8495    /*
8496     * SG (v8M only)
8497     * The bulk of the behaviour for this instruction is implemented
8498     * in v7m_handle_execute_nsc(), which deals with the insn when
8499     * it is executed by a CPU in non-secure state from memory
8500     * which is Secure & NonSecure-Callable.
8501     * Here we only need to handle the remaining cases:
8502     *  * in NS memory (including the "security extension not
8503     *    implemented" case) : NOP
8504     *  * in S memory but CPU already secure (clear IT bits)
8505     * We know that the attribute for the memory this insn is
8506     * in must match the current CPU state, because otherwise
8507     * get_phys_addr_pmsav8 would have generated an exception.
8508     */
8509    if (s->v8m_secure) {
8510        /* Like the IT insn, we don't need to generate any code */
8511        s->condexec_cond = 0;
8512        s->condexec_mask = 0;
8513    }
8514    return true;
8515}
8516
8517static bool trans_TT(DisasContext *s, arg_TT *a)
8518{
8519    TCGv_i32 addr, tmp;
8520
8521    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8522        !arm_dc_feature(s, ARM_FEATURE_V8)) {
8523        return false;
8524    }
8525    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8526        /* We UNDEF for these UNPREDICTABLE cases */
8527        unallocated_encoding(s);
8528        return true;
8529    }
8530    if (a->A && !s->v8m_secure) {
8531        /* This case is UNDEFINED.  */
8532        unallocated_encoding(s);
8533        return true;
8534    }
8535
8536    addr = load_reg(s, a->rn);
8537    tmp = tcg_const_i32((a->A << 1) | a->T);
8538    gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8539    tcg_temp_free_i32(addr);
8540    store_reg(s, a->rd, tmp);
8541    return true;
8542}
8543
8544/*
8545 * Load/store register index
8546 */
8547
8548static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8549{
8550    ISSInfo ret;
8551
8552    /* ISS not valid if writeback */
8553    if (p && !w) {
8554        ret = rd;
8555    } else {
8556        ret = ISSInvalid;
8557    }
8558    return ret;
8559}
8560
8561static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8562{
8563    TCGv_i32 addr = load_reg(s, a->rn);
8564
8565    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8566        gen_helper_v8m_stackcheck(cpu_env, addr);
8567    }
8568
8569    if (a->p) {
8570        TCGv_i32 ofs = load_reg(s, a->rm);
8571        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8572        if (a->u) {
8573            tcg_gen_add_i32(addr, addr, ofs);
8574        } else {
8575            tcg_gen_sub_i32(addr, addr, ofs);
8576        }
8577        tcg_temp_free_i32(ofs);
8578    }
8579    return addr;
8580}
8581
8582static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8583                            TCGv_i32 addr, int address_offset)
8584{
8585    if (!a->p) {
8586        TCGv_i32 ofs = load_reg(s, a->rm);
8587        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8588        if (a->u) {
8589            tcg_gen_add_i32(addr, addr, ofs);
8590        } else {
8591            tcg_gen_sub_i32(addr, addr, ofs);
8592        }
8593        tcg_temp_free_i32(ofs);
8594    } else if (!a->w) {
8595        tcg_temp_free_i32(addr);
8596        return;
8597    }
8598    tcg_gen_addi_i32(addr, addr, address_offset);
8599    store_reg(s, a->rn, addr);
8600}
8601
8602static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8603                       MemOp mop, int mem_idx)
8604{
8605    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8606    TCGv_i32 addr, tmp;
8607
8608    addr = op_addr_rr_pre(s, a);
8609
8610    tmp = tcg_temp_new_i32();
8611    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8612    disas_set_da_iss(s, mop, issinfo);
8613
8614    /*
8615     * Perform base writeback before the loaded value to
8616     * ensure correct behavior with overlapping index registers.
8617     */
8618    op_addr_rr_post(s, a, addr, 0);
8619    store_reg_from_load(s, a->rt, tmp);
8620    return true;
8621}
8622
8623static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8624                        MemOp mop, int mem_idx)
8625{
8626    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8627    TCGv_i32 addr, tmp;
8628
8629    addr = op_addr_rr_pre(s, a);
8630
8631    tmp = load_reg(s, a->rt);
8632    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8633    disas_set_da_iss(s, mop, issinfo);
8634    tcg_temp_free_i32(tmp);
8635
8636    op_addr_rr_post(s, a, addr, 0);
8637    return true;
8638}
8639
8640static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8641{
8642    int mem_idx = get_mem_index(s);
8643    TCGv_i32 addr, tmp;
8644
8645    if (!ENABLE_ARCH_5TE) {
8646        return false;
8647    }
8648    if (a->rt & 1) {
8649        unallocated_encoding(s);
8650        return true;
8651    }
8652    addr = op_addr_rr_pre(s, a);
8653
8654    tmp = tcg_temp_new_i32();
8655    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8656    store_reg(s, a->rt, tmp);
8657
8658    tcg_gen_addi_i32(addr, addr, 4);
8659
8660    tmp = tcg_temp_new_i32();
8661    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8662    store_reg(s, a->rt + 1, tmp);
8663
8664    /* LDRD w/ base writeback is undefined if the registers overlap.  */
8665    op_addr_rr_post(s, a, addr, -4);
8666    return true;
8667}
8668
8669static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8670{
8671    int mem_idx = get_mem_index(s);
8672    TCGv_i32 addr, tmp;
8673
8674    if (!ENABLE_ARCH_5TE) {
8675        return false;
8676    }
8677    if (a->rt & 1) {
8678        unallocated_encoding(s);
8679        return true;
8680    }
8681    addr = op_addr_rr_pre(s, a);
8682
8683    tmp = load_reg(s, a->rt);
8684    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8685    tcg_temp_free_i32(tmp);
8686
8687    tcg_gen_addi_i32(addr, addr, 4);
8688
8689    tmp = load_reg(s, a->rt + 1);
8690    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8691    tcg_temp_free_i32(tmp);
8692
8693    op_addr_rr_post(s, a, addr, -4);
8694    return true;
8695}
8696
8697/*
8698 * Load/store immediate index
8699 */
8700
8701static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8702{
8703    int ofs = a->imm;
8704
8705    if (!a->u) {
8706        ofs = -ofs;
8707    }
8708
8709    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8710        /*
8711         * Stackcheck. Here we know 'addr' is the current SP;
8712         * U is set if we're moving SP up, else down. It is
8713         * UNKNOWN whether the limit check triggers when SP starts
8714         * below the limit and ends up above it; we chose to do so.
8715         */
8716        if (!a->u) {
8717            TCGv_i32 newsp = tcg_temp_new_i32();
8718            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8719            gen_helper_v8m_stackcheck(cpu_env, newsp);
8720            tcg_temp_free_i32(newsp);
8721        } else {
8722            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8723        }
8724    }
8725
8726    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8727}
8728
8729static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8730                            TCGv_i32 addr, int address_offset)
8731{
8732    if (!a->p) {
8733        if (a->u) {
8734            address_offset += a->imm;
8735        } else {
8736            address_offset -= a->imm;
8737        }
8738    } else if (!a->w) {
8739        tcg_temp_free_i32(addr);
8740        return;
8741    }
8742    tcg_gen_addi_i32(addr, addr, address_offset);
8743    store_reg(s, a->rn, addr);
8744}
8745
8746static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8747                       MemOp mop, int mem_idx)
8748{
8749    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8750    TCGv_i32 addr, tmp;
8751
8752    addr = op_addr_ri_pre(s, a);
8753
8754    tmp = tcg_temp_new_i32();
8755    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8756    disas_set_da_iss(s, mop, issinfo);
8757
8758    /*
8759     * Perform base writeback before the loaded value to
8760     * ensure correct behavior with overlapping index registers.
8761     */
8762    op_addr_ri_post(s, a, addr, 0);
8763    store_reg_from_load(s, a->rt, tmp);
8764    return true;
8765}
8766
8767static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8768                        MemOp mop, int mem_idx)
8769{
8770    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8771    TCGv_i32 addr, tmp;
8772
8773    addr = op_addr_ri_pre(s, a);
8774
8775    tmp = load_reg(s, a->rt);
8776    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8777    disas_set_da_iss(s, mop, issinfo);
8778    tcg_temp_free_i32(tmp);
8779
8780    op_addr_ri_post(s, a, addr, 0);
8781    return true;
8782}
8783
8784static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8785{
8786    int mem_idx = get_mem_index(s);
8787    TCGv_i32 addr, tmp;
8788
8789    addr = op_addr_ri_pre(s, a);
8790
8791    tmp = tcg_temp_new_i32();
8792    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8793    store_reg(s, a->rt, tmp);
8794
8795    tcg_gen_addi_i32(addr, addr, 4);
8796
8797    tmp = tcg_temp_new_i32();
8798    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8799    store_reg(s, rt2, tmp);
8800
8801    /* LDRD w/ base writeback is undefined if the registers overlap.  */
8802    op_addr_ri_post(s, a, addr, -4);
8803    return true;
8804}
8805
8806static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8807{
8808    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8809        return false;
8810    }
8811    return op_ldrd_ri(s, a, a->rt + 1);
8812}
8813
8814static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8815{
8816    arg_ldst_ri b = {
8817        .u = a->u, .w = a->w, .p = a->p,
8818        .rn = a->rn, .rt = a->rt, .imm = a->imm
8819    };
8820    return op_ldrd_ri(s, &b, a->rt2);
8821}
8822
8823static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8824{
8825    int mem_idx = get_mem_index(s);
8826    TCGv_i32 addr, tmp;
8827
8828    addr = op_addr_ri_pre(s, a);
8829
8830    tmp = load_reg(s, a->rt);
8831    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8832    tcg_temp_free_i32(tmp);
8833
8834    tcg_gen_addi_i32(addr, addr, 4);
8835
8836    tmp = load_reg(s, rt2);
8837    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8838    tcg_temp_free_i32(tmp);
8839
8840    op_addr_ri_post(s, a, addr, -4);
8841    return true;
8842}
8843
8844static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8845{
8846    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8847        return false;
8848    }
8849    return op_strd_ri(s, a, a->rt + 1);
8850}
8851
8852static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8853{
8854    arg_ldst_ri b = {
8855        .u = a->u, .w = a->w, .p = a->p,
8856        .rn = a->rn, .rt = a->rt, .imm = a->imm
8857    };
8858    return op_strd_ri(s, &b, a->rt2);
8859}
8860
8861#define DO_LDST(NAME, WHICH, MEMOP) \
8862static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
8863{                                                                     \
8864    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
8865}                                                                     \
8866static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
8867{                                                                     \
8868    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
8869}                                                                     \
8870static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
8871{                                                                     \
8872    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
8873}                                                                     \
8874static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
8875{                                                                     \
8876    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
8877}
8878
8879DO_LDST(LDR, load, MO_UL)
8880DO_LDST(LDRB, load, MO_UB)
8881DO_LDST(LDRH, load, MO_UW)
8882DO_LDST(LDRSB, load, MO_SB)
8883DO_LDST(LDRSH, load, MO_SW)
8884
8885DO_LDST(STR, store, MO_UL)
8886DO_LDST(STRB, store, MO_UB)
8887DO_LDST(STRH, store, MO_UW)
8888
8889#undef DO_LDST
8890
8891/*
8892 * Synchronization primitives
8893 */
8894
8895static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8896{
8897    TCGv_i32 addr, tmp;
8898    TCGv taddr;
8899
8900    opc |= s->be_data;
8901    addr = load_reg(s, a->rn);
8902    taddr = gen_aa32_addr(s, addr, opc);
8903    tcg_temp_free_i32(addr);
8904
8905    tmp = load_reg(s, a->rt2);
8906    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8907    tcg_temp_free(taddr);
8908
8909    store_reg(s, a->rt, tmp);
8910    return true;
8911}
8912
8913static bool trans_SWP(DisasContext *s, arg_SWP *a)
8914{
8915    return op_swp(s, a, MO_UL | MO_ALIGN);
8916}
8917
8918static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8919{
8920    return op_swp(s, a, MO_UB);
8921}
8922
8923/*
8924 * Load/Store Exclusive and Load-Acquire/Store-Release
8925 */
8926
8927static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8928{
8929    TCGv_i32 addr;
8930    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8931    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8932
8933    /* We UNDEF for these UNPREDICTABLE cases.  */
8934    if (a->rd == 15 || a->rn == 15 || a->rt == 15
8935        || a->rd == a->rn || a->rd == a->rt
8936        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8937        || (mop == MO_64
8938            && (a->rt2 == 15
8939                || a->rd == a->rt2
8940                || (!v8a && s->thumb && a->rt2 == 13)))) {
8941        unallocated_encoding(s);
8942        return true;
8943    }
8944
8945    if (rel) {
8946        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8947    }
8948
8949    addr = tcg_temp_local_new_i32();
8950    load_reg_var(s, addr, a->rn);
8951    tcg_gen_addi_i32(addr, addr, a->imm);
8952
8953    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8954    tcg_temp_free_i32(addr);
8955    return true;
8956}
8957
8958static bool trans_STREX(DisasContext *s, arg_STREX *a)
8959{
8960    if (!ENABLE_ARCH_6) {
8961        return false;
8962    }
8963    return op_strex(s, a, MO_32, false);
8964}
8965
8966static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8967{
8968    if (!ENABLE_ARCH_6K) {
8969        return false;
8970    }
8971    /* We UNDEF for these UNPREDICTABLE cases.  */
8972    if (a->rt & 1) {
8973        unallocated_encoding(s);
8974        return true;
8975    }
8976    a->rt2 = a->rt + 1;
8977    return op_strex(s, a, MO_64, false);
8978}
8979
8980static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8981{
8982    return op_strex(s, a, MO_64, false);
8983}
8984
8985static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8986{
8987    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8988        return false;
8989    }
8990    return op_strex(s, a, MO_8, false);
8991}
8992
8993static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8994{
8995    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8996        return false;
8997    }
8998    return op_strex(s, a, MO_16, false);
8999}
9000
9001static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9002{
9003    if (!ENABLE_ARCH_8) {
9004        return false;
9005    }
9006    return op_strex(s, a, MO_32, true);
9007}
9008
9009static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9010{
9011    if (!ENABLE_ARCH_8) {
9012        return false;
9013    }
9014    /* We UNDEF for these UNPREDICTABLE cases.  */
9015    if (a->rt & 1) {
9016        unallocated_encoding(s);
9017        return true;
9018    }
9019    a->rt2 = a->rt + 1;
9020    return op_strex(s, a, MO_64, true);
9021}
9022
9023static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9024{
9025    if (!ENABLE_ARCH_8) {
9026        return false;
9027    }
9028    return op_strex(s, a, MO_64, true);
9029}
9030
9031static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9032{
9033    if (!ENABLE_ARCH_8) {
9034        return false;
9035    }
9036    return op_strex(s, a, MO_8, true);
9037}
9038
9039static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9040{
9041    if (!ENABLE_ARCH_8) {
9042        return false;
9043    }
9044    return op_strex(s, a, MO_16, true);
9045}
9046
9047static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9048{
9049    TCGv_i32 addr, tmp;
9050
9051    if (!ENABLE_ARCH_8) {
9052        return false;
9053    }
9054    /* We UNDEF for these UNPREDICTABLE cases.  */
9055    if (a->rn == 15 || a->rt == 15) {
9056        unallocated_encoding(s);
9057        return true;
9058    }
9059
9060    addr = load_reg(s, a->rn);
9061    tmp = load_reg(s, a->rt);
9062    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9063    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9064    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9065
9066    tcg_temp_free_i32(tmp);
9067    tcg_temp_free_i32(addr);
9068    return true;
9069}
9070
9071static bool trans_STL(DisasContext *s, arg_STL *a)
9072{
9073    return op_stl(s, a, MO_UL);
9074}
9075
9076static bool trans_STLB(DisasContext *s, arg_STL *a)
9077{
9078    return op_stl(s, a, MO_UB);
9079}
9080
9081static bool trans_STLH(DisasContext *s, arg_STL *a)
9082{
9083    return op_stl(s, a, MO_UW);
9084}
9085
9086static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9087{
9088    TCGv_i32 addr;
9089    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9090    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9091
9092    /* We UNDEF for these UNPREDICTABLE cases.  */
9093    if (a->rn == 15 || a->rt == 15
9094        || (!v8a && s->thumb && a->rt == 13)
9095        || (mop == MO_64
9096            && (a->rt2 == 15 || a->rt == a->rt2
9097                || (!v8a && s->thumb && a->rt2 == 13)))) {
9098        unallocated_encoding(s);
9099        return true;
9100    }
9101
9102    addr = tcg_temp_local_new_i32();
9103    load_reg_var(s, addr, a->rn);
9104    tcg_gen_addi_i32(addr, addr, a->imm);
9105
9106    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9107    tcg_temp_free_i32(addr);
9108
9109    if (acq) {
9110        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9111    }
9112    return true;
9113}
9114
9115static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9116{
9117    if (!ENABLE_ARCH_6) {
9118        return false;
9119    }
9120    return op_ldrex(s, a, MO_32, false);
9121}
9122
9123static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9124{
9125    if (!ENABLE_ARCH_6K) {
9126        return false;
9127    }
9128    /* We UNDEF for these UNPREDICTABLE cases.  */
9129    if (a->rt & 1) {
9130        unallocated_encoding(s);
9131        return true;
9132    }
9133    a->rt2 = a->rt + 1;
9134    return op_ldrex(s, a, MO_64, false);
9135}
9136
9137static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9138{
9139    return op_ldrex(s, a, MO_64, false);
9140}
9141
9142static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9143{
9144    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9145        return false;
9146    }
9147    return op_ldrex(s, a, MO_8, false);
9148}
9149
9150static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9151{
9152    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9153        return false;
9154    }
9155    return op_ldrex(s, a, MO_16, false);
9156}
9157
9158static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9159{
9160    if (!ENABLE_ARCH_8) {
9161        return false;
9162    }
9163    return op_ldrex(s, a, MO_32, true);
9164}
9165
9166static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9167{
9168    if (!ENABLE_ARCH_8) {
9169        return false;
9170    }
9171    /* We UNDEF for these UNPREDICTABLE cases.  */
9172    if (a->rt & 1) {
9173        unallocated_encoding(s);
9174        return true;
9175    }
9176    a->rt2 = a->rt + 1;
9177    return op_ldrex(s, a, MO_64, true);
9178}
9179
9180static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9181{
9182    if (!ENABLE_ARCH_8) {
9183        return false;
9184    }
9185    return op_ldrex(s, a, MO_64, true);
9186}
9187
9188static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9189{
9190    if (!ENABLE_ARCH_8) {
9191        return false;
9192    }
9193    return op_ldrex(s, a, MO_8, true);
9194}
9195
9196static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9197{
9198    if (!ENABLE_ARCH_8) {
9199        return false;
9200    }
9201    return op_ldrex(s, a, MO_16, true);
9202}
9203
9204static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9205{
9206    TCGv_i32 addr, tmp;
9207
9208    if (!ENABLE_ARCH_8) {
9209        return false;
9210    }
9211    /* We UNDEF for these UNPREDICTABLE cases.  */
9212    if (a->rn == 15 || a->rt == 15) {
9213        unallocated_encoding(s);
9214        return true;
9215    }
9216
9217    addr = load_reg(s, a->rn);
9218    tmp = tcg_temp_new_i32();
9219    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9220    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9221    tcg_temp_free_i32(addr);
9222
9223    store_reg(s, a->rt, tmp);
9224    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9225    return true;
9226}
9227
9228static bool trans_LDA(DisasContext *s, arg_LDA *a)
9229{
9230    return op_lda(s, a, MO_UL);
9231}
9232
9233static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9234{
9235    return op_lda(s, a, MO_UB);
9236}
9237
9238static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9239{
9240    return op_lda(s, a, MO_UW);
9241}
9242
9243/*
9244 * Media instructions
9245 */
9246
9247static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9248{
9249    TCGv_i32 t1, t2;
9250
9251    if (!ENABLE_ARCH_6) {
9252        return false;
9253    }
9254
9255    t1 = load_reg(s, a->rn);
9256    t2 = load_reg(s, a->rm);
9257    gen_helper_usad8(t1, t1, t2);
9258    tcg_temp_free_i32(t2);
9259    if (a->ra != 15) {
9260        t2 = load_reg(s, a->ra);
9261        tcg_gen_add_i32(t1, t1, t2);
9262        tcg_temp_free_i32(t2);
9263    }
9264    store_reg(s, a->rd, t1);
9265    return true;
9266}
9267
9268static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9269{
9270    TCGv_i32 tmp;
9271    int width = a->widthm1 + 1;
9272    int shift = a->lsb;
9273
9274    if (!ENABLE_ARCH_6T2) {
9275        return false;
9276    }
9277    if (shift + width > 32) {
9278        /* UNPREDICTABLE; we choose to UNDEF */
9279        unallocated_encoding(s);
9280        return true;
9281    }
9282
9283    tmp = load_reg(s, a->rn);
9284    if (u) {
9285        tcg_gen_extract_i32(tmp, tmp, shift, width);
9286    } else {
9287        tcg_gen_sextract_i32(tmp, tmp, shift, width);
9288    }
9289    store_reg(s, a->rd, tmp);
9290    return true;
9291}
9292
9293static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9294{
9295    return op_bfx(s, a, false);
9296}
9297
9298static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9299{
9300    return op_bfx(s, a, true);
9301}
9302
9303static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9304{
9305    TCGv_i32 tmp;
9306    int msb = a->msb, lsb = a->lsb;
9307    int width;
9308
9309    if (!ENABLE_ARCH_6T2) {
9310        return false;
9311    }
9312    if (msb < lsb) {
9313        /* UNPREDICTABLE; we choose to UNDEF */
9314        unallocated_encoding(s);
9315        return true;
9316    }
9317
9318    width = msb + 1 - lsb;
9319    if (a->rn == 15) {
9320        /* BFC */
9321        tmp = tcg_const_i32(0);
9322    } else {
9323        /* BFI */
9324        tmp = load_reg(s, a->rn);
9325    }
9326    if (width != 32) {
9327        TCGv_i32 tmp2 = load_reg(s, a->rd);
9328        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9329        tcg_temp_free_i32(tmp2);
9330    }
9331    store_reg(s, a->rd, tmp);
9332    return true;
9333}
9334
9335static bool trans_UDF(DisasContext *s, arg_UDF *a)
9336{
9337    unallocated_encoding(s);
9338    return true;
9339}
9340
9341/*
9342 * Parallel addition and subtraction
9343 */
9344
9345static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9346                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9347{
9348    TCGv_i32 t0, t1;
9349
9350    if (s->thumb
9351        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9352        : !ENABLE_ARCH_6) {
9353        return false;
9354    }
9355
9356    t0 = load_reg(s, a->rn);
9357    t1 = load_reg(s, a->rm);
9358
9359    gen(t0, t0, t1);
9360
9361    tcg_temp_free_i32(t1);
9362    store_reg(s, a->rd, t0);
9363    return true;
9364}
9365
9366static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9367                             void (*gen)(TCGv_i32, TCGv_i32,
9368                                         TCGv_i32, TCGv_ptr))
9369{
9370    TCGv_i32 t0, t1;
9371    TCGv_ptr ge;
9372
9373    if (s->thumb
9374        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9375        : !ENABLE_ARCH_6) {
9376        return false;
9377    }
9378
9379    t0 = load_reg(s, a->rn);
9380    t1 = load_reg(s, a->rm);
9381
9382    ge = tcg_temp_new_ptr();
9383    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9384    gen(t0, t0, t1, ge);
9385
9386    tcg_temp_free_ptr(ge);
9387    tcg_temp_free_i32(t1);
9388    store_reg(s, a->rd, t0);
9389    return true;
9390}
9391
9392#define DO_PAR_ADDSUB(NAME, helper) \
9393static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9394{                                                       \
9395    return op_par_addsub(s, a, helper);                 \
9396}
9397
9398#define DO_PAR_ADDSUB_GE(NAME, helper) \
9399static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9400{                                                       \
9401    return op_par_addsub_ge(s, a, helper);              \
9402}
9403
9404DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9405DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9406DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9407DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9408DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9409DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9410
9411DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9412DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9413DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9414DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9415DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9416DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9417
9418DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9419DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9420DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9421DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9422DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9423DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9424
9425DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9426DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9427DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9428DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9429DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9430DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9431
9432DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9433DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9434DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9435DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9436DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9437DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9438
9439DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9440DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9441DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9442DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9443DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9444DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9445
9446#undef DO_PAR_ADDSUB
9447#undef DO_PAR_ADDSUB_GE
9448
9449/*
9450 * Packing, unpacking, saturation, and reversal
9451 */
9452
9453static bool trans_PKH(DisasContext *s, arg_PKH *a)
9454{
9455    TCGv_i32 tn, tm;
9456    int shift = a->imm;
9457
9458    if (s->thumb
9459        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9460        : !ENABLE_ARCH_6) {
9461        return false;
9462    }
9463
9464    tn = load_reg(s, a->rn);
9465    tm = load_reg(s, a->rm);
9466    if (a->tb) {
9467        /* PKHTB */
9468        if (shift == 0) {
9469            shift = 31;
9470        }
9471        tcg_gen_sari_i32(tm, tm, shift);
9472        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9473    } else {
9474        /* PKHBT */
9475        tcg_gen_shli_i32(tm, tm, shift);
9476        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9477    }
9478    tcg_temp_free_i32(tm);
9479    store_reg(s, a->rd, tn);
9480    return true;
9481}
9482
9483static bool op_sat(DisasContext *s, arg_sat *a,
9484                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9485{
9486    TCGv_i32 tmp, satimm;
9487    int shift = a->imm;
9488
9489    if (!ENABLE_ARCH_6) {
9490        return false;
9491    }
9492
9493    tmp = load_reg(s, a->rn);
9494    if (a->sh) {
9495        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9496    } else {
9497        tcg_gen_shli_i32(tmp, tmp, shift);
9498    }
9499
9500    satimm = tcg_const_i32(a->satimm);
9501    gen(tmp, cpu_env, tmp, satimm);
9502    tcg_temp_free_i32(satimm);
9503
9504    store_reg(s, a->rd, tmp);
9505    return true;
9506}
9507
9508static bool trans_SSAT(DisasContext *s, arg_sat *a)
9509{
9510    return op_sat(s, a, gen_helper_ssat);
9511}
9512
9513static bool trans_USAT(DisasContext *s, arg_sat *a)
9514{
9515    return op_sat(s, a, gen_helper_usat);
9516}
9517
9518static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9519{
9520    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9521        return false;
9522    }
9523    return op_sat(s, a, gen_helper_ssat16);
9524}
9525
9526static bool trans_USAT16(DisasContext *s, arg_sat *a)
9527{
9528    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9529        return false;
9530    }
9531    return op_sat(s, a, gen_helper_usat16);
9532}
9533
9534static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9535                   void (*gen_extract)(TCGv_i32, TCGv_i32),
9536                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9537{
9538    TCGv_i32 tmp;
9539
9540    if (!ENABLE_ARCH_6) {
9541        return false;
9542    }
9543
9544    tmp = load_reg(s, a->rm);
9545    /*
9546     * TODO: In many cases we could do a shift instead of a rotate.
9547     * Combined with a simple extend, that becomes an extract.
9548     */
9549    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9550    gen_extract(tmp, tmp);
9551
9552    if (a->rn != 15) {
9553        TCGv_i32 tmp2 = load_reg(s, a->rn);
9554        gen_add(tmp, tmp, tmp2);
9555        tcg_temp_free_i32(tmp2);
9556    }
9557    store_reg(s, a->rd, tmp);
9558    return true;
9559}
9560
9561static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9562{
9563    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9564}
9565
9566static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9567{
9568    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9569}
9570
9571static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9572{
9573    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9574        return false;
9575    }
9576    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9577}
9578
9579static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9580{
9581    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9582}
9583
9584static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9585{
9586    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9587}
9588
9589static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9590{
9591    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9592        return false;
9593    }
9594    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9595}
9596
9597static bool trans_SEL(DisasContext *s, arg_rrr *a)
9598{
9599    TCGv_i32 t1, t2, t3;
9600
9601    if (s->thumb
9602        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9603        : !ENABLE_ARCH_6) {
9604        return false;
9605    }
9606
9607    t1 = load_reg(s, a->rn);
9608    t2 = load_reg(s, a->rm);
9609    t3 = tcg_temp_new_i32();
9610    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9611    gen_helper_sel_flags(t1, t3, t1, t2);
9612    tcg_temp_free_i32(t3);
9613    tcg_temp_free_i32(t2);
9614    store_reg(s, a->rd, t1);
9615    return true;
9616}
9617
9618static bool op_rr(DisasContext *s, arg_rr *a,
9619                  void (*gen)(TCGv_i32, TCGv_i32))
9620{
9621    TCGv_i32 tmp;
9622
9623    tmp = load_reg(s, a->rm);
9624    gen(tmp, tmp);
9625    store_reg(s, a->rd, tmp);
9626    return true;
9627}
9628
9629static bool trans_REV(DisasContext *s, arg_rr *a)
9630{
9631    if (!ENABLE_ARCH_6) {
9632        return false;
9633    }
9634    return op_rr(s, a, tcg_gen_bswap32_i32);
9635}
9636
9637static bool trans_REV16(DisasContext *s, arg_rr *a)
9638{
9639    if (!ENABLE_ARCH_6) {
9640        return false;
9641    }
9642    return op_rr(s, a, gen_rev16);
9643}
9644
9645static bool trans_REVSH(DisasContext *s, arg_rr *a)
9646{
9647    if (!ENABLE_ARCH_6) {
9648        return false;
9649    }
9650    return op_rr(s, a, gen_revsh);
9651}
9652
9653static bool trans_RBIT(DisasContext *s, arg_rr *a)
9654{
9655    if (!ENABLE_ARCH_6T2) {
9656        return false;
9657    }
9658    return op_rr(s, a, gen_helper_rbit);
9659}
9660
9661/*
9662 * Signed multiply, signed and unsigned divide
9663 */
9664
9665static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9666{
9667    TCGv_i32 t1, t2;
9668
9669    if (!ENABLE_ARCH_6) {
9670        return false;
9671    }
9672
9673    t1 = load_reg(s, a->rn);
9674    t2 = load_reg(s, a->rm);
9675    if (m_swap) {
9676        gen_swap_half(t2);
9677    }
9678    gen_smul_dual(t1, t2);
9679
9680    if (sub) {
9681        /* This subtraction cannot overflow. */
9682        tcg_gen_sub_i32(t1, t1, t2);
9683    } else {
9684        /*
9685         * This addition cannot overflow 32 bits; however it may
9686         * overflow considered as a signed operation, in which case
9687         * we must set the Q flag.
9688         */
9689        gen_helper_add_setq(t1, cpu_env, t1, t2);
9690    }
9691    tcg_temp_free_i32(t2);
9692
9693    if (a->ra != 15) {
9694        t2 = load_reg(s, a->ra);
9695        gen_helper_add_setq(t1, cpu_env, t1, t2);
9696        tcg_temp_free_i32(t2);
9697    }
9698    store_reg(s, a->rd, t1);
9699    return true;
9700}
9701
9702static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9703{
9704    return op_smlad(s, a, false, false);
9705}
9706
9707static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9708{
9709    return op_smlad(s, a, true, false);
9710}
9711
9712static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9713{
9714    return op_smlad(s, a, false, true);
9715}
9716
9717static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9718{
9719    return op_smlad(s, a, true, true);
9720}
9721
9722static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9723{
9724    TCGv_i32 t1, t2;
9725    TCGv_i64 l1, l2;
9726
9727    if (!ENABLE_ARCH_6) {
9728        return false;
9729    }
9730
9731    t1 = load_reg(s, a->rn);
9732    t2 = load_reg(s, a->rm);
9733    if (m_swap) {
9734        gen_swap_half(t2);
9735    }
9736    gen_smul_dual(t1, t2);
9737
9738    l1 = tcg_temp_new_i64();
9739    l2 = tcg_temp_new_i64();
9740    tcg_gen_ext_i32_i64(l1, t1);
9741    tcg_gen_ext_i32_i64(l2, t2);
9742    tcg_temp_free_i32(t1);
9743    tcg_temp_free_i32(t2);
9744
9745    if (sub) {
9746        tcg_gen_sub_i64(l1, l1, l2);
9747    } else {
9748        tcg_gen_add_i64(l1, l1, l2);
9749    }
9750    tcg_temp_free_i64(l2);
9751
9752    gen_addq(s, l1, a->ra, a->rd);
9753    gen_storeq_reg(s, a->ra, a->rd, l1);
9754    tcg_temp_free_i64(l1);
9755    return true;
9756}
9757
9758static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9759{
9760    return op_smlald(s, a, false, false);
9761}
9762
9763static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9764{
9765    return op_smlald(s, a, true, false);
9766}
9767
9768static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9769{
9770    return op_smlald(s, a, false, true);
9771}
9772
9773static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9774{
9775    return op_smlald(s, a, true, true);
9776}
9777
9778static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9779{
9780    TCGv_i32 t1, t2;
9781
9782    if (s->thumb
9783        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9784        : !ENABLE_ARCH_6) {
9785        return false;
9786    }
9787
9788    t1 = load_reg(s, a->rn);
9789    t2 = load_reg(s, a->rm);
9790    tcg_gen_muls2_i32(t2, t1, t1, t2);
9791
9792    if (a->ra != 15) {
9793        TCGv_i32 t3 = load_reg(s, a->ra);
9794        if (sub) {
9795            /*
9796             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
9797             * a non-zero multiplicand lowpart, and the correct result
9798             * lowpart for rounding.
9799             */
9800            TCGv_i32 zero = tcg_const_i32(0);
9801            tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9802            tcg_temp_free_i32(zero);
9803        } else {
9804            tcg_gen_add_i32(t1, t1, t3);
9805        }
9806        tcg_temp_free_i32(t3);
9807    }
9808    if (round) {
9809        /*
9810         * Adding 0x80000000 to the 64-bit quantity means that we have
9811         * carry in to the high word when the low word has the msb set.
9812         */
9813        tcg_gen_shri_i32(t2, t2, 31);
9814        tcg_gen_add_i32(t1, t1, t2);
9815    }
9816    tcg_temp_free_i32(t2);
9817    store_reg(s, a->rd, t1);
9818    return true;
9819}
9820
9821static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9822{
9823    return op_smmla(s, a, false, false);
9824}
9825
9826static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9827{
9828    return op_smmla(s, a, true, false);
9829}
9830
9831static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9832{
9833    return op_smmla(s, a, false, true);
9834}
9835
9836static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9837{
9838    return op_smmla(s, a, true, true);
9839}
9840
9841static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9842{
9843    TCGv_i32 t1, t2;
9844
9845    if (s->thumb
9846        ? !dc_isar_feature(thumb_div, s)
9847        : !dc_isar_feature(arm_div, s)) {
9848        return false;
9849    }
9850
9851    t1 = load_reg(s, a->rn);
9852    t2 = load_reg(s, a->rm);
9853    if (u) {
9854        gen_helper_udiv(t1, t1, t2);
9855    } else {
9856        gen_helper_sdiv(t1, t1, t2);
9857    }
9858    tcg_temp_free_i32(t2);
9859    store_reg(s, a->rd, t1);
9860    return true;
9861}
9862
9863static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9864{
9865    return op_div(s, a, false);
9866}
9867
9868static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9869{
9870    return op_div(s, a, true);
9871}
9872
9873/*
9874 * Block data transfer
9875 */
9876
9877static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9878{
9879    TCGv_i32 addr = load_reg(s, a->rn);
9880
9881    if (a->b) {
9882        if (a->i) {
9883            /* pre increment */
9884            tcg_gen_addi_i32(addr, addr, 4);
9885        } else {
9886            /* pre decrement */
9887            tcg_gen_addi_i32(addr, addr, -(n * 4));
9888        }
9889    } else if (!a->i && n != 1) {
9890        /* post decrement */
9891        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9892    }
9893
9894    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9895        /*
9896         * If the writeback is incrementing SP rather than
9897         * decrementing it, and the initial SP is below the
9898         * stack limit but the final written-back SP would
9899         * be above, then then we must not perform any memory
9900         * accesses, but it is IMPDEF whether we generate
9901         * an exception. We choose to do so in this case.
9902         * At this point 'addr' is the lowest address, so
9903         * either the original SP (if incrementing) or our
9904         * final SP (if decrementing), so that's what we check.
9905         */
9906        gen_helper_v8m_stackcheck(cpu_env, addr);
9907    }
9908
9909    return addr;
9910}
9911
9912static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9913                               TCGv_i32 addr, int n)
9914{
9915    if (a->w) {
9916        /* write back */
9917        if (!a->b) {
9918            if (a->i) {
9919                /* post increment */
9920                tcg_gen_addi_i32(addr, addr, 4);
9921            } else {
9922                /* post decrement */
9923                tcg_gen_addi_i32(addr, addr, -(n * 4));
9924            }
9925        } else if (!a->i && n != 1) {
9926            /* pre decrement */
9927            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9928        }
9929        store_reg(s, a->rn, addr);
9930    } else {
9931        tcg_temp_free_i32(addr);
9932    }
9933}
9934
9935static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9936{
9937    int i, j, n, list, mem_idx;
9938    bool user = a->u;
9939    TCGv_i32 addr, tmp, tmp2;
9940
9941    if (user) {
9942        /* STM (user) */
9943        if (IS_USER(s)) {
9944            /* Only usable in supervisor mode.  */
9945            unallocated_encoding(s);
9946            return true;
9947        }
9948    }
9949
9950    list = a->list;
9951    n = ctpop16(list);
9952    if (n < min_n || a->rn == 15) {
9953        unallocated_encoding(s);
9954        return true;
9955    }
9956
9957    addr = op_addr_block_pre(s, a, n);
9958    mem_idx = get_mem_index(s);
9959
9960    for (i = j = 0; i < 16; i++) {
9961        if (!(list & (1 << i))) {
9962            continue;
9963        }
9964
9965        if (user && i != 15) {
9966            tmp = tcg_temp_new_i32();
9967            tmp2 = tcg_const_i32(i);
9968            gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9969            tcg_temp_free_i32(tmp2);
9970        } else {
9971            tmp = load_reg(s, i);
9972        }
9973        gen_aa32_st32(s, tmp, addr, mem_idx);
9974        tcg_temp_free_i32(tmp);
9975
9976        /* No need to add after the last transfer.  */
9977        if (++j != n) {
9978            tcg_gen_addi_i32(addr, addr, 4);
9979        }
9980    }
9981
9982    op_addr_block_post(s, a, addr, n);
9983    return true;
9984}
9985
9986static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9987{
9988    /* BitCount(list) < 1 is UNPREDICTABLE */
9989    return op_stm(s, a, 1);
9990}
9991
9992static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9993{
9994    /* Writeback register in register list is UNPREDICTABLE for T32.  */
9995    if (a->w && (a->list & (1 << a->rn))) {
9996        unallocated_encoding(s);
9997        return true;
9998    }
9999    /* BitCount(list) < 2 is UNPREDICTABLE */
10000    return op_stm(s, a, 2);
10001}
10002
10003static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10004{
10005    int i, j, n, list, mem_idx;
10006    bool loaded_base;
10007    bool user = a->u;
10008    bool exc_return = false;
10009    TCGv_i32 addr, tmp, tmp2, loaded_var;
10010
10011    if (user) {
10012        /* LDM (user), LDM (exception return) */
10013        if (IS_USER(s)) {
10014            /* Only usable in supervisor mode.  */
10015            unallocated_encoding(s);
10016            return true;
10017        }
10018        if (extract32(a->list, 15, 1)) {
10019            exc_return = true;
10020            user = false;
10021        } else {
10022            /* LDM (user) does not allow writeback.  */
10023            if (a->w) {
10024                unallocated_encoding(s);
10025                return true;
10026            }
10027        }
10028    }
10029
10030    list = a->list;
10031    n = ctpop16(list);
10032    if (n < min_n || a->rn == 15) {
10033        unallocated_encoding(s);
10034        return true;
10035    }
10036
10037    addr = op_addr_block_pre(s, a, n);
10038    mem_idx = get_mem_index(s);
10039    loaded_base = false;
10040    loaded_var = NULL;
10041
10042    for (i = j = 0; i < 16; i++) {
10043        if (!(list & (1 << i))) {
10044            continue;
10045        }
10046
10047        tmp = tcg_temp_new_i32();
10048        gen_aa32_ld32u(s, tmp, addr, mem_idx);
10049        if (user) {
10050            tmp2 = tcg_const_i32(i);
10051            gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10052            tcg_temp_free_i32(tmp2);
10053            tcg_temp_free_i32(tmp);
10054        } else if (i == a->rn) {
10055            loaded_var = tmp;
10056            loaded_base = true;
10057        } else if (i == 15 && exc_return) {
10058            store_pc_exc_ret(s, tmp);
10059        } else {
10060            store_reg_from_load(s, i, tmp);
10061        }
10062
10063        /* No need to add after the last transfer.  */
10064        if (++j != n) {
10065            tcg_gen_addi_i32(addr, addr, 4);
10066        }
10067    }
10068
10069    op_addr_block_post(s, a, addr, n);
10070
10071    if (loaded_base) {
10072        /* Note that we reject base == pc above.  */
10073        store_reg(s, a->rn, loaded_var);
10074    }
10075
10076    if (exc_return) {
10077        /* Restore CPSR from SPSR.  */
10078        tmp = load_cpu_field(spsr);
10079        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10080            gen_io_start();
10081        }
10082        gen_helper_cpsr_write_eret(cpu_env, tmp);
10083        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10084            gen_io_end();
10085        }
10086        tcg_temp_free_i32(tmp);
10087        /* Must exit loop to check un-masked IRQs */
10088        s->base.is_jmp = DISAS_EXIT;
10089    }
10090    return true;
10091}
10092
10093static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10094{
10095    /*
10096     * Writeback register in register list is UNPREDICTABLE
10097     * for ArchVersion() >= 7.  Prior to v7, A32 would write
10098     * an UNKNOWN value to the base register.
10099     */
10100    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10101        unallocated_encoding(s);
10102        return true;
10103    }
10104    /* BitCount(list) < 1 is UNPREDICTABLE */
10105    return do_ldm(s, a, 1);
10106}
10107
10108static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10109{
10110    /* Writeback register in register list is UNPREDICTABLE for T32. */
10111    if (a->w && (a->list & (1 << a->rn))) {
10112        unallocated_encoding(s);
10113        return true;
10114    }
10115    /* BitCount(list) < 2 is UNPREDICTABLE */
10116    return do_ldm(s, a, 2);
10117}
10118
10119static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10120{
10121    /* Writeback is conditional on the base register not being loaded.  */
10122    a->w = !(a->list & (1 << a->rn));
10123    /* BitCount(list) < 1 is UNPREDICTABLE */
10124    return do_ldm(s, a, 1);
10125}
10126
10127/*
10128 * Branch, branch with link
10129 */
10130
10131static bool trans_B(DisasContext *s, arg_i *a)
10132{
10133    gen_jmp(s, read_pc(s) + a->imm);
10134    return true;
10135}
10136
10137static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10138{
10139    /* This has cond from encoding, required to be outside IT block.  */
10140    if (a->cond >= 0xe) {
10141        return false;
10142    }
10143    if (s->condexec_mask) {
10144        unallocated_encoding(s);
10145        return true;
10146    }
10147    arm_skip_unless(s, a->cond);
10148    gen_jmp(s, read_pc(s) + a->imm);
10149    return true;
10150}
10151
10152static bool trans_BL(DisasContext *s, arg_i *a)
10153{
10154    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10155    gen_jmp(s, read_pc(s) + a->imm);
10156    return true;
10157}
10158
10159static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10160{
10161    TCGv_i32 tmp;
10162
10163    /* For A32, ARCH(5) is checked near the start of the uncond block. */
10164    if (s->thumb && (a->imm & 2)) {
10165        return false;
10166    }
10167    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10168    tmp = tcg_const_i32(!s->thumb);
10169    store_cpu_field(tmp, thumb);
10170    gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10171    return true;
10172}
10173
10174static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10175{
10176    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10177    tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10178    return true;
10179}
10180
10181static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10182{
10183    TCGv_i32 tmp = tcg_temp_new_i32();
10184
10185    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10186    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10187    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10188    gen_bx(s, tmp);
10189    return true;
10190}
10191
10192static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10193{
10194    TCGv_i32 tmp;
10195
10196    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10197    if (!ENABLE_ARCH_5) {
10198        return false;
10199    }
10200    tmp = tcg_temp_new_i32();
10201    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10202    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10203    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10204    gen_bx(s, tmp);
10205    return true;
10206}
10207
10208static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10209{
10210    TCGv_i32 addr, tmp;
10211
10212    tmp = load_reg(s, a->rm);
10213    if (half) {
10214        tcg_gen_add_i32(tmp, tmp, tmp);
10215    }
10216    addr = load_reg(s, a->rn);
10217    tcg_gen_add_i32(addr, addr, tmp);
10218
10219    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10220                    half ? MO_UW | s->be_data : MO_UB);
10221    tcg_temp_free_i32(addr);
10222
10223    tcg_gen_add_i32(tmp, tmp, tmp);
10224    tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10225    store_reg(s, 15, tmp);
10226    return true;
10227}
10228
10229static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10230{
10231    return op_tbranch(s, a, false);
10232}
10233
10234static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10235{
10236    return op_tbranch(s, a, true);
10237}
10238
10239static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10240{
10241    TCGv_i32 tmp = load_reg(s, a->rn);
10242
10243    arm_gen_condlabel(s);
10244    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10245                        tmp, 0, s->condlabel);
10246    tcg_temp_free_i32(tmp);
10247    gen_jmp(s, read_pc(s) + a->imm);
10248    return true;
10249}
10250
10251/*
10252 * Supervisor call - both T32 & A32 come here so we need to check
10253 * which mode we are in when checking for semihosting.
10254 */
10255
10256static bool trans_SVC(DisasContext *s, arg_SVC *a)
10257{
10258    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10259
10260    if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10261#ifndef CONFIG_USER_ONLY
10262        !IS_USER(s) &&
10263#endif
10264        (a->imm == semihost_imm)) {
10265        gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
10266    } else {
10267        gen_set_pc_im(s, s->base.pc_next);
10268        s->svc_imm = a->imm;
10269        s->base.is_jmp = DISAS_SWI;
10270    }
10271    return true;
10272}
10273
10274/*
10275 * Unconditional system instructions
10276 */
10277
10278static bool trans_RFE(DisasContext *s, arg_RFE *a)
10279{
10280    static const int8_t pre_offset[4] = {
10281        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10282    };
10283    static const int8_t post_offset[4] = {
10284        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10285    };
10286    TCGv_i32 addr, t1, t2;
10287
10288    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10289        return false;
10290    }
10291    if (IS_USER(s)) {
10292        unallocated_encoding(s);
10293        return true;
10294    }
10295
10296    addr = load_reg(s, a->rn);
10297    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10298
10299    /* Load PC into tmp and CPSR into tmp2.  */
10300    t1 = tcg_temp_new_i32();
10301    gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10302    tcg_gen_addi_i32(addr, addr, 4);
10303    t2 = tcg_temp_new_i32();
10304    gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10305
10306    if (a->w) {
10307        /* Base writeback.  */
10308        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10309        store_reg(s, a->rn, addr);
10310    } else {
10311        tcg_temp_free_i32(addr);
10312    }
10313    gen_rfe(s, t1, t2);
10314    return true;
10315}
10316
10317static bool trans_SRS(DisasContext *s, arg_SRS *a)
10318{
10319    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10320        return false;
10321    }
10322    gen_srs(s, a->mode, a->pu, a->w);
10323    return true;
10324}
10325
10326static bool trans_CPS(DisasContext *s, arg_CPS *a)
10327{
10328    uint32_t mask, val;
10329
10330    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10331        return false;
10332    }
10333    if (IS_USER(s)) {
10334        /* Implemented as NOP in user mode.  */
10335        return true;
10336    }
10337    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10338
10339    mask = val = 0;
10340    if (a->imod & 2) {
10341        if (a->A) {
10342            mask |= CPSR_A;
10343        }
10344        if (a->I) {
10345            mask |= CPSR_I;
10346        }
10347        if (a->F) {
10348            mask |= CPSR_F;
10349        }
10350        if (a->imod & 1) {
10351            val |= mask;
10352        }
10353    }
10354    if (a->M) {
10355        mask |= CPSR_M;
10356        val |= a->mode;
10357    }
10358    if (mask) {
10359        gen_set_psr_im(s, mask, 0, val);
10360    }
10361    return true;
10362}
10363
10364static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10365{
10366    TCGv_i32 tmp, addr;
10367
10368    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10369        return false;
10370    }
10371    if (IS_USER(s)) {
10372        /* Implemented as NOP in user mode.  */
10373        return true;
10374    }
10375
10376    tmp = tcg_const_i32(a->im);
10377    /* FAULTMASK */
10378    if (a->F) {
10379        addr = tcg_const_i32(19);
10380        gen_helper_v7m_msr(cpu_env, addr, tmp);
10381        tcg_temp_free_i32(addr);
10382    }
10383    /* PRIMASK */
10384    if (a->I) {
10385        addr = tcg_const_i32(16);
10386        gen_helper_v7m_msr(cpu_env, addr, tmp);
10387        tcg_temp_free_i32(addr);
10388    }
10389    tcg_temp_free_i32(tmp);
10390    gen_lookup_tb(s);
10391    return true;
10392}
10393
10394/*
10395 * Clear-Exclusive, Barriers
10396 */
10397
10398static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10399{
10400    if (s->thumb
10401        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10402        : !ENABLE_ARCH_6K) {
10403        return false;
10404    }
10405    gen_clrex(s);
10406    return true;
10407}
10408
10409static bool trans_DSB(DisasContext *s, arg_DSB *a)
10410{
10411    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10412        return false;
10413    }
10414    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10415    return true;
10416}
10417
10418static bool trans_DMB(DisasContext *s, arg_DMB *a)
10419{
10420    return trans_DSB(s, NULL);
10421}
10422
10423static bool trans_ISB(DisasContext *s, arg_ISB *a)
10424{
10425    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10426        return false;
10427    }
10428    /*
10429     * We need to break the TB after this insn to execute
10430     * self-modifying code correctly and also to take
10431     * any pending interrupts immediately.
10432     */
10433    gen_goto_tb(s, 0, s->base.pc_next);
10434    return true;
10435}
10436
10437static bool trans_SB(DisasContext *s, arg_SB *a)
10438{
10439    if (!dc_isar_feature(aa32_sb, s)) {
10440        return false;
10441    }
10442    /*
10443     * TODO: There is no speculation barrier opcode
10444     * for TCG; MB and end the TB instead.
10445     */
10446    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10447    gen_goto_tb(s, 0, s->base.pc_next);
10448    return true;
10449}
10450
10451static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10452{
10453    if (!ENABLE_ARCH_6) {
10454        return false;
10455    }
10456    if (a->E != (s->be_data == MO_BE)) {
10457        gen_helper_setend(cpu_env);
10458        s->base.is_jmp = DISAS_UPDATE;
10459    }
10460    return true;
10461}
10462
10463/*
10464 * Preload instructions
10465 * All are nops, contingent on the appropriate arch level.
10466 */
10467
10468static bool trans_PLD(DisasContext *s, arg_PLD *a)
10469{
10470    return ENABLE_ARCH_5TE;
10471}
10472
10473static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10474{
10475    return arm_dc_feature(s, ARM_FEATURE_V7MP);
10476}
10477
10478static bool trans_PLI(DisasContext *s, arg_PLD *a)
10479{
10480    return ENABLE_ARCH_7;
10481}
10482
10483/*
10484 * If-then
10485 */
10486
10487static bool trans_IT(DisasContext *s, arg_IT *a)
10488{
10489    int cond_mask = a->cond_mask;
10490
10491    /*
10492     * No actual code generated for this insn, just setup state.
10493     *
10494     * Combinations of firstcond and mask which set up an 0b1111
10495     * condition are UNPREDICTABLE; we take the CONSTRAINED
10496     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10497     * i.e. both meaning "execute always".
10498     */
10499    s->condexec_cond = (cond_mask >> 4) & 0xe;
10500    s->condexec_mask = cond_mask & 0x1f;
10501    return true;
10502}
10503
10504/*
10505 * Legacy decoder.
10506 */
10507
10508static void disas_arm_insn(DisasContext *s, unsigned int insn)
10509{
10510    unsigned int cond = insn >> 28;
10511
10512    /* M variants do not implement ARM mode; this must raise the INVSTATE
10513     * UsageFault exception.
10514     */
10515    if (arm_dc_feature(s, ARM_FEATURE_M)) {
10516        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10517                           default_exception_el(s));
10518        return;
10519    }
10520
10521    if (cond == 0xf) {
10522        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10523         * choose to UNDEF. In ARMv5 and above the space is used
10524         * for miscellaneous unconditional instructions.
10525         */
10526        ARCH(5);
10527
10528        /* Unconditional instructions.  */
10529        if (disas_a32_uncond(s, insn)) {
10530            return;
10531        }
10532        /* fall back to legacy decoder */
10533
10534        if (((insn >> 25) & 7) == 1) {
10535            /* NEON Data processing.  */
10536            if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
10537                goto illegal_op;
10538            }
10539
10540            if (disas_neon_data_insn(s, insn)) {
10541                goto illegal_op;
10542            }
10543            return;
10544        }
10545        if ((insn & 0x0f100000) == 0x04000000) {
10546            /* NEON load/store.  */
10547            if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
10548                goto illegal_op;
10549            }
10550
10551            if (disas_neon_ls_insn(s, insn)) {
10552                goto illegal_op;
10553            }
10554            return;
10555        }
10556        if ((insn & 0x0f000e10) == 0x0e000a00) {
10557            /* VFP.  */
10558            if (disas_vfp_insn(s, insn)) {
10559                goto illegal_op;
10560            }
10561            return;
10562        }
10563        if ((insn & 0x0e000f00) == 0x0c000100) {
10564            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10565                /* iWMMXt register transfer.  */
10566                if (extract32(s->c15_cpar, 1, 1)) {
10567                    if (!disas_iwmmxt_insn(s, insn)) {
10568                        return;
10569                    }
10570                }
10571            }
10572        } else if ((insn & 0x0e000a00) == 0x0c000800
10573                   && arm_dc_feature(s, ARM_FEATURE_V8)) {
10574            if (disas_neon_insn_3same_ext(s, insn)) {
10575                goto illegal_op;
10576            }
10577            return;
10578        } else if ((insn & 0x0f000a00) == 0x0e000800
10579                   && arm_dc_feature(s, ARM_FEATURE_V8)) {
10580            if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10581                goto illegal_op;
10582            }
10583            return;
10584        }
10585        goto illegal_op;
10586    }
10587    if (cond != 0xe) {
10588        /* if not always execute, we generate a conditional jump to
10589           next instruction */
10590        arm_skip_unless(s, cond);
10591    }
10592
10593    if (disas_a32(s, insn)) {
10594        return;
10595    }
10596    /* fall back to legacy decoder */
10597
10598    switch ((insn >> 24) & 0xf) {
10599    case 0xc:
10600    case 0xd:
10601    case 0xe:
10602        if (((insn >> 8) & 0xe) == 10) {
10603            /* VFP.  */
10604            if (disas_vfp_insn(s, insn)) {
10605                goto illegal_op;
10606            }
10607        } else if (disas_coproc_insn(s, insn)) {
10608            /* Coprocessor.  */
10609            goto illegal_op;
10610        }
10611        break;
10612    default:
10613    illegal_op:
10614        unallocated_encoding(s);
10615        break;
10616    }
10617}
10618
10619static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10620{
10621    /*
10622     * Return true if this is a 16 bit instruction. We must be precise
10623     * about this (matching the decode).
10624     */
10625    if ((insn >> 11) < 0x1d) {
10626        /* Definitely a 16-bit instruction */
10627        return true;
10628    }
10629
10630    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10631     * first half of a 32-bit Thumb insn. Thumb-1 cores might
10632     * end up actually treating this as two 16-bit insns, though,
10633     * if it's half of a bl/blx pair that might span a page boundary.
10634     */
10635    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10636        arm_dc_feature(s, ARM_FEATURE_M)) {
10637        /* Thumb2 cores (including all M profile ones) always treat
10638         * 32-bit insns as 32-bit.
10639         */
10640        return false;
10641    }
10642
10643    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10644        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10645         * is not on the next page; we merge this into a 32-bit
10646         * insn.
10647         */
10648        return false;
10649    }
10650    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10651     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10652     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10653     *  -- handle as single 16 bit insn
10654     */
10655    return true;
10656}
10657
10658/* Translate a 32-bit thumb instruction. */
10659static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10660{
10661    /*
10662     * ARMv6-M supports a limited subset of Thumb2 instructions.
10663     * Other Thumb1 architectures allow only 32-bit
10664     * combined BL/BLX prefix and suffix.
10665     */
10666    if (arm_dc_feature(s, ARM_FEATURE_M) &&
10667        !arm_dc_feature(s, ARM_FEATURE_V7)) {
10668        int i;
10669        bool found = false;
10670        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10671                                               0xf3b08040 /* dsb */,
10672                                               0xf3b08050 /* dmb */,
10673                                               0xf3b08060 /* isb */,
10674                                               0xf3e08000 /* mrs */,
10675                                               0xf000d000 /* bl */};
10676        static const uint32_t armv6m_mask[] = {0xffe0d000,
10677                                               0xfff0d0f0,
10678                                               0xfff0d0f0,
10679                                               0xfff0d0f0,
10680                                               0xffe0d000,
10681                                               0xf800d000};
10682
10683        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10684            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10685                found = true;
10686                break;
10687            }
10688        }
10689        if (!found) {
10690            goto illegal_op;
10691        }
10692    } else if ((insn & 0xf800e800) != 0xf000e800)  {
10693        ARCH(6T2);
10694    }
10695
10696    if (disas_t32(s, insn)) {
10697        return;
10698    }
10699    /* fall back to legacy decoder */
10700
10701    switch ((insn >> 25) & 0xf) {
10702    case 0: case 1: case 2: case 3:
10703        /* 16-bit instructions.  Should never happen.  */
10704        abort();
10705    case 6: case 7: case 14: case 15:
10706        /* Coprocessor.  */
10707        if (arm_dc_feature(s, ARM_FEATURE_M)) {
10708            /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10709            if (extract32(insn, 24, 2) == 3) {
10710                goto illegal_op; /* op0 = 0b11 : unallocated */
10711            }
10712
10713            /*
10714             * Decode VLLDM and VLSTM first: these are nonstandard because:
10715             *  * if there is no FPU then these insns must NOP in
10716             *    Secure state and UNDEF in Nonsecure state
10717             *  * if there is an FPU then these insns do not have
10718             *    the usual behaviour that disas_vfp_insn() provides of
10719             *    being controlled by CPACR/NSACR enable bits or the
10720             *    lazy-stacking logic.
10721             */
10722            if (arm_dc_feature(s, ARM_FEATURE_V8) &&
10723                (insn & 0xffa00f00) == 0xec200a00) {
10724                /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
10725                 *  - VLLDM, VLSTM
10726                 * We choose to UNDEF if the RAZ bits are non-zero.
10727                 */
10728                if (!s->v8m_secure || (insn & 0x0040f0ff)) {
10729                    goto illegal_op;
10730                }
10731
10732                if (arm_dc_feature(s, ARM_FEATURE_VFP)) {
10733                    uint32_t rn = (insn >> 16) & 0xf;
10734                    TCGv_i32 fptr = load_reg(s, rn);
10735
10736                    if (extract32(insn, 20, 1)) {
10737                        gen_helper_v7m_vlldm(cpu_env, fptr);
10738                    } else {
10739                        gen_helper_v7m_vlstm(cpu_env, fptr);
10740                    }
10741                    tcg_temp_free_i32(fptr);
10742
10743                    /* End the TB, because we have updated FP control bits */
10744                    s->base.is_jmp = DISAS_UPDATE;
10745                }
10746                break;
10747            }
10748            if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
10749                ((insn >> 8) & 0xe) == 10) {
10750                /* FP, and the CPU supports it */
10751                if (disas_vfp_insn(s, insn)) {
10752                    goto illegal_op;
10753                }
10754                break;
10755            }
10756
10757            /* All other insns: NOCP */
10758            gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
10759                               default_exception_el(s));
10760            break;
10761        }
10762        if ((insn & 0xfe000a00) == 0xfc000800
10763            && arm_dc_feature(s, ARM_FEATURE_V8)) {
10764            /* The Thumb2 and ARM encodings are identical.  */
10765            if (disas_neon_insn_3same_ext(s, insn)) {
10766                goto illegal_op;
10767            }
10768        } else if ((insn & 0xff000a00) == 0xfe000800
10769                   && arm_dc_feature(s, ARM_FEATURE_V8)) {
10770            /* The Thumb2 and ARM encodings are identical.  */
10771            if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10772                goto illegal_op;
10773            }
10774        } else if (((insn >> 24) & 3) == 3) {
10775            /* Translate into the equivalent ARM encoding.  */
10776            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10777            if (disas_neon_data_insn(s, insn)) {
10778                goto illegal_op;
10779            }
10780        } else if (((insn >> 8) & 0xe) == 10) {
10781            if (disas_vfp_insn(s, insn)) {
10782                goto illegal_op;
10783            }
10784        } else {
10785            if (insn & (1 << 28))
10786                goto illegal_op;
10787            if (disas_coproc_insn(s, insn)) {
10788                goto illegal_op;
10789            }
10790        }
10791        break;
10792    case 12:
10793        if ((insn & 0x01100000) == 0x01000000) {
10794            if (disas_neon_ls_insn(s, insn)) {
10795                goto illegal_op;
10796            }
10797            break;
10798        }
10799        goto illegal_op;
10800    default:
10801    illegal_op:
10802        unallocated_encoding(s);
10803    }
10804}
10805
10806static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10807{
10808    if (!disas_t16(s, insn)) {
10809        unallocated_encoding(s);
10810    }
10811}
10812
10813static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10814{
10815    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10816     * (False positives are OK, false negatives are not.)
10817     * We know this is a Thumb insn, and our caller ensures we are
10818     * only called if dc->base.pc_next is less than 4 bytes from the page
10819     * boundary, so we cross the page if the first 16 bits indicate
10820     * that this is a 32 bit insn.
10821     */
10822    uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10823
10824    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10825}
10826
10827static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10828{
10829    DisasContext *dc = container_of(dcbase, DisasContext, base);
10830    CPUARMState *env = cs->env_ptr;
10831    ARMCPU *cpu = env_archcpu(env);
10832    uint32_t tb_flags = dc->base.tb->flags;
10833    uint32_t condexec, core_mmu_idx;
10834
10835    dc->isar = &cpu->isar;
10836    dc->condjmp = 0;
10837
10838    dc->aarch64 = 0;
10839    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10840     * there is no secure EL1, so we route exceptions to EL3.
10841     */
10842    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10843                               !arm_el_is_aa64(env, 3);
10844    dc->thumb = FIELD_EX32(tb_flags, TBFLAG_A32, THUMB);
10845    dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10846    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10847    condexec = FIELD_EX32(tb_flags, TBFLAG_A32, CONDEXEC);
10848    dc->condexec_mask = (condexec & 0xf) << 1;
10849    dc->condexec_cond = condexec >> 4;
10850    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10851    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10852    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10853#if !defined(CONFIG_USER_ONLY)
10854    dc->user = (dc->current_el == 0);
10855#endif
10856    dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10857    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10858    dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10859    dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10860    if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10861        dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10862        dc->vec_stride = 0;
10863    } else {
10864        dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10865        dc->c15_cpar = 0;
10866    }
10867    dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_A32, HANDLER);
10868    dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10869        regime_is_secure(env, dc->mmu_idx);
10870    dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_A32, STACKCHECK);
10871    dc->v8m_fpccr_s_wrong = FIELD_EX32(tb_flags, TBFLAG_A32, FPCCR_S_WRONG);
10872    dc->v7m_new_fp_ctxt_needed =
10873        FIELD_EX32(tb_flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED);
10874    dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_A32, LSPACT);
10875    dc->cp_regs = cpu->cp_regs;
10876    dc->features = env->features;
10877
10878    /* Single step state. The code-generation logic here is:
10879     *  SS_ACTIVE == 0:
10880     *   generate code with no special handling for single-stepping (except
10881     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10882     *   this happens anyway because those changes are all system register or
10883     *   PSTATE writes).
10884     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10885     *   emit code for one insn
10886     *   emit code to clear PSTATE.SS
10887     *   emit code to generate software step exception for completed step
10888     *   end TB (as usual for having generated an exception)
10889     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10890     *   emit code to generate a software step exception
10891     *   end the TB
10892     */
10893    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10894    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10895    dc->is_ldex = false;
10896    if (!arm_feature(env, ARM_FEATURE_M)) {
10897        dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10898    }
10899
10900    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10901
10902    /* If architectural single step active, limit to 1.  */
10903    if (is_singlestepping(dc)) {
10904        dc->base.max_insns = 1;
10905    }
10906
10907    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
10908       to those left on the page.  */
10909    if (!dc->thumb) {
10910        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10911        dc->base.max_insns = MIN(dc->base.max_insns, bound);
10912    }
10913
10914    cpu_V0 = tcg_temp_new_i64();
10915    cpu_V1 = tcg_temp_new_i64();
10916    /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
10917    cpu_M0 = tcg_temp_new_i64();
10918}
10919
10920static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10921{
10922    DisasContext *dc = container_of(dcbase, DisasContext, base);
10923
10924    /* A note on handling of the condexec (IT) bits:
10925     *
10926     * We want to avoid the overhead of having to write the updated condexec
10927     * bits back to the CPUARMState for every instruction in an IT block. So:
10928     * (1) if the condexec bits are not already zero then we write
10929     * zero back into the CPUARMState now. This avoids complications trying
10930     * to do it at the end of the block. (For example if we don't do this
10931     * it's hard to identify whether we can safely skip writing condexec
10932     * at the end of the TB, which we definitely want to do for the case
10933     * where a TB doesn't do anything with the IT state at all.)
10934     * (2) if we are going to leave the TB then we call gen_set_condexec()
10935     * which will write the correct value into CPUARMState if zero is wrong.
10936     * This is done both for leaving the TB at the end, and for leaving
10937     * it because of an exception we know will happen, which is done in
10938     * gen_exception_insn(). The latter is necessary because we need to
10939     * leave the TB with the PC/IT state just prior to execution of the
10940     * instruction which caused the exception.
10941     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10942     * then the CPUARMState will be wrong and we need to reset it.
10943     * This is handled in the same way as restoration of the
10944     * PC in these situations; we save the value of the condexec bits
10945     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10946     * then uses this to restore them after an exception.
10947     *
10948     * Note that there are no instructions which can read the condexec
10949     * bits, and none which can write non-static values to them, so
10950     * we don't need to care about whether CPUARMState is correct in the
10951     * middle of a TB.
10952     */
10953
10954    /* Reset the conditional execution bits immediately. This avoids
10955       complications trying to do it at the end of the block.  */
10956    if (dc->condexec_mask || dc->condexec_cond) {
10957        TCGv_i32 tmp = tcg_temp_new_i32();
10958        tcg_gen_movi_i32(tmp, 0);
10959        store_cpu_field(tmp, condexec_bits);
10960    }
10961}
10962
10963static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10964{
10965    DisasContext *dc = container_of(dcbase, DisasContext, base);
10966
10967    tcg_gen_insn_start(dc->base.pc_next,
10968                       (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10969                       0);
10970    dc->insn_start = tcg_last_op();
10971}
10972
10973static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10974                                    const CPUBreakpoint *bp)
10975{
10976    DisasContext *dc = container_of(dcbase, DisasContext, base);
10977
10978    if (bp->flags & BP_CPU) {
10979        gen_set_condexec(dc);
10980        gen_set_pc_im(dc, dc->base.pc_next);
10981        gen_helper_check_breakpoints(cpu_env);
10982        /* End the TB early; it's likely not going to be executed */
10983        dc->base.is_jmp = DISAS_TOO_MANY;
10984    } else {
10985        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10986        /* The address covered by the breakpoint must be
10987           included in [tb->pc, tb->pc + tb->size) in order
10988           to for it to be properly cleared -- thus we
10989           increment the PC here so that the logic setting
10990           tb->size below does the right thing.  */
10991        /* TODO: Advance PC by correct instruction length to
10992         * avoid disassembler error messages */
10993        dc->base.pc_next += 2;
10994        dc->base.is_jmp = DISAS_NORETURN;
10995    }
10996
10997    return true;
10998}
10999
11000static bool arm_pre_translate_insn(DisasContext *dc)
11001{
11002#ifdef CONFIG_USER_ONLY
11003    /* Intercept jump to the magic kernel page.  */
11004    if (dc->base.pc_next >= 0xffff0000) {
11005        /* We always get here via a jump, so know we are not in a
11006           conditional execution block.  */
11007        gen_exception_internal(EXCP_KERNEL_TRAP);
11008        dc->base.is_jmp = DISAS_NORETURN;
11009        return true;
11010    }
11011#endif
11012
11013    if (dc->ss_active && !dc->pstate_ss) {
11014        /* Singlestep state is Active-pending.
11015         * If we're in this state at the start of a TB then either
11016         *  a) we just took an exception to an EL which is being debugged
11017         *     and this is the first insn in the exception handler
11018         *  b) debug exceptions were masked and we just unmasked them
11019         *     without changing EL (eg by clearing PSTATE.D)
11020         * In either case we're going to take a swstep exception in the
11021         * "did not step an insn" case, and so the syndrome ISV and EX
11022         * bits should be zero.
11023         */
11024        assert(dc->base.num_insns == 1);
11025        gen_swstep_exception(dc, 0, 0);
11026        dc->base.is_jmp = DISAS_NORETURN;
11027        return true;
11028    }
11029
11030    return false;
11031}
11032
11033static void arm_post_translate_insn(DisasContext *dc)
11034{
11035    if (dc->condjmp && !dc->base.is_jmp) {
11036        gen_set_label(dc->condlabel);
11037        dc->condjmp = 0;
11038    }
11039    translator_loop_temp_check(&dc->base);
11040}
11041
11042static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11043{
11044    DisasContext *dc = container_of(dcbase, DisasContext, base);
11045    CPUARMState *env = cpu->env_ptr;
11046    unsigned int insn;
11047
11048    if (arm_pre_translate_insn(dc)) {
11049        return;
11050    }
11051
11052    dc->pc_curr = dc->base.pc_next;
11053    insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11054    dc->insn = insn;
11055    dc->base.pc_next += 4;
11056    disas_arm_insn(dc, insn);
11057
11058    arm_post_translate_insn(dc);
11059
11060    /* ARM is a fixed-length ISA.  We performed the cross-page check
11061       in init_disas_context by adjusting max_insns.  */
11062}
11063
11064static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11065{
11066    /* Return true if this Thumb insn is always unconditional,
11067     * even inside an IT block. This is true of only a very few
11068     * instructions: BKPT, HLT, and SG.
11069     *
11070     * A larger class of instructions are UNPREDICTABLE if used
11071     * inside an IT block; we do not need to detect those here, because
11072     * what we do by default (perform the cc check and update the IT
11073     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11074     * choice for those situations.
11075     *
11076     * insn is either a 16-bit or a 32-bit instruction; the two are
11077     * distinguishable because for the 16-bit case the top 16 bits
11078     * are zeroes, and that isn't a valid 32-bit encoding.
11079     */
11080    if ((insn & 0xffffff00) == 0xbe00) {
11081        /* BKPT */
11082        return true;
11083    }
11084
11085    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11086        !arm_dc_feature(s, ARM_FEATURE_M)) {
11087        /* HLT: v8A only. This is unconditional even when it is going to
11088         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11089         * For v7 cores this was a plain old undefined encoding and so
11090         * honours its cc check. (We might be using the encoding as
11091         * a semihosting trap, but we don't change the cc check behaviour
11092         * on that account, because a debugger connected to a real v7A
11093         * core and emulating semihosting traps by catching the UNDEF
11094         * exception would also only see cases where the cc check passed.
11095         * No guest code should be trying to do a HLT semihosting trap
11096         * in an IT block anyway.
11097         */
11098        return true;
11099    }
11100
11101    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11102        arm_dc_feature(s, ARM_FEATURE_M)) {
11103        /* SG: v8M only */
11104        return true;
11105    }
11106
11107    return false;
11108}
11109
11110static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11111{
11112    DisasContext *dc = container_of(dcbase, DisasContext, base);
11113    CPUARMState *env = cpu->env_ptr;
11114    uint32_t insn;
11115    bool is_16bit;
11116
11117    if (arm_pre_translate_insn(dc)) {
11118        return;
11119    }
11120
11121    dc->pc_curr = dc->base.pc_next;
11122    insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11123    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11124    dc->base.pc_next += 2;
11125    if (!is_16bit) {
11126        uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11127
11128        insn = insn << 16 | insn2;
11129        dc->base.pc_next += 2;
11130    }
11131    dc->insn = insn;
11132
11133    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11134        uint32_t cond = dc->condexec_cond;
11135
11136        /*
11137         * Conditionally skip the insn. Note that both 0xe and 0xf mean
11138         * "always"; 0xf is not "never".
11139         */
11140        if (cond < 0x0e) {
11141            arm_skip_unless(dc, cond);
11142        }
11143    }
11144
11145    if (is_16bit) {
11146        disas_thumb_insn(dc, insn);
11147    } else {
11148        disas_thumb2_insn(dc, insn);
11149    }
11150
11151    /* Advance the Thumb condexec condition.  */
11152    if (dc->condexec_mask) {
11153        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11154                             ((dc->condexec_mask >> 4) & 1));
11155        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11156        if (dc->condexec_mask == 0) {
11157            dc->condexec_cond = 0;
11158        }
11159    }
11160
11161    arm_post_translate_insn(dc);
11162
11163    /* Thumb is a variable-length ISA.  Stop translation when the next insn
11164     * will touch a new page.  This ensures that prefetch aborts occur at
11165     * the right place.
11166     *
11167     * We want to stop the TB if the next insn starts in a new page,
11168     * or if it spans between this page and the next. This means that
11169     * if we're looking at the last halfword in the page we need to
11170     * see if it's a 16-bit Thumb insn (which will fit in this TB)
11171     * or a 32-bit Thumb insn (which won't).
11172     * This is to avoid generating a silly TB with a single 16-bit insn
11173     * in it at the end of this page (which would execute correctly
11174     * but isn't very efficient).
11175     */
11176    if (dc->base.is_jmp == DISAS_NEXT
11177        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11178            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11179                && insn_crosses_page(env, dc)))) {
11180        dc->base.is_jmp = DISAS_TOO_MANY;
11181    }
11182}
11183
11184static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11185{
11186    DisasContext *dc = container_of(dcbase, DisasContext, base);
11187
11188    if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11189        /* FIXME: This can theoretically happen with self-modifying code. */
11190        cpu_abort(cpu, "IO on conditional branch instruction");
11191    }
11192
11193    /* At this stage dc->condjmp will only be set when the skipped
11194       instruction was a conditional branch or trap, and the PC has
11195       already been written.  */
11196    gen_set_condexec(dc);
11197    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11198        /* Exception return branches need some special case code at the
11199         * end of the TB, which is complex enough that it has to
11200         * handle the single-step vs not and the condition-failed
11201         * insn codepath itself.
11202         */
11203        gen_bx_excret_final_code(dc);
11204    } else if (unlikely(is_singlestepping(dc))) {
11205        /* Unconditional and "condition passed" instruction codepath. */
11206        switch (dc->base.is_jmp) {
11207        case DISAS_SWI:
11208            gen_ss_advance(dc);
11209            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11210                          default_exception_el(dc));
11211            break;
11212        case DISAS_HVC:
11213            gen_ss_advance(dc);
11214            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11215            break;
11216        case DISAS_SMC:
11217            gen_ss_advance(dc);
11218            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11219            break;
11220        case DISAS_NEXT:
11221        case DISAS_TOO_MANY:
11222        case DISAS_UPDATE:
11223            gen_set_pc_im(dc, dc->base.pc_next);
11224            /* fall through */
11225        default:
11226            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11227            gen_singlestep_exception(dc);
11228            break;
11229        case DISAS_NORETURN:
11230            break;
11231        }
11232    } else {
11233        /* While branches must always occur at the end of an IT block,
11234           there are a few other things that can cause us to terminate
11235           the TB in the middle of an IT block:
11236            - Exception generating instructions (bkpt, swi, undefined).
11237            - Page boundaries.
11238            - Hardware watchpoints.
11239           Hardware breakpoints have already been handled and skip this code.
11240         */
11241        switch(dc->base.is_jmp) {
11242        case DISAS_NEXT:
11243        case DISAS_TOO_MANY:
11244            gen_goto_tb(dc, 1, dc->base.pc_next);
11245            break;
11246        case DISAS_JUMP:
11247            gen_goto_ptr();
11248            break;
11249        case DISAS_UPDATE:
11250            gen_set_pc_im(dc, dc->base.pc_next);
11251            /* fall through */
11252        default:
11253            /* indicate that the hash table must be used to find the next TB */
11254            tcg_gen_exit_tb(NULL, 0);
11255            break;
11256        case DISAS_NORETURN:
11257            /* nothing more to generate */
11258            break;
11259        case DISAS_WFI:
11260        {
11261            TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11262                                          !(dc->insn & (1U << 31))) ? 2 : 4);
11263
11264            gen_helper_wfi(cpu_env, tmp);
11265            tcg_temp_free_i32(tmp);
11266            /* The helper doesn't necessarily throw an exception, but we
11267             * must go back to the main loop to check for interrupts anyway.
11268             */
11269            tcg_gen_exit_tb(NULL, 0);
11270            break;
11271        }
11272        case DISAS_WFE:
11273            gen_helper_wfe(cpu_env);
11274            break;
11275        case DISAS_YIELD:
11276            gen_helper_yield(cpu_env);
11277            break;
11278        case DISAS_SWI:
11279            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11280                          default_exception_el(dc));
11281            break;
11282        case DISAS_HVC:
11283            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11284            break;
11285        case DISAS_SMC:
11286            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11287            break;
11288        }
11289    }
11290
11291    if (dc->condjmp) {
11292        /* "Condition failed" instruction codepath for the branch/trap insn */
11293        gen_set_label(dc->condlabel);
11294        gen_set_condexec(dc);
11295        if (unlikely(is_singlestepping(dc))) {
11296            gen_set_pc_im(dc, dc->base.pc_next);
11297            gen_singlestep_exception(dc);
11298        } else {
11299            gen_goto_tb(dc, 1, dc->base.pc_next);
11300        }
11301    }
11302}
11303
11304static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11305{
11306    DisasContext *dc = container_of(dcbase, DisasContext, base);
11307
11308    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11309    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11310}
11311
11312static const TranslatorOps arm_translator_ops = {
11313    .init_disas_context = arm_tr_init_disas_context,
11314    .tb_start           = arm_tr_tb_start,
11315    .insn_start         = arm_tr_insn_start,
11316    .breakpoint_check   = arm_tr_breakpoint_check,
11317    .translate_insn     = arm_tr_translate_insn,
11318    .tb_stop            = arm_tr_tb_stop,
11319    .disas_log          = arm_tr_disas_log,
11320};
11321
11322static const TranslatorOps thumb_translator_ops = {
11323    .init_disas_context = arm_tr_init_disas_context,
11324    .tb_start           = arm_tr_tb_start,
11325    .insn_start         = arm_tr_insn_start,
11326    .breakpoint_check   = arm_tr_breakpoint_check,
11327    .translate_insn     = thumb_tr_translate_insn,
11328    .tb_stop            = arm_tr_tb_stop,
11329    .disas_log          = arm_tr_disas_log,
11330};
11331
11332/* generate intermediate code for basic block 'tb'.  */
11333void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11334{
11335    DisasContext dc;
11336    const TranslatorOps *ops = &arm_translator_ops;
11337
11338    if (FIELD_EX32(tb->flags, TBFLAG_A32, THUMB)) {
11339        ops = &thumb_translator_ops;
11340    }
11341#ifdef TARGET_AARCH64
11342    if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11343        ops = &aarch64_translator_ops;
11344    }
11345#endif
11346
11347    translator_loop(ops, &dc.base, cpu, tb, max_insns);
11348}
11349
11350void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11351                          target_ulong *data)
11352{
11353    if (is_a64(env)) {
11354        env->pc = data[0];
11355        env->condexec_bits = 0;
11356        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11357    } else {
11358        env->regs[15] = data[0];
11359        env->condexec_bits = data[1];
11360        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11361    }
11362}
11363