qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg/tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "hw/semihosting/semihost.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36
  37#include "trace-tcg.h"
  38#include "exec/log.h"
  39
  40
  41#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43/* currently all emulated v5 cores are also v5TE, so don't bother */
  44#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52#include "translate.h"
  53
  54#if defined(CONFIG_USER_ONLY)
  55#define IS_USER(s) 1
  56#else
  57#define IS_USER(s) (s->user)
  58#endif
  59
  60/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  61static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  62/* These are TCG globals which alias CPUARMState fields */
  63static TCGv_i32 cpu_R[16];
  64TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  65TCGv_i64 cpu_exclusive_addr;
  66TCGv_i64 cpu_exclusive_val;
  67
  68#include "exec/gen-icount.h"
  69
  70static const char * const regnames[] =
  71    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  72      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  73
  74/* Function prototypes for gen_ functions calling Neon helpers.  */
  75typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  76                                 TCGv_i32, TCGv_i32);
  77/* Function prototypes for gen_ functions for fix point conversions */
  78typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79
  80/* initialize TCG globals.  */
  81void arm_translate_init(void)
  82{
  83    int i;
  84
  85    for (i = 0; i < 16; i++) {
  86        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  87                                          offsetof(CPUARMState, regs[i]),
  88                                          regnames[i]);
  89    }
  90    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  91    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  92    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  93    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  94
  95    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  96        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  97    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  98        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  99
 100    a64_translate_init();
 101}
 102
 103/* Flags for the disas_set_da_iss info argument:
 104 * lower bits hold the Rt register number, higher bits are flags.
 105 */
 106typedef enum ISSInfo {
 107    ISSNone = 0,
 108    ISSRegMask = 0x1f,
 109    ISSInvalid = (1 << 5),
 110    ISSIsAcqRel = (1 << 6),
 111    ISSIsWrite = (1 << 7),
 112    ISSIs16Bit = (1 << 8),
 113} ISSInfo;
 114
 115/* Save the syndrome information for a Data Abort */
 116static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 117{
 118    uint32_t syn;
 119    int sas = memop & MO_SIZE;
 120    bool sse = memop & MO_SIGN;
 121    bool is_acqrel = issinfo & ISSIsAcqRel;
 122    bool is_write = issinfo & ISSIsWrite;
 123    bool is_16bit = issinfo & ISSIs16Bit;
 124    int srt = issinfo & ISSRegMask;
 125
 126    if (issinfo & ISSInvalid) {
 127        /* Some callsites want to conditionally provide ISS info,
 128         * eg "only if this was not a writeback"
 129         */
 130        return;
 131    }
 132
 133    if (srt == 15) {
 134        /* For AArch32, insns where the src/dest is R15 never generate
 135         * ISS information. Catching that here saves checking at all
 136         * the call sites.
 137         */
 138        return;
 139    }
 140
 141    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 142                                  0, 0, 0, is_write, 0, is_16bit);
 143    disas_set_insn_syndrome(s, syn);
 144}
 145
 146static inline int get_a32_user_mem_index(DisasContext *s)
 147{
 148    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 149     * insns:
 150     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 151     *  otherwise, access as if at PL0.
 152     */
 153    switch (s->mmu_idx) {
 154    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 155    case ARMMMUIdx_E10_0:
 156    case ARMMMUIdx_E10_1:
 157    case ARMMMUIdx_E10_1_PAN:
 158        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 159    case ARMMMUIdx_SE3:
 160    case ARMMMUIdx_SE10_0:
 161    case ARMMMUIdx_SE10_1:
 162    case ARMMMUIdx_SE10_1_PAN:
 163        return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 164    case ARMMMUIdx_MUser:
 165    case ARMMMUIdx_MPriv:
 166        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 167    case ARMMMUIdx_MUserNegPri:
 168    case ARMMMUIdx_MPrivNegPri:
 169        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 170    case ARMMMUIdx_MSUser:
 171    case ARMMMUIdx_MSPriv:
 172        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 173    case ARMMMUIdx_MSUserNegPri:
 174    case ARMMMUIdx_MSPrivNegPri:
 175        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 176    default:
 177        g_assert_not_reached();
 178    }
 179}
 180
 181static inline TCGv_i32 load_cpu_offset(int offset)
 182{
 183    TCGv_i32 tmp = tcg_temp_new_i32();
 184    tcg_gen_ld_i32(tmp, cpu_env, offset);
 185    return tmp;
 186}
 187
 188#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 189
 190static inline void store_cpu_offset(TCGv_i32 var, int offset)
 191{
 192    tcg_gen_st_i32(var, cpu_env, offset);
 193    tcg_temp_free_i32(var);
 194}
 195
 196#define store_cpu_field(var, name) \
 197    store_cpu_offset(var, offsetof(CPUARMState, name))
 198
 199/* The architectural value of PC.  */
 200static uint32_t read_pc(DisasContext *s)
 201{
 202    return s->pc_curr + (s->thumb ? 4 : 8);
 203}
 204
 205/* Set a variable to the value of a CPU register.  */
 206static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 207{
 208    if (reg == 15) {
 209        tcg_gen_movi_i32(var, read_pc(s));
 210    } else {
 211        tcg_gen_mov_i32(var, cpu_R[reg]);
 212    }
 213}
 214
 215/* Create a new temporary and set it to the value of a CPU register.  */
 216static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 217{
 218    TCGv_i32 tmp = tcg_temp_new_i32();
 219    load_reg_var(s, tmp, reg);
 220    return tmp;
 221}
 222
 223/*
 224 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 225 * This is used for load/store for which use of PC implies (literal),
 226 * or ADD that implies ADR.
 227 */
 228static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 229{
 230    TCGv_i32 tmp = tcg_temp_new_i32();
 231
 232    if (reg == 15) {
 233        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 234    } else {
 235        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 236    }
 237    return tmp;
 238}
 239
 240/* Set a CPU register.  The source must be a temporary and will be
 241   marked as dead.  */
 242static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 243{
 244    if (reg == 15) {
 245        /* In Thumb mode, we must ignore bit 0.
 246         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 247         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 248         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 249         */
 250        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 251        s->base.is_jmp = DISAS_JUMP;
 252    }
 253    tcg_gen_mov_i32(cpu_R[reg], var);
 254    tcg_temp_free_i32(var);
 255}
 256
 257/*
 258 * Variant of store_reg which applies v8M stack-limit checks before updating
 259 * SP. If the check fails this will result in an exception being taken.
 260 * We disable the stack checks for CONFIG_USER_ONLY because we have
 261 * no idea what the stack limits should be in that case.
 262 * If stack checking is not being done this just acts like store_reg().
 263 */
 264static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 265{
 266#ifndef CONFIG_USER_ONLY
 267    if (s->v8m_stackcheck) {
 268        gen_helper_v8m_stackcheck(cpu_env, var);
 269    }
 270#endif
 271    store_reg(s, 13, var);
 272}
 273
 274/* Value extensions.  */
 275#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 276#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 277#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 278#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 279
 280#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 281#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 282
 283
 284static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 285{
 286    TCGv_i32 tmp_mask = tcg_const_i32(mask);
 287    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 288    tcg_temp_free_i32(tmp_mask);
 289}
 290/* Set NZCV flags from the high 4 bits of var.  */
 291#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 292
 293static void gen_exception_internal(int excp)
 294{
 295    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 296
 297    assert(excp_is_internal(excp));
 298    gen_helper_exception_internal(cpu_env, tcg_excp);
 299    tcg_temp_free_i32(tcg_excp);
 300}
 301
 302static void gen_step_complete_exception(DisasContext *s)
 303{
 304    /* We just completed step of an insn. Move from Active-not-pending
 305     * to Active-pending, and then also take the swstep exception.
 306     * This corresponds to making the (IMPDEF) choice to prioritize
 307     * swstep exceptions over asynchronous exceptions taken to an exception
 308     * level where debug is disabled. This choice has the advantage that
 309     * we do not need to maintain internal state corresponding to the
 310     * ISV/EX syndrome bits between completion of the step and generation
 311     * of the exception, and our syndrome information is always correct.
 312     */
 313    gen_ss_advance(s);
 314    gen_swstep_exception(s, 1, s->is_ldex);
 315    s->base.is_jmp = DISAS_NORETURN;
 316}
 317
 318static void gen_singlestep_exception(DisasContext *s)
 319{
 320    /* Generate the right kind of exception for singlestep, which is
 321     * either the architectural singlestep or EXCP_DEBUG for QEMU's
 322     * gdb singlestepping.
 323     */
 324    if (s->ss_active) {
 325        gen_step_complete_exception(s);
 326    } else {
 327        gen_exception_internal(EXCP_DEBUG);
 328    }
 329}
 330
 331static inline bool is_singlestepping(DisasContext *s)
 332{
 333    /* Return true if we are singlestepping either because of
 334     * architectural singlestep or QEMU gdbstub singlestep. This does
 335     * not include the command line '-singlestep' mode which is rather
 336     * misnamed as it only means "one instruction per TB" and doesn't
 337     * affect the code we generate.
 338     */
 339    return s->base.singlestep_enabled || s->ss_active;
 340}
 341
 342static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 343{
 344    TCGv_i32 tmp1 = tcg_temp_new_i32();
 345    TCGv_i32 tmp2 = tcg_temp_new_i32();
 346    tcg_gen_ext16s_i32(tmp1, a);
 347    tcg_gen_ext16s_i32(tmp2, b);
 348    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 349    tcg_temp_free_i32(tmp2);
 350    tcg_gen_sari_i32(a, a, 16);
 351    tcg_gen_sari_i32(b, b, 16);
 352    tcg_gen_mul_i32(b, b, a);
 353    tcg_gen_mov_i32(a, tmp1);
 354    tcg_temp_free_i32(tmp1);
 355}
 356
 357/* Byteswap each halfword.  */
 358static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 359{
 360    TCGv_i32 tmp = tcg_temp_new_i32();
 361    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 362    tcg_gen_shri_i32(tmp, var, 8);
 363    tcg_gen_and_i32(tmp, tmp, mask);
 364    tcg_gen_and_i32(var, var, mask);
 365    tcg_gen_shli_i32(var, var, 8);
 366    tcg_gen_or_i32(dest, var, tmp);
 367    tcg_temp_free_i32(mask);
 368    tcg_temp_free_i32(tmp);
 369}
 370
 371/* Byteswap low halfword and sign extend.  */
 372static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 373{
 374    tcg_gen_ext16u_i32(var, var);
 375    tcg_gen_bswap16_i32(var, var);
 376    tcg_gen_ext16s_i32(dest, var);
 377}
 378
 379/* Swap low and high halfwords.  */
 380static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
 381{
 382    tcg_gen_rotri_i32(dest, var, 16);
 383}
 384
 385/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 386    tmp = (t0 ^ t1) & 0x8000;
 387    t0 &= ~0x8000;
 388    t1 &= ~0x8000;
 389    t0 = (t0 + t1) ^ tmp;
 390 */
 391
 392static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 393{
 394    TCGv_i32 tmp = tcg_temp_new_i32();
 395    tcg_gen_xor_i32(tmp, t0, t1);
 396    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 397    tcg_gen_andi_i32(t0, t0, ~0x8000);
 398    tcg_gen_andi_i32(t1, t1, ~0x8000);
 399    tcg_gen_add_i32(t0, t0, t1);
 400    tcg_gen_xor_i32(dest, t0, tmp);
 401    tcg_temp_free_i32(tmp);
 402}
 403
 404/* Set N and Z flags from var.  */
 405static inline void gen_logic_CC(TCGv_i32 var)
 406{
 407    tcg_gen_mov_i32(cpu_NF, var);
 408    tcg_gen_mov_i32(cpu_ZF, var);
 409}
 410
 411/* dest = T0 + T1 + CF. */
 412static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 413{
 414    tcg_gen_add_i32(dest, t0, t1);
 415    tcg_gen_add_i32(dest, dest, cpu_CF);
 416}
 417
 418/* dest = T0 - T1 + CF - 1.  */
 419static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 420{
 421    tcg_gen_sub_i32(dest, t0, t1);
 422    tcg_gen_add_i32(dest, dest, cpu_CF);
 423    tcg_gen_subi_i32(dest, dest, 1);
 424}
 425
 426/* dest = T0 + T1. Compute C, N, V and Z flags */
 427static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 428{
 429    TCGv_i32 tmp = tcg_temp_new_i32();
 430    tcg_gen_movi_i32(tmp, 0);
 431    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 432    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 433    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 434    tcg_gen_xor_i32(tmp, t0, t1);
 435    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 436    tcg_temp_free_i32(tmp);
 437    tcg_gen_mov_i32(dest, cpu_NF);
 438}
 439
 440/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 441static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 442{
 443    TCGv_i32 tmp = tcg_temp_new_i32();
 444    if (TCG_TARGET_HAS_add2_i32) {
 445        tcg_gen_movi_i32(tmp, 0);
 446        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 447        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 448    } else {
 449        TCGv_i64 q0 = tcg_temp_new_i64();
 450        TCGv_i64 q1 = tcg_temp_new_i64();
 451        tcg_gen_extu_i32_i64(q0, t0);
 452        tcg_gen_extu_i32_i64(q1, t1);
 453        tcg_gen_add_i64(q0, q0, q1);
 454        tcg_gen_extu_i32_i64(q1, cpu_CF);
 455        tcg_gen_add_i64(q0, q0, q1);
 456        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 457        tcg_temp_free_i64(q0);
 458        tcg_temp_free_i64(q1);
 459    }
 460    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 461    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 462    tcg_gen_xor_i32(tmp, t0, t1);
 463    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 464    tcg_temp_free_i32(tmp);
 465    tcg_gen_mov_i32(dest, cpu_NF);
 466}
 467
 468/* dest = T0 - T1. Compute C, N, V and Z flags */
 469static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 470{
 471    TCGv_i32 tmp;
 472    tcg_gen_sub_i32(cpu_NF, t0, t1);
 473    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 474    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 475    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 476    tmp = tcg_temp_new_i32();
 477    tcg_gen_xor_i32(tmp, t0, t1);
 478    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 479    tcg_temp_free_i32(tmp);
 480    tcg_gen_mov_i32(dest, cpu_NF);
 481}
 482
 483/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 484static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 485{
 486    TCGv_i32 tmp = tcg_temp_new_i32();
 487    tcg_gen_not_i32(tmp, t1);
 488    gen_adc_CC(dest, t0, tmp);
 489    tcg_temp_free_i32(tmp);
 490}
 491
 492#define GEN_SHIFT(name)                                               \
 493static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 494{                                                                     \
 495    TCGv_i32 tmp1, tmp2, tmp3;                                        \
 496    tmp1 = tcg_temp_new_i32();                                        \
 497    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 498    tmp2 = tcg_const_i32(0);                                          \
 499    tmp3 = tcg_const_i32(0x1f);                                       \
 500    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 501    tcg_temp_free_i32(tmp3);                                          \
 502    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 503    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 504    tcg_temp_free_i32(tmp2);                                          \
 505    tcg_temp_free_i32(tmp1);                                          \
 506}
 507GEN_SHIFT(shl)
 508GEN_SHIFT(shr)
 509#undef GEN_SHIFT
 510
 511static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 512{
 513    TCGv_i32 tmp1, tmp2;
 514    tmp1 = tcg_temp_new_i32();
 515    tcg_gen_andi_i32(tmp1, t1, 0xff);
 516    tmp2 = tcg_const_i32(0x1f);
 517    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 518    tcg_temp_free_i32(tmp2);
 519    tcg_gen_sar_i32(dest, t0, tmp1);
 520    tcg_temp_free_i32(tmp1);
 521}
 522
 523static void shifter_out_im(TCGv_i32 var, int shift)
 524{
 525    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 526}
 527
 528/* Shift by immediate.  Includes special handling for shift == 0.  */
 529static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 530                                    int shift, int flags)
 531{
 532    switch (shiftop) {
 533    case 0: /* LSL */
 534        if (shift != 0) {
 535            if (flags)
 536                shifter_out_im(var, 32 - shift);
 537            tcg_gen_shli_i32(var, var, shift);
 538        }
 539        break;
 540    case 1: /* LSR */
 541        if (shift == 0) {
 542            if (flags) {
 543                tcg_gen_shri_i32(cpu_CF, var, 31);
 544            }
 545            tcg_gen_movi_i32(var, 0);
 546        } else {
 547            if (flags)
 548                shifter_out_im(var, shift - 1);
 549            tcg_gen_shri_i32(var, var, shift);
 550        }
 551        break;
 552    case 2: /* ASR */
 553        if (shift == 0)
 554            shift = 32;
 555        if (flags)
 556            shifter_out_im(var, shift - 1);
 557        if (shift == 32)
 558          shift = 31;
 559        tcg_gen_sari_i32(var, var, shift);
 560        break;
 561    case 3: /* ROR/RRX */
 562        if (shift != 0) {
 563            if (flags)
 564                shifter_out_im(var, shift - 1);
 565            tcg_gen_rotri_i32(var, var, shift); break;
 566        } else {
 567            TCGv_i32 tmp = tcg_temp_new_i32();
 568            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 569            if (flags)
 570                shifter_out_im(var, 0);
 571            tcg_gen_shri_i32(var, var, 1);
 572            tcg_gen_or_i32(var, var, tmp);
 573            tcg_temp_free_i32(tmp);
 574        }
 575    }
 576};
 577
 578static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 579                                     TCGv_i32 shift, int flags)
 580{
 581    if (flags) {
 582        switch (shiftop) {
 583        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 584        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 585        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 586        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 587        }
 588    } else {
 589        switch (shiftop) {
 590        case 0:
 591            gen_shl(var, var, shift);
 592            break;
 593        case 1:
 594            gen_shr(var, var, shift);
 595            break;
 596        case 2:
 597            gen_sar(var, var, shift);
 598            break;
 599        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 600                tcg_gen_rotr_i32(var, var, shift); break;
 601        }
 602    }
 603    tcg_temp_free_i32(shift);
 604}
 605
 606/*
 607 * Generate a conditional based on ARM condition code cc.
 608 * This is common between ARM and Aarch64 targets.
 609 */
 610void arm_test_cc(DisasCompare *cmp, int cc)
 611{
 612    TCGv_i32 value;
 613    TCGCond cond;
 614    bool global = true;
 615
 616    switch (cc) {
 617    case 0: /* eq: Z */
 618    case 1: /* ne: !Z */
 619        cond = TCG_COND_EQ;
 620        value = cpu_ZF;
 621        break;
 622
 623    case 2: /* cs: C */
 624    case 3: /* cc: !C */
 625        cond = TCG_COND_NE;
 626        value = cpu_CF;
 627        break;
 628
 629    case 4: /* mi: N */
 630    case 5: /* pl: !N */
 631        cond = TCG_COND_LT;
 632        value = cpu_NF;
 633        break;
 634
 635    case 6: /* vs: V */
 636    case 7: /* vc: !V */
 637        cond = TCG_COND_LT;
 638        value = cpu_VF;
 639        break;
 640
 641    case 8: /* hi: C && !Z */
 642    case 9: /* ls: !C || Z -> !(C && !Z) */
 643        cond = TCG_COND_NE;
 644        value = tcg_temp_new_i32();
 645        global = false;
 646        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 647           ZF is non-zero for !Z; so AND the two subexpressions.  */
 648        tcg_gen_neg_i32(value, cpu_CF);
 649        tcg_gen_and_i32(value, value, cpu_ZF);
 650        break;
 651
 652    case 10: /* ge: N == V -> N ^ V == 0 */
 653    case 11: /* lt: N != V -> N ^ V != 0 */
 654        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 655        cond = TCG_COND_GE;
 656        value = tcg_temp_new_i32();
 657        global = false;
 658        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 659        break;
 660
 661    case 12: /* gt: !Z && N == V */
 662    case 13: /* le: Z || N != V */
 663        cond = TCG_COND_NE;
 664        value = tcg_temp_new_i32();
 665        global = false;
 666        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 667         * the sign bit then AND with ZF to yield the result.  */
 668        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 669        tcg_gen_sari_i32(value, value, 31);
 670        tcg_gen_andc_i32(value, cpu_ZF, value);
 671        break;
 672
 673    case 14: /* always */
 674    case 15: /* always */
 675        /* Use the ALWAYS condition, which will fold early.
 676         * It doesn't matter what we use for the value.  */
 677        cond = TCG_COND_ALWAYS;
 678        value = cpu_ZF;
 679        goto no_invert;
 680
 681    default:
 682        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 683        abort();
 684    }
 685
 686    if (cc & 1) {
 687        cond = tcg_invert_cond(cond);
 688    }
 689
 690 no_invert:
 691    cmp->cond = cond;
 692    cmp->value = value;
 693    cmp->value_global = global;
 694}
 695
 696void arm_free_cc(DisasCompare *cmp)
 697{
 698    if (!cmp->value_global) {
 699        tcg_temp_free_i32(cmp->value);
 700    }
 701}
 702
 703void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 704{
 705    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 706}
 707
 708void arm_gen_test_cc(int cc, TCGLabel *label)
 709{
 710    DisasCompare cmp;
 711    arm_test_cc(&cmp, cc);
 712    arm_jump_cc(&cmp, label);
 713    arm_free_cc(&cmp);
 714}
 715
 716static inline void gen_set_condexec(DisasContext *s)
 717{
 718    if (s->condexec_mask) {
 719        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 720        TCGv_i32 tmp = tcg_temp_new_i32();
 721        tcg_gen_movi_i32(tmp, val);
 722        store_cpu_field(tmp, condexec_bits);
 723    }
 724}
 725
 726static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 727{
 728    tcg_gen_movi_i32(cpu_R[15], val);
 729}
 730
 731/* Set PC and Thumb state from var.  var is marked as dead.  */
 732static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 733{
 734    s->base.is_jmp = DISAS_JUMP;
 735    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 736    tcg_gen_andi_i32(var, var, 1);
 737    store_cpu_field(var, thumb);
 738}
 739
 740/*
 741 * Set PC and Thumb state from var. var is marked as dead.
 742 * For M-profile CPUs, include logic to detect exception-return
 743 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 744 * and BX reg, and no others, and happens only for code in Handler mode.
 745 * The Security Extension also requires us to check for the FNC_RETURN
 746 * which signals a function return from non-secure state; this can happen
 747 * in both Handler and Thread mode.
 748 * To avoid having to do multiple comparisons in inline generated code,
 749 * we make the check we do here loose, so it will match for EXC_RETURN
 750 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 751 * for these spurious cases and returns without doing anything (giving
 752 * the same behaviour as for a branch to a non-magic address).
 753 *
 754 * In linux-user mode it is unclear what the right behaviour for an
 755 * attempted FNC_RETURN should be, because in real hardware this will go
 756 * directly to Secure code (ie not the Linux kernel) which will then treat
 757 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 758 * attempt behave the way it would on a CPU without the security extension,
 759 * which is to say "like a normal branch". That means we can simply treat
 760 * all branches as normal with no magic address behaviour.
 761 */
 762static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 763{
 764    /* Generate the same code here as for a simple bx, but flag via
 765     * s->base.is_jmp that we need to do the rest of the work later.
 766     */
 767    gen_bx(s, var);
 768#ifndef CONFIG_USER_ONLY
 769    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 770        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 771        s->base.is_jmp = DISAS_BX_EXCRET;
 772    }
 773#endif
 774}
 775
 776static inline void gen_bx_excret_final_code(DisasContext *s)
 777{
 778    /* Generate the code to finish possible exception return and end the TB */
 779    TCGLabel *excret_label = gen_new_label();
 780    uint32_t min_magic;
 781
 782    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 783        /* Covers FNC_RETURN and EXC_RETURN magic */
 784        min_magic = FNC_RETURN_MIN_MAGIC;
 785    } else {
 786        /* EXC_RETURN magic only */
 787        min_magic = EXC_RETURN_MIN_MAGIC;
 788    }
 789
 790    /* Is the new PC value in the magic range indicating exception return? */
 791    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 792    /* No: end the TB as we would for a DISAS_JMP */
 793    if (is_singlestepping(s)) {
 794        gen_singlestep_exception(s);
 795    } else {
 796        tcg_gen_exit_tb(NULL, 0);
 797    }
 798    gen_set_label(excret_label);
 799    /* Yes: this is an exception return.
 800     * At this point in runtime env->regs[15] and env->thumb will hold
 801     * the exception-return magic number, which do_v7m_exception_exit()
 802     * will read. Nothing else will be able to see those values because
 803     * the cpu-exec main loop guarantees that we will always go straight
 804     * from raising the exception to the exception-handling code.
 805     *
 806     * gen_ss_advance(s) does nothing on M profile currently but
 807     * calling it is conceptually the right thing as we have executed
 808     * this instruction (compare SWI, HVC, SMC handling).
 809     */
 810    gen_ss_advance(s);
 811    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 812}
 813
 814static inline void gen_bxns(DisasContext *s, int rm)
 815{
 816    TCGv_i32 var = load_reg(s, rm);
 817
 818    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 819     * we need to sync state before calling it, but:
 820     *  - we don't need to do gen_set_pc_im() because the bxns helper will
 821     *    always set the PC itself
 822     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 823     *    unless it's outside an IT block or the last insn in an IT block,
 824     *    so we know that condexec == 0 (already set at the top of the TB)
 825     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 826     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 827     */
 828    gen_helper_v7m_bxns(cpu_env, var);
 829    tcg_temp_free_i32(var);
 830    s->base.is_jmp = DISAS_EXIT;
 831}
 832
 833static inline void gen_blxns(DisasContext *s, int rm)
 834{
 835    TCGv_i32 var = load_reg(s, rm);
 836
 837    /* We don't need to sync condexec state, for the same reason as bxns.
 838     * We do however need to set the PC, because the blxns helper reads it.
 839     * The blxns helper may throw an exception.
 840     */
 841    gen_set_pc_im(s, s->base.pc_next);
 842    gen_helper_v7m_blxns(cpu_env, var);
 843    tcg_temp_free_i32(var);
 844    s->base.is_jmp = DISAS_EXIT;
 845}
 846
 847/* Variant of store_reg which uses branch&exchange logic when storing
 848   to r15 in ARM architecture v7 and above. The source must be a temporary
 849   and will be marked as dead. */
 850static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 851{
 852    if (reg == 15 && ENABLE_ARCH_7) {
 853        gen_bx(s, var);
 854    } else {
 855        store_reg(s, reg, var);
 856    }
 857}
 858
 859/* Variant of store_reg which uses branch&exchange logic when storing
 860 * to r15 in ARM architecture v5T and above. This is used for storing
 861 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 862 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 863static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 864{
 865    if (reg == 15 && ENABLE_ARCH_5) {
 866        gen_bx_excret(s, var);
 867    } else {
 868        store_reg(s, reg, var);
 869    }
 870}
 871
 872#ifdef CONFIG_USER_ONLY
 873#define IS_USER_ONLY 1
 874#else
 875#define IS_USER_ONLY 0
 876#endif
 877
 878/* Abstractions of "generate code to do a guest load/store for
 879 * AArch32", where a vaddr is always 32 bits (and is zero
 880 * extended if we're a 64 bit core) and  data is also
 881 * 32 bits unless specifically doing a 64 bit access.
 882 * These functions work like tcg_gen_qemu_{ld,st}* except
 883 * that the address argument is TCGv_i32 rather than TCGv.
 884 */
 885
 886static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 887{
 888    TCGv addr = tcg_temp_new();
 889    tcg_gen_extu_i32_tl(addr, a32);
 890
 891    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 892    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 893        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 894    }
 895    return addr;
 896}
 897
 898static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 899                            int index, MemOp opc)
 900{
 901    TCGv addr;
 902
 903    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 904        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 905        opc |= MO_ALIGN;
 906    }
 907
 908    addr = gen_aa32_addr(s, a32, opc);
 909    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 910    tcg_temp_free(addr);
 911}
 912
 913static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 914                            int index, MemOp opc)
 915{
 916    TCGv addr;
 917
 918    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 919        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 920        opc |= MO_ALIGN;
 921    }
 922
 923    addr = gen_aa32_addr(s, a32, opc);
 924    tcg_gen_qemu_st_i32(val, addr, index, opc);
 925    tcg_temp_free(addr);
 926}
 927
 928#define DO_GEN_LD(SUFF, OPC)                                             \
 929static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 930                                     TCGv_i32 a32, int index)            \
 931{                                                                        \
 932    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 933}
 934
 935#define DO_GEN_ST(SUFF, OPC)                                             \
 936static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 937                                     TCGv_i32 a32, int index)            \
 938{                                                                        \
 939    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 940}
 941
 942static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 943{
 944    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 945    if (!IS_USER_ONLY && s->sctlr_b) {
 946        tcg_gen_rotri_i64(val, val, 32);
 947    }
 948}
 949
 950static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 951                            int index, MemOp opc)
 952{
 953    TCGv addr = gen_aa32_addr(s, a32, opc);
 954    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 955    gen_aa32_frob64(s, val);
 956    tcg_temp_free(addr);
 957}
 958
 959static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 960                                 TCGv_i32 a32, int index)
 961{
 962    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
 963}
 964
 965static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 966                            int index, MemOp opc)
 967{
 968    TCGv addr = gen_aa32_addr(s, a32, opc);
 969
 970    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 971    if (!IS_USER_ONLY && s->sctlr_b) {
 972        TCGv_i64 tmp = tcg_temp_new_i64();
 973        tcg_gen_rotri_i64(tmp, val, 32);
 974        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 975        tcg_temp_free_i64(tmp);
 976    } else {
 977        tcg_gen_qemu_st_i64(val, addr, index, opc);
 978    }
 979    tcg_temp_free(addr);
 980}
 981
 982static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
 983                                 TCGv_i32 a32, int index)
 984{
 985    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
 986}
 987
 988DO_GEN_LD(8u, MO_UB)
 989DO_GEN_LD(16u, MO_UW)
 990DO_GEN_LD(32u, MO_UL)
 991DO_GEN_ST(8, MO_UB)
 992DO_GEN_ST(16, MO_UW)
 993DO_GEN_ST(32, MO_UL)
 994
 995static inline void gen_hvc(DisasContext *s, int imm16)
 996{
 997    /* The pre HVC helper handles cases when HVC gets trapped
 998     * as an undefined insn by runtime configuration (ie before
 999     * the insn really executes).
1000     */
1001    gen_set_pc_im(s, s->pc_curr);
1002    gen_helper_pre_hvc(cpu_env);
1003    /* Otherwise we will treat this as a real exception which
1004     * happens after execution of the insn. (The distinction matters
1005     * for the PC value reported to the exception handler and also
1006     * for single stepping.)
1007     */
1008    s->svc_imm = imm16;
1009    gen_set_pc_im(s, s->base.pc_next);
1010    s->base.is_jmp = DISAS_HVC;
1011}
1012
1013static inline void gen_smc(DisasContext *s)
1014{
1015    /* As with HVC, we may take an exception either before or after
1016     * the insn executes.
1017     */
1018    TCGv_i32 tmp;
1019
1020    gen_set_pc_im(s, s->pc_curr);
1021    tmp = tcg_const_i32(syn_aa32_smc());
1022    gen_helper_pre_smc(cpu_env, tmp);
1023    tcg_temp_free_i32(tmp);
1024    gen_set_pc_im(s, s->base.pc_next);
1025    s->base.is_jmp = DISAS_SMC;
1026}
1027
1028static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1029{
1030    gen_set_condexec(s);
1031    gen_set_pc_im(s, pc);
1032    gen_exception_internal(excp);
1033    s->base.is_jmp = DISAS_NORETURN;
1034}
1035
1036static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1037                               int syn, uint32_t target_el)
1038{
1039    gen_set_condexec(s);
1040    gen_set_pc_im(s, pc);
1041    gen_exception(excp, syn, target_el);
1042    s->base.is_jmp = DISAS_NORETURN;
1043}
1044
1045static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1046{
1047    TCGv_i32 tcg_syn;
1048
1049    gen_set_condexec(s);
1050    gen_set_pc_im(s, s->pc_curr);
1051    tcg_syn = tcg_const_i32(syn);
1052    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1053    tcg_temp_free_i32(tcg_syn);
1054    s->base.is_jmp = DISAS_NORETURN;
1055}
1056
1057static void unallocated_encoding(DisasContext *s)
1058{
1059    /* Unallocated and reserved encodings are uncategorized */
1060    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1061                       default_exception_el(s));
1062}
1063
1064/* Force a TB lookup after an instruction that changes the CPU state.  */
1065static inline void gen_lookup_tb(DisasContext *s)
1066{
1067    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1068    s->base.is_jmp = DISAS_EXIT;
1069}
1070
1071static inline void gen_hlt(DisasContext *s, int imm)
1072{
1073    /* HLT. This has two purposes.
1074     * Architecturally, it is an external halting debug instruction.
1075     * Since QEMU doesn't implement external debug, we treat this as
1076     * it is required for halting debug disabled: it will UNDEF.
1077     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1078     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1079     * must trigger semihosting even for ARMv7 and earlier, where
1080     * HLT was an undefined encoding.
1081     * In system mode, we don't allow userspace access to
1082     * semihosting, to provide some semblance of security
1083     * (and for consistency with our 32-bit semihosting).
1084     */
1085    if (semihosting_enabled() &&
1086#ifndef CONFIG_USER_ONLY
1087        s->current_el != 0 &&
1088#endif
1089        (imm == (s->thumb ? 0x3c : 0xf000))) {
1090        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1091        return;
1092    }
1093
1094    unallocated_encoding(s);
1095}
1096
1097/*
1098 * Return the offset of a "full" NEON Dreg.
1099 */
1100static long neon_full_reg_offset(unsigned reg)
1101{
1102    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1103}
1104
1105/*
1106 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1107 * where 0 is the least significant end of the register.
1108 */
1109static long neon_element_offset(int reg, int element, MemOp memop)
1110{
1111    int element_size = 1 << (memop & MO_SIZE);
1112    int ofs = element * element_size;
1113#ifdef HOST_WORDS_BIGENDIAN
1114    /*
1115     * Calculate the offset assuming fully little-endian,
1116     * then XOR to account for the order of the 8-byte units.
1117     */
1118    if (element_size < 8) {
1119        ofs ^= 8 - element_size;
1120    }
1121#endif
1122    return neon_full_reg_offset(reg) + ofs;
1123}
1124
1125/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1126static long vfp_reg_offset(bool dp, unsigned reg)
1127{
1128    if (dp) {
1129        return neon_element_offset(reg, 0, MO_64);
1130    } else {
1131        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1132    }
1133}
1134
1135static inline void vfp_load_reg64(TCGv_i64 var, int reg)
1136{
1137    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
1138}
1139
1140static inline void vfp_store_reg64(TCGv_i64 var, int reg)
1141{
1142    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
1143}
1144
1145static inline void vfp_load_reg32(TCGv_i32 var, int reg)
1146{
1147    tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1148}
1149
1150static inline void vfp_store_reg32(TCGv_i32 var, int reg)
1151{
1152    tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1153}
1154
1155static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1156{
1157    long off = neon_element_offset(reg, ele, memop);
1158
1159    switch (memop) {
1160    case MO_SB:
1161        tcg_gen_ld8s_i32(dest, cpu_env, off);
1162        break;
1163    case MO_UB:
1164        tcg_gen_ld8u_i32(dest, cpu_env, off);
1165        break;
1166    case MO_SW:
1167        tcg_gen_ld16s_i32(dest, cpu_env, off);
1168        break;
1169    case MO_UW:
1170        tcg_gen_ld16u_i32(dest, cpu_env, off);
1171        break;
1172    case MO_UL:
1173    case MO_SL:
1174        tcg_gen_ld_i32(dest, cpu_env, off);
1175        break;
1176    default:
1177        g_assert_not_reached();
1178    }
1179}
1180
1181static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1182{
1183    long off = neon_element_offset(reg, ele, memop);
1184
1185    switch (memop) {
1186    case MO_SL:
1187        tcg_gen_ld32s_i64(dest, cpu_env, off);
1188        break;
1189    case MO_UL:
1190        tcg_gen_ld32u_i64(dest, cpu_env, off);
1191        break;
1192    case MO_Q:
1193        tcg_gen_ld_i64(dest, cpu_env, off);
1194        break;
1195    default:
1196        g_assert_not_reached();
1197    }
1198}
1199
1200static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1201{
1202    long off = neon_element_offset(reg, ele, memop);
1203
1204    switch (memop) {
1205    case MO_8:
1206        tcg_gen_st8_i32(src, cpu_env, off);
1207        break;
1208    case MO_16:
1209        tcg_gen_st16_i32(src, cpu_env, off);
1210        break;
1211    case MO_32:
1212        tcg_gen_st_i32(src, cpu_env, off);
1213        break;
1214    default:
1215        g_assert_not_reached();
1216    }
1217}
1218
1219static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1220{
1221    long off = neon_element_offset(reg, ele, memop);
1222
1223    switch (memop) {
1224    case MO_64:
1225        tcg_gen_st_i64(src, cpu_env, off);
1226        break;
1227    default:
1228        g_assert_not_reached();
1229    }
1230}
1231
1232static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1233{
1234    TCGv_ptr ret = tcg_temp_new_ptr();
1235    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1236    return ret;
1237}
1238
1239#define ARM_CP_RW_BIT   (1 << 20)
1240
1241/* Include the VFP and Neon decoders */
1242#include "decode-m-nocp.c.inc"
1243#include "translate-vfp.c.inc"
1244#include "translate-neon.c.inc"
1245
1246static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1247{
1248    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1249}
1250
1251static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1252{
1253    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1254}
1255
1256static inline TCGv_i32 iwmmxt_load_creg(int reg)
1257{
1258    TCGv_i32 var = tcg_temp_new_i32();
1259    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1260    return var;
1261}
1262
1263static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1264{
1265    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1266    tcg_temp_free_i32(var);
1267}
1268
1269static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1270{
1271    iwmmxt_store_reg(cpu_M0, rn);
1272}
1273
1274static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1275{
1276    iwmmxt_load_reg(cpu_M0, rn);
1277}
1278
1279static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1280{
1281    iwmmxt_load_reg(cpu_V1, rn);
1282    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1283}
1284
1285static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1286{
1287    iwmmxt_load_reg(cpu_V1, rn);
1288    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1289}
1290
1291static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1292{
1293    iwmmxt_load_reg(cpu_V1, rn);
1294    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1295}
1296
1297#define IWMMXT_OP(name) \
1298static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1299{ \
1300    iwmmxt_load_reg(cpu_V1, rn); \
1301    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1302}
1303
1304#define IWMMXT_OP_ENV(name) \
1305static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1306{ \
1307    iwmmxt_load_reg(cpu_V1, rn); \
1308    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1309}
1310
1311#define IWMMXT_OP_ENV_SIZE(name) \
1312IWMMXT_OP_ENV(name##b) \
1313IWMMXT_OP_ENV(name##w) \
1314IWMMXT_OP_ENV(name##l)
1315
1316#define IWMMXT_OP_ENV1(name) \
1317static inline void gen_op_iwmmxt_##name##_M0(void) \
1318{ \
1319    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1320}
1321
1322IWMMXT_OP(maddsq)
1323IWMMXT_OP(madduq)
1324IWMMXT_OP(sadb)
1325IWMMXT_OP(sadw)
1326IWMMXT_OP(mulslw)
1327IWMMXT_OP(mulshw)
1328IWMMXT_OP(mululw)
1329IWMMXT_OP(muluhw)
1330IWMMXT_OP(macsw)
1331IWMMXT_OP(macuw)
1332
1333IWMMXT_OP_ENV_SIZE(unpackl)
1334IWMMXT_OP_ENV_SIZE(unpackh)
1335
1336IWMMXT_OP_ENV1(unpacklub)
1337IWMMXT_OP_ENV1(unpackluw)
1338IWMMXT_OP_ENV1(unpacklul)
1339IWMMXT_OP_ENV1(unpackhub)
1340IWMMXT_OP_ENV1(unpackhuw)
1341IWMMXT_OP_ENV1(unpackhul)
1342IWMMXT_OP_ENV1(unpacklsb)
1343IWMMXT_OP_ENV1(unpacklsw)
1344IWMMXT_OP_ENV1(unpacklsl)
1345IWMMXT_OP_ENV1(unpackhsb)
1346IWMMXT_OP_ENV1(unpackhsw)
1347IWMMXT_OP_ENV1(unpackhsl)
1348
1349IWMMXT_OP_ENV_SIZE(cmpeq)
1350IWMMXT_OP_ENV_SIZE(cmpgtu)
1351IWMMXT_OP_ENV_SIZE(cmpgts)
1352
1353IWMMXT_OP_ENV_SIZE(mins)
1354IWMMXT_OP_ENV_SIZE(minu)
1355IWMMXT_OP_ENV_SIZE(maxs)
1356IWMMXT_OP_ENV_SIZE(maxu)
1357
1358IWMMXT_OP_ENV_SIZE(subn)
1359IWMMXT_OP_ENV_SIZE(addn)
1360IWMMXT_OP_ENV_SIZE(subu)
1361IWMMXT_OP_ENV_SIZE(addu)
1362IWMMXT_OP_ENV_SIZE(subs)
1363IWMMXT_OP_ENV_SIZE(adds)
1364
1365IWMMXT_OP_ENV(avgb0)
1366IWMMXT_OP_ENV(avgb1)
1367IWMMXT_OP_ENV(avgw0)
1368IWMMXT_OP_ENV(avgw1)
1369
1370IWMMXT_OP_ENV(packuw)
1371IWMMXT_OP_ENV(packul)
1372IWMMXT_OP_ENV(packuq)
1373IWMMXT_OP_ENV(packsw)
1374IWMMXT_OP_ENV(packsl)
1375IWMMXT_OP_ENV(packsq)
1376
1377static void gen_op_iwmmxt_set_mup(void)
1378{
1379    TCGv_i32 tmp;
1380    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1381    tcg_gen_ori_i32(tmp, tmp, 2);
1382    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1383}
1384
1385static void gen_op_iwmmxt_set_cup(void)
1386{
1387    TCGv_i32 tmp;
1388    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1389    tcg_gen_ori_i32(tmp, tmp, 1);
1390    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1391}
1392
1393static void gen_op_iwmmxt_setpsr_nz(void)
1394{
1395    TCGv_i32 tmp = tcg_temp_new_i32();
1396    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1397    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1398}
1399
1400static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1401{
1402    iwmmxt_load_reg(cpu_V1, rn);
1403    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1404    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1405}
1406
1407static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1408                                     TCGv_i32 dest)
1409{
1410    int rd;
1411    uint32_t offset;
1412    TCGv_i32 tmp;
1413
1414    rd = (insn >> 16) & 0xf;
1415    tmp = load_reg(s, rd);
1416
1417    offset = (insn & 0xff) << ((insn >> 7) & 2);
1418    if (insn & (1 << 24)) {
1419        /* Pre indexed */
1420        if (insn & (1 << 23))
1421            tcg_gen_addi_i32(tmp, tmp, offset);
1422        else
1423            tcg_gen_addi_i32(tmp, tmp, -offset);
1424        tcg_gen_mov_i32(dest, tmp);
1425        if (insn & (1 << 21))
1426            store_reg(s, rd, tmp);
1427        else
1428            tcg_temp_free_i32(tmp);
1429    } else if (insn & (1 << 21)) {
1430        /* Post indexed */
1431        tcg_gen_mov_i32(dest, tmp);
1432        if (insn & (1 << 23))
1433            tcg_gen_addi_i32(tmp, tmp, offset);
1434        else
1435            tcg_gen_addi_i32(tmp, tmp, -offset);
1436        store_reg(s, rd, tmp);
1437    } else if (!(insn & (1 << 23)))
1438        return 1;
1439    return 0;
1440}
1441
1442static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1443{
1444    int rd = (insn >> 0) & 0xf;
1445    TCGv_i32 tmp;
1446
1447    if (insn & (1 << 8)) {
1448        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1449            return 1;
1450        } else {
1451            tmp = iwmmxt_load_creg(rd);
1452        }
1453    } else {
1454        tmp = tcg_temp_new_i32();
1455        iwmmxt_load_reg(cpu_V0, rd);
1456        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1457    }
1458    tcg_gen_andi_i32(tmp, tmp, mask);
1459    tcg_gen_mov_i32(dest, tmp);
1460    tcg_temp_free_i32(tmp);
1461    return 0;
1462}
1463
1464/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1465   (ie. an undefined instruction).  */
1466static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1467{
1468    int rd, wrd;
1469    int rdhi, rdlo, rd0, rd1, i;
1470    TCGv_i32 addr;
1471    TCGv_i32 tmp, tmp2, tmp3;
1472
1473    if ((insn & 0x0e000e00) == 0x0c000000) {
1474        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1475            wrd = insn & 0xf;
1476            rdlo = (insn >> 12) & 0xf;
1477            rdhi = (insn >> 16) & 0xf;
1478            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1479                iwmmxt_load_reg(cpu_V0, wrd);
1480                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1481                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1482            } else {                                    /* TMCRR */
1483                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1484                iwmmxt_store_reg(cpu_V0, wrd);
1485                gen_op_iwmmxt_set_mup();
1486            }
1487            return 0;
1488        }
1489
1490        wrd = (insn >> 12) & 0xf;
1491        addr = tcg_temp_new_i32();
1492        if (gen_iwmmxt_address(s, insn, addr)) {
1493            tcg_temp_free_i32(addr);
1494            return 1;
1495        }
1496        if (insn & ARM_CP_RW_BIT) {
1497            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1498                tmp = tcg_temp_new_i32();
1499                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1500                iwmmxt_store_creg(wrd, tmp);
1501            } else {
1502                i = 1;
1503                if (insn & (1 << 8)) {
1504                    if (insn & (1 << 22)) {             /* WLDRD */
1505                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1506                        i = 0;
1507                    } else {                            /* WLDRW wRd */
1508                        tmp = tcg_temp_new_i32();
1509                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1510                    }
1511                } else {
1512                    tmp = tcg_temp_new_i32();
1513                    if (insn & (1 << 22)) {             /* WLDRH */
1514                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1515                    } else {                            /* WLDRB */
1516                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1517                    }
1518                }
1519                if (i) {
1520                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1521                    tcg_temp_free_i32(tmp);
1522                }
1523                gen_op_iwmmxt_movq_wRn_M0(wrd);
1524            }
1525        } else {
1526            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1527                tmp = iwmmxt_load_creg(wrd);
1528                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1529            } else {
1530                gen_op_iwmmxt_movq_M0_wRn(wrd);
1531                tmp = tcg_temp_new_i32();
1532                if (insn & (1 << 8)) {
1533                    if (insn & (1 << 22)) {             /* WSTRD */
1534                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1535                    } else {                            /* WSTRW wRd */
1536                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1537                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1538                    }
1539                } else {
1540                    if (insn & (1 << 22)) {             /* WSTRH */
1541                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1542                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1543                    } else {                            /* WSTRB */
1544                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1545                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1546                    }
1547                }
1548            }
1549            tcg_temp_free_i32(tmp);
1550        }
1551        tcg_temp_free_i32(addr);
1552        return 0;
1553    }
1554
1555    if ((insn & 0x0f000000) != 0x0e000000)
1556        return 1;
1557
1558    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1559    case 0x000:                                                 /* WOR */
1560        wrd = (insn >> 12) & 0xf;
1561        rd0 = (insn >> 0) & 0xf;
1562        rd1 = (insn >> 16) & 0xf;
1563        gen_op_iwmmxt_movq_M0_wRn(rd0);
1564        gen_op_iwmmxt_orq_M0_wRn(rd1);
1565        gen_op_iwmmxt_setpsr_nz();
1566        gen_op_iwmmxt_movq_wRn_M0(wrd);
1567        gen_op_iwmmxt_set_mup();
1568        gen_op_iwmmxt_set_cup();
1569        break;
1570    case 0x011:                                                 /* TMCR */
1571        if (insn & 0xf)
1572            return 1;
1573        rd = (insn >> 12) & 0xf;
1574        wrd = (insn >> 16) & 0xf;
1575        switch (wrd) {
1576        case ARM_IWMMXT_wCID:
1577        case ARM_IWMMXT_wCASF:
1578            break;
1579        case ARM_IWMMXT_wCon:
1580            gen_op_iwmmxt_set_cup();
1581            /* Fall through.  */
1582        case ARM_IWMMXT_wCSSF:
1583            tmp = iwmmxt_load_creg(wrd);
1584            tmp2 = load_reg(s, rd);
1585            tcg_gen_andc_i32(tmp, tmp, tmp2);
1586            tcg_temp_free_i32(tmp2);
1587            iwmmxt_store_creg(wrd, tmp);
1588            break;
1589        case ARM_IWMMXT_wCGR0:
1590        case ARM_IWMMXT_wCGR1:
1591        case ARM_IWMMXT_wCGR2:
1592        case ARM_IWMMXT_wCGR3:
1593            gen_op_iwmmxt_set_cup();
1594            tmp = load_reg(s, rd);
1595            iwmmxt_store_creg(wrd, tmp);
1596            break;
1597        default:
1598            return 1;
1599        }
1600        break;
1601    case 0x100:                                                 /* WXOR */
1602        wrd = (insn >> 12) & 0xf;
1603        rd0 = (insn >> 0) & 0xf;
1604        rd1 = (insn >> 16) & 0xf;
1605        gen_op_iwmmxt_movq_M0_wRn(rd0);
1606        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1607        gen_op_iwmmxt_setpsr_nz();
1608        gen_op_iwmmxt_movq_wRn_M0(wrd);
1609        gen_op_iwmmxt_set_mup();
1610        gen_op_iwmmxt_set_cup();
1611        break;
1612    case 0x111:                                                 /* TMRC */
1613        if (insn & 0xf)
1614            return 1;
1615        rd = (insn >> 12) & 0xf;
1616        wrd = (insn >> 16) & 0xf;
1617        tmp = iwmmxt_load_creg(wrd);
1618        store_reg(s, rd, tmp);
1619        break;
1620    case 0x300:                                                 /* WANDN */
1621        wrd = (insn >> 12) & 0xf;
1622        rd0 = (insn >> 0) & 0xf;
1623        rd1 = (insn >> 16) & 0xf;
1624        gen_op_iwmmxt_movq_M0_wRn(rd0);
1625        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1626        gen_op_iwmmxt_andq_M0_wRn(rd1);
1627        gen_op_iwmmxt_setpsr_nz();
1628        gen_op_iwmmxt_movq_wRn_M0(wrd);
1629        gen_op_iwmmxt_set_mup();
1630        gen_op_iwmmxt_set_cup();
1631        break;
1632    case 0x200:                                                 /* WAND */
1633        wrd = (insn >> 12) & 0xf;
1634        rd0 = (insn >> 0) & 0xf;
1635        rd1 = (insn >> 16) & 0xf;
1636        gen_op_iwmmxt_movq_M0_wRn(rd0);
1637        gen_op_iwmmxt_andq_M0_wRn(rd1);
1638        gen_op_iwmmxt_setpsr_nz();
1639        gen_op_iwmmxt_movq_wRn_M0(wrd);
1640        gen_op_iwmmxt_set_mup();
1641        gen_op_iwmmxt_set_cup();
1642        break;
1643    case 0x810: case 0xa10:                             /* WMADD */
1644        wrd = (insn >> 12) & 0xf;
1645        rd0 = (insn >> 0) & 0xf;
1646        rd1 = (insn >> 16) & 0xf;
1647        gen_op_iwmmxt_movq_M0_wRn(rd0);
1648        if (insn & (1 << 21))
1649            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1650        else
1651            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1652        gen_op_iwmmxt_movq_wRn_M0(wrd);
1653        gen_op_iwmmxt_set_mup();
1654        break;
1655    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1656        wrd = (insn >> 12) & 0xf;
1657        rd0 = (insn >> 16) & 0xf;
1658        rd1 = (insn >> 0) & 0xf;
1659        gen_op_iwmmxt_movq_M0_wRn(rd0);
1660        switch ((insn >> 22) & 3) {
1661        case 0:
1662            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1663            break;
1664        case 1:
1665            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1666            break;
1667        case 2:
1668            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1669            break;
1670        case 3:
1671            return 1;
1672        }
1673        gen_op_iwmmxt_movq_wRn_M0(wrd);
1674        gen_op_iwmmxt_set_mup();
1675        gen_op_iwmmxt_set_cup();
1676        break;
1677    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1678        wrd = (insn >> 12) & 0xf;
1679        rd0 = (insn >> 16) & 0xf;
1680        rd1 = (insn >> 0) & 0xf;
1681        gen_op_iwmmxt_movq_M0_wRn(rd0);
1682        switch ((insn >> 22) & 3) {
1683        case 0:
1684            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1685            break;
1686        case 1:
1687            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1688            break;
1689        case 2:
1690            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1691            break;
1692        case 3:
1693            return 1;
1694        }
1695        gen_op_iwmmxt_movq_wRn_M0(wrd);
1696        gen_op_iwmmxt_set_mup();
1697        gen_op_iwmmxt_set_cup();
1698        break;
1699    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1700        wrd = (insn >> 12) & 0xf;
1701        rd0 = (insn >> 16) & 0xf;
1702        rd1 = (insn >> 0) & 0xf;
1703        gen_op_iwmmxt_movq_M0_wRn(rd0);
1704        if (insn & (1 << 22))
1705            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1706        else
1707            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1708        if (!(insn & (1 << 20)))
1709            gen_op_iwmmxt_addl_M0_wRn(wrd);
1710        gen_op_iwmmxt_movq_wRn_M0(wrd);
1711        gen_op_iwmmxt_set_mup();
1712        break;
1713    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1714        wrd = (insn >> 12) & 0xf;
1715        rd0 = (insn >> 16) & 0xf;
1716        rd1 = (insn >> 0) & 0xf;
1717        gen_op_iwmmxt_movq_M0_wRn(rd0);
1718        if (insn & (1 << 21)) {
1719            if (insn & (1 << 20))
1720                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1721            else
1722                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1723        } else {
1724            if (insn & (1 << 20))
1725                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1726            else
1727                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1728        }
1729        gen_op_iwmmxt_movq_wRn_M0(wrd);
1730        gen_op_iwmmxt_set_mup();
1731        break;
1732    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1733        wrd = (insn >> 12) & 0xf;
1734        rd0 = (insn >> 16) & 0xf;
1735        rd1 = (insn >> 0) & 0xf;
1736        gen_op_iwmmxt_movq_M0_wRn(rd0);
1737        if (insn & (1 << 21))
1738            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1739        else
1740            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1741        if (!(insn & (1 << 20))) {
1742            iwmmxt_load_reg(cpu_V1, wrd);
1743            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1744        }
1745        gen_op_iwmmxt_movq_wRn_M0(wrd);
1746        gen_op_iwmmxt_set_mup();
1747        break;
1748    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1749        wrd = (insn >> 12) & 0xf;
1750        rd0 = (insn >> 16) & 0xf;
1751        rd1 = (insn >> 0) & 0xf;
1752        gen_op_iwmmxt_movq_M0_wRn(rd0);
1753        switch ((insn >> 22) & 3) {
1754        case 0:
1755            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1756            break;
1757        case 1:
1758            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1759            break;
1760        case 2:
1761            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1762            break;
1763        case 3:
1764            return 1;
1765        }
1766        gen_op_iwmmxt_movq_wRn_M0(wrd);
1767        gen_op_iwmmxt_set_mup();
1768        gen_op_iwmmxt_set_cup();
1769        break;
1770    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1771        wrd = (insn >> 12) & 0xf;
1772        rd0 = (insn >> 16) & 0xf;
1773        rd1 = (insn >> 0) & 0xf;
1774        gen_op_iwmmxt_movq_M0_wRn(rd0);
1775        if (insn & (1 << 22)) {
1776            if (insn & (1 << 20))
1777                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1778            else
1779                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1780        } else {
1781            if (insn & (1 << 20))
1782                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1783            else
1784                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1785        }
1786        gen_op_iwmmxt_movq_wRn_M0(wrd);
1787        gen_op_iwmmxt_set_mup();
1788        gen_op_iwmmxt_set_cup();
1789        break;
1790    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1791        wrd = (insn >> 12) & 0xf;
1792        rd0 = (insn >> 16) & 0xf;
1793        rd1 = (insn >> 0) & 0xf;
1794        gen_op_iwmmxt_movq_M0_wRn(rd0);
1795        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1796        tcg_gen_andi_i32(tmp, tmp, 7);
1797        iwmmxt_load_reg(cpu_V1, rd1);
1798        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1799        tcg_temp_free_i32(tmp);
1800        gen_op_iwmmxt_movq_wRn_M0(wrd);
1801        gen_op_iwmmxt_set_mup();
1802        break;
1803    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1804        if (((insn >> 6) & 3) == 3)
1805            return 1;
1806        rd = (insn >> 12) & 0xf;
1807        wrd = (insn >> 16) & 0xf;
1808        tmp = load_reg(s, rd);
1809        gen_op_iwmmxt_movq_M0_wRn(wrd);
1810        switch ((insn >> 6) & 3) {
1811        case 0:
1812            tmp2 = tcg_const_i32(0xff);
1813            tmp3 = tcg_const_i32((insn & 7) << 3);
1814            break;
1815        case 1:
1816            tmp2 = tcg_const_i32(0xffff);
1817            tmp3 = tcg_const_i32((insn & 3) << 4);
1818            break;
1819        case 2:
1820            tmp2 = tcg_const_i32(0xffffffff);
1821            tmp3 = tcg_const_i32((insn & 1) << 5);
1822            break;
1823        default:
1824            tmp2 = NULL;
1825            tmp3 = NULL;
1826        }
1827        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1828        tcg_temp_free_i32(tmp3);
1829        tcg_temp_free_i32(tmp2);
1830        tcg_temp_free_i32(tmp);
1831        gen_op_iwmmxt_movq_wRn_M0(wrd);
1832        gen_op_iwmmxt_set_mup();
1833        break;
1834    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1835        rd = (insn >> 12) & 0xf;
1836        wrd = (insn >> 16) & 0xf;
1837        if (rd == 15 || ((insn >> 22) & 3) == 3)
1838            return 1;
1839        gen_op_iwmmxt_movq_M0_wRn(wrd);
1840        tmp = tcg_temp_new_i32();
1841        switch ((insn >> 22) & 3) {
1842        case 0:
1843            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1844            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1845            if (insn & 8) {
1846                tcg_gen_ext8s_i32(tmp, tmp);
1847            } else {
1848                tcg_gen_andi_i32(tmp, tmp, 0xff);
1849            }
1850            break;
1851        case 1:
1852            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1853            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1854            if (insn & 8) {
1855                tcg_gen_ext16s_i32(tmp, tmp);
1856            } else {
1857                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1858            }
1859            break;
1860        case 2:
1861            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1862            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1863            break;
1864        }
1865        store_reg(s, rd, tmp);
1866        break;
1867    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1868        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1869            return 1;
1870        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1871        switch ((insn >> 22) & 3) {
1872        case 0:
1873            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1874            break;
1875        case 1:
1876            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1877            break;
1878        case 2:
1879            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1880            break;
1881        }
1882        tcg_gen_shli_i32(tmp, tmp, 28);
1883        gen_set_nzcv(tmp);
1884        tcg_temp_free_i32(tmp);
1885        break;
1886    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1887        if (((insn >> 6) & 3) == 3)
1888            return 1;
1889        rd = (insn >> 12) & 0xf;
1890        wrd = (insn >> 16) & 0xf;
1891        tmp = load_reg(s, rd);
1892        switch ((insn >> 6) & 3) {
1893        case 0:
1894            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1895            break;
1896        case 1:
1897            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1898            break;
1899        case 2:
1900            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1901            break;
1902        }
1903        tcg_temp_free_i32(tmp);
1904        gen_op_iwmmxt_movq_wRn_M0(wrd);
1905        gen_op_iwmmxt_set_mup();
1906        break;
1907    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1908        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1909            return 1;
1910        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1911        tmp2 = tcg_temp_new_i32();
1912        tcg_gen_mov_i32(tmp2, tmp);
1913        switch ((insn >> 22) & 3) {
1914        case 0:
1915            for (i = 0; i < 7; i ++) {
1916                tcg_gen_shli_i32(tmp2, tmp2, 4);
1917                tcg_gen_and_i32(tmp, tmp, tmp2);
1918            }
1919            break;
1920        case 1:
1921            for (i = 0; i < 3; i ++) {
1922                tcg_gen_shli_i32(tmp2, tmp2, 8);
1923                tcg_gen_and_i32(tmp, tmp, tmp2);
1924            }
1925            break;
1926        case 2:
1927            tcg_gen_shli_i32(tmp2, tmp2, 16);
1928            tcg_gen_and_i32(tmp, tmp, tmp2);
1929            break;
1930        }
1931        gen_set_nzcv(tmp);
1932        tcg_temp_free_i32(tmp2);
1933        tcg_temp_free_i32(tmp);
1934        break;
1935    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1936        wrd = (insn >> 12) & 0xf;
1937        rd0 = (insn >> 16) & 0xf;
1938        gen_op_iwmmxt_movq_M0_wRn(rd0);
1939        switch ((insn >> 22) & 3) {
1940        case 0:
1941            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1942            break;
1943        case 1:
1944            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1945            break;
1946        case 2:
1947            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1948            break;
1949        case 3:
1950            return 1;
1951        }
1952        gen_op_iwmmxt_movq_wRn_M0(wrd);
1953        gen_op_iwmmxt_set_mup();
1954        break;
1955    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1956        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1957            return 1;
1958        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1959        tmp2 = tcg_temp_new_i32();
1960        tcg_gen_mov_i32(tmp2, tmp);
1961        switch ((insn >> 22) & 3) {
1962        case 0:
1963            for (i = 0; i < 7; i ++) {
1964                tcg_gen_shli_i32(tmp2, tmp2, 4);
1965                tcg_gen_or_i32(tmp, tmp, tmp2);
1966            }
1967            break;
1968        case 1:
1969            for (i = 0; i < 3; i ++) {
1970                tcg_gen_shli_i32(tmp2, tmp2, 8);
1971                tcg_gen_or_i32(tmp, tmp, tmp2);
1972            }
1973            break;
1974        case 2:
1975            tcg_gen_shli_i32(tmp2, tmp2, 16);
1976            tcg_gen_or_i32(tmp, tmp, tmp2);
1977            break;
1978        }
1979        gen_set_nzcv(tmp);
1980        tcg_temp_free_i32(tmp2);
1981        tcg_temp_free_i32(tmp);
1982        break;
1983    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1984        rd = (insn >> 12) & 0xf;
1985        rd0 = (insn >> 16) & 0xf;
1986        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1987            return 1;
1988        gen_op_iwmmxt_movq_M0_wRn(rd0);
1989        tmp = tcg_temp_new_i32();
1990        switch ((insn >> 22) & 3) {
1991        case 0:
1992            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1993            break;
1994        case 1:
1995            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1996            break;
1997        case 2:
1998            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1999            break;
2000        }
2001        store_reg(s, rd, tmp);
2002        break;
2003    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2004    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2005        wrd = (insn >> 12) & 0xf;
2006        rd0 = (insn >> 16) & 0xf;
2007        rd1 = (insn >> 0) & 0xf;
2008        gen_op_iwmmxt_movq_M0_wRn(rd0);
2009        switch ((insn >> 22) & 3) {
2010        case 0:
2011            if (insn & (1 << 21))
2012                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2013            else
2014                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2015            break;
2016        case 1:
2017            if (insn & (1 << 21))
2018                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2019            else
2020                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2021            break;
2022        case 2:
2023            if (insn & (1 << 21))
2024                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2025            else
2026                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2027            break;
2028        case 3:
2029            return 1;
2030        }
2031        gen_op_iwmmxt_movq_wRn_M0(wrd);
2032        gen_op_iwmmxt_set_mup();
2033        gen_op_iwmmxt_set_cup();
2034        break;
2035    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2036    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2037        wrd = (insn >> 12) & 0xf;
2038        rd0 = (insn >> 16) & 0xf;
2039        gen_op_iwmmxt_movq_M0_wRn(rd0);
2040        switch ((insn >> 22) & 3) {
2041        case 0:
2042            if (insn & (1 << 21))
2043                gen_op_iwmmxt_unpacklsb_M0();
2044            else
2045                gen_op_iwmmxt_unpacklub_M0();
2046            break;
2047        case 1:
2048            if (insn & (1 << 21))
2049                gen_op_iwmmxt_unpacklsw_M0();
2050            else
2051                gen_op_iwmmxt_unpackluw_M0();
2052            break;
2053        case 2:
2054            if (insn & (1 << 21))
2055                gen_op_iwmmxt_unpacklsl_M0();
2056            else
2057                gen_op_iwmmxt_unpacklul_M0();
2058            break;
2059        case 3:
2060            return 1;
2061        }
2062        gen_op_iwmmxt_movq_wRn_M0(wrd);
2063        gen_op_iwmmxt_set_mup();
2064        gen_op_iwmmxt_set_cup();
2065        break;
2066    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2067    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2068        wrd = (insn >> 12) & 0xf;
2069        rd0 = (insn >> 16) & 0xf;
2070        gen_op_iwmmxt_movq_M0_wRn(rd0);
2071        switch ((insn >> 22) & 3) {
2072        case 0:
2073            if (insn & (1 << 21))
2074                gen_op_iwmmxt_unpackhsb_M0();
2075            else
2076                gen_op_iwmmxt_unpackhub_M0();
2077            break;
2078        case 1:
2079            if (insn & (1 << 21))
2080                gen_op_iwmmxt_unpackhsw_M0();
2081            else
2082                gen_op_iwmmxt_unpackhuw_M0();
2083            break;
2084        case 2:
2085            if (insn & (1 << 21))
2086                gen_op_iwmmxt_unpackhsl_M0();
2087            else
2088                gen_op_iwmmxt_unpackhul_M0();
2089            break;
2090        case 3:
2091            return 1;
2092        }
2093        gen_op_iwmmxt_movq_wRn_M0(wrd);
2094        gen_op_iwmmxt_set_mup();
2095        gen_op_iwmmxt_set_cup();
2096        break;
2097    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2098    case 0x214: case 0x614: case 0xa14: case 0xe14:
2099        if (((insn >> 22) & 3) == 0)
2100            return 1;
2101        wrd = (insn >> 12) & 0xf;
2102        rd0 = (insn >> 16) & 0xf;
2103        gen_op_iwmmxt_movq_M0_wRn(rd0);
2104        tmp = tcg_temp_new_i32();
2105        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2106            tcg_temp_free_i32(tmp);
2107            return 1;
2108        }
2109        switch ((insn >> 22) & 3) {
2110        case 1:
2111            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2112            break;
2113        case 2:
2114            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2115            break;
2116        case 3:
2117            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2118            break;
2119        }
2120        tcg_temp_free_i32(tmp);
2121        gen_op_iwmmxt_movq_wRn_M0(wrd);
2122        gen_op_iwmmxt_set_mup();
2123        gen_op_iwmmxt_set_cup();
2124        break;
2125    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2126    case 0x014: case 0x414: case 0x814: case 0xc14:
2127        if (((insn >> 22) & 3) == 0)
2128            return 1;
2129        wrd = (insn >> 12) & 0xf;
2130        rd0 = (insn >> 16) & 0xf;
2131        gen_op_iwmmxt_movq_M0_wRn(rd0);
2132        tmp = tcg_temp_new_i32();
2133        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2134            tcg_temp_free_i32(tmp);
2135            return 1;
2136        }
2137        switch ((insn >> 22) & 3) {
2138        case 1:
2139            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2140            break;
2141        case 2:
2142            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2143            break;
2144        case 3:
2145            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2146            break;
2147        }
2148        tcg_temp_free_i32(tmp);
2149        gen_op_iwmmxt_movq_wRn_M0(wrd);
2150        gen_op_iwmmxt_set_mup();
2151        gen_op_iwmmxt_set_cup();
2152        break;
2153    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2154    case 0x114: case 0x514: case 0x914: case 0xd14:
2155        if (((insn >> 22) & 3) == 0)
2156            return 1;
2157        wrd = (insn >> 12) & 0xf;
2158        rd0 = (insn >> 16) & 0xf;
2159        gen_op_iwmmxt_movq_M0_wRn(rd0);
2160        tmp = tcg_temp_new_i32();
2161        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2162            tcg_temp_free_i32(tmp);
2163            return 1;
2164        }
2165        switch ((insn >> 22) & 3) {
2166        case 1:
2167            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2168            break;
2169        case 2:
2170            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2171            break;
2172        case 3:
2173            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2174            break;
2175        }
2176        tcg_temp_free_i32(tmp);
2177        gen_op_iwmmxt_movq_wRn_M0(wrd);
2178        gen_op_iwmmxt_set_mup();
2179        gen_op_iwmmxt_set_cup();
2180        break;
2181    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182    case 0x314: case 0x714: case 0xb14: case 0xf14:
2183        if (((insn >> 22) & 3) == 0)
2184            return 1;
2185        wrd = (insn >> 12) & 0xf;
2186        rd0 = (insn >> 16) & 0xf;
2187        gen_op_iwmmxt_movq_M0_wRn(rd0);
2188        tmp = tcg_temp_new_i32();
2189        switch ((insn >> 22) & 3) {
2190        case 1:
2191            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                tcg_temp_free_i32(tmp);
2193                return 1;
2194            }
2195            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2196            break;
2197        case 2:
2198            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2199                tcg_temp_free_i32(tmp);
2200                return 1;
2201            }
2202            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2203            break;
2204        case 3:
2205            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2206                tcg_temp_free_i32(tmp);
2207                return 1;
2208            }
2209            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2210            break;
2211        }
2212        tcg_temp_free_i32(tmp);
2213        gen_op_iwmmxt_movq_wRn_M0(wrd);
2214        gen_op_iwmmxt_set_mup();
2215        gen_op_iwmmxt_set_cup();
2216        break;
2217    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2218    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2219        wrd = (insn >> 12) & 0xf;
2220        rd0 = (insn >> 16) & 0xf;
2221        rd1 = (insn >> 0) & 0xf;
2222        gen_op_iwmmxt_movq_M0_wRn(rd0);
2223        switch ((insn >> 22) & 3) {
2224        case 0:
2225            if (insn & (1 << 21))
2226                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2227            else
2228                gen_op_iwmmxt_minub_M0_wRn(rd1);
2229            break;
2230        case 1:
2231            if (insn & (1 << 21))
2232                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2233            else
2234                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2235            break;
2236        case 2:
2237            if (insn & (1 << 21))
2238                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2239            else
2240                gen_op_iwmmxt_minul_M0_wRn(rd1);
2241            break;
2242        case 3:
2243            return 1;
2244        }
2245        gen_op_iwmmxt_movq_wRn_M0(wrd);
2246        gen_op_iwmmxt_set_mup();
2247        break;
2248    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2249    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2250        wrd = (insn >> 12) & 0xf;
2251        rd0 = (insn >> 16) & 0xf;
2252        rd1 = (insn >> 0) & 0xf;
2253        gen_op_iwmmxt_movq_M0_wRn(rd0);
2254        switch ((insn >> 22) & 3) {
2255        case 0:
2256            if (insn & (1 << 21))
2257                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2258            else
2259                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2260            break;
2261        case 1:
2262            if (insn & (1 << 21))
2263                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2264            else
2265                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2266            break;
2267        case 2:
2268            if (insn & (1 << 21))
2269                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2270            else
2271                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2272            break;
2273        case 3:
2274            return 1;
2275        }
2276        gen_op_iwmmxt_movq_wRn_M0(wrd);
2277        gen_op_iwmmxt_set_mup();
2278        break;
2279    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2280    case 0x402: case 0x502: case 0x602: case 0x702:
2281        wrd = (insn >> 12) & 0xf;
2282        rd0 = (insn >> 16) & 0xf;
2283        rd1 = (insn >> 0) & 0xf;
2284        gen_op_iwmmxt_movq_M0_wRn(rd0);
2285        tmp = tcg_const_i32((insn >> 20) & 3);
2286        iwmmxt_load_reg(cpu_V1, rd1);
2287        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2288        tcg_temp_free_i32(tmp);
2289        gen_op_iwmmxt_movq_wRn_M0(wrd);
2290        gen_op_iwmmxt_set_mup();
2291        break;
2292    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2293    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2294    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2295    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2296        wrd = (insn >> 12) & 0xf;
2297        rd0 = (insn >> 16) & 0xf;
2298        rd1 = (insn >> 0) & 0xf;
2299        gen_op_iwmmxt_movq_M0_wRn(rd0);
2300        switch ((insn >> 20) & 0xf) {
2301        case 0x0:
2302            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2303            break;
2304        case 0x1:
2305            gen_op_iwmmxt_subub_M0_wRn(rd1);
2306            break;
2307        case 0x3:
2308            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2309            break;
2310        case 0x4:
2311            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2312            break;
2313        case 0x5:
2314            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2315            break;
2316        case 0x7:
2317            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2318            break;
2319        case 0x8:
2320            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2321            break;
2322        case 0x9:
2323            gen_op_iwmmxt_subul_M0_wRn(rd1);
2324            break;
2325        case 0xb:
2326            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2327            break;
2328        default:
2329            return 1;
2330        }
2331        gen_op_iwmmxt_movq_wRn_M0(wrd);
2332        gen_op_iwmmxt_set_mup();
2333        gen_op_iwmmxt_set_cup();
2334        break;
2335    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2336    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2337    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2338    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2339        wrd = (insn >> 12) & 0xf;
2340        rd0 = (insn >> 16) & 0xf;
2341        gen_op_iwmmxt_movq_M0_wRn(rd0);
2342        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2343        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2344        tcg_temp_free_i32(tmp);
2345        gen_op_iwmmxt_movq_wRn_M0(wrd);
2346        gen_op_iwmmxt_set_mup();
2347        gen_op_iwmmxt_set_cup();
2348        break;
2349    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2350    case 0x418: case 0x518: case 0x618: case 0x718:
2351    case 0x818: case 0x918: case 0xa18: case 0xb18:
2352    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2353        wrd = (insn >> 12) & 0xf;
2354        rd0 = (insn >> 16) & 0xf;
2355        rd1 = (insn >> 0) & 0xf;
2356        gen_op_iwmmxt_movq_M0_wRn(rd0);
2357        switch ((insn >> 20) & 0xf) {
2358        case 0x0:
2359            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2360            break;
2361        case 0x1:
2362            gen_op_iwmmxt_addub_M0_wRn(rd1);
2363            break;
2364        case 0x3:
2365            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2366            break;
2367        case 0x4:
2368            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2369            break;
2370        case 0x5:
2371            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2372            break;
2373        case 0x7:
2374            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2375            break;
2376        case 0x8:
2377            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2378            break;
2379        case 0x9:
2380            gen_op_iwmmxt_addul_M0_wRn(rd1);
2381            break;
2382        case 0xb:
2383            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2384            break;
2385        default:
2386            return 1;
2387        }
2388        gen_op_iwmmxt_movq_wRn_M0(wrd);
2389        gen_op_iwmmxt_set_mup();
2390        gen_op_iwmmxt_set_cup();
2391        break;
2392    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2393    case 0x408: case 0x508: case 0x608: case 0x708:
2394    case 0x808: case 0x908: case 0xa08: case 0xb08:
2395    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2396        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2397            return 1;
2398        wrd = (insn >> 12) & 0xf;
2399        rd0 = (insn >> 16) & 0xf;
2400        rd1 = (insn >> 0) & 0xf;
2401        gen_op_iwmmxt_movq_M0_wRn(rd0);
2402        switch ((insn >> 22) & 3) {
2403        case 1:
2404            if (insn & (1 << 21))
2405                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2406            else
2407                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2408            break;
2409        case 2:
2410            if (insn & (1 << 21))
2411                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2412            else
2413                gen_op_iwmmxt_packul_M0_wRn(rd1);
2414            break;
2415        case 3:
2416            if (insn & (1 << 21))
2417                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2418            else
2419                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2420            break;
2421        }
2422        gen_op_iwmmxt_movq_wRn_M0(wrd);
2423        gen_op_iwmmxt_set_mup();
2424        gen_op_iwmmxt_set_cup();
2425        break;
2426    case 0x201: case 0x203: case 0x205: case 0x207:
2427    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2428    case 0x211: case 0x213: case 0x215: case 0x217:
2429    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2430        wrd = (insn >> 5) & 0xf;
2431        rd0 = (insn >> 12) & 0xf;
2432        rd1 = (insn >> 0) & 0xf;
2433        if (rd0 == 0xf || rd1 == 0xf)
2434            return 1;
2435        gen_op_iwmmxt_movq_M0_wRn(wrd);
2436        tmp = load_reg(s, rd0);
2437        tmp2 = load_reg(s, rd1);
2438        switch ((insn >> 16) & 0xf) {
2439        case 0x0:                                       /* TMIA */
2440            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2441            break;
2442        case 0x8:                                       /* TMIAPH */
2443            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2444            break;
2445        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2446            if (insn & (1 << 16))
2447                tcg_gen_shri_i32(tmp, tmp, 16);
2448            if (insn & (1 << 17))
2449                tcg_gen_shri_i32(tmp2, tmp2, 16);
2450            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2451            break;
2452        default:
2453            tcg_temp_free_i32(tmp2);
2454            tcg_temp_free_i32(tmp);
2455            return 1;
2456        }
2457        tcg_temp_free_i32(tmp2);
2458        tcg_temp_free_i32(tmp);
2459        gen_op_iwmmxt_movq_wRn_M0(wrd);
2460        gen_op_iwmmxt_set_mup();
2461        break;
2462    default:
2463        return 1;
2464    }
2465
2466    return 0;
2467}
2468
2469/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2470   (ie. an undefined instruction).  */
2471static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2472{
2473    int acc, rd0, rd1, rdhi, rdlo;
2474    TCGv_i32 tmp, tmp2;
2475
2476    if ((insn & 0x0ff00f10) == 0x0e200010) {
2477        /* Multiply with Internal Accumulate Format */
2478        rd0 = (insn >> 12) & 0xf;
2479        rd1 = insn & 0xf;
2480        acc = (insn >> 5) & 7;
2481
2482        if (acc != 0)
2483            return 1;
2484
2485        tmp = load_reg(s, rd0);
2486        tmp2 = load_reg(s, rd1);
2487        switch ((insn >> 16) & 0xf) {
2488        case 0x0:                                       /* MIA */
2489            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2490            break;
2491        case 0x8:                                       /* MIAPH */
2492            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2493            break;
2494        case 0xc:                                       /* MIABB */
2495        case 0xd:                                       /* MIABT */
2496        case 0xe:                                       /* MIATB */
2497        case 0xf:                                       /* MIATT */
2498            if (insn & (1 << 16))
2499                tcg_gen_shri_i32(tmp, tmp, 16);
2500            if (insn & (1 << 17))
2501                tcg_gen_shri_i32(tmp2, tmp2, 16);
2502            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2503            break;
2504        default:
2505            return 1;
2506        }
2507        tcg_temp_free_i32(tmp2);
2508        tcg_temp_free_i32(tmp);
2509
2510        gen_op_iwmmxt_movq_wRn_M0(acc);
2511        return 0;
2512    }
2513
2514    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2515        /* Internal Accumulator Access Format */
2516        rdhi = (insn >> 16) & 0xf;
2517        rdlo = (insn >> 12) & 0xf;
2518        acc = insn & 7;
2519
2520        if (acc != 0)
2521            return 1;
2522
2523        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2524            iwmmxt_load_reg(cpu_V0, acc);
2525            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2526            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2527            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2528        } else {                                        /* MAR */
2529            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2530            iwmmxt_store_reg(cpu_V0, acc);
2531        }
2532        return 0;
2533    }
2534
2535    return 1;
2536}
2537
2538static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2539{
2540#ifndef CONFIG_USER_ONLY
2541    return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2542           ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2543#else
2544    return true;
2545#endif
2546}
2547
2548static void gen_goto_ptr(void)
2549{
2550    tcg_gen_lookup_and_goto_ptr();
2551}
2552
2553/* This will end the TB but doesn't guarantee we'll return to
2554 * cpu_loop_exec. Any live exit_requests will be processed as we
2555 * enter the next TB.
2556 */
2557static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2558{
2559    if (use_goto_tb(s, dest)) {
2560        tcg_gen_goto_tb(n);
2561        gen_set_pc_im(s, dest);
2562        tcg_gen_exit_tb(s->base.tb, n);
2563    } else {
2564        gen_set_pc_im(s, dest);
2565        gen_goto_ptr();
2566    }
2567    s->base.is_jmp = DISAS_NORETURN;
2568}
2569
2570/* Jump, specifying which TB number to use if we gen_goto_tb() */
2571static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
2572{
2573    if (unlikely(is_singlestepping(s))) {
2574        /* An indirect jump so that we still trigger the debug exception.  */
2575        gen_set_pc_im(s, dest);
2576        s->base.is_jmp = DISAS_JUMP;
2577    } else {
2578        gen_goto_tb(s, tbno, dest);
2579    }
2580}
2581
2582static inline void gen_jmp(DisasContext *s, uint32_t dest)
2583{
2584    gen_jmp_tb(s, dest, 0);
2585}
2586
2587static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2588{
2589    if (x)
2590        tcg_gen_sari_i32(t0, t0, 16);
2591    else
2592        gen_sxth(t0);
2593    if (y)
2594        tcg_gen_sari_i32(t1, t1, 16);
2595    else
2596        gen_sxth(t1);
2597    tcg_gen_mul_i32(t0, t0, t1);
2598}
2599
2600/* Return the mask of PSR bits set by a MSR instruction.  */
2601static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2602{
2603    uint32_t mask = 0;
2604
2605    if (flags & (1 << 0)) {
2606        mask |= 0xff;
2607    }
2608    if (flags & (1 << 1)) {
2609        mask |= 0xff00;
2610    }
2611    if (flags & (1 << 2)) {
2612        mask |= 0xff0000;
2613    }
2614    if (flags & (1 << 3)) {
2615        mask |= 0xff000000;
2616    }
2617
2618    /* Mask out undefined and reserved bits.  */
2619    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2620
2621    /* Mask out execution state.  */
2622    if (!spsr) {
2623        mask &= ~CPSR_EXEC;
2624    }
2625
2626    /* Mask out privileged bits.  */
2627    if (IS_USER(s)) {
2628        mask &= CPSR_USER;
2629    }
2630    return mask;
2631}
2632
2633/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2634static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2635{
2636    TCGv_i32 tmp;
2637    if (spsr) {
2638        /* ??? This is also undefined in system mode.  */
2639        if (IS_USER(s))
2640            return 1;
2641
2642        tmp = load_cpu_field(spsr);
2643        tcg_gen_andi_i32(tmp, tmp, ~mask);
2644        tcg_gen_andi_i32(t0, t0, mask);
2645        tcg_gen_or_i32(tmp, tmp, t0);
2646        store_cpu_field(tmp, spsr);
2647    } else {
2648        gen_set_cpsr(t0, mask);
2649    }
2650    tcg_temp_free_i32(t0);
2651    gen_lookup_tb(s);
2652    return 0;
2653}
2654
2655/* Returns nonzero if access to the PSR is not permitted.  */
2656static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2657{
2658    TCGv_i32 tmp;
2659    tmp = tcg_temp_new_i32();
2660    tcg_gen_movi_i32(tmp, val);
2661    return gen_set_psr(s, mask, spsr, tmp);
2662}
2663
2664static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2665                                     int *tgtmode, int *regno)
2666{
2667    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2668     * the target mode and register number, and identify the various
2669     * unpredictable cases.
2670     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2671     *  + executed in user mode
2672     *  + using R15 as the src/dest register
2673     *  + accessing an unimplemented register
2674     *  + accessing a register that's inaccessible at current PL/security state*
2675     *  + accessing a register that you could access with a different insn
2676     * We choose to UNDEF in all these cases.
2677     * Since we don't know which of the various AArch32 modes we are in
2678     * we have to defer some checks to runtime.
2679     * Accesses to Monitor mode registers from Secure EL1 (which implies
2680     * that EL3 is AArch64) must trap to EL3.
2681     *
2682     * If the access checks fail this function will emit code to take
2683     * an exception and return false. Otherwise it will return true,
2684     * and set *tgtmode and *regno appropriately.
2685     */
2686    int exc_target = default_exception_el(s);
2687
2688    /* These instructions are present only in ARMv8, or in ARMv7 with the
2689     * Virtualization Extensions.
2690     */
2691    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2692        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2693        goto undef;
2694    }
2695
2696    if (IS_USER(s) || rn == 15) {
2697        goto undef;
2698    }
2699
2700    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2701     * of registers into (r, sysm).
2702     */
2703    if (r) {
2704        /* SPSRs for other modes */
2705        switch (sysm) {
2706        case 0xe: /* SPSR_fiq */
2707            *tgtmode = ARM_CPU_MODE_FIQ;
2708            break;
2709        case 0x10: /* SPSR_irq */
2710            *tgtmode = ARM_CPU_MODE_IRQ;
2711            break;
2712        case 0x12: /* SPSR_svc */
2713            *tgtmode = ARM_CPU_MODE_SVC;
2714            break;
2715        case 0x14: /* SPSR_abt */
2716            *tgtmode = ARM_CPU_MODE_ABT;
2717            break;
2718        case 0x16: /* SPSR_und */
2719            *tgtmode = ARM_CPU_MODE_UND;
2720            break;
2721        case 0x1c: /* SPSR_mon */
2722            *tgtmode = ARM_CPU_MODE_MON;
2723            break;
2724        case 0x1e: /* SPSR_hyp */
2725            *tgtmode = ARM_CPU_MODE_HYP;
2726            break;
2727        default: /* unallocated */
2728            goto undef;
2729        }
2730        /* We arbitrarily assign SPSR a register number of 16. */
2731        *regno = 16;
2732    } else {
2733        /* general purpose registers for other modes */
2734        switch (sysm) {
2735        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2736            *tgtmode = ARM_CPU_MODE_USR;
2737            *regno = sysm + 8;
2738            break;
2739        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2740            *tgtmode = ARM_CPU_MODE_FIQ;
2741            *regno = sysm;
2742            break;
2743        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2744            *tgtmode = ARM_CPU_MODE_IRQ;
2745            *regno = sysm & 1 ? 13 : 14;
2746            break;
2747        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2748            *tgtmode = ARM_CPU_MODE_SVC;
2749            *regno = sysm & 1 ? 13 : 14;
2750            break;
2751        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2752            *tgtmode = ARM_CPU_MODE_ABT;
2753            *regno = sysm & 1 ? 13 : 14;
2754            break;
2755        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2756            *tgtmode = ARM_CPU_MODE_UND;
2757            *regno = sysm & 1 ? 13 : 14;
2758            break;
2759        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2760            *tgtmode = ARM_CPU_MODE_MON;
2761            *regno = sysm & 1 ? 13 : 14;
2762            break;
2763        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2764            *tgtmode = ARM_CPU_MODE_HYP;
2765            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2766            *regno = sysm & 1 ? 13 : 17;
2767            break;
2768        default: /* unallocated */
2769            goto undef;
2770        }
2771    }
2772
2773    /* Catch the 'accessing inaccessible register' cases we can detect
2774     * at translate time.
2775     */
2776    switch (*tgtmode) {
2777    case ARM_CPU_MODE_MON:
2778        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2779            goto undef;
2780        }
2781        if (s->current_el == 1) {
2782            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2783             * then accesses to Mon registers trap to EL3
2784             */
2785            exc_target = 3;
2786            goto undef;
2787        }
2788        break;
2789    case ARM_CPU_MODE_HYP:
2790        /*
2791         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2792         * (and so we can forbid accesses from EL2 or below). elr_hyp
2793         * can be accessed also from Hyp mode, so forbid accesses from
2794         * EL0 or EL1.
2795         */
2796        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2797            (s->current_el < 3 && *regno != 17)) {
2798            goto undef;
2799        }
2800        break;
2801    default:
2802        break;
2803    }
2804
2805    return true;
2806
2807undef:
2808    /* If we get here then some access check did not pass */
2809    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2810                       syn_uncategorized(), exc_target);
2811    return false;
2812}
2813
2814static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2815{
2816    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2817    int tgtmode = 0, regno = 0;
2818
2819    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2820        return;
2821    }
2822
2823    /* Sync state because msr_banked() can raise exceptions */
2824    gen_set_condexec(s);
2825    gen_set_pc_im(s, s->pc_curr);
2826    tcg_reg = load_reg(s, rn);
2827    tcg_tgtmode = tcg_const_i32(tgtmode);
2828    tcg_regno = tcg_const_i32(regno);
2829    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2830    tcg_temp_free_i32(tcg_tgtmode);
2831    tcg_temp_free_i32(tcg_regno);
2832    tcg_temp_free_i32(tcg_reg);
2833    s->base.is_jmp = DISAS_UPDATE_EXIT;
2834}
2835
2836static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2837{
2838    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2839    int tgtmode = 0, regno = 0;
2840
2841    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2842        return;
2843    }
2844
2845    /* Sync state because mrs_banked() can raise exceptions */
2846    gen_set_condexec(s);
2847    gen_set_pc_im(s, s->pc_curr);
2848    tcg_reg = tcg_temp_new_i32();
2849    tcg_tgtmode = tcg_const_i32(tgtmode);
2850    tcg_regno = tcg_const_i32(regno);
2851    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2852    tcg_temp_free_i32(tcg_tgtmode);
2853    tcg_temp_free_i32(tcg_regno);
2854    store_reg(s, rn, tcg_reg);
2855    s->base.is_jmp = DISAS_UPDATE_EXIT;
2856}
2857
2858/* Store value to PC as for an exception return (ie don't
2859 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2860 * will do the masking based on the new value of the Thumb bit.
2861 */
2862static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2863{
2864    tcg_gen_mov_i32(cpu_R[15], pc);
2865    tcg_temp_free_i32(pc);
2866}
2867
2868/* Generate a v6 exception return.  Marks both values as dead.  */
2869static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2870{
2871    store_pc_exc_ret(s, pc);
2872    /* The cpsr_write_eret helper will mask the low bits of PC
2873     * appropriately depending on the new Thumb bit, so it must
2874     * be called after storing the new PC.
2875     */
2876    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2877        gen_io_start();
2878    }
2879    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2880    tcg_temp_free_i32(cpsr);
2881    /* Must exit loop to check un-masked IRQs */
2882    s->base.is_jmp = DISAS_EXIT;
2883}
2884
2885/* Generate an old-style exception return. Marks pc as dead. */
2886static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2887{
2888    gen_rfe(s, pc, load_cpu_field(spsr));
2889}
2890
2891static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2892                            uint32_t opr_sz, uint32_t max_sz,
2893                            gen_helper_gvec_3_ptr *fn)
2894{
2895    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2896
2897    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2898    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2899                       opr_sz, max_sz, 0, fn);
2900    tcg_temp_free_ptr(qc_ptr);
2901}
2902
2903void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2904                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2905{
2906    static gen_helper_gvec_3_ptr * const fns[2] = {
2907        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2908    };
2909    tcg_debug_assert(vece >= 1 && vece <= 2);
2910    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2911}
2912
2913void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2914                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2915{
2916    static gen_helper_gvec_3_ptr * const fns[2] = {
2917        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2918    };
2919    tcg_debug_assert(vece >= 1 && vece <= 2);
2920    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2921}
2922
2923#define GEN_CMP0(NAME, COND)                                            \
2924    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2925    {                                                                   \
2926        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2927        tcg_gen_neg_i32(d, d);                                          \
2928    }                                                                   \
2929    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2930    {                                                                   \
2931        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2932        tcg_gen_neg_i64(d, d);                                          \
2933    }                                                                   \
2934    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2935    {                                                                   \
2936        TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
2937        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2938        tcg_temp_free_vec(zero);                                        \
2939    }                                                                   \
2940    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2941                            uint32_t opr_sz, uint32_t max_sz)           \
2942    {                                                                   \
2943        const GVecGen2 op[4] = {                                        \
2944            { .fno = gen_helper_gvec_##NAME##0_b,                       \
2945              .fniv = gen_##NAME##0_vec,                                \
2946              .opt_opc = vecop_list_cmp,                                \
2947              .vece = MO_8 },                                           \
2948            { .fno = gen_helper_gvec_##NAME##0_h,                       \
2949              .fniv = gen_##NAME##0_vec,                                \
2950              .opt_opc = vecop_list_cmp,                                \
2951              .vece = MO_16 },                                          \
2952            { .fni4 = gen_##NAME##0_i32,                                \
2953              .fniv = gen_##NAME##0_vec,                                \
2954              .opt_opc = vecop_list_cmp,                                \
2955              .vece = MO_32 },                                          \
2956            { .fni8 = gen_##NAME##0_i64,                                \
2957              .fniv = gen_##NAME##0_vec,                                \
2958              .opt_opc = vecop_list_cmp,                                \
2959              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2960              .vece = MO_64 },                                          \
2961        };                                                              \
2962        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2963    }
2964
2965static const TCGOpcode vecop_list_cmp[] = {
2966    INDEX_op_cmp_vec, 0
2967};
2968
2969GEN_CMP0(ceq, TCG_COND_EQ)
2970GEN_CMP0(cle, TCG_COND_LE)
2971GEN_CMP0(cge, TCG_COND_GE)
2972GEN_CMP0(clt, TCG_COND_LT)
2973GEN_CMP0(cgt, TCG_COND_GT)
2974
2975#undef GEN_CMP0
2976
2977static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2978{
2979    tcg_gen_vec_sar8i_i64(a, a, shift);
2980    tcg_gen_vec_add8_i64(d, d, a);
2981}
2982
2983static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2984{
2985    tcg_gen_vec_sar16i_i64(a, a, shift);
2986    tcg_gen_vec_add16_i64(d, d, a);
2987}
2988
2989static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2990{
2991    tcg_gen_sari_i32(a, a, shift);
2992    tcg_gen_add_i32(d, d, a);
2993}
2994
2995static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2996{
2997    tcg_gen_sari_i64(a, a, shift);
2998    tcg_gen_add_i64(d, d, a);
2999}
3000
3001static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3002{
3003    tcg_gen_sari_vec(vece, a, a, sh);
3004    tcg_gen_add_vec(vece, d, d, a);
3005}
3006
3007void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3008                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3009{
3010    static const TCGOpcode vecop_list[] = {
3011        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3012    };
3013    static const GVecGen2i ops[4] = {
3014        { .fni8 = gen_ssra8_i64,
3015          .fniv = gen_ssra_vec,
3016          .fno = gen_helper_gvec_ssra_b,
3017          .load_dest = true,
3018          .opt_opc = vecop_list,
3019          .vece = MO_8 },
3020        { .fni8 = gen_ssra16_i64,
3021          .fniv = gen_ssra_vec,
3022          .fno = gen_helper_gvec_ssra_h,
3023          .load_dest = true,
3024          .opt_opc = vecop_list,
3025          .vece = MO_16 },
3026        { .fni4 = gen_ssra32_i32,
3027          .fniv = gen_ssra_vec,
3028          .fno = gen_helper_gvec_ssra_s,
3029          .load_dest = true,
3030          .opt_opc = vecop_list,
3031          .vece = MO_32 },
3032        { .fni8 = gen_ssra64_i64,
3033          .fniv = gen_ssra_vec,
3034          .fno = gen_helper_gvec_ssra_b,
3035          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3036          .opt_opc = vecop_list,
3037          .load_dest = true,
3038          .vece = MO_64 },
3039    };
3040
3041    /* tszimm encoding produces immediates in the range [1..esize]. */
3042    tcg_debug_assert(shift > 0);
3043    tcg_debug_assert(shift <= (8 << vece));
3044
3045    /*
3046     * Shifts larger than the element size are architecturally valid.
3047     * Signed results in all sign bits.
3048     */
3049    shift = MIN(shift, (8 << vece) - 1);
3050    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3051}
3052
3053static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3054{
3055    tcg_gen_vec_shr8i_i64(a, a, shift);
3056    tcg_gen_vec_add8_i64(d, d, a);
3057}
3058
3059static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3060{
3061    tcg_gen_vec_shr16i_i64(a, a, shift);
3062    tcg_gen_vec_add16_i64(d, d, a);
3063}
3064
3065static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3066{
3067    tcg_gen_shri_i32(a, a, shift);
3068    tcg_gen_add_i32(d, d, a);
3069}
3070
3071static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3072{
3073    tcg_gen_shri_i64(a, a, shift);
3074    tcg_gen_add_i64(d, d, a);
3075}
3076
3077static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3078{
3079    tcg_gen_shri_vec(vece, a, a, sh);
3080    tcg_gen_add_vec(vece, d, d, a);
3081}
3082
3083void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3084                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3085{
3086    static const TCGOpcode vecop_list[] = {
3087        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3088    };
3089    static const GVecGen2i ops[4] = {
3090        { .fni8 = gen_usra8_i64,
3091          .fniv = gen_usra_vec,
3092          .fno = gen_helper_gvec_usra_b,
3093          .load_dest = true,
3094          .opt_opc = vecop_list,
3095          .vece = MO_8, },
3096        { .fni8 = gen_usra16_i64,
3097          .fniv = gen_usra_vec,
3098          .fno = gen_helper_gvec_usra_h,
3099          .load_dest = true,
3100          .opt_opc = vecop_list,
3101          .vece = MO_16, },
3102        { .fni4 = gen_usra32_i32,
3103          .fniv = gen_usra_vec,
3104          .fno = gen_helper_gvec_usra_s,
3105          .load_dest = true,
3106          .opt_opc = vecop_list,
3107          .vece = MO_32, },
3108        { .fni8 = gen_usra64_i64,
3109          .fniv = gen_usra_vec,
3110          .fno = gen_helper_gvec_usra_d,
3111          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3112          .load_dest = true,
3113          .opt_opc = vecop_list,
3114          .vece = MO_64, },
3115    };
3116
3117    /* tszimm encoding produces immediates in the range [1..esize]. */
3118    tcg_debug_assert(shift > 0);
3119    tcg_debug_assert(shift <= (8 << vece));
3120
3121    /*
3122     * Shifts larger than the element size are architecturally valid.
3123     * Unsigned results in all zeros as input to accumulate: nop.
3124     */
3125    if (shift < (8 << vece)) {
3126        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3127    } else {
3128        /* Nop, but we do need to clear the tail. */
3129        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3130    }
3131}
3132
3133/*
3134 * Shift one less than the requested amount, and the low bit is
3135 * the rounding bit.  For the 8 and 16-bit operations, because we
3136 * mask the low bit, we can perform a normal integer shift instead
3137 * of a vector shift.
3138 */
3139static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3140{
3141    TCGv_i64 t = tcg_temp_new_i64();
3142
3143    tcg_gen_shri_i64(t, a, sh - 1);
3144    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3145    tcg_gen_vec_sar8i_i64(d, a, sh);
3146    tcg_gen_vec_add8_i64(d, d, t);
3147    tcg_temp_free_i64(t);
3148}
3149
3150static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3151{
3152    TCGv_i64 t = tcg_temp_new_i64();
3153
3154    tcg_gen_shri_i64(t, a, sh - 1);
3155    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3156    tcg_gen_vec_sar16i_i64(d, a, sh);
3157    tcg_gen_vec_add16_i64(d, d, t);
3158    tcg_temp_free_i64(t);
3159}
3160
3161static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3162{
3163    TCGv_i32 t = tcg_temp_new_i32();
3164
3165    tcg_gen_extract_i32(t, a, sh - 1, 1);
3166    tcg_gen_sari_i32(d, a, sh);
3167    tcg_gen_add_i32(d, d, t);
3168    tcg_temp_free_i32(t);
3169}
3170
3171static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172{
3173    TCGv_i64 t = tcg_temp_new_i64();
3174
3175    tcg_gen_extract_i64(t, a, sh - 1, 1);
3176    tcg_gen_sari_i64(d, a, sh);
3177    tcg_gen_add_i64(d, d, t);
3178    tcg_temp_free_i64(t);
3179}
3180
3181static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3182{
3183    TCGv_vec t = tcg_temp_new_vec_matching(d);
3184    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3185
3186    tcg_gen_shri_vec(vece, t, a, sh - 1);
3187    tcg_gen_dupi_vec(vece, ones, 1);
3188    tcg_gen_and_vec(vece, t, t, ones);
3189    tcg_gen_sari_vec(vece, d, a, sh);
3190    tcg_gen_add_vec(vece, d, d, t);
3191
3192    tcg_temp_free_vec(t);
3193    tcg_temp_free_vec(ones);
3194}
3195
3196void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3197                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3198{
3199    static const TCGOpcode vecop_list[] = {
3200        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3201    };
3202    static const GVecGen2i ops[4] = {
3203        { .fni8 = gen_srshr8_i64,
3204          .fniv = gen_srshr_vec,
3205          .fno = gen_helper_gvec_srshr_b,
3206          .opt_opc = vecop_list,
3207          .vece = MO_8 },
3208        { .fni8 = gen_srshr16_i64,
3209          .fniv = gen_srshr_vec,
3210          .fno = gen_helper_gvec_srshr_h,
3211          .opt_opc = vecop_list,
3212          .vece = MO_16 },
3213        { .fni4 = gen_srshr32_i32,
3214          .fniv = gen_srshr_vec,
3215          .fno = gen_helper_gvec_srshr_s,
3216          .opt_opc = vecop_list,
3217          .vece = MO_32 },
3218        { .fni8 = gen_srshr64_i64,
3219          .fniv = gen_srshr_vec,
3220          .fno = gen_helper_gvec_srshr_d,
3221          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3222          .opt_opc = vecop_list,
3223          .vece = MO_64 },
3224    };
3225
3226    /* tszimm encoding produces immediates in the range [1..esize] */
3227    tcg_debug_assert(shift > 0);
3228    tcg_debug_assert(shift <= (8 << vece));
3229
3230    if (shift == (8 << vece)) {
3231        /*
3232         * Shifts larger than the element size are architecturally valid.
3233         * Signed results in all sign bits.  With rounding, this produces
3234         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3235         * I.e. always zero.
3236         */
3237        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3238    } else {
3239        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3240    }
3241}
3242
3243static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3244{
3245    TCGv_i64 t = tcg_temp_new_i64();
3246
3247    gen_srshr8_i64(t, a, sh);
3248    tcg_gen_vec_add8_i64(d, d, t);
3249    tcg_temp_free_i64(t);
3250}
3251
3252static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3253{
3254    TCGv_i64 t = tcg_temp_new_i64();
3255
3256    gen_srshr16_i64(t, a, sh);
3257    tcg_gen_vec_add16_i64(d, d, t);
3258    tcg_temp_free_i64(t);
3259}
3260
3261static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3262{
3263    TCGv_i32 t = tcg_temp_new_i32();
3264
3265    gen_srshr32_i32(t, a, sh);
3266    tcg_gen_add_i32(d, d, t);
3267    tcg_temp_free_i32(t);
3268}
3269
3270static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3271{
3272    TCGv_i64 t = tcg_temp_new_i64();
3273
3274    gen_srshr64_i64(t, a, sh);
3275    tcg_gen_add_i64(d, d, t);
3276    tcg_temp_free_i64(t);
3277}
3278
3279static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3280{
3281    TCGv_vec t = tcg_temp_new_vec_matching(d);
3282
3283    gen_srshr_vec(vece, t, a, sh);
3284    tcg_gen_add_vec(vece, d, d, t);
3285    tcg_temp_free_vec(t);
3286}
3287
3288void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3289                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3290{
3291    static const TCGOpcode vecop_list[] = {
3292        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3293    };
3294    static const GVecGen2i ops[4] = {
3295        { .fni8 = gen_srsra8_i64,
3296          .fniv = gen_srsra_vec,
3297          .fno = gen_helper_gvec_srsra_b,
3298          .opt_opc = vecop_list,
3299          .load_dest = true,
3300          .vece = MO_8 },
3301        { .fni8 = gen_srsra16_i64,
3302          .fniv = gen_srsra_vec,
3303          .fno = gen_helper_gvec_srsra_h,
3304          .opt_opc = vecop_list,
3305          .load_dest = true,
3306          .vece = MO_16 },
3307        { .fni4 = gen_srsra32_i32,
3308          .fniv = gen_srsra_vec,
3309          .fno = gen_helper_gvec_srsra_s,
3310          .opt_opc = vecop_list,
3311          .load_dest = true,
3312          .vece = MO_32 },
3313        { .fni8 = gen_srsra64_i64,
3314          .fniv = gen_srsra_vec,
3315          .fno = gen_helper_gvec_srsra_d,
3316          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3317          .opt_opc = vecop_list,
3318          .load_dest = true,
3319          .vece = MO_64 },
3320    };
3321
3322    /* tszimm encoding produces immediates in the range [1..esize] */
3323    tcg_debug_assert(shift > 0);
3324    tcg_debug_assert(shift <= (8 << vece));
3325
3326    /*
3327     * Shifts larger than the element size are architecturally valid.
3328     * Signed results in all sign bits.  With rounding, this produces
3329     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3330     * I.e. always zero.  With accumulation, this leaves D unchanged.
3331     */
3332    if (shift == (8 << vece)) {
3333        /* Nop, but we do need to clear the tail. */
3334        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3335    } else {
3336        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3337    }
3338}
3339
3340static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3341{
3342    TCGv_i64 t = tcg_temp_new_i64();
3343
3344    tcg_gen_shri_i64(t, a, sh - 1);
3345    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3346    tcg_gen_vec_shr8i_i64(d, a, sh);
3347    tcg_gen_vec_add8_i64(d, d, t);
3348    tcg_temp_free_i64(t);
3349}
3350
3351static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3352{
3353    TCGv_i64 t = tcg_temp_new_i64();
3354
3355    tcg_gen_shri_i64(t, a, sh - 1);
3356    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3357    tcg_gen_vec_shr16i_i64(d, a, sh);
3358    tcg_gen_vec_add16_i64(d, d, t);
3359    tcg_temp_free_i64(t);
3360}
3361
3362static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3363{
3364    TCGv_i32 t = tcg_temp_new_i32();
3365
3366    tcg_gen_extract_i32(t, a, sh - 1, 1);
3367    tcg_gen_shri_i32(d, a, sh);
3368    tcg_gen_add_i32(d, d, t);
3369    tcg_temp_free_i32(t);
3370}
3371
3372static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3373{
3374    TCGv_i64 t = tcg_temp_new_i64();
3375
3376    tcg_gen_extract_i64(t, a, sh - 1, 1);
3377    tcg_gen_shri_i64(d, a, sh);
3378    tcg_gen_add_i64(d, d, t);
3379    tcg_temp_free_i64(t);
3380}
3381
3382static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3383{
3384    TCGv_vec t = tcg_temp_new_vec_matching(d);
3385    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3386
3387    tcg_gen_shri_vec(vece, t, a, shift - 1);
3388    tcg_gen_dupi_vec(vece, ones, 1);
3389    tcg_gen_and_vec(vece, t, t, ones);
3390    tcg_gen_shri_vec(vece, d, a, shift);
3391    tcg_gen_add_vec(vece, d, d, t);
3392
3393    tcg_temp_free_vec(t);
3394    tcg_temp_free_vec(ones);
3395}
3396
3397void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3398                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3399{
3400    static const TCGOpcode vecop_list[] = {
3401        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3402    };
3403    static const GVecGen2i ops[4] = {
3404        { .fni8 = gen_urshr8_i64,
3405          .fniv = gen_urshr_vec,
3406          .fno = gen_helper_gvec_urshr_b,
3407          .opt_opc = vecop_list,
3408          .vece = MO_8 },
3409        { .fni8 = gen_urshr16_i64,
3410          .fniv = gen_urshr_vec,
3411          .fno = gen_helper_gvec_urshr_h,
3412          .opt_opc = vecop_list,
3413          .vece = MO_16 },
3414        { .fni4 = gen_urshr32_i32,
3415          .fniv = gen_urshr_vec,
3416          .fno = gen_helper_gvec_urshr_s,
3417          .opt_opc = vecop_list,
3418          .vece = MO_32 },
3419        { .fni8 = gen_urshr64_i64,
3420          .fniv = gen_urshr_vec,
3421          .fno = gen_helper_gvec_urshr_d,
3422          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3423          .opt_opc = vecop_list,
3424          .vece = MO_64 },
3425    };
3426
3427    /* tszimm encoding produces immediates in the range [1..esize] */
3428    tcg_debug_assert(shift > 0);
3429    tcg_debug_assert(shift <= (8 << vece));
3430
3431    if (shift == (8 << vece)) {
3432        /*
3433         * Shifts larger than the element size are architecturally valid.
3434         * Unsigned results in zero.  With rounding, this produces a
3435         * copy of the most significant bit.
3436         */
3437        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3438    } else {
3439        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3440    }
3441}
3442
3443static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3444{
3445    TCGv_i64 t = tcg_temp_new_i64();
3446
3447    if (sh == 8) {
3448        tcg_gen_vec_shr8i_i64(t, a, 7);
3449    } else {
3450        gen_urshr8_i64(t, a, sh);
3451    }
3452    tcg_gen_vec_add8_i64(d, d, t);
3453    tcg_temp_free_i64(t);
3454}
3455
3456static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3457{
3458    TCGv_i64 t = tcg_temp_new_i64();
3459
3460    if (sh == 16) {
3461        tcg_gen_vec_shr16i_i64(t, a, 15);
3462    } else {
3463        gen_urshr16_i64(t, a, sh);
3464    }
3465    tcg_gen_vec_add16_i64(d, d, t);
3466    tcg_temp_free_i64(t);
3467}
3468
3469static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3470{
3471    TCGv_i32 t = tcg_temp_new_i32();
3472
3473    if (sh == 32) {
3474        tcg_gen_shri_i32(t, a, 31);
3475    } else {
3476        gen_urshr32_i32(t, a, sh);
3477    }
3478    tcg_gen_add_i32(d, d, t);
3479    tcg_temp_free_i32(t);
3480}
3481
3482static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3483{
3484    TCGv_i64 t = tcg_temp_new_i64();
3485
3486    if (sh == 64) {
3487        tcg_gen_shri_i64(t, a, 63);
3488    } else {
3489        gen_urshr64_i64(t, a, sh);
3490    }
3491    tcg_gen_add_i64(d, d, t);
3492    tcg_temp_free_i64(t);
3493}
3494
3495static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3496{
3497    TCGv_vec t = tcg_temp_new_vec_matching(d);
3498
3499    if (sh == (8 << vece)) {
3500        tcg_gen_shri_vec(vece, t, a, sh - 1);
3501    } else {
3502        gen_urshr_vec(vece, t, a, sh);
3503    }
3504    tcg_gen_add_vec(vece, d, d, t);
3505    tcg_temp_free_vec(t);
3506}
3507
3508void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3509                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3510{
3511    static const TCGOpcode vecop_list[] = {
3512        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3513    };
3514    static const GVecGen2i ops[4] = {
3515        { .fni8 = gen_ursra8_i64,
3516          .fniv = gen_ursra_vec,
3517          .fno = gen_helper_gvec_ursra_b,
3518          .opt_opc = vecop_list,
3519          .load_dest = true,
3520          .vece = MO_8 },
3521        { .fni8 = gen_ursra16_i64,
3522          .fniv = gen_ursra_vec,
3523          .fno = gen_helper_gvec_ursra_h,
3524          .opt_opc = vecop_list,
3525          .load_dest = true,
3526          .vece = MO_16 },
3527        { .fni4 = gen_ursra32_i32,
3528          .fniv = gen_ursra_vec,
3529          .fno = gen_helper_gvec_ursra_s,
3530          .opt_opc = vecop_list,
3531          .load_dest = true,
3532          .vece = MO_32 },
3533        { .fni8 = gen_ursra64_i64,
3534          .fniv = gen_ursra_vec,
3535          .fno = gen_helper_gvec_ursra_d,
3536          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3537          .opt_opc = vecop_list,
3538          .load_dest = true,
3539          .vece = MO_64 },
3540    };
3541
3542    /* tszimm encoding produces immediates in the range [1..esize] */
3543    tcg_debug_assert(shift > 0);
3544    tcg_debug_assert(shift <= (8 << vece));
3545
3546    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3547}
3548
3549static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3550{
3551    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3552    TCGv_i64 t = tcg_temp_new_i64();
3553
3554    tcg_gen_shri_i64(t, a, shift);
3555    tcg_gen_andi_i64(t, t, mask);
3556    tcg_gen_andi_i64(d, d, ~mask);
3557    tcg_gen_or_i64(d, d, t);
3558    tcg_temp_free_i64(t);
3559}
3560
3561static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3562{
3563    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3564    TCGv_i64 t = tcg_temp_new_i64();
3565
3566    tcg_gen_shri_i64(t, a, shift);
3567    tcg_gen_andi_i64(t, t, mask);
3568    tcg_gen_andi_i64(d, d, ~mask);
3569    tcg_gen_or_i64(d, d, t);
3570    tcg_temp_free_i64(t);
3571}
3572
3573static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3574{
3575    tcg_gen_shri_i32(a, a, shift);
3576    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3577}
3578
3579static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3580{
3581    tcg_gen_shri_i64(a, a, shift);
3582    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3583}
3584
3585static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3586{
3587    TCGv_vec t = tcg_temp_new_vec_matching(d);
3588    TCGv_vec m = tcg_temp_new_vec_matching(d);
3589
3590    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3591    tcg_gen_shri_vec(vece, t, a, sh);
3592    tcg_gen_and_vec(vece, d, d, m);
3593    tcg_gen_or_vec(vece, d, d, t);
3594
3595    tcg_temp_free_vec(t);
3596    tcg_temp_free_vec(m);
3597}
3598
3599void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3600                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3601{
3602    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3603    const GVecGen2i ops[4] = {
3604        { .fni8 = gen_shr8_ins_i64,
3605          .fniv = gen_shr_ins_vec,
3606          .fno = gen_helper_gvec_sri_b,
3607          .load_dest = true,
3608          .opt_opc = vecop_list,
3609          .vece = MO_8 },
3610        { .fni8 = gen_shr16_ins_i64,
3611          .fniv = gen_shr_ins_vec,
3612          .fno = gen_helper_gvec_sri_h,
3613          .load_dest = true,
3614          .opt_opc = vecop_list,
3615          .vece = MO_16 },
3616        { .fni4 = gen_shr32_ins_i32,
3617          .fniv = gen_shr_ins_vec,
3618          .fno = gen_helper_gvec_sri_s,
3619          .load_dest = true,
3620          .opt_opc = vecop_list,
3621          .vece = MO_32 },
3622        { .fni8 = gen_shr64_ins_i64,
3623          .fniv = gen_shr_ins_vec,
3624          .fno = gen_helper_gvec_sri_d,
3625          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3626          .load_dest = true,
3627          .opt_opc = vecop_list,
3628          .vece = MO_64 },
3629    };
3630
3631    /* tszimm encoding produces immediates in the range [1..esize]. */
3632    tcg_debug_assert(shift > 0);
3633    tcg_debug_assert(shift <= (8 << vece));
3634
3635    /* Shift of esize leaves destination unchanged. */
3636    if (shift < (8 << vece)) {
3637        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3638    } else {
3639        /* Nop, but we do need to clear the tail. */
3640        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3641    }
3642}
3643
3644static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3645{
3646    uint64_t mask = dup_const(MO_8, 0xff << shift);
3647    TCGv_i64 t = tcg_temp_new_i64();
3648
3649    tcg_gen_shli_i64(t, a, shift);
3650    tcg_gen_andi_i64(t, t, mask);
3651    tcg_gen_andi_i64(d, d, ~mask);
3652    tcg_gen_or_i64(d, d, t);
3653    tcg_temp_free_i64(t);
3654}
3655
3656static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3657{
3658    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3659    TCGv_i64 t = tcg_temp_new_i64();
3660
3661    tcg_gen_shli_i64(t, a, shift);
3662    tcg_gen_andi_i64(t, t, mask);
3663    tcg_gen_andi_i64(d, d, ~mask);
3664    tcg_gen_or_i64(d, d, t);
3665    tcg_temp_free_i64(t);
3666}
3667
3668static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3669{
3670    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3671}
3672
3673static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3674{
3675    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3676}
3677
3678static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3679{
3680    TCGv_vec t = tcg_temp_new_vec_matching(d);
3681    TCGv_vec m = tcg_temp_new_vec_matching(d);
3682
3683    tcg_gen_shli_vec(vece, t, a, sh);
3684    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3685    tcg_gen_and_vec(vece, d, d, m);
3686    tcg_gen_or_vec(vece, d, d, t);
3687
3688    tcg_temp_free_vec(t);
3689    tcg_temp_free_vec(m);
3690}
3691
3692void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3693                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3694{
3695    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3696    const GVecGen2i ops[4] = {
3697        { .fni8 = gen_shl8_ins_i64,
3698          .fniv = gen_shl_ins_vec,
3699          .fno = gen_helper_gvec_sli_b,
3700          .load_dest = true,
3701          .opt_opc = vecop_list,
3702          .vece = MO_8 },
3703        { .fni8 = gen_shl16_ins_i64,
3704          .fniv = gen_shl_ins_vec,
3705          .fno = gen_helper_gvec_sli_h,
3706          .load_dest = true,
3707          .opt_opc = vecop_list,
3708          .vece = MO_16 },
3709        { .fni4 = gen_shl32_ins_i32,
3710          .fniv = gen_shl_ins_vec,
3711          .fno = gen_helper_gvec_sli_s,
3712          .load_dest = true,
3713          .opt_opc = vecop_list,
3714          .vece = MO_32 },
3715        { .fni8 = gen_shl64_ins_i64,
3716          .fniv = gen_shl_ins_vec,
3717          .fno = gen_helper_gvec_sli_d,
3718          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3719          .load_dest = true,
3720          .opt_opc = vecop_list,
3721          .vece = MO_64 },
3722    };
3723
3724    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3725    tcg_debug_assert(shift >= 0);
3726    tcg_debug_assert(shift < (8 << vece));
3727
3728    if (shift == 0) {
3729        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3730    } else {
3731        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3732    }
3733}
3734
3735static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3736{
3737    gen_helper_neon_mul_u8(a, a, b);
3738    gen_helper_neon_add_u8(d, d, a);
3739}
3740
3741static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3742{
3743    gen_helper_neon_mul_u8(a, a, b);
3744    gen_helper_neon_sub_u8(d, d, a);
3745}
3746
3747static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3748{
3749    gen_helper_neon_mul_u16(a, a, b);
3750    gen_helper_neon_add_u16(d, d, a);
3751}
3752
3753static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3754{
3755    gen_helper_neon_mul_u16(a, a, b);
3756    gen_helper_neon_sub_u16(d, d, a);
3757}
3758
3759static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3760{
3761    tcg_gen_mul_i32(a, a, b);
3762    tcg_gen_add_i32(d, d, a);
3763}
3764
3765static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3766{
3767    tcg_gen_mul_i32(a, a, b);
3768    tcg_gen_sub_i32(d, d, a);
3769}
3770
3771static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3772{
3773    tcg_gen_mul_i64(a, a, b);
3774    tcg_gen_add_i64(d, d, a);
3775}
3776
3777static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3778{
3779    tcg_gen_mul_i64(a, a, b);
3780    tcg_gen_sub_i64(d, d, a);
3781}
3782
3783static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3784{
3785    tcg_gen_mul_vec(vece, a, a, b);
3786    tcg_gen_add_vec(vece, d, d, a);
3787}
3788
3789static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3790{
3791    tcg_gen_mul_vec(vece, a, a, b);
3792    tcg_gen_sub_vec(vece, d, d, a);
3793}
3794
3795/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3796 * these tables are shared with AArch64 which does support them.
3797 */
3798void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3799                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3800{
3801    static const TCGOpcode vecop_list[] = {
3802        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3803    };
3804    static const GVecGen3 ops[4] = {
3805        { .fni4 = gen_mla8_i32,
3806          .fniv = gen_mla_vec,
3807          .load_dest = true,
3808          .opt_opc = vecop_list,
3809          .vece = MO_8 },
3810        { .fni4 = gen_mla16_i32,
3811          .fniv = gen_mla_vec,
3812          .load_dest = true,
3813          .opt_opc = vecop_list,
3814          .vece = MO_16 },
3815        { .fni4 = gen_mla32_i32,
3816          .fniv = gen_mla_vec,
3817          .load_dest = true,
3818          .opt_opc = vecop_list,
3819          .vece = MO_32 },
3820        { .fni8 = gen_mla64_i64,
3821          .fniv = gen_mla_vec,
3822          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3823          .load_dest = true,
3824          .opt_opc = vecop_list,
3825          .vece = MO_64 },
3826    };
3827    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3828}
3829
3830void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3831                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3832{
3833    static const TCGOpcode vecop_list[] = {
3834        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3835    };
3836    static const GVecGen3 ops[4] = {
3837        { .fni4 = gen_mls8_i32,
3838          .fniv = gen_mls_vec,
3839          .load_dest = true,
3840          .opt_opc = vecop_list,
3841          .vece = MO_8 },
3842        { .fni4 = gen_mls16_i32,
3843          .fniv = gen_mls_vec,
3844          .load_dest = true,
3845          .opt_opc = vecop_list,
3846          .vece = MO_16 },
3847        { .fni4 = gen_mls32_i32,
3848          .fniv = gen_mls_vec,
3849          .load_dest = true,
3850          .opt_opc = vecop_list,
3851          .vece = MO_32 },
3852        { .fni8 = gen_mls64_i64,
3853          .fniv = gen_mls_vec,
3854          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3855          .load_dest = true,
3856          .opt_opc = vecop_list,
3857          .vece = MO_64 },
3858    };
3859    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3860}
3861
3862/* CMTST : test is "if (X & Y != 0)". */
3863static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3864{
3865    tcg_gen_and_i32(d, a, b);
3866    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3867    tcg_gen_neg_i32(d, d);
3868}
3869
3870void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3871{
3872    tcg_gen_and_i64(d, a, b);
3873    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3874    tcg_gen_neg_i64(d, d);
3875}
3876
3877static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3878{
3879    tcg_gen_and_vec(vece, d, a, b);
3880    tcg_gen_dupi_vec(vece, a, 0);
3881    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3882}
3883
3884void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3885                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3886{
3887    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3888    static const GVecGen3 ops[4] = {
3889        { .fni4 = gen_helper_neon_tst_u8,
3890          .fniv = gen_cmtst_vec,
3891          .opt_opc = vecop_list,
3892          .vece = MO_8 },
3893        { .fni4 = gen_helper_neon_tst_u16,
3894          .fniv = gen_cmtst_vec,
3895          .opt_opc = vecop_list,
3896          .vece = MO_16 },
3897        { .fni4 = gen_cmtst_i32,
3898          .fniv = gen_cmtst_vec,
3899          .opt_opc = vecop_list,
3900          .vece = MO_32 },
3901        { .fni8 = gen_cmtst_i64,
3902          .fniv = gen_cmtst_vec,
3903          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3904          .opt_opc = vecop_list,
3905          .vece = MO_64 },
3906    };
3907    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3908}
3909
3910void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3911{
3912    TCGv_i32 lval = tcg_temp_new_i32();
3913    TCGv_i32 rval = tcg_temp_new_i32();
3914    TCGv_i32 lsh = tcg_temp_new_i32();
3915    TCGv_i32 rsh = tcg_temp_new_i32();
3916    TCGv_i32 zero = tcg_const_i32(0);
3917    TCGv_i32 max = tcg_const_i32(32);
3918
3919    /*
3920     * Rely on the TCG guarantee that out of range shifts produce
3921     * unspecified results, not undefined behaviour (i.e. no trap).
3922     * Discard out-of-range results after the fact.
3923     */
3924    tcg_gen_ext8s_i32(lsh, shift);
3925    tcg_gen_neg_i32(rsh, lsh);
3926    tcg_gen_shl_i32(lval, src, lsh);
3927    tcg_gen_shr_i32(rval, src, rsh);
3928    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3929    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3930
3931    tcg_temp_free_i32(lval);
3932    tcg_temp_free_i32(rval);
3933    tcg_temp_free_i32(lsh);
3934    tcg_temp_free_i32(rsh);
3935    tcg_temp_free_i32(zero);
3936    tcg_temp_free_i32(max);
3937}
3938
3939void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3940{
3941    TCGv_i64 lval = tcg_temp_new_i64();
3942    TCGv_i64 rval = tcg_temp_new_i64();
3943    TCGv_i64 lsh = tcg_temp_new_i64();
3944    TCGv_i64 rsh = tcg_temp_new_i64();
3945    TCGv_i64 zero = tcg_const_i64(0);
3946    TCGv_i64 max = tcg_const_i64(64);
3947
3948    /*
3949     * Rely on the TCG guarantee that out of range shifts produce
3950     * unspecified results, not undefined behaviour (i.e. no trap).
3951     * Discard out-of-range results after the fact.
3952     */
3953    tcg_gen_ext8s_i64(lsh, shift);
3954    tcg_gen_neg_i64(rsh, lsh);
3955    tcg_gen_shl_i64(lval, src, lsh);
3956    tcg_gen_shr_i64(rval, src, rsh);
3957    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3958    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3959
3960    tcg_temp_free_i64(lval);
3961    tcg_temp_free_i64(rval);
3962    tcg_temp_free_i64(lsh);
3963    tcg_temp_free_i64(rsh);
3964    tcg_temp_free_i64(zero);
3965    tcg_temp_free_i64(max);
3966}
3967
3968static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3969                         TCGv_vec src, TCGv_vec shift)
3970{
3971    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3972    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3973    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3974    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3975    TCGv_vec msk, max;
3976
3977    tcg_gen_neg_vec(vece, rsh, shift);
3978    if (vece == MO_8) {
3979        tcg_gen_mov_vec(lsh, shift);
3980    } else {
3981        msk = tcg_temp_new_vec_matching(dst);
3982        tcg_gen_dupi_vec(vece, msk, 0xff);
3983        tcg_gen_and_vec(vece, lsh, shift, msk);
3984        tcg_gen_and_vec(vece, rsh, rsh, msk);
3985        tcg_temp_free_vec(msk);
3986    }
3987
3988    /*
3989     * Rely on the TCG guarantee that out of range shifts produce
3990     * unspecified results, not undefined behaviour (i.e. no trap).
3991     * Discard out-of-range results after the fact.
3992     */
3993    tcg_gen_shlv_vec(vece, lval, src, lsh);
3994    tcg_gen_shrv_vec(vece, rval, src, rsh);
3995
3996    max = tcg_temp_new_vec_matching(dst);
3997    tcg_gen_dupi_vec(vece, max, 8 << vece);
3998
3999    /*
4000     * The choice of LT (signed) and GEU (unsigned) are biased toward
4001     * the instructions of the x86_64 host.  For MO_8, the whole byte
4002     * is significant so we must use an unsigned compare; otherwise we
4003     * have already masked to a byte and so a signed compare works.
4004     * Other tcg hosts have a full set of comparisons and do not care.
4005     */
4006    if (vece == MO_8) {
4007        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4008        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4009        tcg_gen_andc_vec(vece, lval, lval, lsh);
4010        tcg_gen_andc_vec(vece, rval, rval, rsh);
4011    } else {
4012        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4013        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4014        tcg_gen_and_vec(vece, lval, lval, lsh);
4015        tcg_gen_and_vec(vece, rval, rval, rsh);
4016    }
4017    tcg_gen_or_vec(vece, dst, lval, rval);
4018
4019    tcg_temp_free_vec(max);
4020    tcg_temp_free_vec(lval);
4021    tcg_temp_free_vec(rval);
4022    tcg_temp_free_vec(lsh);
4023    tcg_temp_free_vec(rsh);
4024}
4025
4026void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4027                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4028{
4029    static const TCGOpcode vecop_list[] = {
4030        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4031        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4032    };
4033    static const GVecGen3 ops[4] = {
4034        { .fniv = gen_ushl_vec,
4035          .fno = gen_helper_gvec_ushl_b,
4036          .opt_opc = vecop_list,
4037          .vece = MO_8 },
4038        { .fniv = gen_ushl_vec,
4039          .fno = gen_helper_gvec_ushl_h,
4040          .opt_opc = vecop_list,
4041          .vece = MO_16 },
4042        { .fni4 = gen_ushl_i32,
4043          .fniv = gen_ushl_vec,
4044          .opt_opc = vecop_list,
4045          .vece = MO_32 },
4046        { .fni8 = gen_ushl_i64,
4047          .fniv = gen_ushl_vec,
4048          .opt_opc = vecop_list,
4049          .vece = MO_64 },
4050    };
4051    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4052}
4053
4054void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4055{
4056    TCGv_i32 lval = tcg_temp_new_i32();
4057    TCGv_i32 rval = tcg_temp_new_i32();
4058    TCGv_i32 lsh = tcg_temp_new_i32();
4059    TCGv_i32 rsh = tcg_temp_new_i32();
4060    TCGv_i32 zero = tcg_const_i32(0);
4061    TCGv_i32 max = tcg_const_i32(31);
4062
4063    /*
4064     * Rely on the TCG guarantee that out of range shifts produce
4065     * unspecified results, not undefined behaviour (i.e. no trap).
4066     * Discard out-of-range results after the fact.
4067     */
4068    tcg_gen_ext8s_i32(lsh, shift);
4069    tcg_gen_neg_i32(rsh, lsh);
4070    tcg_gen_shl_i32(lval, src, lsh);
4071    tcg_gen_umin_i32(rsh, rsh, max);
4072    tcg_gen_sar_i32(rval, src, rsh);
4073    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4074    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4075
4076    tcg_temp_free_i32(lval);
4077    tcg_temp_free_i32(rval);
4078    tcg_temp_free_i32(lsh);
4079    tcg_temp_free_i32(rsh);
4080    tcg_temp_free_i32(zero);
4081    tcg_temp_free_i32(max);
4082}
4083
4084void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4085{
4086    TCGv_i64 lval = tcg_temp_new_i64();
4087    TCGv_i64 rval = tcg_temp_new_i64();
4088    TCGv_i64 lsh = tcg_temp_new_i64();
4089    TCGv_i64 rsh = tcg_temp_new_i64();
4090    TCGv_i64 zero = tcg_const_i64(0);
4091    TCGv_i64 max = tcg_const_i64(63);
4092
4093    /*
4094     * Rely on the TCG guarantee that out of range shifts produce
4095     * unspecified results, not undefined behaviour (i.e. no trap).
4096     * Discard out-of-range results after the fact.
4097     */
4098    tcg_gen_ext8s_i64(lsh, shift);
4099    tcg_gen_neg_i64(rsh, lsh);
4100    tcg_gen_shl_i64(lval, src, lsh);
4101    tcg_gen_umin_i64(rsh, rsh, max);
4102    tcg_gen_sar_i64(rval, src, rsh);
4103    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4104    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4105
4106    tcg_temp_free_i64(lval);
4107    tcg_temp_free_i64(rval);
4108    tcg_temp_free_i64(lsh);
4109    tcg_temp_free_i64(rsh);
4110    tcg_temp_free_i64(zero);
4111    tcg_temp_free_i64(max);
4112}
4113
4114static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4115                         TCGv_vec src, TCGv_vec shift)
4116{
4117    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4118    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4119    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4120    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4121    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4122
4123    /*
4124     * Rely on the TCG guarantee that out of range shifts produce
4125     * unspecified results, not undefined behaviour (i.e. no trap).
4126     * Discard out-of-range results after the fact.
4127     */
4128    tcg_gen_neg_vec(vece, rsh, shift);
4129    if (vece == MO_8) {
4130        tcg_gen_mov_vec(lsh, shift);
4131    } else {
4132        tcg_gen_dupi_vec(vece, tmp, 0xff);
4133        tcg_gen_and_vec(vece, lsh, shift, tmp);
4134        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4135    }
4136
4137    /* Bound rsh so out of bound right shift gets -1.  */
4138    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4139    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4140    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4141
4142    tcg_gen_shlv_vec(vece, lval, src, lsh);
4143    tcg_gen_sarv_vec(vece, rval, src, rsh);
4144
4145    /* Select in-bound left shift.  */
4146    tcg_gen_andc_vec(vece, lval, lval, tmp);
4147
4148    /* Select between left and right shift.  */
4149    if (vece == MO_8) {
4150        tcg_gen_dupi_vec(vece, tmp, 0);
4151        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4152    } else {
4153        tcg_gen_dupi_vec(vece, tmp, 0x80);
4154        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4155    }
4156
4157    tcg_temp_free_vec(lval);
4158    tcg_temp_free_vec(rval);
4159    tcg_temp_free_vec(lsh);
4160    tcg_temp_free_vec(rsh);
4161    tcg_temp_free_vec(tmp);
4162}
4163
4164void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4165                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4166{
4167    static const TCGOpcode vecop_list[] = {
4168        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4169        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4170    };
4171    static const GVecGen3 ops[4] = {
4172        { .fniv = gen_sshl_vec,
4173          .fno = gen_helper_gvec_sshl_b,
4174          .opt_opc = vecop_list,
4175          .vece = MO_8 },
4176        { .fniv = gen_sshl_vec,
4177          .fno = gen_helper_gvec_sshl_h,
4178          .opt_opc = vecop_list,
4179          .vece = MO_16 },
4180        { .fni4 = gen_sshl_i32,
4181          .fniv = gen_sshl_vec,
4182          .opt_opc = vecop_list,
4183          .vece = MO_32 },
4184        { .fni8 = gen_sshl_i64,
4185          .fniv = gen_sshl_vec,
4186          .opt_opc = vecop_list,
4187          .vece = MO_64 },
4188    };
4189    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4190}
4191
4192static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4193                          TCGv_vec a, TCGv_vec b)
4194{
4195    TCGv_vec x = tcg_temp_new_vec_matching(t);
4196    tcg_gen_add_vec(vece, x, a, b);
4197    tcg_gen_usadd_vec(vece, t, a, b);
4198    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4199    tcg_gen_or_vec(vece, sat, sat, x);
4200    tcg_temp_free_vec(x);
4201}
4202
4203void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4204                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4205{
4206    static const TCGOpcode vecop_list[] = {
4207        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4208    };
4209    static const GVecGen4 ops[4] = {
4210        { .fniv = gen_uqadd_vec,
4211          .fno = gen_helper_gvec_uqadd_b,
4212          .write_aofs = true,
4213          .opt_opc = vecop_list,
4214          .vece = MO_8 },
4215        { .fniv = gen_uqadd_vec,
4216          .fno = gen_helper_gvec_uqadd_h,
4217          .write_aofs = true,
4218          .opt_opc = vecop_list,
4219          .vece = MO_16 },
4220        { .fniv = gen_uqadd_vec,
4221          .fno = gen_helper_gvec_uqadd_s,
4222          .write_aofs = true,
4223          .opt_opc = vecop_list,
4224          .vece = MO_32 },
4225        { .fniv = gen_uqadd_vec,
4226          .fno = gen_helper_gvec_uqadd_d,
4227          .write_aofs = true,
4228          .opt_opc = vecop_list,
4229          .vece = MO_64 },
4230    };
4231    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4232                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4233}
4234
4235static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4236                          TCGv_vec a, TCGv_vec b)
4237{
4238    TCGv_vec x = tcg_temp_new_vec_matching(t);
4239    tcg_gen_add_vec(vece, x, a, b);
4240    tcg_gen_ssadd_vec(vece, t, a, b);
4241    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4242    tcg_gen_or_vec(vece, sat, sat, x);
4243    tcg_temp_free_vec(x);
4244}
4245
4246void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4247                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4248{
4249    static const TCGOpcode vecop_list[] = {
4250        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4251    };
4252    static const GVecGen4 ops[4] = {
4253        { .fniv = gen_sqadd_vec,
4254          .fno = gen_helper_gvec_sqadd_b,
4255          .opt_opc = vecop_list,
4256          .write_aofs = true,
4257          .vece = MO_8 },
4258        { .fniv = gen_sqadd_vec,
4259          .fno = gen_helper_gvec_sqadd_h,
4260          .opt_opc = vecop_list,
4261          .write_aofs = true,
4262          .vece = MO_16 },
4263        { .fniv = gen_sqadd_vec,
4264          .fno = gen_helper_gvec_sqadd_s,
4265          .opt_opc = vecop_list,
4266          .write_aofs = true,
4267          .vece = MO_32 },
4268        { .fniv = gen_sqadd_vec,
4269          .fno = gen_helper_gvec_sqadd_d,
4270          .opt_opc = vecop_list,
4271          .write_aofs = true,
4272          .vece = MO_64 },
4273    };
4274    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4275                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4276}
4277
4278static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4279                          TCGv_vec a, TCGv_vec b)
4280{
4281    TCGv_vec x = tcg_temp_new_vec_matching(t);
4282    tcg_gen_sub_vec(vece, x, a, b);
4283    tcg_gen_ussub_vec(vece, t, a, b);
4284    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4285    tcg_gen_or_vec(vece, sat, sat, x);
4286    tcg_temp_free_vec(x);
4287}
4288
4289void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4290                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4291{
4292    static const TCGOpcode vecop_list[] = {
4293        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4294    };
4295    static const GVecGen4 ops[4] = {
4296        { .fniv = gen_uqsub_vec,
4297          .fno = gen_helper_gvec_uqsub_b,
4298          .opt_opc = vecop_list,
4299          .write_aofs = true,
4300          .vece = MO_8 },
4301        { .fniv = gen_uqsub_vec,
4302          .fno = gen_helper_gvec_uqsub_h,
4303          .opt_opc = vecop_list,
4304          .write_aofs = true,
4305          .vece = MO_16 },
4306        { .fniv = gen_uqsub_vec,
4307          .fno = gen_helper_gvec_uqsub_s,
4308          .opt_opc = vecop_list,
4309          .write_aofs = true,
4310          .vece = MO_32 },
4311        { .fniv = gen_uqsub_vec,
4312          .fno = gen_helper_gvec_uqsub_d,
4313          .opt_opc = vecop_list,
4314          .write_aofs = true,
4315          .vece = MO_64 },
4316    };
4317    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4318                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4319}
4320
4321static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4322                          TCGv_vec a, TCGv_vec b)
4323{
4324    TCGv_vec x = tcg_temp_new_vec_matching(t);
4325    tcg_gen_sub_vec(vece, x, a, b);
4326    tcg_gen_sssub_vec(vece, t, a, b);
4327    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4328    tcg_gen_or_vec(vece, sat, sat, x);
4329    tcg_temp_free_vec(x);
4330}
4331
4332void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4333                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4334{
4335    static const TCGOpcode vecop_list[] = {
4336        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4337    };
4338    static const GVecGen4 ops[4] = {
4339        { .fniv = gen_sqsub_vec,
4340          .fno = gen_helper_gvec_sqsub_b,
4341          .opt_opc = vecop_list,
4342          .write_aofs = true,
4343          .vece = MO_8 },
4344        { .fniv = gen_sqsub_vec,
4345          .fno = gen_helper_gvec_sqsub_h,
4346          .opt_opc = vecop_list,
4347          .write_aofs = true,
4348          .vece = MO_16 },
4349        { .fniv = gen_sqsub_vec,
4350          .fno = gen_helper_gvec_sqsub_s,
4351          .opt_opc = vecop_list,
4352          .write_aofs = true,
4353          .vece = MO_32 },
4354        { .fniv = gen_sqsub_vec,
4355          .fno = gen_helper_gvec_sqsub_d,
4356          .opt_opc = vecop_list,
4357          .write_aofs = true,
4358          .vece = MO_64 },
4359    };
4360    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4361                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4362}
4363
4364static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4365{
4366    TCGv_i32 t = tcg_temp_new_i32();
4367
4368    tcg_gen_sub_i32(t, a, b);
4369    tcg_gen_sub_i32(d, b, a);
4370    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4371    tcg_temp_free_i32(t);
4372}
4373
4374static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4375{
4376    TCGv_i64 t = tcg_temp_new_i64();
4377
4378    tcg_gen_sub_i64(t, a, b);
4379    tcg_gen_sub_i64(d, b, a);
4380    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4381    tcg_temp_free_i64(t);
4382}
4383
4384static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4385{
4386    TCGv_vec t = tcg_temp_new_vec_matching(d);
4387
4388    tcg_gen_smin_vec(vece, t, a, b);
4389    tcg_gen_smax_vec(vece, d, a, b);
4390    tcg_gen_sub_vec(vece, d, d, t);
4391    tcg_temp_free_vec(t);
4392}
4393
4394void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4395                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4396{
4397    static const TCGOpcode vecop_list[] = {
4398        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4399    };
4400    static const GVecGen3 ops[4] = {
4401        { .fniv = gen_sabd_vec,
4402          .fno = gen_helper_gvec_sabd_b,
4403          .opt_opc = vecop_list,
4404          .vece = MO_8 },
4405        { .fniv = gen_sabd_vec,
4406          .fno = gen_helper_gvec_sabd_h,
4407          .opt_opc = vecop_list,
4408          .vece = MO_16 },
4409        { .fni4 = gen_sabd_i32,
4410          .fniv = gen_sabd_vec,
4411          .fno = gen_helper_gvec_sabd_s,
4412          .opt_opc = vecop_list,
4413          .vece = MO_32 },
4414        { .fni8 = gen_sabd_i64,
4415          .fniv = gen_sabd_vec,
4416          .fno = gen_helper_gvec_sabd_d,
4417          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4418          .opt_opc = vecop_list,
4419          .vece = MO_64 },
4420    };
4421    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4422}
4423
4424static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4425{
4426    TCGv_i32 t = tcg_temp_new_i32();
4427
4428    tcg_gen_sub_i32(t, a, b);
4429    tcg_gen_sub_i32(d, b, a);
4430    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4431    tcg_temp_free_i32(t);
4432}
4433
4434static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4435{
4436    TCGv_i64 t = tcg_temp_new_i64();
4437
4438    tcg_gen_sub_i64(t, a, b);
4439    tcg_gen_sub_i64(d, b, a);
4440    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4441    tcg_temp_free_i64(t);
4442}
4443
4444static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4445{
4446    TCGv_vec t = tcg_temp_new_vec_matching(d);
4447
4448    tcg_gen_umin_vec(vece, t, a, b);
4449    tcg_gen_umax_vec(vece, d, a, b);
4450    tcg_gen_sub_vec(vece, d, d, t);
4451    tcg_temp_free_vec(t);
4452}
4453
4454void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4455                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4456{
4457    static const TCGOpcode vecop_list[] = {
4458        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4459    };
4460    static const GVecGen3 ops[4] = {
4461        { .fniv = gen_uabd_vec,
4462          .fno = gen_helper_gvec_uabd_b,
4463          .opt_opc = vecop_list,
4464          .vece = MO_8 },
4465        { .fniv = gen_uabd_vec,
4466          .fno = gen_helper_gvec_uabd_h,
4467          .opt_opc = vecop_list,
4468          .vece = MO_16 },
4469        { .fni4 = gen_uabd_i32,
4470          .fniv = gen_uabd_vec,
4471          .fno = gen_helper_gvec_uabd_s,
4472          .opt_opc = vecop_list,
4473          .vece = MO_32 },
4474        { .fni8 = gen_uabd_i64,
4475          .fniv = gen_uabd_vec,
4476          .fno = gen_helper_gvec_uabd_d,
4477          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4478          .opt_opc = vecop_list,
4479          .vece = MO_64 },
4480    };
4481    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4482}
4483
4484static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4485{
4486    TCGv_i32 t = tcg_temp_new_i32();
4487    gen_sabd_i32(t, a, b);
4488    tcg_gen_add_i32(d, d, t);
4489    tcg_temp_free_i32(t);
4490}
4491
4492static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4493{
4494    TCGv_i64 t = tcg_temp_new_i64();
4495    gen_sabd_i64(t, a, b);
4496    tcg_gen_add_i64(d, d, t);
4497    tcg_temp_free_i64(t);
4498}
4499
4500static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4501{
4502    TCGv_vec t = tcg_temp_new_vec_matching(d);
4503    gen_sabd_vec(vece, t, a, b);
4504    tcg_gen_add_vec(vece, d, d, t);
4505    tcg_temp_free_vec(t);
4506}
4507
4508void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4509                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4510{
4511    static const TCGOpcode vecop_list[] = {
4512        INDEX_op_sub_vec, INDEX_op_add_vec,
4513        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4514    };
4515    static const GVecGen3 ops[4] = {
4516        { .fniv = gen_saba_vec,
4517          .fno = gen_helper_gvec_saba_b,
4518          .opt_opc = vecop_list,
4519          .load_dest = true,
4520          .vece = MO_8 },
4521        { .fniv = gen_saba_vec,
4522          .fno = gen_helper_gvec_saba_h,
4523          .opt_opc = vecop_list,
4524          .load_dest = true,
4525          .vece = MO_16 },
4526        { .fni4 = gen_saba_i32,
4527          .fniv = gen_saba_vec,
4528          .fno = gen_helper_gvec_saba_s,
4529          .opt_opc = vecop_list,
4530          .load_dest = true,
4531          .vece = MO_32 },
4532        { .fni8 = gen_saba_i64,
4533          .fniv = gen_saba_vec,
4534          .fno = gen_helper_gvec_saba_d,
4535          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4536          .opt_opc = vecop_list,
4537          .load_dest = true,
4538          .vece = MO_64 },
4539    };
4540    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4541}
4542
4543static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4544{
4545    TCGv_i32 t = tcg_temp_new_i32();
4546    gen_uabd_i32(t, a, b);
4547    tcg_gen_add_i32(d, d, t);
4548    tcg_temp_free_i32(t);
4549}
4550
4551static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4552{
4553    TCGv_i64 t = tcg_temp_new_i64();
4554    gen_uabd_i64(t, a, b);
4555    tcg_gen_add_i64(d, d, t);
4556    tcg_temp_free_i64(t);
4557}
4558
4559static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4560{
4561    TCGv_vec t = tcg_temp_new_vec_matching(d);
4562    gen_uabd_vec(vece, t, a, b);
4563    tcg_gen_add_vec(vece, d, d, t);
4564    tcg_temp_free_vec(t);
4565}
4566
4567void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4568                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4569{
4570    static const TCGOpcode vecop_list[] = {
4571        INDEX_op_sub_vec, INDEX_op_add_vec,
4572        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4573    };
4574    static const GVecGen3 ops[4] = {
4575        { .fniv = gen_uaba_vec,
4576          .fno = gen_helper_gvec_uaba_b,
4577          .opt_opc = vecop_list,
4578          .load_dest = true,
4579          .vece = MO_8 },
4580        { .fniv = gen_uaba_vec,
4581          .fno = gen_helper_gvec_uaba_h,
4582          .opt_opc = vecop_list,
4583          .load_dest = true,
4584          .vece = MO_16 },
4585        { .fni4 = gen_uaba_i32,
4586          .fniv = gen_uaba_vec,
4587          .fno = gen_helper_gvec_uaba_s,
4588          .opt_opc = vecop_list,
4589          .load_dest = true,
4590          .vece = MO_32 },
4591        { .fni8 = gen_uaba_i64,
4592          .fniv = gen_uaba_vec,
4593          .fno = gen_helper_gvec_uaba_d,
4594          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4595          .opt_opc = vecop_list,
4596          .load_dest = true,
4597          .vece = MO_64 },
4598    };
4599    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4600}
4601
4602static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4603                           int opc1, int crn, int crm, int opc2,
4604                           bool isread, int rt, int rt2)
4605{
4606    const ARMCPRegInfo *ri;
4607
4608    ri = get_arm_cp_reginfo(s->cp_regs,
4609            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4610    if (ri) {
4611        bool need_exit_tb;
4612
4613        /* Check access permissions */
4614        if (!cp_access_ok(s->current_el, ri, isread)) {
4615            unallocated_encoding(s);
4616            return;
4617        }
4618
4619        if (s->hstr_active || ri->accessfn ||
4620            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4621            /* Emit code to perform further access permissions checks at
4622             * runtime; this may result in an exception.
4623             * Note that on XScale all cp0..c13 registers do an access check
4624             * call in order to handle c15_cpar.
4625             */
4626            TCGv_ptr tmpptr;
4627            TCGv_i32 tcg_syn, tcg_isread;
4628            uint32_t syndrome;
4629
4630            /* Note that since we are an implementation which takes an
4631             * exception on a trapped conditional instruction only if the
4632             * instruction passes its condition code check, we can take
4633             * advantage of the clause in the ARM ARM that allows us to set
4634             * the COND field in the instruction to 0xE in all cases.
4635             * We could fish the actual condition out of the insn (ARM)
4636             * or the condexec bits (Thumb) but it isn't necessary.
4637             */
4638            switch (cpnum) {
4639            case 14:
4640                if (is64) {
4641                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4642                                                 isread, false);
4643                } else {
4644                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4645                                                rt, isread, false);
4646                }
4647                break;
4648            case 15:
4649                if (is64) {
4650                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4651                                                 isread, false);
4652                } else {
4653                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4654                                                rt, isread, false);
4655                }
4656                break;
4657            default:
4658                /* ARMv8 defines that only coprocessors 14 and 15 exist,
4659                 * so this can only happen if this is an ARMv7 or earlier CPU,
4660                 * in which case the syndrome information won't actually be
4661                 * guest visible.
4662                 */
4663                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4664                syndrome = syn_uncategorized();
4665                break;
4666            }
4667
4668            gen_set_condexec(s);
4669            gen_set_pc_im(s, s->pc_curr);
4670            tmpptr = tcg_const_ptr(ri);
4671            tcg_syn = tcg_const_i32(syndrome);
4672            tcg_isread = tcg_const_i32(isread);
4673            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
4674                                           tcg_isread);
4675            tcg_temp_free_ptr(tmpptr);
4676            tcg_temp_free_i32(tcg_syn);
4677            tcg_temp_free_i32(tcg_isread);
4678        } else if (ri->type & ARM_CP_RAISES_EXC) {
4679            /*
4680             * The readfn or writefn might raise an exception;
4681             * synchronize the CPU state in case it does.
4682             */
4683            gen_set_condexec(s);
4684            gen_set_pc_im(s, s->pc_curr);
4685        }
4686
4687        /* Handle special cases first */
4688        switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
4689        case ARM_CP_NOP:
4690            return;
4691        case ARM_CP_WFI:
4692            if (isread) {
4693                unallocated_encoding(s);
4694                return;
4695            }
4696            gen_set_pc_im(s, s->base.pc_next);
4697            s->base.is_jmp = DISAS_WFI;
4698            return;
4699        default:
4700            break;
4701        }
4702
4703        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4704            gen_io_start();
4705        }
4706
4707        if (isread) {
4708            /* Read */
4709            if (is64) {
4710                TCGv_i64 tmp64;
4711                TCGv_i32 tmp;
4712                if (ri->type & ARM_CP_CONST) {
4713                    tmp64 = tcg_const_i64(ri->resetvalue);
4714                } else if (ri->readfn) {
4715                    TCGv_ptr tmpptr;
4716                    tmp64 = tcg_temp_new_i64();
4717                    tmpptr = tcg_const_ptr(ri);
4718                    gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
4719                    tcg_temp_free_ptr(tmpptr);
4720                } else {
4721                    tmp64 = tcg_temp_new_i64();
4722                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4723                }
4724                tmp = tcg_temp_new_i32();
4725                tcg_gen_extrl_i64_i32(tmp, tmp64);
4726                store_reg(s, rt, tmp);
4727                tmp = tcg_temp_new_i32();
4728                tcg_gen_extrh_i64_i32(tmp, tmp64);
4729                tcg_temp_free_i64(tmp64);
4730                store_reg(s, rt2, tmp);
4731            } else {
4732                TCGv_i32 tmp;
4733                if (ri->type & ARM_CP_CONST) {
4734                    tmp = tcg_const_i32(ri->resetvalue);
4735                } else if (ri->readfn) {
4736                    TCGv_ptr tmpptr;
4737                    tmp = tcg_temp_new_i32();
4738                    tmpptr = tcg_const_ptr(ri);
4739                    gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
4740                    tcg_temp_free_ptr(tmpptr);
4741                } else {
4742                    tmp = load_cpu_offset(ri->fieldoffset);
4743                }
4744                if (rt == 15) {
4745                    /* Destination register of r15 for 32 bit loads sets
4746                     * the condition codes from the high 4 bits of the value
4747                     */
4748                    gen_set_nzcv(tmp);
4749                    tcg_temp_free_i32(tmp);
4750                } else {
4751                    store_reg(s, rt, tmp);
4752                }
4753            }
4754        } else {
4755            /* Write */
4756            if (ri->type & ARM_CP_CONST) {
4757                /* If not forbidden by access permissions, treat as WI */
4758                return;
4759            }
4760
4761            if (is64) {
4762                TCGv_i32 tmplo, tmphi;
4763                TCGv_i64 tmp64 = tcg_temp_new_i64();
4764                tmplo = load_reg(s, rt);
4765                tmphi = load_reg(s, rt2);
4766                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4767                tcg_temp_free_i32(tmplo);
4768                tcg_temp_free_i32(tmphi);
4769                if (ri->writefn) {
4770                    TCGv_ptr tmpptr = tcg_const_ptr(ri);
4771                    gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
4772                    tcg_temp_free_ptr(tmpptr);
4773                } else {
4774                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4775                }
4776                tcg_temp_free_i64(tmp64);
4777            } else {
4778                if (ri->writefn) {
4779                    TCGv_i32 tmp;
4780                    TCGv_ptr tmpptr;
4781                    tmp = load_reg(s, rt);
4782                    tmpptr = tcg_const_ptr(ri);
4783                    gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
4784                    tcg_temp_free_ptr(tmpptr);
4785                    tcg_temp_free_i32(tmp);
4786                } else {
4787                    TCGv_i32 tmp = load_reg(s, rt);
4788                    store_cpu_offset(tmp, ri->fieldoffset);
4789                }
4790            }
4791        }
4792
4793        /* I/O operations must end the TB here (whether read or write) */
4794        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4795                        (ri->type & ARM_CP_IO));
4796
4797        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4798            /*
4799             * A write to any coprocessor register that ends a TB
4800             * must rebuild the hflags for the next TB.
4801             */
4802            TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
4803            if (arm_dc_feature(s, ARM_FEATURE_M)) {
4804                gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
4805            } else {
4806                if (ri->type & ARM_CP_NEWEL) {
4807                    gen_helper_rebuild_hflags_a32_newel(cpu_env);
4808                } else {
4809                    gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
4810                }
4811            }
4812            tcg_temp_free_i32(tcg_el);
4813            /*
4814             * We default to ending the TB on a coprocessor register write,
4815             * but allow this to be suppressed by the register definition
4816             * (usually only necessary to work around guest bugs).
4817             */
4818            need_exit_tb = true;
4819        }
4820        if (need_exit_tb) {
4821            gen_lookup_tb(s);
4822        }
4823
4824        return;
4825    }
4826
4827    /* Unknown register; this might be a guest error or a QEMU
4828     * unimplemented feature.
4829     */
4830    if (is64) {
4831        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4832                      "64 bit system register cp:%d opc1: %d crm:%d "
4833                      "(%s)\n",
4834                      isread ? "read" : "write", cpnum, opc1, crm,
4835                      s->ns ? "non-secure" : "secure");
4836    } else {
4837        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4838                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4839                      "(%s)\n",
4840                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4841                      s->ns ? "non-secure" : "secure");
4842    }
4843
4844    unallocated_encoding(s);
4845    return;
4846}
4847
4848/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4849static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4850{
4851    int cpnum = (insn >> 8) & 0xf;
4852
4853    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4854        unallocated_encoding(s);
4855    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4856        if (disas_iwmmxt_insn(s, insn)) {
4857            unallocated_encoding(s);
4858        }
4859    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4860        if (disas_dsp_insn(s, insn)) {
4861            unallocated_encoding(s);
4862        }
4863    }
4864}
4865
4866/* Store a 64-bit value to a register pair.  Clobbers val.  */
4867static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4868{
4869    TCGv_i32 tmp;
4870    tmp = tcg_temp_new_i32();
4871    tcg_gen_extrl_i64_i32(tmp, val);
4872    store_reg(s, rlow, tmp);
4873    tmp = tcg_temp_new_i32();
4874    tcg_gen_extrh_i64_i32(tmp, val);
4875    store_reg(s, rhigh, tmp);
4876}
4877
4878/* load and add a 64-bit value from a register pair.  */
4879static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4880{
4881    TCGv_i64 tmp;
4882    TCGv_i32 tmpl;
4883    TCGv_i32 tmph;
4884
4885    /* Load 64-bit value rd:rn.  */
4886    tmpl = load_reg(s, rlow);
4887    tmph = load_reg(s, rhigh);
4888    tmp = tcg_temp_new_i64();
4889    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4890    tcg_temp_free_i32(tmpl);
4891    tcg_temp_free_i32(tmph);
4892    tcg_gen_add_i64(val, val, tmp);
4893    tcg_temp_free_i64(tmp);
4894}
4895
4896/* Set N and Z flags from hi|lo.  */
4897static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4898{
4899    tcg_gen_mov_i32(cpu_NF, hi);
4900    tcg_gen_or_i32(cpu_ZF, lo, hi);
4901}
4902
4903/* Load/Store exclusive instructions are implemented by remembering
4904   the value/address loaded, and seeing if these are the same
4905   when the store is performed.  This should be sufficient to implement
4906   the architecturally mandated semantics, and avoids having to monitor
4907   regular stores.  The compare vs the remembered value is done during
4908   the cmpxchg operation, but we must compare the addresses manually.  */
4909static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4910                               TCGv_i32 addr, int size)
4911{
4912    TCGv_i32 tmp = tcg_temp_new_i32();
4913    MemOp opc = size | MO_ALIGN | s->be_data;
4914
4915    s->is_ldex = true;
4916
4917    if (size == 3) {
4918        TCGv_i32 tmp2 = tcg_temp_new_i32();
4919        TCGv_i64 t64 = tcg_temp_new_i64();
4920
4921        /* For AArch32, architecturally the 32-bit word at the lowest
4922         * address is always Rt and the one at addr+4 is Rt2, even if
4923         * the CPU is big-endian. That means we don't want to do a
4924         * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
4925         * for an architecturally 64-bit access, but instead do a
4926         * 64-bit access using MO_BE if appropriate and then split
4927         * the two halves.
4928         * This only makes a difference for BE32 user-mode, where
4929         * frob64() must not flip the two halves of the 64-bit data
4930         * but this code must treat BE32 user-mode like BE32 system.
4931         */
4932        TCGv taddr = gen_aa32_addr(s, addr, opc);
4933
4934        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4935        tcg_temp_free(taddr);
4936        tcg_gen_mov_i64(cpu_exclusive_val, t64);
4937        if (s->be_data == MO_BE) {
4938            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4939        } else {
4940            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4941        }
4942        tcg_temp_free_i64(t64);
4943
4944        store_reg(s, rt2, tmp2);
4945    } else {
4946        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4947        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4948    }
4949
4950    store_reg(s, rt, tmp);
4951    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4952}
4953
4954static void gen_clrex(DisasContext *s)
4955{
4956    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4957}
4958
4959static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4960                                TCGv_i32 addr, int size)
4961{
4962    TCGv_i32 t0, t1, t2;
4963    TCGv_i64 extaddr;
4964    TCGv taddr;
4965    TCGLabel *done_label;
4966    TCGLabel *fail_label;
4967    MemOp opc = size | MO_ALIGN | s->be_data;
4968
4969    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4970         [addr] = {Rt};
4971         {Rd} = 0;
4972       } else {
4973         {Rd} = 1;
4974       } */
4975    fail_label = gen_new_label();
4976    done_label = gen_new_label();
4977    extaddr = tcg_temp_new_i64();
4978    tcg_gen_extu_i32_i64(extaddr, addr);
4979    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4980    tcg_temp_free_i64(extaddr);
4981
4982    taddr = gen_aa32_addr(s, addr, opc);
4983    t0 = tcg_temp_new_i32();
4984    t1 = load_reg(s, rt);
4985    if (size == 3) {
4986        TCGv_i64 o64 = tcg_temp_new_i64();
4987        TCGv_i64 n64 = tcg_temp_new_i64();
4988
4989        t2 = load_reg(s, rt2);
4990        /* For AArch32, architecturally the 32-bit word at the lowest
4991         * address is always Rt and the one at addr+4 is Rt2, even if
4992         * the CPU is big-endian. Since we're going to treat this as a
4993         * single 64-bit BE store, we need to put the two halves in the
4994         * opposite order for BE to LE, so that they end up in the right
4995         * places.
4996         * We don't want gen_aa32_frob64() because that does the wrong
4997         * thing for BE32 usermode.
4998         */
4999        if (s->be_data == MO_BE) {
5000            tcg_gen_concat_i32_i64(n64, t2, t1);
5001        } else {
5002            tcg_gen_concat_i32_i64(n64, t1, t2);
5003        }
5004        tcg_temp_free_i32(t2);
5005
5006        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5007                                   get_mem_index(s), opc);
5008        tcg_temp_free_i64(n64);
5009
5010        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5011        tcg_gen_extrl_i64_i32(t0, o64);
5012
5013        tcg_temp_free_i64(o64);
5014    } else {
5015        t2 = tcg_temp_new_i32();
5016        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5017        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5018        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5019        tcg_temp_free_i32(t2);
5020    }
5021    tcg_temp_free_i32(t1);
5022    tcg_temp_free(taddr);
5023    tcg_gen_mov_i32(cpu_R[rd], t0);
5024    tcg_temp_free_i32(t0);
5025    tcg_gen_br(done_label);
5026
5027    gen_set_label(fail_label);
5028    tcg_gen_movi_i32(cpu_R[rd], 1);
5029    gen_set_label(done_label);
5030    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5031}
5032
5033/* gen_srs:
5034 * @env: CPUARMState
5035 * @s: DisasContext
5036 * @mode: mode field from insn (which stack to store to)
5037 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5038 * @writeback: true if writeback bit set
5039 *
5040 * Generate code for the SRS (Store Return State) insn.
5041 */
5042static void gen_srs(DisasContext *s,
5043                    uint32_t mode, uint32_t amode, bool writeback)
5044{
5045    int32_t offset;
5046    TCGv_i32 addr, tmp;
5047    bool undef = false;
5048
5049    /* SRS is:
5050     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5051     *   and specified mode is monitor mode
5052     * - UNDEFINED in Hyp mode
5053     * - UNPREDICTABLE in User or System mode
5054     * - UNPREDICTABLE if the specified mode is:
5055     * -- not implemented
5056     * -- not a valid mode number
5057     * -- a mode that's at a higher exception level
5058     * -- Monitor, if we are Non-secure
5059     * For the UNPREDICTABLE cases we choose to UNDEF.
5060     */
5061    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5062        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
5063        return;
5064    }
5065
5066    if (s->current_el == 0 || s->current_el == 2) {
5067        undef = true;
5068    }
5069
5070    switch (mode) {
5071    case ARM_CPU_MODE_USR:
5072    case ARM_CPU_MODE_FIQ:
5073    case ARM_CPU_MODE_IRQ:
5074    case ARM_CPU_MODE_SVC:
5075    case ARM_CPU_MODE_ABT:
5076    case ARM_CPU_MODE_UND:
5077    case ARM_CPU_MODE_SYS:
5078        break;
5079    case ARM_CPU_MODE_HYP:
5080        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5081            undef = true;
5082        }
5083        break;
5084    case ARM_CPU_MODE_MON:
5085        /* No need to check specifically for "are we non-secure" because
5086         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5087         * so if this isn't EL3 then we must be non-secure.
5088         */
5089        if (s->current_el != 3) {
5090            undef = true;
5091        }
5092        break;
5093    default:
5094        undef = true;
5095    }
5096
5097    if (undef) {
5098        unallocated_encoding(s);
5099        return;
5100    }
5101
5102    addr = tcg_temp_new_i32();
5103    tmp = tcg_const_i32(mode);
5104    /* get_r13_banked() will raise an exception if called from System mode */
5105    gen_set_condexec(s);
5106    gen_set_pc_im(s, s->pc_curr);
5107    gen_helper_get_r13_banked(addr, cpu_env, tmp);
5108    tcg_temp_free_i32(tmp);
5109    switch (amode) {
5110    case 0: /* DA */
5111        offset = -4;
5112        break;
5113    case 1: /* IA */
5114        offset = 0;
5115        break;
5116    case 2: /* DB */
5117        offset = -8;
5118        break;
5119    case 3: /* IB */
5120        offset = 4;
5121        break;
5122    default:
5123        abort();
5124    }
5125    tcg_gen_addi_i32(addr, addr, offset);
5126    tmp = load_reg(s, 14);
5127    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5128    tcg_temp_free_i32(tmp);
5129    tmp = load_cpu_field(spsr);
5130    tcg_gen_addi_i32(addr, addr, 4);
5131    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5132    tcg_temp_free_i32(tmp);
5133    if (writeback) {
5134        switch (amode) {
5135        case 0:
5136            offset = -8;
5137            break;
5138        case 1:
5139            offset = 4;
5140            break;
5141        case 2:
5142            offset = -4;
5143            break;
5144        case 3:
5145            offset = 0;
5146            break;
5147        default:
5148            abort();
5149        }
5150        tcg_gen_addi_i32(addr, addr, offset);
5151        tmp = tcg_const_i32(mode);
5152        gen_helper_set_r13_banked(cpu_env, tmp, addr);
5153        tcg_temp_free_i32(tmp);
5154    }
5155    tcg_temp_free_i32(addr);
5156    s->base.is_jmp = DISAS_UPDATE_EXIT;
5157}
5158
5159/* Generate a label used for skipping this instruction */
5160static void arm_gen_condlabel(DisasContext *s)
5161{
5162    if (!s->condjmp) {
5163        s->condlabel = gen_new_label();
5164        s->condjmp = 1;
5165    }
5166}
5167
5168/* Skip this instruction if the ARM condition is false */
5169static void arm_skip_unless(DisasContext *s, uint32_t cond)
5170{
5171    arm_gen_condlabel(s);
5172    arm_gen_test_cc(cond ^ 1, s->condlabel);
5173}
5174
5175
5176/*
5177 * Constant expanders for the decoders.
5178 */
5179
5180static int negate(DisasContext *s, int x)
5181{
5182    return -x;
5183}
5184
5185static int plus_2(DisasContext *s, int x)
5186{
5187    return x + 2;
5188}
5189
5190static int times_2(DisasContext *s, int x)
5191{
5192    return x * 2;
5193}
5194
5195static int times_4(DisasContext *s, int x)
5196{
5197    return x * 4;
5198}
5199
5200/* Return only the rotation part of T32ExpandImm.  */
5201static int t32_expandimm_rot(DisasContext *s, int x)
5202{
5203    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5204}
5205
5206/* Return the unrotated immediate from T32ExpandImm.  */
5207static int t32_expandimm_imm(DisasContext *s, int x)
5208{
5209    int imm = extract32(x, 0, 8);
5210
5211    switch (extract32(x, 8, 4)) {
5212    case 0: /* XY */
5213        /* Nothing to do.  */
5214        break;
5215    case 1: /* 00XY00XY */
5216        imm *= 0x00010001;
5217        break;
5218    case 2: /* XY00XY00 */
5219        imm *= 0x01000100;
5220        break;
5221    case 3: /* XYXYXYXY */
5222        imm *= 0x01010101;
5223        break;
5224    default:
5225        /* Rotated constant.  */
5226        imm |= 0x80;
5227        break;
5228    }
5229    return imm;
5230}
5231
5232static int t32_branch24(DisasContext *s, int x)
5233{
5234    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5235    x ^= !(x < 0) * (3 << 21);
5236    /* Append the final zero.  */
5237    return x << 1;
5238}
5239
5240static int t16_setflags(DisasContext *s)
5241{
5242    return s->condexec_mask == 0;
5243}
5244
5245static int t16_push_list(DisasContext *s, int x)
5246{
5247    return (x & 0xff) | (x & 0x100) << (14 - 8);
5248}
5249
5250static int t16_pop_list(DisasContext *s, int x)
5251{
5252    return (x & 0xff) | (x & 0x100) << (15 - 8);
5253}
5254
5255/*
5256 * Include the generated decoders.
5257 */
5258
5259#include "decode-a32.c.inc"
5260#include "decode-a32-uncond.c.inc"
5261#include "decode-t32.c.inc"
5262#include "decode-t16.c.inc"
5263
5264static bool valid_cp(DisasContext *s, int cp)
5265{
5266    /*
5267     * Return true if this coprocessor field indicates something
5268     * that's really a possible coprocessor.
5269     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5270     * and of those only cp14 and cp15 were used for registers.
5271     * cp10 and cp11 were used for VFP and Neon, whose decode is
5272     * dealt with elsewhere. With the advent of fp16, cp9 is also
5273     * now part of VFP.
5274     * For v8A and later, the encoding has been tightened so that
5275     * only cp14 and cp15 are valid, and other values aren't considered
5276     * to be in the coprocessor-instruction space at all. v8M still
5277     * permits coprocessors 0..7.
5278     */
5279    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5280        !arm_dc_feature(s, ARM_FEATURE_M)) {
5281        return cp >= 14;
5282    }
5283    return cp < 8 || cp >= 14;
5284}
5285
5286static bool trans_MCR(DisasContext *s, arg_MCR *a)
5287{
5288    if (!valid_cp(s, a->cp)) {
5289        return false;
5290    }
5291    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5292                   false, a->rt, 0);
5293    return true;
5294}
5295
5296static bool trans_MRC(DisasContext *s, arg_MRC *a)
5297{
5298    if (!valid_cp(s, a->cp)) {
5299        return false;
5300    }
5301    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5302                   true, a->rt, 0);
5303    return true;
5304}
5305
5306static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5307{
5308    if (!valid_cp(s, a->cp)) {
5309        return false;
5310    }
5311    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5312                   false, a->rt, a->rt2);
5313    return true;
5314}
5315
5316static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5317{
5318    if (!valid_cp(s, a->cp)) {
5319        return false;
5320    }
5321    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5322                   true, a->rt, a->rt2);
5323    return true;
5324}
5325
5326/* Helpers to swap operands for reverse-subtract.  */
5327static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5328{
5329    tcg_gen_sub_i32(dst, b, a);
5330}
5331
5332static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5333{
5334    gen_sub_CC(dst, b, a);
5335}
5336
5337static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5338{
5339    gen_sub_carry(dest, b, a);
5340}
5341
5342static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5343{
5344    gen_sbc_CC(dest, b, a);
5345}
5346
5347/*
5348 * Helpers for the data processing routines.
5349 *
5350 * After the computation store the results back.
5351 * This may be suppressed altogether (STREG_NONE), require a runtime
5352 * check against the stack limits (STREG_SP_CHECK), or generate an
5353 * exception return.  Oh, or store into a register.
5354 *
5355 * Always return true, indicating success for a trans_* function.
5356 */
5357typedef enum {
5358   STREG_NONE,
5359   STREG_NORMAL,
5360   STREG_SP_CHECK,
5361   STREG_EXC_RET,
5362} StoreRegKind;
5363
5364static bool store_reg_kind(DisasContext *s, int rd,
5365                            TCGv_i32 val, StoreRegKind kind)
5366{
5367    switch (kind) {
5368    case STREG_NONE:
5369        tcg_temp_free_i32(val);
5370        return true;
5371    case STREG_NORMAL:
5372        /* See ALUWritePC: Interworking only from a32 mode. */
5373        if (s->thumb) {
5374            store_reg(s, rd, val);
5375        } else {
5376            store_reg_bx(s, rd, val);
5377        }
5378        return true;
5379    case STREG_SP_CHECK:
5380        store_sp_checked(s, val);
5381        return true;
5382    case STREG_EXC_RET:
5383        gen_exception_return(s, val);
5384        return true;
5385    }
5386    g_assert_not_reached();
5387}
5388
5389/*
5390 * Data Processing (register)
5391 *
5392 * Operate, with set flags, one register source,
5393 * one immediate shifted register source, and a destination.
5394 */
5395static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5396                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5397                         int logic_cc, StoreRegKind kind)
5398{
5399    TCGv_i32 tmp1, tmp2;
5400
5401    tmp2 = load_reg(s, a->rm);
5402    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5403    tmp1 = load_reg(s, a->rn);
5404
5405    gen(tmp1, tmp1, tmp2);
5406    tcg_temp_free_i32(tmp2);
5407
5408    if (logic_cc) {
5409        gen_logic_CC(tmp1);
5410    }
5411    return store_reg_kind(s, a->rd, tmp1, kind);
5412}
5413
5414static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5415                         void (*gen)(TCGv_i32, TCGv_i32),
5416                         int logic_cc, StoreRegKind kind)
5417{
5418    TCGv_i32 tmp;
5419
5420    tmp = load_reg(s, a->rm);
5421    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5422
5423    gen(tmp, tmp);
5424    if (logic_cc) {
5425        gen_logic_CC(tmp);
5426    }
5427    return store_reg_kind(s, a->rd, tmp, kind);
5428}
5429
5430/*
5431 * Data-processing (register-shifted register)
5432 *
5433 * Operate, with set flags, one register source,
5434 * one register shifted register source, and a destination.
5435 */
5436static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5437                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5438                         int logic_cc, StoreRegKind kind)
5439{
5440    TCGv_i32 tmp1, tmp2;
5441
5442    tmp1 = load_reg(s, a->rs);
5443    tmp2 = load_reg(s, a->rm);
5444    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5445    tmp1 = load_reg(s, a->rn);
5446
5447    gen(tmp1, tmp1, tmp2);
5448    tcg_temp_free_i32(tmp2);
5449
5450    if (logic_cc) {
5451        gen_logic_CC(tmp1);
5452    }
5453    return store_reg_kind(s, a->rd, tmp1, kind);
5454}
5455
5456static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5457                         void (*gen)(TCGv_i32, TCGv_i32),
5458                         int logic_cc, StoreRegKind kind)
5459{
5460    TCGv_i32 tmp1, tmp2;
5461
5462    tmp1 = load_reg(s, a->rs);
5463    tmp2 = load_reg(s, a->rm);
5464    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5465
5466    gen(tmp2, tmp2);
5467    if (logic_cc) {
5468        gen_logic_CC(tmp2);
5469    }
5470    return store_reg_kind(s, a->rd, tmp2, kind);
5471}
5472
5473/*
5474 * Data-processing (immediate)
5475 *
5476 * Operate, with set flags, one register source,
5477 * one rotated immediate, and a destination.
5478 *
5479 * Note that logic_cc && a->rot setting CF based on the msb of the
5480 * immediate is the reason why we must pass in the unrotated form
5481 * of the immediate.
5482 */
5483static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5484                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5485                         int logic_cc, StoreRegKind kind)
5486{
5487    TCGv_i32 tmp1, tmp2;
5488    uint32_t imm;
5489
5490    imm = ror32(a->imm, a->rot);
5491    if (logic_cc && a->rot) {
5492        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5493    }
5494    tmp2 = tcg_const_i32(imm);
5495    tmp1 = load_reg(s, a->rn);
5496
5497    gen(tmp1, tmp1, tmp2);
5498    tcg_temp_free_i32(tmp2);
5499
5500    if (logic_cc) {
5501        gen_logic_CC(tmp1);
5502    }
5503    return store_reg_kind(s, a->rd, tmp1, kind);
5504}
5505
5506static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5507                         void (*gen)(TCGv_i32, TCGv_i32),
5508                         int logic_cc, StoreRegKind kind)
5509{
5510    TCGv_i32 tmp;
5511    uint32_t imm;
5512
5513    imm = ror32(a->imm, a->rot);
5514    if (logic_cc && a->rot) {
5515        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5516    }
5517    tmp = tcg_const_i32(imm);
5518
5519    gen(tmp, tmp);
5520    if (logic_cc) {
5521        gen_logic_CC(tmp);
5522    }
5523    return store_reg_kind(s, a->rd, tmp, kind);
5524}
5525
5526#define DO_ANY3(NAME, OP, L, K)                                         \
5527    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5528    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5529    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5530    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5531    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5532    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5533
5534#define DO_ANY2(NAME, OP, L, K)                                         \
5535    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5536    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5537    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5538    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5539    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5540    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5541
5542#define DO_CMP2(NAME, OP, L)                                            \
5543    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5544    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5545    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5546    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5547    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5548    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5549
5550DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5551DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5552DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5553DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5554
5555DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5556DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5557DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5558DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5559
5560DO_CMP2(TST, tcg_gen_and_i32, true)
5561DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5562DO_CMP2(CMN, gen_add_CC, false)
5563DO_CMP2(CMP, gen_sub_CC, false)
5564
5565DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5566        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5567
5568/*
5569 * Note for the computation of StoreRegKind we return out of the
5570 * middle of the functions that are expanded by DO_ANY3, and that
5571 * we modify a->s via that parameter before it is used by OP.
5572 */
5573DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5574        ({
5575            StoreRegKind ret = STREG_NORMAL;
5576            if (a->rd == 15 && a->s) {
5577                /*
5578                 * See ALUExceptionReturn:
5579                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5580                 * In Hyp mode, UNDEFINED.
5581                 */
5582                if (IS_USER(s) || s->current_el == 2) {
5583                    unallocated_encoding(s);
5584                    return true;
5585                }
5586                /* There is no writeback of nzcv to PSTATE.  */
5587                a->s = 0;
5588                ret = STREG_EXC_RET;
5589            } else if (a->rd == 13 && a->rn == 13) {
5590                ret = STREG_SP_CHECK;
5591            }
5592            ret;
5593        }))
5594
5595DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5596        ({
5597            StoreRegKind ret = STREG_NORMAL;
5598            if (a->rd == 15 && a->s) {
5599                /*
5600                 * See ALUExceptionReturn:
5601                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5602                 * In Hyp mode, UNDEFINED.
5603                 */
5604                if (IS_USER(s) || s->current_el == 2) {
5605                    unallocated_encoding(s);
5606                    return true;
5607                }
5608                /* There is no writeback of nzcv to PSTATE.  */
5609                a->s = 0;
5610                ret = STREG_EXC_RET;
5611            } else if (a->rd == 13) {
5612                ret = STREG_SP_CHECK;
5613            }
5614            ret;
5615        }))
5616
5617DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5618
5619/*
5620 * ORN is only available with T32, so there is no register-shifted-register
5621 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5622 */
5623static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5624{
5625    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5626}
5627
5628static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5629{
5630    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5631}
5632
5633#undef DO_ANY3
5634#undef DO_ANY2
5635#undef DO_CMP2
5636
5637static bool trans_ADR(DisasContext *s, arg_ri *a)
5638{
5639    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5640    return true;
5641}
5642
5643static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5644{
5645    TCGv_i32 tmp;
5646
5647    if (!ENABLE_ARCH_6T2) {
5648        return false;
5649    }
5650
5651    tmp = tcg_const_i32(a->imm);
5652    store_reg(s, a->rd, tmp);
5653    return true;
5654}
5655
5656static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5657{
5658    TCGv_i32 tmp;
5659
5660    if (!ENABLE_ARCH_6T2) {
5661        return false;
5662    }
5663
5664    tmp = load_reg(s, a->rd);
5665    tcg_gen_ext16u_i32(tmp, tmp);
5666    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5667    store_reg(s, a->rd, tmp);
5668    return true;
5669}
5670
5671/*
5672 * Multiply and multiply accumulate
5673 */
5674
5675static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5676{
5677    TCGv_i32 t1, t2;
5678
5679    t1 = load_reg(s, a->rn);
5680    t2 = load_reg(s, a->rm);
5681    tcg_gen_mul_i32(t1, t1, t2);
5682    tcg_temp_free_i32(t2);
5683    if (add) {
5684        t2 = load_reg(s, a->ra);
5685        tcg_gen_add_i32(t1, t1, t2);
5686        tcg_temp_free_i32(t2);
5687    }
5688    if (a->s) {
5689        gen_logic_CC(t1);
5690    }
5691    store_reg(s, a->rd, t1);
5692    return true;
5693}
5694
5695static bool trans_MUL(DisasContext *s, arg_MUL *a)
5696{
5697    return op_mla(s, a, false);
5698}
5699
5700static bool trans_MLA(DisasContext *s, arg_MLA *a)
5701{
5702    return op_mla(s, a, true);
5703}
5704
5705static bool trans_MLS(DisasContext *s, arg_MLS *a)
5706{
5707    TCGv_i32 t1, t2;
5708
5709    if (!ENABLE_ARCH_6T2) {
5710        return false;
5711    }
5712    t1 = load_reg(s, a->rn);
5713    t2 = load_reg(s, a->rm);
5714    tcg_gen_mul_i32(t1, t1, t2);
5715    tcg_temp_free_i32(t2);
5716    t2 = load_reg(s, a->ra);
5717    tcg_gen_sub_i32(t1, t2, t1);
5718    tcg_temp_free_i32(t2);
5719    store_reg(s, a->rd, t1);
5720    return true;
5721}
5722
5723static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5724{
5725    TCGv_i32 t0, t1, t2, t3;
5726
5727    t0 = load_reg(s, a->rm);
5728    t1 = load_reg(s, a->rn);
5729    if (uns) {
5730        tcg_gen_mulu2_i32(t0, t1, t0, t1);
5731    } else {
5732        tcg_gen_muls2_i32(t0, t1, t0, t1);
5733    }
5734    if (add) {
5735        t2 = load_reg(s, a->ra);
5736        t3 = load_reg(s, a->rd);
5737        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5738        tcg_temp_free_i32(t2);
5739        tcg_temp_free_i32(t3);
5740    }
5741    if (a->s) {
5742        gen_logicq_cc(t0, t1);
5743    }
5744    store_reg(s, a->ra, t0);
5745    store_reg(s, a->rd, t1);
5746    return true;
5747}
5748
5749static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5750{
5751    return op_mlal(s, a, true, false);
5752}
5753
5754static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5755{
5756    return op_mlal(s, a, false, false);
5757}
5758
5759static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5760{
5761    return op_mlal(s, a, true, true);
5762}
5763
5764static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5765{
5766    return op_mlal(s, a, false, true);
5767}
5768
5769static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5770{
5771    TCGv_i32 t0, t1, t2, zero;
5772
5773    if (s->thumb
5774        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5775        : !ENABLE_ARCH_6) {
5776        return false;
5777    }
5778
5779    t0 = load_reg(s, a->rm);
5780    t1 = load_reg(s, a->rn);
5781    tcg_gen_mulu2_i32(t0, t1, t0, t1);
5782    zero = tcg_const_i32(0);
5783    t2 = load_reg(s, a->ra);
5784    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5785    tcg_temp_free_i32(t2);
5786    t2 = load_reg(s, a->rd);
5787    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5788    tcg_temp_free_i32(t2);
5789    tcg_temp_free_i32(zero);
5790    store_reg(s, a->ra, t0);
5791    store_reg(s, a->rd, t1);
5792    return true;
5793}
5794
5795/*
5796 * Saturating addition and subtraction
5797 */
5798
5799static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5800{
5801    TCGv_i32 t0, t1;
5802
5803    if (s->thumb
5804        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5805        : !ENABLE_ARCH_5TE) {
5806        return false;
5807    }
5808
5809    t0 = load_reg(s, a->rm);
5810    t1 = load_reg(s, a->rn);
5811    if (doub) {
5812        gen_helper_add_saturate(t1, cpu_env, t1, t1);
5813    }
5814    if (add) {
5815        gen_helper_add_saturate(t0, cpu_env, t0, t1);
5816    } else {
5817        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5818    }
5819    tcg_temp_free_i32(t1);
5820    store_reg(s, a->rd, t0);
5821    return true;
5822}
5823
5824#define DO_QADDSUB(NAME, ADD, DOUB) \
5825static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5826{                                                        \
5827    return op_qaddsub(s, a, ADD, DOUB);                  \
5828}
5829
5830DO_QADDSUB(QADD, true, false)
5831DO_QADDSUB(QSUB, false, false)
5832DO_QADDSUB(QDADD, true, true)
5833DO_QADDSUB(QDSUB, false, true)
5834
5835#undef DO_QADDSUB
5836
5837/*
5838 * Halfword multiply and multiply accumulate
5839 */
5840
5841static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5842                       int add_long, bool nt, bool mt)
5843{
5844    TCGv_i32 t0, t1, tl, th;
5845
5846    if (s->thumb
5847        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5848        : !ENABLE_ARCH_5TE) {
5849        return false;
5850    }
5851
5852    t0 = load_reg(s, a->rn);
5853    t1 = load_reg(s, a->rm);
5854    gen_mulxy(t0, t1, nt, mt);
5855    tcg_temp_free_i32(t1);
5856
5857    switch (add_long) {
5858    case 0:
5859        store_reg(s, a->rd, t0);
5860        break;
5861    case 1:
5862        t1 = load_reg(s, a->ra);
5863        gen_helper_add_setq(t0, cpu_env, t0, t1);
5864        tcg_temp_free_i32(t1);
5865        store_reg(s, a->rd, t0);
5866        break;
5867    case 2:
5868        tl = load_reg(s, a->ra);
5869        th = load_reg(s, a->rd);
5870        /* Sign-extend the 32-bit product to 64 bits.  */
5871        t1 = tcg_temp_new_i32();
5872        tcg_gen_sari_i32(t1, t0, 31);
5873        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5874        tcg_temp_free_i32(t0);
5875        tcg_temp_free_i32(t1);
5876        store_reg(s, a->ra, tl);
5877        store_reg(s, a->rd, th);
5878        break;
5879    default:
5880        g_assert_not_reached();
5881    }
5882    return true;
5883}
5884
5885#define DO_SMLAX(NAME, add, nt, mt) \
5886static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5887{                                                          \
5888    return op_smlaxxx(s, a, add, nt, mt);                  \
5889}
5890
5891DO_SMLAX(SMULBB, 0, 0, 0)
5892DO_SMLAX(SMULBT, 0, 0, 1)
5893DO_SMLAX(SMULTB, 0, 1, 0)
5894DO_SMLAX(SMULTT, 0, 1, 1)
5895
5896DO_SMLAX(SMLABB, 1, 0, 0)
5897DO_SMLAX(SMLABT, 1, 0, 1)
5898DO_SMLAX(SMLATB, 1, 1, 0)
5899DO_SMLAX(SMLATT, 1, 1, 1)
5900
5901DO_SMLAX(SMLALBB, 2, 0, 0)
5902DO_SMLAX(SMLALBT, 2, 0, 1)
5903DO_SMLAX(SMLALTB, 2, 1, 0)
5904DO_SMLAX(SMLALTT, 2, 1, 1)
5905
5906#undef DO_SMLAX
5907
5908static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
5909{
5910    TCGv_i32 t0, t1;
5911
5912    if (!ENABLE_ARCH_5TE) {
5913        return false;
5914    }
5915
5916    t0 = load_reg(s, a->rn);
5917    t1 = load_reg(s, a->rm);
5918    /*
5919     * Since the nominal result is product<47:16>, shift the 16-bit
5920     * input up by 16 bits, so that the result is at product<63:32>.
5921     */
5922    if (mt) {
5923        tcg_gen_andi_i32(t1, t1, 0xffff0000);
5924    } else {
5925        tcg_gen_shli_i32(t1, t1, 16);
5926    }
5927    tcg_gen_muls2_i32(t0, t1, t0, t1);
5928    tcg_temp_free_i32(t0);
5929    if (add) {
5930        t0 = load_reg(s, a->ra);
5931        gen_helper_add_setq(t1, cpu_env, t1, t0);
5932        tcg_temp_free_i32(t0);
5933    }
5934    store_reg(s, a->rd, t1);
5935    return true;
5936}
5937
5938#define DO_SMLAWX(NAME, add, mt) \
5939static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5940{                                                          \
5941    return op_smlawx(s, a, add, mt);                       \
5942}
5943
5944DO_SMLAWX(SMULWB, 0, 0)
5945DO_SMLAWX(SMULWT, 0, 1)
5946DO_SMLAWX(SMLAWB, 1, 0)
5947DO_SMLAWX(SMLAWT, 1, 1)
5948
5949#undef DO_SMLAWX
5950
5951/*
5952 * MSR (immediate) and hints
5953 */
5954
5955static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
5956{
5957    /*
5958     * When running single-threaded TCG code, use the helper to ensure that
5959     * the next round-robin scheduled vCPU gets a crack.  When running in
5960     * MTTCG we don't generate jumps to the helper as it won't affect the
5961     * scheduling of other vCPUs.
5962     */
5963    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
5964        gen_set_pc_im(s, s->base.pc_next);
5965        s->base.is_jmp = DISAS_YIELD;
5966    }
5967    return true;
5968}
5969
5970static bool trans_WFE(DisasContext *s, arg_WFE *a)
5971{
5972    /*
5973     * When running single-threaded TCG code, use the helper to ensure that
5974     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
5975     * just skip this instruction.  Currently the SEV/SEVL instructions,
5976     * which are *one* of many ways to wake the CPU from WFE, are not
5977     * implemented so we can't sleep like WFI does.
5978     */
5979    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
5980        gen_set_pc_im(s, s->base.pc_next);
5981        s->base.is_jmp = DISAS_WFE;
5982    }
5983    return true;
5984}
5985
5986static bool trans_WFI(DisasContext *s, arg_WFI *a)
5987{
5988    /* For WFI, halt the vCPU until an IRQ. */
5989    gen_set_pc_im(s, s->base.pc_next);
5990    s->base.is_jmp = DISAS_WFI;
5991    return true;
5992}
5993
5994static bool trans_NOP(DisasContext *s, arg_NOP *a)
5995{
5996    return true;
5997}
5998
5999static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6000{
6001    uint32_t val = ror32(a->imm, a->rot * 2);
6002    uint32_t mask = msr_mask(s, a->mask, a->r);
6003
6004    if (gen_set_psr_im(s, mask, a->r, val)) {
6005        unallocated_encoding(s);
6006    }
6007    return true;
6008}
6009
6010/*
6011 * Cyclic Redundancy Check
6012 */
6013
6014static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6015{
6016    TCGv_i32 t1, t2, t3;
6017
6018    if (!dc_isar_feature(aa32_crc32, s)) {
6019        return false;
6020    }
6021
6022    t1 = load_reg(s, a->rn);
6023    t2 = load_reg(s, a->rm);
6024    switch (sz) {
6025    case MO_8:
6026        gen_uxtb(t2);
6027        break;
6028    case MO_16:
6029        gen_uxth(t2);
6030        break;
6031    case MO_32:
6032        break;
6033    default:
6034        g_assert_not_reached();
6035    }
6036    t3 = tcg_const_i32(1 << sz);
6037    if (c) {
6038        gen_helper_crc32c(t1, t1, t2, t3);
6039    } else {
6040        gen_helper_crc32(t1, t1, t2, t3);
6041    }
6042    tcg_temp_free_i32(t2);
6043    tcg_temp_free_i32(t3);
6044    store_reg(s, a->rd, t1);
6045    return true;
6046}
6047
6048#define DO_CRC32(NAME, c, sz) \
6049static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6050    { return op_crc32(s, a, c, sz); }
6051
6052DO_CRC32(CRC32B, false, MO_8)
6053DO_CRC32(CRC32H, false, MO_16)
6054DO_CRC32(CRC32W, false, MO_32)
6055DO_CRC32(CRC32CB, true, MO_8)
6056DO_CRC32(CRC32CH, true, MO_16)
6057DO_CRC32(CRC32CW, true, MO_32)
6058
6059#undef DO_CRC32
6060
6061/*
6062 * Miscellaneous instructions
6063 */
6064
6065static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6066{
6067    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6068        return false;
6069    }
6070    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6071    return true;
6072}
6073
6074static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6075{
6076    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6077        return false;
6078    }
6079    gen_msr_banked(s, a->r, a->sysm, a->rn);
6080    return true;
6081}
6082
6083static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6084{
6085    TCGv_i32 tmp;
6086
6087    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6088        return false;
6089    }
6090    if (a->r) {
6091        if (IS_USER(s)) {
6092            unallocated_encoding(s);
6093            return true;
6094        }
6095        tmp = load_cpu_field(spsr);
6096    } else {
6097        tmp = tcg_temp_new_i32();
6098        gen_helper_cpsr_read(tmp, cpu_env);
6099    }
6100    store_reg(s, a->rd, tmp);
6101    return true;
6102}
6103
6104static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6105{
6106    TCGv_i32 tmp;
6107    uint32_t mask = msr_mask(s, a->mask, a->r);
6108
6109    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6110        return false;
6111    }
6112    tmp = load_reg(s, a->rn);
6113    if (gen_set_psr(s, mask, a->r, tmp)) {
6114        unallocated_encoding(s);
6115    }
6116    return true;
6117}
6118
6119static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6120{
6121    TCGv_i32 tmp;
6122
6123    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6124        return false;
6125    }
6126    tmp = tcg_const_i32(a->sysm);
6127    gen_helper_v7m_mrs(tmp, cpu_env, tmp);
6128    store_reg(s, a->rd, tmp);
6129    return true;
6130}
6131
6132static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6133{
6134    TCGv_i32 addr, reg;
6135
6136    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6137        return false;
6138    }
6139    addr = tcg_const_i32((a->mask << 10) | a->sysm);
6140    reg = load_reg(s, a->rn);
6141    gen_helper_v7m_msr(cpu_env, addr, reg);
6142    tcg_temp_free_i32(addr);
6143    tcg_temp_free_i32(reg);
6144    /* If we wrote to CONTROL, the EL might have changed */
6145    gen_helper_rebuild_hflags_m32_newel(cpu_env);
6146    gen_lookup_tb(s);
6147    return true;
6148}
6149
6150static bool trans_BX(DisasContext *s, arg_BX *a)
6151{
6152    if (!ENABLE_ARCH_4T) {
6153        return false;
6154    }
6155    gen_bx_excret(s, load_reg(s, a->rm));
6156    return true;
6157}
6158
6159static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6160{
6161    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6162        return false;
6163    }
6164    /* Trivial implementation equivalent to bx.  */
6165    gen_bx(s, load_reg(s, a->rm));
6166    return true;
6167}
6168
6169static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6170{
6171    TCGv_i32 tmp;
6172
6173    if (!ENABLE_ARCH_5) {
6174        return false;
6175    }
6176    tmp = load_reg(s, a->rm);
6177    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6178    gen_bx(s, tmp);
6179    return true;
6180}
6181
6182/*
6183 * BXNS/BLXNS: only exist for v8M with the security extensions,
6184 * and always UNDEF if NonSecure.  We don't implement these in
6185 * the user-only mode either (in theory you can use them from
6186 * Secure User mode but they are too tied in to system emulation).
6187 */
6188static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6189{
6190    if (!s->v8m_secure || IS_USER_ONLY) {
6191        unallocated_encoding(s);
6192    } else {
6193        gen_bxns(s, a->rm);
6194    }
6195    return true;
6196}
6197
6198static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6199{
6200    if (!s->v8m_secure || IS_USER_ONLY) {
6201        unallocated_encoding(s);
6202    } else {
6203        gen_blxns(s, a->rm);
6204    }
6205    return true;
6206}
6207
6208static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6209{
6210    TCGv_i32 tmp;
6211
6212    if (!ENABLE_ARCH_5) {
6213        return false;
6214    }
6215    tmp = load_reg(s, a->rm);
6216    tcg_gen_clzi_i32(tmp, tmp, 32);
6217    store_reg(s, a->rd, tmp);
6218    return true;
6219}
6220
6221static bool trans_ERET(DisasContext *s, arg_ERET *a)
6222{
6223    TCGv_i32 tmp;
6224
6225    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6226        return false;
6227    }
6228    if (IS_USER(s)) {
6229        unallocated_encoding(s);
6230        return true;
6231    }
6232    if (s->current_el == 2) {
6233        /* ERET from Hyp uses ELR_Hyp, not LR */
6234        tmp = load_cpu_field(elr_el[2]);
6235    } else {
6236        tmp = load_reg(s, 14);
6237    }
6238    gen_exception_return(s, tmp);
6239    return true;
6240}
6241
6242static bool trans_HLT(DisasContext *s, arg_HLT *a)
6243{
6244    gen_hlt(s, a->imm);
6245    return true;
6246}
6247
6248static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6249{
6250    if (!ENABLE_ARCH_5) {
6251        return false;
6252    }
6253    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6254        semihosting_enabled() &&
6255#ifndef CONFIG_USER_ONLY
6256        !IS_USER(s) &&
6257#endif
6258        (a->imm == 0xab)) {
6259        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6260    } else {
6261        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6262    }
6263    return true;
6264}
6265
6266static bool trans_HVC(DisasContext *s, arg_HVC *a)
6267{
6268    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6269        return false;
6270    }
6271    if (IS_USER(s)) {
6272        unallocated_encoding(s);
6273    } else {
6274        gen_hvc(s, a->imm);
6275    }
6276    return true;
6277}
6278
6279static bool trans_SMC(DisasContext *s, arg_SMC *a)
6280{
6281    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6282        return false;
6283    }
6284    if (IS_USER(s)) {
6285        unallocated_encoding(s);
6286    } else {
6287        gen_smc(s);
6288    }
6289    return true;
6290}
6291
6292static bool trans_SG(DisasContext *s, arg_SG *a)
6293{
6294    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6295        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6296        return false;
6297    }
6298    /*
6299     * SG (v8M only)
6300     * The bulk of the behaviour for this instruction is implemented
6301     * in v7m_handle_execute_nsc(), which deals with the insn when
6302     * it is executed by a CPU in non-secure state from memory
6303     * which is Secure & NonSecure-Callable.
6304     * Here we only need to handle the remaining cases:
6305     *  * in NS memory (including the "security extension not
6306     *    implemented" case) : NOP
6307     *  * in S memory but CPU already secure (clear IT bits)
6308     * We know that the attribute for the memory this insn is
6309     * in must match the current CPU state, because otherwise
6310     * get_phys_addr_pmsav8 would have generated an exception.
6311     */
6312    if (s->v8m_secure) {
6313        /* Like the IT insn, we don't need to generate any code */
6314        s->condexec_cond = 0;
6315        s->condexec_mask = 0;
6316    }
6317    return true;
6318}
6319
6320static bool trans_TT(DisasContext *s, arg_TT *a)
6321{
6322    TCGv_i32 addr, tmp;
6323
6324    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6325        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6326        return false;
6327    }
6328    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6329        /* We UNDEF for these UNPREDICTABLE cases */
6330        unallocated_encoding(s);
6331        return true;
6332    }
6333    if (a->A && !s->v8m_secure) {
6334        /* This case is UNDEFINED.  */
6335        unallocated_encoding(s);
6336        return true;
6337    }
6338
6339    addr = load_reg(s, a->rn);
6340    tmp = tcg_const_i32((a->A << 1) | a->T);
6341    gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
6342    tcg_temp_free_i32(addr);
6343    store_reg(s, a->rd, tmp);
6344    return true;
6345}
6346
6347/*
6348 * Load/store register index
6349 */
6350
6351static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6352{
6353    ISSInfo ret;
6354
6355    /* ISS not valid if writeback */
6356    if (p && !w) {
6357        ret = rd;
6358        if (s->base.pc_next - s->pc_curr == 2) {
6359            ret |= ISSIs16Bit;
6360        }
6361    } else {
6362        ret = ISSInvalid;
6363    }
6364    return ret;
6365}
6366
6367static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6368{
6369    TCGv_i32 addr = load_reg(s, a->rn);
6370
6371    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6372        gen_helper_v8m_stackcheck(cpu_env, addr);
6373    }
6374
6375    if (a->p) {
6376        TCGv_i32 ofs = load_reg(s, a->rm);
6377        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6378        if (a->u) {
6379            tcg_gen_add_i32(addr, addr, ofs);
6380        } else {
6381            tcg_gen_sub_i32(addr, addr, ofs);
6382        }
6383        tcg_temp_free_i32(ofs);
6384    }
6385    return addr;
6386}
6387
6388static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6389                            TCGv_i32 addr, int address_offset)
6390{
6391    if (!a->p) {
6392        TCGv_i32 ofs = load_reg(s, a->rm);
6393        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6394        if (a->u) {
6395            tcg_gen_add_i32(addr, addr, ofs);
6396        } else {
6397            tcg_gen_sub_i32(addr, addr, ofs);
6398        }
6399        tcg_temp_free_i32(ofs);
6400    } else if (!a->w) {
6401        tcg_temp_free_i32(addr);
6402        return;
6403    }
6404    tcg_gen_addi_i32(addr, addr, address_offset);
6405    store_reg(s, a->rn, addr);
6406}
6407
6408static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6409                       MemOp mop, int mem_idx)
6410{
6411    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6412    TCGv_i32 addr, tmp;
6413
6414    addr = op_addr_rr_pre(s, a);
6415
6416    tmp = tcg_temp_new_i32();
6417    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6418    disas_set_da_iss(s, mop, issinfo);
6419
6420    /*
6421     * Perform base writeback before the loaded value to
6422     * ensure correct behavior with overlapping index registers.
6423     */
6424    op_addr_rr_post(s, a, addr, 0);
6425    store_reg_from_load(s, a->rt, tmp);
6426    return true;
6427}
6428
6429static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6430                        MemOp mop, int mem_idx)
6431{
6432    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6433    TCGv_i32 addr, tmp;
6434
6435    addr = op_addr_rr_pre(s, a);
6436
6437    tmp = load_reg(s, a->rt);
6438    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6439    disas_set_da_iss(s, mop, issinfo);
6440    tcg_temp_free_i32(tmp);
6441
6442    op_addr_rr_post(s, a, addr, 0);
6443    return true;
6444}
6445
6446static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6447{
6448    int mem_idx = get_mem_index(s);
6449    TCGv_i32 addr, tmp;
6450
6451    if (!ENABLE_ARCH_5TE) {
6452        return false;
6453    }
6454    if (a->rt & 1) {
6455        unallocated_encoding(s);
6456        return true;
6457    }
6458    addr = op_addr_rr_pre(s, a);
6459
6460    tmp = tcg_temp_new_i32();
6461    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6462    store_reg(s, a->rt, tmp);
6463
6464    tcg_gen_addi_i32(addr, addr, 4);
6465
6466    tmp = tcg_temp_new_i32();
6467    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6468    store_reg(s, a->rt + 1, tmp);
6469
6470    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6471    op_addr_rr_post(s, a, addr, -4);
6472    return true;
6473}
6474
6475static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6476{
6477    int mem_idx = get_mem_index(s);
6478    TCGv_i32 addr, tmp;
6479
6480    if (!ENABLE_ARCH_5TE) {
6481        return false;
6482    }
6483    if (a->rt & 1) {
6484        unallocated_encoding(s);
6485        return true;
6486    }
6487    addr = op_addr_rr_pre(s, a);
6488
6489    tmp = load_reg(s, a->rt);
6490    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6491    tcg_temp_free_i32(tmp);
6492
6493    tcg_gen_addi_i32(addr, addr, 4);
6494
6495    tmp = load_reg(s, a->rt + 1);
6496    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6497    tcg_temp_free_i32(tmp);
6498
6499    op_addr_rr_post(s, a, addr, -4);
6500    return true;
6501}
6502
6503/*
6504 * Load/store immediate index
6505 */
6506
6507static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6508{
6509    int ofs = a->imm;
6510
6511    if (!a->u) {
6512        ofs = -ofs;
6513    }
6514
6515    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6516        /*
6517         * Stackcheck. Here we know 'addr' is the current SP;
6518         * U is set if we're moving SP up, else down. It is
6519         * UNKNOWN whether the limit check triggers when SP starts
6520         * below the limit and ends up above it; we chose to do so.
6521         */
6522        if (!a->u) {
6523            TCGv_i32 newsp = tcg_temp_new_i32();
6524            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6525            gen_helper_v8m_stackcheck(cpu_env, newsp);
6526            tcg_temp_free_i32(newsp);
6527        } else {
6528            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6529        }
6530    }
6531
6532    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6533}
6534
6535static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6536                            TCGv_i32 addr, int address_offset)
6537{
6538    if (!a->p) {
6539        if (a->u) {
6540            address_offset += a->imm;
6541        } else {
6542            address_offset -= a->imm;
6543        }
6544    } else if (!a->w) {
6545        tcg_temp_free_i32(addr);
6546        return;
6547    }
6548    tcg_gen_addi_i32(addr, addr, address_offset);
6549    store_reg(s, a->rn, addr);
6550}
6551
6552static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6553                       MemOp mop, int mem_idx)
6554{
6555    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6556    TCGv_i32 addr, tmp;
6557
6558    addr = op_addr_ri_pre(s, a);
6559
6560    tmp = tcg_temp_new_i32();
6561    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6562    disas_set_da_iss(s, mop, issinfo);
6563
6564    /*
6565     * Perform base writeback before the loaded value to
6566     * ensure correct behavior with overlapping index registers.
6567     */
6568    op_addr_ri_post(s, a, addr, 0);
6569    store_reg_from_load(s, a->rt, tmp);
6570    return true;
6571}
6572
6573static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6574                        MemOp mop, int mem_idx)
6575{
6576    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6577    TCGv_i32 addr, tmp;
6578
6579    addr = op_addr_ri_pre(s, a);
6580
6581    tmp = load_reg(s, a->rt);
6582    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6583    disas_set_da_iss(s, mop, issinfo);
6584    tcg_temp_free_i32(tmp);
6585
6586    op_addr_ri_post(s, a, addr, 0);
6587    return true;
6588}
6589
6590static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6591{
6592    int mem_idx = get_mem_index(s);
6593    TCGv_i32 addr, tmp;
6594
6595    addr = op_addr_ri_pre(s, a);
6596
6597    tmp = tcg_temp_new_i32();
6598    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6599    store_reg(s, a->rt, tmp);
6600
6601    tcg_gen_addi_i32(addr, addr, 4);
6602
6603    tmp = tcg_temp_new_i32();
6604    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6605    store_reg(s, rt2, tmp);
6606
6607    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6608    op_addr_ri_post(s, a, addr, -4);
6609    return true;
6610}
6611
6612static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6613{
6614    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6615        return false;
6616    }
6617    return op_ldrd_ri(s, a, a->rt + 1);
6618}
6619
6620static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6621{
6622    arg_ldst_ri b = {
6623        .u = a->u, .w = a->w, .p = a->p,
6624        .rn = a->rn, .rt = a->rt, .imm = a->imm
6625    };
6626    return op_ldrd_ri(s, &b, a->rt2);
6627}
6628
6629static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6630{
6631    int mem_idx = get_mem_index(s);
6632    TCGv_i32 addr, tmp;
6633
6634    addr = op_addr_ri_pre(s, a);
6635
6636    tmp = load_reg(s, a->rt);
6637    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6638    tcg_temp_free_i32(tmp);
6639
6640    tcg_gen_addi_i32(addr, addr, 4);
6641
6642    tmp = load_reg(s, rt2);
6643    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6644    tcg_temp_free_i32(tmp);
6645
6646    op_addr_ri_post(s, a, addr, -4);
6647    return true;
6648}
6649
6650static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6651{
6652    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6653        return false;
6654    }
6655    return op_strd_ri(s, a, a->rt + 1);
6656}
6657
6658static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6659{
6660    arg_ldst_ri b = {
6661        .u = a->u, .w = a->w, .p = a->p,
6662        .rn = a->rn, .rt = a->rt, .imm = a->imm
6663    };
6664    return op_strd_ri(s, &b, a->rt2);
6665}
6666
6667#define DO_LDST(NAME, WHICH, MEMOP) \
6668static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6669{                                                                     \
6670    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6671}                                                                     \
6672static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6673{                                                                     \
6674    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6675}                                                                     \
6676static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6677{                                                                     \
6678    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6679}                                                                     \
6680static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6681{                                                                     \
6682    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6683}
6684
6685DO_LDST(LDR, load, MO_UL)
6686DO_LDST(LDRB, load, MO_UB)
6687DO_LDST(LDRH, load, MO_UW)
6688DO_LDST(LDRSB, load, MO_SB)
6689DO_LDST(LDRSH, load, MO_SW)
6690
6691DO_LDST(STR, store, MO_UL)
6692DO_LDST(STRB, store, MO_UB)
6693DO_LDST(STRH, store, MO_UW)
6694
6695#undef DO_LDST
6696
6697/*
6698 * Synchronization primitives
6699 */
6700
6701static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6702{
6703    TCGv_i32 addr, tmp;
6704    TCGv taddr;
6705
6706    opc |= s->be_data;
6707    addr = load_reg(s, a->rn);
6708    taddr = gen_aa32_addr(s, addr, opc);
6709    tcg_temp_free_i32(addr);
6710
6711    tmp = load_reg(s, a->rt2);
6712    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6713    tcg_temp_free(taddr);
6714
6715    store_reg(s, a->rt, tmp);
6716    return true;
6717}
6718
6719static bool trans_SWP(DisasContext *s, arg_SWP *a)
6720{
6721    return op_swp(s, a, MO_UL | MO_ALIGN);
6722}
6723
6724static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6725{
6726    return op_swp(s, a, MO_UB);
6727}
6728
6729/*
6730 * Load/Store Exclusive and Load-Acquire/Store-Release
6731 */
6732
6733static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6734{
6735    TCGv_i32 addr;
6736    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6737    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6738
6739    /* We UNDEF for these UNPREDICTABLE cases.  */
6740    if (a->rd == 15 || a->rn == 15 || a->rt == 15
6741        || a->rd == a->rn || a->rd == a->rt
6742        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6743        || (mop == MO_64
6744            && (a->rt2 == 15
6745                || a->rd == a->rt2
6746                || (!v8a && s->thumb && a->rt2 == 13)))) {
6747        unallocated_encoding(s);
6748        return true;
6749    }
6750
6751    if (rel) {
6752        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6753    }
6754
6755    addr = tcg_temp_local_new_i32();
6756    load_reg_var(s, addr, a->rn);
6757    tcg_gen_addi_i32(addr, addr, a->imm);
6758
6759    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6760    tcg_temp_free_i32(addr);
6761    return true;
6762}
6763
6764static bool trans_STREX(DisasContext *s, arg_STREX *a)
6765{
6766    if (!ENABLE_ARCH_6) {
6767        return false;
6768    }
6769    return op_strex(s, a, MO_32, false);
6770}
6771
6772static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6773{
6774    if (!ENABLE_ARCH_6K) {
6775        return false;
6776    }
6777    /* We UNDEF for these UNPREDICTABLE cases.  */
6778    if (a->rt & 1) {
6779        unallocated_encoding(s);
6780        return true;
6781    }
6782    a->rt2 = a->rt + 1;
6783    return op_strex(s, a, MO_64, false);
6784}
6785
6786static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6787{
6788    return op_strex(s, a, MO_64, false);
6789}
6790
6791static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6792{
6793    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6794        return false;
6795    }
6796    return op_strex(s, a, MO_8, false);
6797}
6798
6799static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6800{
6801    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6802        return false;
6803    }
6804    return op_strex(s, a, MO_16, false);
6805}
6806
6807static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6808{
6809    if (!ENABLE_ARCH_8) {
6810        return false;
6811    }
6812    return op_strex(s, a, MO_32, true);
6813}
6814
6815static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6816{
6817    if (!ENABLE_ARCH_8) {
6818        return false;
6819    }
6820    /* We UNDEF for these UNPREDICTABLE cases.  */
6821    if (a->rt & 1) {
6822        unallocated_encoding(s);
6823        return true;
6824    }
6825    a->rt2 = a->rt + 1;
6826    return op_strex(s, a, MO_64, true);
6827}
6828
6829static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6830{
6831    if (!ENABLE_ARCH_8) {
6832        return false;
6833    }
6834    return op_strex(s, a, MO_64, true);
6835}
6836
6837static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6838{
6839    if (!ENABLE_ARCH_8) {
6840        return false;
6841    }
6842    return op_strex(s, a, MO_8, true);
6843}
6844
6845static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6846{
6847    if (!ENABLE_ARCH_8) {
6848        return false;
6849    }
6850    return op_strex(s, a, MO_16, true);
6851}
6852
6853static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6854{
6855    TCGv_i32 addr, tmp;
6856
6857    if (!ENABLE_ARCH_8) {
6858        return false;
6859    }
6860    /* We UNDEF for these UNPREDICTABLE cases.  */
6861    if (a->rn == 15 || a->rt == 15) {
6862        unallocated_encoding(s);
6863        return true;
6864    }
6865
6866    addr = load_reg(s, a->rn);
6867    tmp = load_reg(s, a->rt);
6868    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6869    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
6870    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
6871
6872    tcg_temp_free_i32(tmp);
6873    tcg_temp_free_i32(addr);
6874    return true;
6875}
6876
6877static bool trans_STL(DisasContext *s, arg_STL *a)
6878{
6879    return op_stl(s, a, MO_UL);
6880}
6881
6882static bool trans_STLB(DisasContext *s, arg_STL *a)
6883{
6884    return op_stl(s, a, MO_UB);
6885}
6886
6887static bool trans_STLH(DisasContext *s, arg_STL *a)
6888{
6889    return op_stl(s, a, MO_UW);
6890}
6891
6892static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
6893{
6894    TCGv_i32 addr;
6895    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6896    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6897
6898    /* We UNDEF for these UNPREDICTABLE cases.  */
6899    if (a->rn == 15 || a->rt == 15
6900        || (!v8a && s->thumb && a->rt == 13)
6901        || (mop == MO_64
6902            && (a->rt2 == 15 || a->rt == a->rt2
6903                || (!v8a && s->thumb && a->rt2 == 13)))) {
6904        unallocated_encoding(s);
6905        return true;
6906    }
6907
6908    addr = tcg_temp_local_new_i32();
6909    load_reg_var(s, addr, a->rn);
6910    tcg_gen_addi_i32(addr, addr, a->imm);
6911
6912    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
6913    tcg_temp_free_i32(addr);
6914
6915    if (acq) {
6916        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
6917    }
6918    return true;
6919}
6920
6921static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
6922{
6923    if (!ENABLE_ARCH_6) {
6924        return false;
6925    }
6926    return op_ldrex(s, a, MO_32, false);
6927}
6928
6929static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
6930{
6931    if (!ENABLE_ARCH_6K) {
6932        return false;
6933    }
6934    /* We UNDEF for these UNPREDICTABLE cases.  */
6935    if (a->rt & 1) {
6936        unallocated_encoding(s);
6937        return true;
6938    }
6939    a->rt2 = a->rt + 1;
6940    return op_ldrex(s, a, MO_64, false);
6941}
6942
6943static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
6944{
6945    return op_ldrex(s, a, MO_64, false);
6946}
6947
6948static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
6949{
6950    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6951        return false;
6952    }
6953    return op_ldrex(s, a, MO_8, false);
6954}
6955
6956static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
6957{
6958    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6959        return false;
6960    }
6961    return op_ldrex(s, a, MO_16, false);
6962}
6963
6964static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
6965{
6966    if (!ENABLE_ARCH_8) {
6967        return false;
6968    }
6969    return op_ldrex(s, a, MO_32, true);
6970}
6971
6972static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
6973{
6974    if (!ENABLE_ARCH_8) {
6975        return false;
6976    }
6977    /* We UNDEF for these UNPREDICTABLE cases.  */
6978    if (a->rt & 1) {
6979        unallocated_encoding(s);
6980        return true;
6981    }
6982    a->rt2 = a->rt + 1;
6983    return op_ldrex(s, a, MO_64, true);
6984}
6985
6986static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
6987{
6988    if (!ENABLE_ARCH_8) {
6989        return false;
6990    }
6991    return op_ldrex(s, a, MO_64, true);
6992}
6993
6994static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
6995{
6996    if (!ENABLE_ARCH_8) {
6997        return false;
6998    }
6999    return op_ldrex(s, a, MO_8, true);
7000}
7001
7002static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7003{
7004    if (!ENABLE_ARCH_8) {
7005        return false;
7006    }
7007    return op_ldrex(s, a, MO_16, true);
7008}
7009
7010static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7011{
7012    TCGv_i32 addr, tmp;
7013
7014    if (!ENABLE_ARCH_8) {
7015        return false;
7016    }
7017    /* We UNDEF for these UNPREDICTABLE cases.  */
7018    if (a->rn == 15 || a->rt == 15) {
7019        unallocated_encoding(s);
7020        return true;
7021    }
7022
7023    addr = load_reg(s, a->rn);
7024    tmp = tcg_temp_new_i32();
7025    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
7026    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7027    tcg_temp_free_i32(addr);
7028
7029    store_reg(s, a->rt, tmp);
7030    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7031    return true;
7032}
7033
7034static bool trans_LDA(DisasContext *s, arg_LDA *a)
7035{
7036    return op_lda(s, a, MO_UL);
7037}
7038
7039static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7040{
7041    return op_lda(s, a, MO_UB);
7042}
7043
7044static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7045{
7046    return op_lda(s, a, MO_UW);
7047}
7048
7049/*
7050 * Media instructions
7051 */
7052
7053static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7054{
7055    TCGv_i32 t1, t2;
7056
7057    if (!ENABLE_ARCH_6) {
7058        return false;
7059    }
7060
7061    t1 = load_reg(s, a->rn);
7062    t2 = load_reg(s, a->rm);
7063    gen_helper_usad8(t1, t1, t2);
7064    tcg_temp_free_i32(t2);
7065    if (a->ra != 15) {
7066        t2 = load_reg(s, a->ra);
7067        tcg_gen_add_i32(t1, t1, t2);
7068        tcg_temp_free_i32(t2);
7069    }
7070    store_reg(s, a->rd, t1);
7071    return true;
7072}
7073
7074static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7075{
7076    TCGv_i32 tmp;
7077    int width = a->widthm1 + 1;
7078    int shift = a->lsb;
7079
7080    if (!ENABLE_ARCH_6T2) {
7081        return false;
7082    }
7083    if (shift + width > 32) {
7084        /* UNPREDICTABLE; we choose to UNDEF */
7085        unallocated_encoding(s);
7086        return true;
7087    }
7088
7089    tmp = load_reg(s, a->rn);
7090    if (u) {
7091        tcg_gen_extract_i32(tmp, tmp, shift, width);
7092    } else {
7093        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7094    }
7095    store_reg(s, a->rd, tmp);
7096    return true;
7097}
7098
7099static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7100{
7101    return op_bfx(s, a, false);
7102}
7103
7104static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7105{
7106    return op_bfx(s, a, true);
7107}
7108
7109static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7110{
7111    TCGv_i32 tmp;
7112    int msb = a->msb, lsb = a->lsb;
7113    int width;
7114
7115    if (!ENABLE_ARCH_6T2) {
7116        return false;
7117    }
7118    if (msb < lsb) {
7119        /* UNPREDICTABLE; we choose to UNDEF */
7120        unallocated_encoding(s);
7121        return true;
7122    }
7123
7124    width = msb + 1 - lsb;
7125    if (a->rn == 15) {
7126        /* BFC */
7127        tmp = tcg_const_i32(0);
7128    } else {
7129        /* BFI */
7130        tmp = load_reg(s, a->rn);
7131    }
7132    if (width != 32) {
7133        TCGv_i32 tmp2 = load_reg(s, a->rd);
7134        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7135        tcg_temp_free_i32(tmp2);
7136    }
7137    store_reg(s, a->rd, tmp);
7138    return true;
7139}
7140
7141static bool trans_UDF(DisasContext *s, arg_UDF *a)
7142{
7143    unallocated_encoding(s);
7144    return true;
7145}
7146
7147/*
7148 * Parallel addition and subtraction
7149 */
7150
7151static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7152                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7153{
7154    TCGv_i32 t0, t1;
7155
7156    if (s->thumb
7157        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7158        : !ENABLE_ARCH_6) {
7159        return false;
7160    }
7161
7162    t0 = load_reg(s, a->rn);
7163    t1 = load_reg(s, a->rm);
7164
7165    gen(t0, t0, t1);
7166
7167    tcg_temp_free_i32(t1);
7168    store_reg(s, a->rd, t0);
7169    return true;
7170}
7171
7172static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7173                             void (*gen)(TCGv_i32, TCGv_i32,
7174                                         TCGv_i32, TCGv_ptr))
7175{
7176    TCGv_i32 t0, t1;
7177    TCGv_ptr ge;
7178
7179    if (s->thumb
7180        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7181        : !ENABLE_ARCH_6) {
7182        return false;
7183    }
7184
7185    t0 = load_reg(s, a->rn);
7186    t1 = load_reg(s, a->rm);
7187
7188    ge = tcg_temp_new_ptr();
7189    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7190    gen(t0, t0, t1, ge);
7191
7192    tcg_temp_free_ptr(ge);
7193    tcg_temp_free_i32(t1);
7194    store_reg(s, a->rd, t0);
7195    return true;
7196}
7197
7198#define DO_PAR_ADDSUB(NAME, helper) \
7199static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7200{                                                       \
7201    return op_par_addsub(s, a, helper);                 \
7202}
7203
7204#define DO_PAR_ADDSUB_GE(NAME, helper) \
7205static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7206{                                                       \
7207    return op_par_addsub_ge(s, a, helper);              \
7208}
7209
7210DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7211DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7212DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7213DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7214DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7215DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7216
7217DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7218DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7219DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7220DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7221DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7222DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7223
7224DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7225DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7226DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7227DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7228DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7229DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7230
7231DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7232DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7233DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7234DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7235DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7236DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7237
7238DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7239DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7240DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7241DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7242DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7243DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7244
7245DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7246DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7247DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7248DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7249DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7250DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7251
7252#undef DO_PAR_ADDSUB
7253#undef DO_PAR_ADDSUB_GE
7254
7255/*
7256 * Packing, unpacking, saturation, and reversal
7257 */
7258
7259static bool trans_PKH(DisasContext *s, arg_PKH *a)
7260{
7261    TCGv_i32 tn, tm;
7262    int shift = a->imm;
7263
7264    if (s->thumb
7265        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7266        : !ENABLE_ARCH_6) {
7267        return false;
7268    }
7269
7270    tn = load_reg(s, a->rn);
7271    tm = load_reg(s, a->rm);
7272    if (a->tb) {
7273        /* PKHTB */
7274        if (shift == 0) {
7275            shift = 31;
7276        }
7277        tcg_gen_sari_i32(tm, tm, shift);
7278        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7279    } else {
7280        /* PKHBT */
7281        tcg_gen_shli_i32(tm, tm, shift);
7282        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7283    }
7284    tcg_temp_free_i32(tm);
7285    store_reg(s, a->rd, tn);
7286    return true;
7287}
7288
7289static bool op_sat(DisasContext *s, arg_sat *a,
7290                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7291{
7292    TCGv_i32 tmp, satimm;
7293    int shift = a->imm;
7294
7295    if (!ENABLE_ARCH_6) {
7296        return false;
7297    }
7298
7299    tmp = load_reg(s, a->rn);
7300    if (a->sh) {
7301        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7302    } else {
7303        tcg_gen_shli_i32(tmp, tmp, shift);
7304    }
7305
7306    satimm = tcg_const_i32(a->satimm);
7307    gen(tmp, cpu_env, tmp, satimm);
7308    tcg_temp_free_i32(satimm);
7309
7310    store_reg(s, a->rd, tmp);
7311    return true;
7312}
7313
7314static bool trans_SSAT(DisasContext *s, arg_sat *a)
7315{
7316    return op_sat(s, a, gen_helper_ssat);
7317}
7318
7319static bool trans_USAT(DisasContext *s, arg_sat *a)
7320{
7321    return op_sat(s, a, gen_helper_usat);
7322}
7323
7324static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7325{
7326    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7327        return false;
7328    }
7329    return op_sat(s, a, gen_helper_ssat16);
7330}
7331
7332static bool trans_USAT16(DisasContext *s, arg_sat *a)
7333{
7334    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7335        return false;
7336    }
7337    return op_sat(s, a, gen_helper_usat16);
7338}
7339
7340static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7341                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7342                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7343{
7344    TCGv_i32 tmp;
7345
7346    if (!ENABLE_ARCH_6) {
7347        return false;
7348    }
7349
7350    tmp = load_reg(s, a->rm);
7351    /*
7352     * TODO: In many cases we could do a shift instead of a rotate.
7353     * Combined with a simple extend, that becomes an extract.
7354     */
7355    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7356    gen_extract(tmp, tmp);
7357
7358    if (a->rn != 15) {
7359        TCGv_i32 tmp2 = load_reg(s, a->rn);
7360        gen_add(tmp, tmp, tmp2);
7361        tcg_temp_free_i32(tmp2);
7362    }
7363    store_reg(s, a->rd, tmp);
7364    return true;
7365}
7366
7367static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7368{
7369    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7370}
7371
7372static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7373{
7374    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7375}
7376
7377static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7378{
7379    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7380        return false;
7381    }
7382    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7383}
7384
7385static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7386{
7387    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7388}
7389
7390static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7391{
7392    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7393}
7394
7395static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7396{
7397    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7398        return false;
7399    }
7400    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7401}
7402
7403static bool trans_SEL(DisasContext *s, arg_rrr *a)
7404{
7405    TCGv_i32 t1, t2, t3;
7406
7407    if (s->thumb
7408        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7409        : !ENABLE_ARCH_6) {
7410        return false;
7411    }
7412
7413    t1 = load_reg(s, a->rn);
7414    t2 = load_reg(s, a->rm);
7415    t3 = tcg_temp_new_i32();
7416    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7417    gen_helper_sel_flags(t1, t3, t1, t2);
7418    tcg_temp_free_i32(t3);
7419    tcg_temp_free_i32(t2);
7420    store_reg(s, a->rd, t1);
7421    return true;
7422}
7423
7424static bool op_rr(DisasContext *s, arg_rr *a,
7425                  void (*gen)(TCGv_i32, TCGv_i32))
7426{
7427    TCGv_i32 tmp;
7428
7429    tmp = load_reg(s, a->rm);
7430    gen(tmp, tmp);
7431    store_reg(s, a->rd, tmp);
7432    return true;
7433}
7434
7435static bool trans_REV(DisasContext *s, arg_rr *a)
7436{
7437    if (!ENABLE_ARCH_6) {
7438        return false;
7439    }
7440    return op_rr(s, a, tcg_gen_bswap32_i32);
7441}
7442
7443static bool trans_REV16(DisasContext *s, arg_rr *a)
7444{
7445    if (!ENABLE_ARCH_6) {
7446        return false;
7447    }
7448    return op_rr(s, a, gen_rev16);
7449}
7450
7451static bool trans_REVSH(DisasContext *s, arg_rr *a)
7452{
7453    if (!ENABLE_ARCH_6) {
7454        return false;
7455    }
7456    return op_rr(s, a, gen_revsh);
7457}
7458
7459static bool trans_RBIT(DisasContext *s, arg_rr *a)
7460{
7461    if (!ENABLE_ARCH_6T2) {
7462        return false;
7463    }
7464    return op_rr(s, a, gen_helper_rbit);
7465}
7466
7467/*
7468 * Signed multiply, signed and unsigned divide
7469 */
7470
7471static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7472{
7473    TCGv_i32 t1, t2;
7474
7475    if (!ENABLE_ARCH_6) {
7476        return false;
7477    }
7478
7479    t1 = load_reg(s, a->rn);
7480    t2 = load_reg(s, a->rm);
7481    if (m_swap) {
7482        gen_swap_half(t2, t2);
7483    }
7484    gen_smul_dual(t1, t2);
7485
7486    if (sub) {
7487        /*
7488         * This subtraction cannot overflow, so we can do a simple
7489         * 32-bit subtraction and then a possible 32-bit saturating
7490         * addition of Ra.
7491         */
7492        tcg_gen_sub_i32(t1, t1, t2);
7493        tcg_temp_free_i32(t2);
7494
7495        if (a->ra != 15) {
7496            t2 = load_reg(s, a->ra);
7497            gen_helper_add_setq(t1, cpu_env, t1, t2);
7498            tcg_temp_free_i32(t2);
7499        }
7500    } else if (a->ra == 15) {
7501        /* Single saturation-checking addition */
7502        gen_helper_add_setq(t1, cpu_env, t1, t2);
7503        tcg_temp_free_i32(t2);
7504    } else {
7505        /*
7506         * We need to add the products and Ra together and then
7507         * determine whether the final result overflowed. Doing
7508         * this as two separate add-and-check-overflow steps incorrectly
7509         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7510         * Do all the arithmetic at 64-bits and then check for overflow.
7511         */
7512        TCGv_i64 p64, q64;
7513        TCGv_i32 t3, qf, one;
7514
7515        p64 = tcg_temp_new_i64();
7516        q64 = tcg_temp_new_i64();
7517        tcg_gen_ext_i32_i64(p64, t1);
7518        tcg_gen_ext_i32_i64(q64, t2);
7519        tcg_gen_add_i64(p64, p64, q64);
7520        load_reg_var(s, t2, a->ra);
7521        tcg_gen_ext_i32_i64(q64, t2);
7522        tcg_gen_add_i64(p64, p64, q64);
7523        tcg_temp_free_i64(q64);
7524
7525        tcg_gen_extr_i64_i32(t1, t2, p64);
7526        tcg_temp_free_i64(p64);
7527        /*
7528         * t1 is the low half of the result which goes into Rd.
7529         * We have overflow and must set Q if the high half (t2)
7530         * is different from the sign-extension of t1.
7531         */
7532        t3 = tcg_temp_new_i32();
7533        tcg_gen_sari_i32(t3, t1, 31);
7534        qf = load_cpu_field(QF);
7535        one = tcg_const_i32(1);
7536        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7537        store_cpu_field(qf, QF);
7538        tcg_temp_free_i32(one);
7539        tcg_temp_free_i32(t3);
7540        tcg_temp_free_i32(t2);
7541    }
7542    store_reg(s, a->rd, t1);
7543    return true;
7544}
7545
7546static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7547{
7548    return op_smlad(s, a, false, false);
7549}
7550
7551static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7552{
7553    return op_smlad(s, a, true, false);
7554}
7555
7556static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7557{
7558    return op_smlad(s, a, false, true);
7559}
7560
7561static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7562{
7563    return op_smlad(s, a, true, true);
7564}
7565
7566static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7567{
7568    TCGv_i32 t1, t2;
7569    TCGv_i64 l1, l2;
7570
7571    if (!ENABLE_ARCH_6) {
7572        return false;
7573    }
7574
7575    t1 = load_reg(s, a->rn);
7576    t2 = load_reg(s, a->rm);
7577    if (m_swap) {
7578        gen_swap_half(t2, t2);
7579    }
7580    gen_smul_dual(t1, t2);
7581
7582    l1 = tcg_temp_new_i64();
7583    l2 = tcg_temp_new_i64();
7584    tcg_gen_ext_i32_i64(l1, t1);
7585    tcg_gen_ext_i32_i64(l2, t2);
7586    tcg_temp_free_i32(t1);
7587    tcg_temp_free_i32(t2);
7588
7589    if (sub) {
7590        tcg_gen_sub_i64(l1, l1, l2);
7591    } else {
7592        tcg_gen_add_i64(l1, l1, l2);
7593    }
7594    tcg_temp_free_i64(l2);
7595
7596    gen_addq(s, l1, a->ra, a->rd);
7597    gen_storeq_reg(s, a->ra, a->rd, l1);
7598    tcg_temp_free_i64(l1);
7599    return true;
7600}
7601
7602static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7603{
7604    return op_smlald(s, a, false, false);
7605}
7606
7607static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7608{
7609    return op_smlald(s, a, true, false);
7610}
7611
7612static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7613{
7614    return op_smlald(s, a, false, true);
7615}
7616
7617static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7618{
7619    return op_smlald(s, a, true, true);
7620}
7621
7622static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7623{
7624    TCGv_i32 t1, t2;
7625
7626    if (s->thumb
7627        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7628        : !ENABLE_ARCH_6) {
7629        return false;
7630    }
7631
7632    t1 = load_reg(s, a->rn);
7633    t2 = load_reg(s, a->rm);
7634    tcg_gen_muls2_i32(t2, t1, t1, t2);
7635
7636    if (a->ra != 15) {
7637        TCGv_i32 t3 = load_reg(s, a->ra);
7638        if (sub) {
7639            /*
7640             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7641             * a non-zero multiplicand lowpart, and the correct result
7642             * lowpart for rounding.
7643             */
7644            TCGv_i32 zero = tcg_const_i32(0);
7645            tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
7646            tcg_temp_free_i32(zero);
7647        } else {
7648            tcg_gen_add_i32(t1, t1, t3);
7649        }
7650        tcg_temp_free_i32(t3);
7651    }
7652    if (round) {
7653        /*
7654         * Adding 0x80000000 to the 64-bit quantity means that we have
7655         * carry in to the high word when the low word has the msb set.
7656         */
7657        tcg_gen_shri_i32(t2, t2, 31);
7658        tcg_gen_add_i32(t1, t1, t2);
7659    }
7660    tcg_temp_free_i32(t2);
7661    store_reg(s, a->rd, t1);
7662    return true;
7663}
7664
7665static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7666{
7667    return op_smmla(s, a, false, false);
7668}
7669
7670static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7671{
7672    return op_smmla(s, a, true, false);
7673}
7674
7675static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7676{
7677    return op_smmla(s, a, false, true);
7678}
7679
7680static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7681{
7682    return op_smmla(s, a, true, true);
7683}
7684
7685static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7686{
7687    TCGv_i32 t1, t2;
7688
7689    if (s->thumb
7690        ? !dc_isar_feature(aa32_thumb_div, s)
7691        : !dc_isar_feature(aa32_arm_div, s)) {
7692        return false;
7693    }
7694
7695    t1 = load_reg(s, a->rn);
7696    t2 = load_reg(s, a->rm);
7697    if (u) {
7698        gen_helper_udiv(t1, t1, t2);
7699    } else {
7700        gen_helper_sdiv(t1, t1, t2);
7701    }
7702    tcg_temp_free_i32(t2);
7703    store_reg(s, a->rd, t1);
7704    return true;
7705}
7706
7707static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7708{
7709    return op_div(s, a, false);
7710}
7711
7712static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7713{
7714    return op_div(s, a, true);
7715}
7716
7717/*
7718 * Block data transfer
7719 */
7720
7721static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7722{
7723    TCGv_i32 addr = load_reg(s, a->rn);
7724
7725    if (a->b) {
7726        if (a->i) {
7727            /* pre increment */
7728            tcg_gen_addi_i32(addr, addr, 4);
7729        } else {
7730            /* pre decrement */
7731            tcg_gen_addi_i32(addr, addr, -(n * 4));
7732        }
7733    } else if (!a->i && n != 1) {
7734        /* post decrement */
7735        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7736    }
7737
7738    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7739        /*
7740         * If the writeback is incrementing SP rather than
7741         * decrementing it, and the initial SP is below the
7742         * stack limit but the final written-back SP would
7743         * be above, then then we must not perform any memory
7744         * accesses, but it is IMPDEF whether we generate
7745         * an exception. We choose to do so in this case.
7746         * At this point 'addr' is the lowest address, so
7747         * either the original SP (if incrementing) or our
7748         * final SP (if decrementing), so that's what we check.
7749         */
7750        gen_helper_v8m_stackcheck(cpu_env, addr);
7751    }
7752
7753    return addr;
7754}
7755
7756static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7757                               TCGv_i32 addr, int n)
7758{
7759    if (a->w) {
7760        /* write back */
7761        if (!a->b) {
7762            if (a->i) {
7763                /* post increment */
7764                tcg_gen_addi_i32(addr, addr, 4);
7765            } else {
7766                /* post decrement */
7767                tcg_gen_addi_i32(addr, addr, -(n * 4));
7768            }
7769        } else if (!a->i && n != 1) {
7770            /* pre decrement */
7771            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7772        }
7773        store_reg(s, a->rn, addr);
7774    } else {
7775        tcg_temp_free_i32(addr);
7776    }
7777}
7778
7779static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7780{
7781    int i, j, n, list, mem_idx;
7782    bool user = a->u;
7783    TCGv_i32 addr, tmp, tmp2;
7784
7785    if (user) {
7786        /* STM (user) */
7787        if (IS_USER(s)) {
7788            /* Only usable in supervisor mode.  */
7789            unallocated_encoding(s);
7790            return true;
7791        }
7792    }
7793
7794    list = a->list;
7795    n = ctpop16(list);
7796    if (n < min_n || a->rn == 15) {
7797        unallocated_encoding(s);
7798        return true;
7799    }
7800
7801    addr = op_addr_block_pre(s, a, n);
7802    mem_idx = get_mem_index(s);
7803
7804    for (i = j = 0; i < 16; i++) {
7805        if (!(list & (1 << i))) {
7806            continue;
7807        }
7808
7809        if (user && i != 15) {
7810            tmp = tcg_temp_new_i32();
7811            tmp2 = tcg_const_i32(i);
7812            gen_helper_get_user_reg(tmp, cpu_env, tmp2);
7813            tcg_temp_free_i32(tmp2);
7814        } else {
7815            tmp = load_reg(s, i);
7816        }
7817        gen_aa32_st32(s, tmp, addr, mem_idx);
7818        tcg_temp_free_i32(tmp);
7819
7820        /* No need to add after the last transfer.  */
7821        if (++j != n) {
7822            tcg_gen_addi_i32(addr, addr, 4);
7823        }
7824    }
7825
7826    op_addr_block_post(s, a, addr, n);
7827    return true;
7828}
7829
7830static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7831{
7832    /* BitCount(list) < 1 is UNPREDICTABLE */
7833    return op_stm(s, a, 1);
7834}
7835
7836static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7837{
7838    /* Writeback register in register list is UNPREDICTABLE for T32.  */
7839    if (a->w && (a->list & (1 << a->rn))) {
7840        unallocated_encoding(s);
7841        return true;
7842    }
7843    /* BitCount(list) < 2 is UNPREDICTABLE */
7844    return op_stm(s, a, 2);
7845}
7846
7847static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7848{
7849    int i, j, n, list, mem_idx;
7850    bool loaded_base;
7851    bool user = a->u;
7852    bool exc_return = false;
7853    TCGv_i32 addr, tmp, tmp2, loaded_var;
7854
7855    if (user) {
7856        /* LDM (user), LDM (exception return) */
7857        if (IS_USER(s)) {
7858            /* Only usable in supervisor mode.  */
7859            unallocated_encoding(s);
7860            return true;
7861        }
7862        if (extract32(a->list, 15, 1)) {
7863            exc_return = true;
7864            user = false;
7865        } else {
7866            /* LDM (user) does not allow writeback.  */
7867            if (a->w) {
7868                unallocated_encoding(s);
7869                return true;
7870            }
7871        }
7872    }
7873
7874    list = a->list;
7875    n = ctpop16(list);
7876    if (n < min_n || a->rn == 15) {
7877        unallocated_encoding(s);
7878        return true;
7879    }
7880
7881    addr = op_addr_block_pre(s, a, n);
7882    mem_idx = get_mem_index(s);
7883    loaded_base = false;
7884    loaded_var = NULL;
7885
7886    for (i = j = 0; i < 16; i++) {
7887        if (!(list & (1 << i))) {
7888            continue;
7889        }
7890
7891        tmp = tcg_temp_new_i32();
7892        gen_aa32_ld32u(s, tmp, addr, mem_idx);
7893        if (user) {
7894            tmp2 = tcg_const_i32(i);
7895            gen_helper_set_user_reg(cpu_env, tmp2, tmp);
7896            tcg_temp_free_i32(tmp2);
7897            tcg_temp_free_i32(tmp);
7898        } else if (i == a->rn) {
7899            loaded_var = tmp;
7900            loaded_base = true;
7901        } else if (i == 15 && exc_return) {
7902            store_pc_exc_ret(s, tmp);
7903        } else {
7904            store_reg_from_load(s, i, tmp);
7905        }
7906
7907        /* No need to add after the last transfer.  */
7908        if (++j != n) {
7909            tcg_gen_addi_i32(addr, addr, 4);
7910        }
7911    }
7912
7913    op_addr_block_post(s, a, addr, n);
7914
7915    if (loaded_base) {
7916        /* Note that we reject base == pc above.  */
7917        store_reg(s, a->rn, loaded_var);
7918    }
7919
7920    if (exc_return) {
7921        /* Restore CPSR from SPSR.  */
7922        tmp = load_cpu_field(spsr);
7923        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7924            gen_io_start();
7925        }
7926        gen_helper_cpsr_write_eret(cpu_env, tmp);
7927        tcg_temp_free_i32(tmp);
7928        /* Must exit loop to check un-masked IRQs */
7929        s->base.is_jmp = DISAS_EXIT;
7930    }
7931    return true;
7932}
7933
7934static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
7935{
7936    /*
7937     * Writeback register in register list is UNPREDICTABLE
7938     * for ArchVersion() >= 7.  Prior to v7, A32 would write
7939     * an UNKNOWN value to the base register.
7940     */
7941    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
7942        unallocated_encoding(s);
7943        return true;
7944    }
7945    /* BitCount(list) < 1 is UNPREDICTABLE */
7946    return do_ldm(s, a, 1);
7947}
7948
7949static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
7950{
7951    /* Writeback register in register list is UNPREDICTABLE for T32. */
7952    if (a->w && (a->list & (1 << a->rn))) {
7953        unallocated_encoding(s);
7954        return true;
7955    }
7956    /* BitCount(list) < 2 is UNPREDICTABLE */
7957    return do_ldm(s, a, 2);
7958}
7959
7960static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
7961{
7962    /* Writeback is conditional on the base register not being loaded.  */
7963    a->w = !(a->list & (1 << a->rn));
7964    /* BitCount(list) < 1 is UNPREDICTABLE */
7965    return do_ldm(s, a, 1);
7966}
7967
7968/*
7969 * Branch, branch with link
7970 */
7971
7972static bool trans_B(DisasContext *s, arg_i *a)
7973{
7974    gen_jmp(s, read_pc(s) + a->imm);
7975    return true;
7976}
7977
7978static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
7979{
7980    /* This has cond from encoding, required to be outside IT block.  */
7981    if (a->cond >= 0xe) {
7982        return false;
7983    }
7984    if (s->condexec_mask) {
7985        unallocated_encoding(s);
7986        return true;
7987    }
7988    arm_skip_unless(s, a->cond);
7989    gen_jmp(s, read_pc(s) + a->imm);
7990    return true;
7991}
7992
7993static bool trans_BL(DisasContext *s, arg_i *a)
7994{
7995    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7996    gen_jmp(s, read_pc(s) + a->imm);
7997    return true;
7998}
7999
8000static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8001{
8002    TCGv_i32 tmp;
8003
8004    /*
8005     * BLX <imm> would be useless on M-profile; the encoding space
8006     * is used for other insns from v8.1M onward, and UNDEFs before that.
8007     */
8008    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8009        return false;
8010    }
8011
8012    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8013    if (s->thumb && (a->imm & 2)) {
8014        return false;
8015    }
8016    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8017    tmp = tcg_const_i32(!s->thumb);
8018    store_cpu_field(tmp, thumb);
8019    gen_jmp(s, (read_pc(s) & ~3) + a->imm);
8020    return true;
8021}
8022
8023static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8024{
8025    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8026    tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
8027    return true;
8028}
8029
8030static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8031{
8032    TCGv_i32 tmp = tcg_temp_new_i32();
8033
8034    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8035    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8036    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8037    gen_bx(s, tmp);
8038    return true;
8039}
8040
8041static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8042{
8043    TCGv_i32 tmp;
8044
8045    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8046    if (!ENABLE_ARCH_5) {
8047        return false;
8048    }
8049    tmp = tcg_temp_new_i32();
8050    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8051    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8052    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8053    gen_bx(s, tmp);
8054    return true;
8055}
8056
8057static bool trans_BF(DisasContext *s, arg_BF *a)
8058{
8059    /*
8060     * M-profile branch future insns. The architecture permits an
8061     * implementation to implement these as NOPs (equivalent to
8062     * discarding the LO_BRANCH_INFO cache immediately), and we
8063     * take that IMPDEF option because for QEMU a "real" implementation
8064     * would be complicated and wouldn't execute any faster.
8065     */
8066    if (!dc_isar_feature(aa32_lob, s)) {
8067        return false;
8068    }
8069    if (a->boff == 0) {
8070        /* SEE "Related encodings" (loop insns) */
8071        return false;
8072    }
8073    /* Handle as NOP */
8074    return true;
8075}
8076
8077static bool trans_DLS(DisasContext *s, arg_DLS *a)
8078{
8079    /* M-profile low-overhead loop start */
8080    TCGv_i32 tmp;
8081
8082    if (!dc_isar_feature(aa32_lob, s)) {
8083        return false;
8084    }
8085    if (a->rn == 13 || a->rn == 15) {
8086        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8087        return false;
8088    }
8089
8090    /* Not a while loop, no tail predication: just set LR to the count */
8091    tmp = load_reg(s, a->rn);
8092    store_reg(s, 14, tmp);
8093    return true;
8094}
8095
8096static bool trans_WLS(DisasContext *s, arg_WLS *a)
8097{
8098    /* M-profile low-overhead while-loop start */
8099    TCGv_i32 tmp;
8100    TCGLabel *nextlabel;
8101
8102    if (!dc_isar_feature(aa32_lob, s)) {
8103        return false;
8104    }
8105    if (a->rn == 13 || a->rn == 15) {
8106        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8107        return false;
8108    }
8109    if (s->condexec_mask) {
8110        /*
8111         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8112         * we choose to UNDEF, because otherwise our use of
8113         * gen_goto_tb(1) would clash with the use of TB exit 1
8114         * in the dc->condjmp condition-failed codepath in
8115         * arm_tr_tb_stop() and we'd get an assertion.
8116         */
8117        return false;
8118    }
8119    nextlabel = gen_new_label();
8120    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
8121    tmp = load_reg(s, a->rn);
8122    store_reg(s, 14, tmp);
8123    gen_jmp_tb(s, s->base.pc_next, 1);
8124
8125    gen_set_label(nextlabel);
8126    gen_jmp(s, read_pc(s) + a->imm);
8127    return true;
8128}
8129
8130static bool trans_LE(DisasContext *s, arg_LE *a)
8131{
8132    /*
8133     * M-profile low-overhead loop end. The architecture permits an
8134     * implementation to discard the LO_BRANCH_INFO cache at any time,
8135     * and we take the IMPDEF option to never set it in the first place
8136     * (equivalent to always discarding it immediately), because for QEMU
8137     * a "real" implementation would be complicated and wouldn't execute
8138     * any faster.
8139     */
8140    TCGv_i32 tmp;
8141
8142    if (!dc_isar_feature(aa32_lob, s)) {
8143        return false;
8144    }
8145
8146    if (!a->f) {
8147        /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
8148        arm_gen_condlabel(s);
8149        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
8150        /* Decrement LR */
8151        tmp = load_reg(s, 14);
8152        tcg_gen_addi_i32(tmp, tmp, -1);
8153        store_reg(s, 14, tmp);
8154    }
8155    /* Jump back to the loop start */
8156    gen_jmp(s, read_pc(s) - a->imm);
8157    return true;
8158}
8159
8160static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8161{
8162    TCGv_i32 addr, tmp;
8163
8164    tmp = load_reg(s, a->rm);
8165    if (half) {
8166        tcg_gen_add_i32(tmp, tmp, tmp);
8167    }
8168    addr = load_reg(s, a->rn);
8169    tcg_gen_add_i32(addr, addr, tmp);
8170
8171    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
8172                    half ? MO_UW | s->be_data : MO_UB);
8173    tcg_temp_free_i32(addr);
8174
8175    tcg_gen_add_i32(tmp, tmp, tmp);
8176    tcg_gen_addi_i32(tmp, tmp, read_pc(s));
8177    store_reg(s, 15, tmp);
8178    return true;
8179}
8180
8181static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8182{
8183    return op_tbranch(s, a, false);
8184}
8185
8186static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8187{
8188    return op_tbranch(s, a, true);
8189}
8190
8191static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8192{
8193    TCGv_i32 tmp = load_reg(s, a->rn);
8194
8195    arm_gen_condlabel(s);
8196    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8197                        tmp, 0, s->condlabel);
8198    tcg_temp_free_i32(tmp);
8199    gen_jmp(s, read_pc(s) + a->imm);
8200    return true;
8201}
8202
8203/*
8204 * Supervisor call - both T32 & A32 come here so we need to check
8205 * which mode we are in when checking for semihosting.
8206 */
8207
8208static bool trans_SVC(DisasContext *s, arg_SVC *a)
8209{
8210    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8211
8212    if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8213#ifndef CONFIG_USER_ONLY
8214        !IS_USER(s) &&
8215#endif
8216        (a->imm == semihost_imm)) {
8217        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8218    } else {
8219        gen_set_pc_im(s, s->base.pc_next);
8220        s->svc_imm = a->imm;
8221        s->base.is_jmp = DISAS_SWI;
8222    }
8223    return true;
8224}
8225
8226/*
8227 * Unconditional system instructions
8228 */
8229
8230static bool trans_RFE(DisasContext *s, arg_RFE *a)
8231{
8232    static const int8_t pre_offset[4] = {
8233        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8234    };
8235    static const int8_t post_offset[4] = {
8236        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8237    };
8238    TCGv_i32 addr, t1, t2;
8239
8240    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8241        return false;
8242    }
8243    if (IS_USER(s)) {
8244        unallocated_encoding(s);
8245        return true;
8246    }
8247
8248    addr = load_reg(s, a->rn);
8249    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8250
8251    /* Load PC into tmp and CPSR into tmp2.  */
8252    t1 = tcg_temp_new_i32();
8253    gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
8254    tcg_gen_addi_i32(addr, addr, 4);
8255    t2 = tcg_temp_new_i32();
8256    gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
8257
8258    if (a->w) {
8259        /* Base writeback.  */
8260        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8261        store_reg(s, a->rn, addr);
8262    } else {
8263        tcg_temp_free_i32(addr);
8264    }
8265    gen_rfe(s, t1, t2);
8266    return true;
8267}
8268
8269static bool trans_SRS(DisasContext *s, arg_SRS *a)
8270{
8271    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8272        return false;
8273    }
8274    gen_srs(s, a->mode, a->pu, a->w);
8275    return true;
8276}
8277
8278static bool trans_CPS(DisasContext *s, arg_CPS *a)
8279{
8280    uint32_t mask, val;
8281
8282    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8283        return false;
8284    }
8285    if (IS_USER(s)) {
8286        /* Implemented as NOP in user mode.  */
8287        return true;
8288    }
8289    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8290
8291    mask = val = 0;
8292    if (a->imod & 2) {
8293        if (a->A) {
8294            mask |= CPSR_A;
8295        }
8296        if (a->I) {
8297            mask |= CPSR_I;
8298        }
8299        if (a->F) {
8300            mask |= CPSR_F;
8301        }
8302        if (a->imod & 1) {
8303            val |= mask;
8304        }
8305    }
8306    if (a->M) {
8307        mask |= CPSR_M;
8308        val |= a->mode;
8309    }
8310    if (mask) {
8311        gen_set_psr_im(s, mask, 0, val);
8312    }
8313    return true;
8314}
8315
8316static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8317{
8318    TCGv_i32 tmp, addr, el;
8319
8320    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8321        return false;
8322    }
8323    if (IS_USER(s)) {
8324        /* Implemented as NOP in user mode.  */
8325        return true;
8326    }
8327
8328    tmp = tcg_const_i32(a->im);
8329    /* FAULTMASK */
8330    if (a->F) {
8331        addr = tcg_const_i32(19);
8332        gen_helper_v7m_msr(cpu_env, addr, tmp);
8333        tcg_temp_free_i32(addr);
8334    }
8335    /* PRIMASK */
8336    if (a->I) {
8337        addr = tcg_const_i32(16);
8338        gen_helper_v7m_msr(cpu_env, addr, tmp);
8339        tcg_temp_free_i32(addr);
8340    }
8341    el = tcg_const_i32(s->current_el);
8342    gen_helper_rebuild_hflags_m32(cpu_env, el);
8343    tcg_temp_free_i32(el);
8344    tcg_temp_free_i32(tmp);
8345    gen_lookup_tb(s);
8346    return true;
8347}
8348
8349/*
8350 * Clear-Exclusive, Barriers
8351 */
8352
8353static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8354{
8355    if (s->thumb
8356        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8357        : !ENABLE_ARCH_6K) {
8358        return false;
8359    }
8360    gen_clrex(s);
8361    return true;
8362}
8363
8364static bool trans_DSB(DisasContext *s, arg_DSB *a)
8365{
8366    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8367        return false;
8368    }
8369    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8370    return true;
8371}
8372
8373static bool trans_DMB(DisasContext *s, arg_DMB *a)
8374{
8375    return trans_DSB(s, NULL);
8376}
8377
8378static bool trans_ISB(DisasContext *s, arg_ISB *a)
8379{
8380    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8381        return false;
8382    }
8383    /*
8384     * We need to break the TB after this insn to execute
8385     * self-modifying code correctly and also to take
8386     * any pending interrupts immediately.
8387     */
8388    gen_goto_tb(s, 0, s->base.pc_next);
8389    return true;
8390}
8391
8392static bool trans_SB(DisasContext *s, arg_SB *a)
8393{
8394    if (!dc_isar_feature(aa32_sb, s)) {
8395        return false;
8396    }
8397    /*
8398     * TODO: There is no speculation barrier opcode
8399     * for TCG; MB and end the TB instead.
8400     */
8401    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8402    gen_goto_tb(s, 0, s->base.pc_next);
8403    return true;
8404}
8405
8406static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8407{
8408    if (!ENABLE_ARCH_6) {
8409        return false;
8410    }
8411    if (a->E != (s->be_data == MO_BE)) {
8412        gen_helper_setend(cpu_env);
8413        s->base.is_jmp = DISAS_UPDATE_EXIT;
8414    }
8415    return true;
8416}
8417
8418/*
8419 * Preload instructions
8420 * All are nops, contingent on the appropriate arch level.
8421 */
8422
8423static bool trans_PLD(DisasContext *s, arg_PLD *a)
8424{
8425    return ENABLE_ARCH_5TE;
8426}
8427
8428static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8429{
8430    return arm_dc_feature(s, ARM_FEATURE_V7MP);
8431}
8432
8433static bool trans_PLI(DisasContext *s, arg_PLD *a)
8434{
8435    return ENABLE_ARCH_7;
8436}
8437
8438/*
8439 * If-then
8440 */
8441
8442static bool trans_IT(DisasContext *s, arg_IT *a)
8443{
8444    int cond_mask = a->cond_mask;
8445
8446    /*
8447     * No actual code generated for this insn, just setup state.
8448     *
8449     * Combinations of firstcond and mask which set up an 0b1111
8450     * condition are UNPREDICTABLE; we take the CONSTRAINED
8451     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8452     * i.e. both meaning "execute always".
8453     */
8454    s->condexec_cond = (cond_mask >> 4) & 0xe;
8455    s->condexec_mask = cond_mask & 0x1f;
8456    return true;
8457}
8458
8459/* v8.1M CSEL/CSINC/CSNEG/CSINV */
8460static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8461{
8462    TCGv_i32 rn, rm, zero;
8463    DisasCompare c;
8464
8465    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8466        return false;
8467    }
8468
8469    if (a->rm == 13) {
8470        /* SEE "Related encodings" (MVE shifts) */
8471        return false;
8472    }
8473
8474    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8475        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8476        return false;
8477    }
8478
8479    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8480    if (a->rn == 15) {
8481        rn = tcg_const_i32(0);
8482    } else {
8483        rn = load_reg(s, a->rn);
8484    }
8485    if (a->rm == 15) {
8486        rm = tcg_const_i32(0);
8487    } else {
8488        rm = load_reg(s, a->rm);
8489    }
8490
8491    switch (a->op) {
8492    case 0: /* CSEL */
8493        break;
8494    case 1: /* CSINC */
8495        tcg_gen_addi_i32(rm, rm, 1);
8496        break;
8497    case 2: /* CSINV */
8498        tcg_gen_not_i32(rm, rm);
8499        break;
8500    case 3: /* CSNEG */
8501        tcg_gen_neg_i32(rm, rm);
8502        break;
8503    default:
8504        g_assert_not_reached();
8505    }
8506
8507    arm_test_cc(&c, a->fcond);
8508    zero = tcg_const_i32(0);
8509    tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
8510    arm_free_cc(&c);
8511    tcg_temp_free_i32(zero);
8512
8513    store_reg(s, a->rd, rn);
8514    tcg_temp_free_i32(rm);
8515
8516    return true;
8517}
8518
8519/*
8520 * Legacy decoder.
8521 */
8522
8523static void disas_arm_insn(DisasContext *s, unsigned int insn)
8524{
8525    unsigned int cond = insn >> 28;
8526
8527    /* M variants do not implement ARM mode; this must raise the INVSTATE
8528     * UsageFault exception.
8529     */
8530    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8531        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
8532                           default_exception_el(s));
8533        return;
8534    }
8535
8536    if (cond == 0xf) {
8537        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8538         * choose to UNDEF. In ARMv5 and above the space is used
8539         * for miscellaneous unconditional instructions.
8540         */
8541        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8542            unallocated_encoding(s);
8543            return;
8544        }
8545
8546        /* Unconditional instructions.  */
8547        /* TODO: Perhaps merge these into one decodetree output file.  */
8548        if (disas_a32_uncond(s, insn) ||
8549            disas_vfp_uncond(s, insn) ||
8550            disas_neon_dp(s, insn) ||
8551            disas_neon_ls(s, insn) ||
8552            disas_neon_shared(s, insn)) {
8553            return;
8554        }
8555        /* fall back to legacy decoder */
8556
8557        if ((insn & 0x0e000f00) == 0x0c000100) {
8558            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8559                /* iWMMXt register transfer.  */
8560                if (extract32(s->c15_cpar, 1, 1)) {
8561                    if (!disas_iwmmxt_insn(s, insn)) {
8562                        return;
8563                    }
8564                }
8565            }
8566        }
8567        goto illegal_op;
8568    }
8569    if (cond != 0xe) {
8570        /* if not always execute, we generate a conditional jump to
8571           next instruction */
8572        arm_skip_unless(s, cond);
8573    }
8574
8575    /* TODO: Perhaps merge these into one decodetree output file.  */
8576    if (disas_a32(s, insn) ||
8577        disas_vfp(s, insn)) {
8578        return;
8579    }
8580    /* fall back to legacy decoder */
8581    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8582    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8583        if (((insn & 0x0c000e00) == 0x0c000000)
8584            && ((insn & 0x03000000) != 0x03000000)) {
8585            /* Coprocessor insn, coprocessor 0 or 1 */
8586            disas_xscale_insn(s, insn);
8587            return;
8588        }
8589    }
8590
8591illegal_op:
8592    unallocated_encoding(s);
8593}
8594
8595static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8596{
8597    /*
8598     * Return true if this is a 16 bit instruction. We must be precise
8599     * about this (matching the decode).
8600     */
8601    if ((insn >> 11) < 0x1d) {
8602        /* Definitely a 16-bit instruction */
8603        return true;
8604    }
8605
8606    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8607     * first half of a 32-bit Thumb insn. Thumb-1 cores might
8608     * end up actually treating this as two 16-bit insns, though,
8609     * if it's half of a bl/blx pair that might span a page boundary.
8610     */
8611    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8612        arm_dc_feature(s, ARM_FEATURE_M)) {
8613        /* Thumb2 cores (including all M profile ones) always treat
8614         * 32-bit insns as 32-bit.
8615         */
8616        return false;
8617    }
8618
8619    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8620        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8621         * is not on the next page; we merge this into a 32-bit
8622         * insn.
8623         */
8624        return false;
8625    }
8626    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8627     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8628     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8629     *  -- handle as single 16 bit insn
8630     */
8631    return true;
8632}
8633
8634/* Translate a 32-bit thumb instruction. */
8635static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8636{
8637    /*
8638     * ARMv6-M supports a limited subset of Thumb2 instructions.
8639     * Other Thumb1 architectures allow only 32-bit
8640     * combined BL/BLX prefix and suffix.
8641     */
8642    if (arm_dc_feature(s, ARM_FEATURE_M) &&
8643        !arm_dc_feature(s, ARM_FEATURE_V7)) {
8644        int i;
8645        bool found = false;
8646        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8647                                               0xf3b08040 /* dsb */,
8648                                               0xf3b08050 /* dmb */,
8649                                               0xf3b08060 /* isb */,
8650                                               0xf3e08000 /* mrs */,
8651                                               0xf000d000 /* bl */};
8652        static const uint32_t armv6m_mask[] = {0xffe0d000,
8653                                               0xfff0d0f0,
8654                                               0xfff0d0f0,
8655                                               0xfff0d0f0,
8656                                               0xffe0d000,
8657                                               0xf800d000};
8658
8659        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
8660            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
8661                found = true;
8662                break;
8663            }
8664        }
8665        if (!found) {
8666            goto illegal_op;
8667        }
8668    } else if ((insn & 0xf800e800) != 0xf000e800)  {
8669        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
8670            unallocated_encoding(s);
8671            return;
8672        }
8673    }
8674
8675    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8676        /*
8677         * NOCP takes precedence over any UNDEF for (almost) the
8678         * entire wide range of coprocessor-space encodings, so check
8679         * for it first before proceeding to actually decode eg VFP
8680         * insns. This decode also handles the few insns which are
8681         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
8682         */
8683        if (disas_m_nocp(s, insn)) {
8684            return;
8685        }
8686    }
8687
8688    if ((insn & 0xef000000) == 0xef000000) {
8689        /*
8690         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8691         * transform into
8692         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8693         */
8694        uint32_t a32_insn = (insn & 0xe2ffffff) |
8695            ((insn & (1 << 28)) >> 4) | (1 << 28);
8696
8697        if (disas_neon_dp(s, a32_insn)) {
8698            return;
8699        }
8700    }
8701
8702    if ((insn & 0xff100000) == 0xf9000000) {
8703        /*
8704         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8705         * transform into
8706         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8707         */
8708        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
8709
8710        if (disas_neon_ls(s, a32_insn)) {
8711            return;
8712        }
8713    }
8714
8715    /*
8716     * TODO: Perhaps merge these into one decodetree output file.
8717     * Note disas_vfp is written for a32 with cond field in the
8718     * top nibble.  The t32 encoding requires 0xe in the top nibble.
8719     */
8720    if (disas_t32(s, insn) ||
8721        disas_vfp_uncond(s, insn) ||
8722        disas_neon_shared(s, insn) ||
8723        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
8724        return;
8725    }
8726
8727illegal_op:
8728    unallocated_encoding(s);
8729}
8730
8731static void disas_thumb_insn(DisasContext *s, uint32_t insn)
8732{
8733    if (!disas_t16(s, insn)) {
8734        unallocated_encoding(s);
8735    }
8736}
8737
8738static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
8739{
8740    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
8741     * (False positives are OK, false negatives are not.)
8742     * We know this is a Thumb insn, and our caller ensures we are
8743     * only called if dc->base.pc_next is less than 4 bytes from the page
8744     * boundary, so we cross the page if the first 16 bits indicate
8745     * that this is a 32 bit insn.
8746     */
8747    uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
8748
8749    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
8750}
8751
8752static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
8753{
8754    DisasContext *dc = container_of(dcbase, DisasContext, base);
8755    CPUARMState *env = cs->env_ptr;
8756    ARMCPU *cpu = env_archcpu(env);
8757    uint32_t tb_flags = dc->base.tb->flags;
8758    uint32_t condexec, core_mmu_idx;
8759
8760    dc->isar = &cpu->isar;
8761    dc->condjmp = 0;
8762
8763    dc->aarch64 = 0;
8764    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
8765     * there is no secure EL1, so we route exceptions to EL3.
8766     */
8767    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
8768                               !arm_el_is_aa64(env, 3);
8769    dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
8770    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8771    condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
8772    dc->condexec_mask = (condexec & 0xf) << 1;
8773    dc->condexec_cond = condexec >> 4;
8774
8775    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
8776    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
8777    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
8778#if !defined(CONFIG_USER_ONLY)
8779    dc->user = (dc->current_el == 0);
8780#endif
8781    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
8782
8783    if (arm_feature(env, ARM_FEATURE_M)) {
8784        dc->vfp_enabled = 1;
8785        dc->be_data = MO_TE;
8786        dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
8787        dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
8788            regime_is_secure(env, dc->mmu_idx);
8789        dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
8790        dc->v8m_fpccr_s_wrong =
8791            FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
8792        dc->v7m_new_fp_ctxt_needed =
8793            FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
8794        dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
8795    } else {
8796        dc->be_data =
8797            FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8798        dc->debug_target_el =
8799            FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
8800        dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
8801        dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
8802        dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
8803        dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
8804        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
8805            dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
8806        } else {
8807            dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
8808            dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
8809        }
8810    }
8811    dc->cp_regs = cpu->cp_regs;
8812    dc->features = env->features;
8813
8814    /* Single step state. The code-generation logic here is:
8815     *  SS_ACTIVE == 0:
8816     *   generate code with no special handling for single-stepping (except
8817     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
8818     *   this happens anyway because those changes are all system register or
8819     *   PSTATE writes).
8820     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
8821     *   emit code for one insn
8822     *   emit code to clear PSTATE.SS
8823     *   emit code to generate software step exception for completed step
8824     *   end TB (as usual for having generated an exception)
8825     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
8826     *   emit code to generate a software step exception
8827     *   end the TB
8828     */
8829    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
8830    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
8831    dc->is_ldex = false;
8832
8833    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
8834
8835    /* If architectural single step active, limit to 1.  */
8836    if (is_singlestepping(dc)) {
8837        dc->base.max_insns = 1;
8838    }
8839
8840    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
8841       to those left on the page.  */
8842    if (!dc->thumb) {
8843        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
8844        dc->base.max_insns = MIN(dc->base.max_insns, bound);
8845    }
8846
8847    cpu_V0 = tcg_temp_new_i64();
8848    cpu_V1 = tcg_temp_new_i64();
8849    cpu_M0 = tcg_temp_new_i64();
8850}
8851
8852static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
8853{
8854    DisasContext *dc = container_of(dcbase, DisasContext, base);
8855
8856    /* A note on handling of the condexec (IT) bits:
8857     *
8858     * We want to avoid the overhead of having to write the updated condexec
8859     * bits back to the CPUARMState for every instruction in an IT block. So:
8860     * (1) if the condexec bits are not already zero then we write
8861     * zero back into the CPUARMState now. This avoids complications trying
8862     * to do it at the end of the block. (For example if we don't do this
8863     * it's hard to identify whether we can safely skip writing condexec
8864     * at the end of the TB, which we definitely want to do for the case
8865     * where a TB doesn't do anything with the IT state at all.)
8866     * (2) if we are going to leave the TB then we call gen_set_condexec()
8867     * which will write the correct value into CPUARMState if zero is wrong.
8868     * This is done both for leaving the TB at the end, and for leaving
8869     * it because of an exception we know will happen, which is done in
8870     * gen_exception_insn(). The latter is necessary because we need to
8871     * leave the TB with the PC/IT state just prior to execution of the
8872     * instruction which caused the exception.
8873     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
8874     * then the CPUARMState will be wrong and we need to reset it.
8875     * This is handled in the same way as restoration of the
8876     * PC in these situations; we save the value of the condexec bits
8877     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
8878     * then uses this to restore them after an exception.
8879     *
8880     * Note that there are no instructions which can read the condexec
8881     * bits, and none which can write non-static values to them, so
8882     * we don't need to care about whether CPUARMState is correct in the
8883     * middle of a TB.
8884     */
8885
8886    /* Reset the conditional execution bits immediately. This avoids
8887       complications trying to do it at the end of the block.  */
8888    if (dc->condexec_mask || dc->condexec_cond) {
8889        TCGv_i32 tmp = tcg_temp_new_i32();
8890        tcg_gen_movi_i32(tmp, 0);
8891        store_cpu_field(tmp, condexec_bits);
8892    }
8893}
8894
8895static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8896{
8897    DisasContext *dc = container_of(dcbase, DisasContext, base);
8898
8899    tcg_gen_insn_start(dc->base.pc_next,
8900                       (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
8901                       0);
8902    dc->insn_start = tcg_last_op();
8903}
8904
8905static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8906                                    const CPUBreakpoint *bp)
8907{
8908    DisasContext *dc = container_of(dcbase, DisasContext, base);
8909
8910    if (bp->flags & BP_CPU) {
8911        gen_set_condexec(dc);
8912        gen_set_pc_im(dc, dc->base.pc_next);
8913        gen_helper_check_breakpoints(cpu_env);
8914        /* End the TB early; it's likely not going to be executed */
8915        dc->base.is_jmp = DISAS_TOO_MANY;
8916    } else {
8917        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
8918        /* The address covered by the breakpoint must be
8919           included in [tb->pc, tb->pc + tb->size) in order
8920           to for it to be properly cleared -- thus we
8921           increment the PC here so that the logic setting
8922           tb->size below does the right thing.  */
8923        /* TODO: Advance PC by correct instruction length to
8924         * avoid disassembler error messages */
8925        dc->base.pc_next += 2;
8926        dc->base.is_jmp = DISAS_NORETURN;
8927    }
8928
8929    return true;
8930}
8931
8932static bool arm_pre_translate_insn(DisasContext *dc)
8933{
8934#ifdef CONFIG_USER_ONLY
8935    /* Intercept jump to the magic kernel page.  */
8936    if (dc->base.pc_next >= 0xffff0000) {
8937        /* We always get here via a jump, so know we are not in a
8938           conditional execution block.  */
8939        gen_exception_internal(EXCP_KERNEL_TRAP);
8940        dc->base.is_jmp = DISAS_NORETURN;
8941        return true;
8942    }
8943#endif
8944
8945    if (dc->ss_active && !dc->pstate_ss) {
8946        /* Singlestep state is Active-pending.
8947         * If we're in this state at the start of a TB then either
8948         *  a) we just took an exception to an EL which is being debugged
8949         *     and this is the first insn in the exception handler
8950         *  b) debug exceptions were masked and we just unmasked them
8951         *     without changing EL (eg by clearing PSTATE.D)
8952         * In either case we're going to take a swstep exception in the
8953         * "did not step an insn" case, and so the syndrome ISV and EX
8954         * bits should be zero.
8955         */
8956        assert(dc->base.num_insns == 1);
8957        gen_swstep_exception(dc, 0, 0);
8958        dc->base.is_jmp = DISAS_NORETURN;
8959        return true;
8960    }
8961
8962    return false;
8963}
8964
8965static void arm_post_translate_insn(DisasContext *dc)
8966{
8967    if (dc->condjmp && !dc->base.is_jmp) {
8968        gen_set_label(dc->condlabel);
8969        dc->condjmp = 0;
8970    }
8971    translator_loop_temp_check(&dc->base);
8972}
8973
8974static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8975{
8976    DisasContext *dc = container_of(dcbase, DisasContext, base);
8977    CPUARMState *env = cpu->env_ptr;
8978    unsigned int insn;
8979
8980    if (arm_pre_translate_insn(dc)) {
8981        return;
8982    }
8983
8984    dc->pc_curr = dc->base.pc_next;
8985    insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
8986    dc->insn = insn;
8987    dc->base.pc_next += 4;
8988    disas_arm_insn(dc, insn);
8989
8990    arm_post_translate_insn(dc);
8991
8992    /* ARM is a fixed-length ISA.  We performed the cross-page check
8993       in init_disas_context by adjusting max_insns.  */
8994}
8995
8996static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
8997{
8998    /* Return true if this Thumb insn is always unconditional,
8999     * even inside an IT block. This is true of only a very few
9000     * instructions: BKPT, HLT, and SG.
9001     *
9002     * A larger class of instructions are UNPREDICTABLE if used
9003     * inside an IT block; we do not need to detect those here, because
9004     * what we do by default (perform the cc check and update the IT
9005     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9006     * choice for those situations.
9007     *
9008     * insn is either a 16-bit or a 32-bit instruction; the two are
9009     * distinguishable because for the 16-bit case the top 16 bits
9010     * are zeroes, and that isn't a valid 32-bit encoding.
9011     */
9012    if ((insn & 0xffffff00) == 0xbe00) {
9013        /* BKPT */
9014        return true;
9015    }
9016
9017    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9018        !arm_dc_feature(s, ARM_FEATURE_M)) {
9019        /* HLT: v8A only. This is unconditional even when it is going to
9020         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9021         * For v7 cores this was a plain old undefined encoding and so
9022         * honours its cc check. (We might be using the encoding as
9023         * a semihosting trap, but we don't change the cc check behaviour
9024         * on that account, because a debugger connected to a real v7A
9025         * core and emulating semihosting traps by catching the UNDEF
9026         * exception would also only see cases where the cc check passed.
9027         * No guest code should be trying to do a HLT semihosting trap
9028         * in an IT block anyway.
9029         */
9030        return true;
9031    }
9032
9033    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9034        arm_dc_feature(s, ARM_FEATURE_M)) {
9035        /* SG: v8M only */
9036        return true;
9037    }
9038
9039    return false;
9040}
9041
9042static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9043{
9044    DisasContext *dc = container_of(dcbase, DisasContext, base);
9045    CPUARMState *env = cpu->env_ptr;
9046    uint32_t insn;
9047    bool is_16bit;
9048
9049    if (arm_pre_translate_insn(dc)) {
9050        return;
9051    }
9052
9053    dc->pc_curr = dc->base.pc_next;
9054    insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
9055    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9056    dc->base.pc_next += 2;
9057    if (!is_16bit) {
9058        uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
9059
9060        insn = insn << 16 | insn2;
9061        dc->base.pc_next += 2;
9062    }
9063    dc->insn = insn;
9064
9065    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9066        uint32_t cond = dc->condexec_cond;
9067
9068        /*
9069         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9070         * "always"; 0xf is not "never".
9071         */
9072        if (cond < 0x0e) {
9073            arm_skip_unless(dc, cond);
9074        }
9075    }
9076
9077    if (is_16bit) {
9078        disas_thumb_insn(dc, insn);
9079    } else {
9080        disas_thumb2_insn(dc, insn);
9081    }
9082
9083    /* Advance the Thumb condexec condition.  */
9084    if (dc->condexec_mask) {
9085        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9086                             ((dc->condexec_mask >> 4) & 1));
9087        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9088        if (dc->condexec_mask == 0) {
9089            dc->condexec_cond = 0;
9090        }
9091    }
9092
9093    arm_post_translate_insn(dc);
9094
9095    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9096     * will touch a new page.  This ensures that prefetch aborts occur at
9097     * the right place.
9098     *
9099     * We want to stop the TB if the next insn starts in a new page,
9100     * or if it spans between this page and the next. This means that
9101     * if we're looking at the last halfword in the page we need to
9102     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9103     * or a 32-bit Thumb insn (which won't).
9104     * This is to avoid generating a silly TB with a single 16-bit insn
9105     * in it at the end of this page (which would execute correctly
9106     * but isn't very efficient).
9107     */
9108    if (dc->base.is_jmp == DISAS_NEXT
9109        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9110            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9111                && insn_crosses_page(env, dc)))) {
9112        dc->base.is_jmp = DISAS_TOO_MANY;
9113    }
9114}
9115
9116static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9117{
9118    DisasContext *dc = container_of(dcbase, DisasContext, base);
9119
9120    if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
9121        /* FIXME: This can theoretically happen with self-modifying code. */
9122        cpu_abort(cpu, "IO on conditional branch instruction");
9123    }
9124
9125    /* At this stage dc->condjmp will only be set when the skipped
9126       instruction was a conditional branch or trap, and the PC has
9127       already been written.  */
9128    gen_set_condexec(dc);
9129    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9130        /* Exception return branches need some special case code at the
9131         * end of the TB, which is complex enough that it has to
9132         * handle the single-step vs not and the condition-failed
9133         * insn codepath itself.
9134         */
9135        gen_bx_excret_final_code(dc);
9136    } else if (unlikely(is_singlestepping(dc))) {
9137        /* Unconditional and "condition passed" instruction codepath. */
9138        switch (dc->base.is_jmp) {
9139        case DISAS_SWI:
9140            gen_ss_advance(dc);
9141            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9142                          default_exception_el(dc));
9143            break;
9144        case DISAS_HVC:
9145            gen_ss_advance(dc);
9146            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9147            break;
9148        case DISAS_SMC:
9149            gen_ss_advance(dc);
9150            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9151            break;
9152        case DISAS_NEXT:
9153        case DISAS_TOO_MANY:
9154        case DISAS_UPDATE_EXIT:
9155        case DISAS_UPDATE_NOCHAIN:
9156            gen_set_pc_im(dc, dc->base.pc_next);
9157            /* fall through */
9158        default:
9159            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9160            gen_singlestep_exception(dc);
9161            break;
9162        case DISAS_NORETURN:
9163            break;
9164        }
9165    } else {
9166        /* While branches must always occur at the end of an IT block,
9167           there are a few other things that can cause us to terminate
9168           the TB in the middle of an IT block:
9169            - Exception generating instructions (bkpt, swi, undefined).
9170            - Page boundaries.
9171            - Hardware watchpoints.
9172           Hardware breakpoints have already been handled and skip this code.
9173         */
9174        switch (dc->base.is_jmp) {
9175        case DISAS_NEXT:
9176        case DISAS_TOO_MANY:
9177            gen_goto_tb(dc, 1, dc->base.pc_next);
9178            break;
9179        case DISAS_UPDATE_NOCHAIN:
9180            gen_set_pc_im(dc, dc->base.pc_next);
9181            /* fall through */
9182        case DISAS_JUMP:
9183            gen_goto_ptr();
9184            break;
9185        case DISAS_UPDATE_EXIT:
9186            gen_set_pc_im(dc, dc->base.pc_next);
9187            /* fall through */
9188        default:
9189            /* indicate that the hash table must be used to find the next TB */
9190            tcg_gen_exit_tb(NULL, 0);
9191            break;
9192        case DISAS_NORETURN:
9193            /* nothing more to generate */
9194            break;
9195        case DISAS_WFI:
9196        {
9197            TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
9198                                          !(dc->insn & (1U << 31))) ? 2 : 4);
9199
9200            gen_helper_wfi(cpu_env, tmp);
9201            tcg_temp_free_i32(tmp);
9202            /* The helper doesn't necessarily throw an exception, but we
9203             * must go back to the main loop to check for interrupts anyway.
9204             */
9205            tcg_gen_exit_tb(NULL, 0);
9206            break;
9207        }
9208        case DISAS_WFE:
9209            gen_helper_wfe(cpu_env);
9210            break;
9211        case DISAS_YIELD:
9212            gen_helper_yield(cpu_env);
9213            break;
9214        case DISAS_SWI:
9215            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9216                          default_exception_el(dc));
9217            break;
9218        case DISAS_HVC:
9219            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9220            break;
9221        case DISAS_SMC:
9222            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9223            break;
9224        }
9225    }
9226
9227    if (dc->condjmp) {
9228        /* "Condition failed" instruction codepath for the branch/trap insn */
9229        gen_set_label(dc->condlabel);
9230        gen_set_condexec(dc);
9231        if (unlikely(is_singlestepping(dc))) {
9232            gen_set_pc_im(dc, dc->base.pc_next);
9233            gen_singlestep_exception(dc);
9234        } else {
9235            gen_goto_tb(dc, 1, dc->base.pc_next);
9236        }
9237    }
9238}
9239
9240static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
9241{
9242    DisasContext *dc = container_of(dcbase, DisasContext, base);
9243
9244    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
9245    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
9246}
9247
9248static const TranslatorOps arm_translator_ops = {
9249    .init_disas_context = arm_tr_init_disas_context,
9250    .tb_start           = arm_tr_tb_start,
9251    .insn_start         = arm_tr_insn_start,
9252    .breakpoint_check   = arm_tr_breakpoint_check,
9253    .translate_insn     = arm_tr_translate_insn,
9254    .tb_stop            = arm_tr_tb_stop,
9255    .disas_log          = arm_tr_disas_log,
9256};
9257
9258static const TranslatorOps thumb_translator_ops = {
9259    .init_disas_context = arm_tr_init_disas_context,
9260    .tb_start           = arm_tr_tb_start,
9261    .insn_start         = arm_tr_insn_start,
9262    .breakpoint_check   = arm_tr_breakpoint_check,
9263    .translate_insn     = thumb_tr_translate_insn,
9264    .tb_stop            = arm_tr_tb_stop,
9265    .disas_log          = arm_tr_disas_log,
9266};
9267
9268/* generate intermediate code for basic block 'tb'.  */
9269void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9270{
9271    DisasContext dc = { };
9272    const TranslatorOps *ops = &arm_translator_ops;
9273
9274    if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
9275        ops = &thumb_translator_ops;
9276    }
9277#ifdef TARGET_AARCH64
9278    if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
9279        ops = &aarch64_translator_ops;
9280    }
9281#endif
9282
9283    translator_loop(ops, &dc.base, cpu, tb, max_insns);
9284}
9285
9286void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9287                          target_ulong *data)
9288{
9289    if (is_a64(env)) {
9290        env->pc = data[0];
9291        env->condexec_bits = 0;
9292        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9293    } else {
9294        env->regs[15] = data[0];
9295        env->condexec_bits = data[1];
9296        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9297    }
9298}
9299