qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg/tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "semihosting/semihost.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36
  37#include "trace-tcg.h"
  38#include "exec/log.h"
  39
  40
  41#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43/* currently all emulated v5 cores are also v5TE, so don't bother */
  44#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52#include "translate.h"
  53
  54#if defined(CONFIG_USER_ONLY)
  55#define IS_USER(s) 1
  56#else
  57#define IS_USER(s) (s->user)
  58#endif
  59
  60/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  61static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  62/* These are TCG globals which alias CPUARMState fields */
  63static TCGv_i32 cpu_R[16];
  64TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  65TCGv_i64 cpu_exclusive_addr;
  66TCGv_i64 cpu_exclusive_val;
  67
  68#include "exec/gen-icount.h"
  69
  70static const char * const regnames[] =
  71    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  72      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  73
  74/* Function prototypes for gen_ functions calling Neon helpers.  */
  75typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  76                                 TCGv_i32, TCGv_i32);
  77/* Function prototypes for gen_ functions for fix point conversions */
  78typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79
  80/* initialize TCG globals.  */
  81void arm_translate_init(void)
  82{
  83    int i;
  84
  85    for (i = 0; i < 16; i++) {
  86        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  87                                          offsetof(CPUARMState, regs[i]),
  88                                          regnames[i]);
  89    }
  90    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  91    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  92    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  93    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  94
  95    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  96        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  97    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  98        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  99
 100    a64_translate_init();
 101}
 102
 103/* Generate a label used for skipping this instruction */
 104static void arm_gen_condlabel(DisasContext *s)
 105{
 106    if (!s->condjmp) {
 107        s->condlabel = gen_new_label();
 108        s->condjmp = 1;
 109    }
 110}
 111
 112/*
 113 * Constant expanders for the decoders.
 114 */
 115
 116static int negate(DisasContext *s, int x)
 117{
 118    return -x;
 119}
 120
 121static int plus_2(DisasContext *s, int x)
 122{
 123    return x + 2;
 124}
 125
 126static int times_2(DisasContext *s, int x)
 127{
 128    return x * 2;
 129}
 130
 131static int times_4(DisasContext *s, int x)
 132{
 133    return x * 4;
 134}
 135
 136/* Flags for the disas_set_da_iss info argument:
 137 * lower bits hold the Rt register number, higher bits are flags.
 138 */
 139typedef enum ISSInfo {
 140    ISSNone = 0,
 141    ISSRegMask = 0x1f,
 142    ISSInvalid = (1 << 5),
 143    ISSIsAcqRel = (1 << 6),
 144    ISSIsWrite = (1 << 7),
 145    ISSIs16Bit = (1 << 8),
 146} ISSInfo;
 147
 148/* Save the syndrome information for a Data Abort */
 149static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 150{
 151    uint32_t syn;
 152    int sas = memop & MO_SIZE;
 153    bool sse = memop & MO_SIGN;
 154    bool is_acqrel = issinfo & ISSIsAcqRel;
 155    bool is_write = issinfo & ISSIsWrite;
 156    bool is_16bit = issinfo & ISSIs16Bit;
 157    int srt = issinfo & ISSRegMask;
 158
 159    if (issinfo & ISSInvalid) {
 160        /* Some callsites want to conditionally provide ISS info,
 161         * eg "only if this was not a writeback"
 162         */
 163        return;
 164    }
 165
 166    if (srt == 15) {
 167        /* For AArch32, insns where the src/dest is R15 never generate
 168         * ISS information. Catching that here saves checking at all
 169         * the call sites.
 170         */
 171        return;
 172    }
 173
 174    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 175                                  0, 0, 0, is_write, 0, is_16bit);
 176    disas_set_insn_syndrome(s, syn);
 177}
 178
 179static inline int get_a32_user_mem_index(DisasContext *s)
 180{
 181    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 182     * insns:
 183     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 184     *  otherwise, access as if at PL0.
 185     */
 186    switch (s->mmu_idx) {
 187    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 188    case ARMMMUIdx_E10_0:
 189    case ARMMMUIdx_E10_1:
 190    case ARMMMUIdx_E10_1_PAN:
 191        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 192    case ARMMMUIdx_SE3:
 193    case ARMMMUIdx_SE10_0:
 194    case ARMMMUIdx_SE10_1:
 195    case ARMMMUIdx_SE10_1_PAN:
 196        return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 197    case ARMMMUIdx_MUser:
 198    case ARMMMUIdx_MPriv:
 199        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 200    case ARMMMUIdx_MUserNegPri:
 201    case ARMMMUIdx_MPrivNegPri:
 202        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 203    case ARMMMUIdx_MSUser:
 204    case ARMMMUIdx_MSPriv:
 205        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 206    case ARMMMUIdx_MSUserNegPri:
 207    case ARMMMUIdx_MSPrivNegPri:
 208        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 209    default:
 210        g_assert_not_reached();
 211    }
 212}
 213
 214static inline TCGv_i32 load_cpu_offset(int offset)
 215{
 216    TCGv_i32 tmp = tcg_temp_new_i32();
 217    tcg_gen_ld_i32(tmp, cpu_env, offset);
 218    return tmp;
 219}
 220
 221#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 222
 223static inline void store_cpu_offset(TCGv_i32 var, int offset)
 224{
 225    tcg_gen_st_i32(var, cpu_env, offset);
 226    tcg_temp_free_i32(var);
 227}
 228
 229#define store_cpu_field(var, name) \
 230    store_cpu_offset(var, offsetof(CPUARMState, name))
 231
 232/* The architectural value of PC.  */
 233static uint32_t read_pc(DisasContext *s)
 234{
 235    return s->pc_curr + (s->thumb ? 4 : 8);
 236}
 237
 238/* Set a variable to the value of a CPU register.  */
 239static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 240{
 241    if (reg == 15) {
 242        tcg_gen_movi_i32(var, read_pc(s));
 243    } else {
 244        tcg_gen_mov_i32(var, cpu_R[reg]);
 245    }
 246}
 247
 248/* Create a new temporary and set it to the value of a CPU register.  */
 249static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 250{
 251    TCGv_i32 tmp = tcg_temp_new_i32();
 252    load_reg_var(s, tmp, reg);
 253    return tmp;
 254}
 255
 256/*
 257 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 258 * This is used for load/store for which use of PC implies (literal),
 259 * or ADD that implies ADR.
 260 */
 261static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 262{
 263    TCGv_i32 tmp = tcg_temp_new_i32();
 264
 265    if (reg == 15) {
 266        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 267    } else {
 268        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 269    }
 270    return tmp;
 271}
 272
 273/* Set a CPU register.  The source must be a temporary and will be
 274   marked as dead.  */
 275static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 276{
 277    if (reg == 15) {
 278        /* In Thumb mode, we must ignore bit 0.
 279         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 280         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 281         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 282         */
 283        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 284        s->base.is_jmp = DISAS_JUMP;
 285    }
 286    tcg_gen_mov_i32(cpu_R[reg], var);
 287    tcg_temp_free_i32(var);
 288}
 289
 290/*
 291 * Variant of store_reg which applies v8M stack-limit checks before updating
 292 * SP. If the check fails this will result in an exception being taken.
 293 * We disable the stack checks for CONFIG_USER_ONLY because we have
 294 * no idea what the stack limits should be in that case.
 295 * If stack checking is not being done this just acts like store_reg().
 296 */
 297static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 298{
 299#ifndef CONFIG_USER_ONLY
 300    if (s->v8m_stackcheck) {
 301        gen_helper_v8m_stackcheck(cpu_env, var);
 302    }
 303#endif
 304    store_reg(s, 13, var);
 305}
 306
 307/* Value extensions.  */
 308#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 309#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 310#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 311#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 312
 313#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 314#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 315
 316
 317static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 318{
 319    TCGv_i32 tmp_mask = tcg_const_i32(mask);
 320    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 321    tcg_temp_free_i32(tmp_mask);
 322}
 323/* Set NZCV flags from the high 4 bits of var.  */
 324#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 325
 326static void gen_exception_internal(int excp)
 327{
 328    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 329
 330    assert(excp_is_internal(excp));
 331    gen_helper_exception_internal(cpu_env, tcg_excp);
 332    tcg_temp_free_i32(tcg_excp);
 333}
 334
 335static void gen_step_complete_exception(DisasContext *s)
 336{
 337    /* We just completed step of an insn. Move from Active-not-pending
 338     * to Active-pending, and then also take the swstep exception.
 339     * This corresponds to making the (IMPDEF) choice to prioritize
 340     * swstep exceptions over asynchronous exceptions taken to an exception
 341     * level where debug is disabled. This choice has the advantage that
 342     * we do not need to maintain internal state corresponding to the
 343     * ISV/EX syndrome bits between completion of the step and generation
 344     * of the exception, and our syndrome information is always correct.
 345     */
 346    gen_ss_advance(s);
 347    gen_swstep_exception(s, 1, s->is_ldex);
 348    s->base.is_jmp = DISAS_NORETURN;
 349}
 350
 351static void gen_singlestep_exception(DisasContext *s)
 352{
 353    /* Generate the right kind of exception for singlestep, which is
 354     * either the architectural singlestep or EXCP_DEBUG for QEMU's
 355     * gdb singlestepping.
 356     */
 357    if (s->ss_active) {
 358        gen_step_complete_exception(s);
 359    } else {
 360        gen_exception_internal(EXCP_DEBUG);
 361    }
 362}
 363
 364static inline bool is_singlestepping(DisasContext *s)
 365{
 366    /* Return true if we are singlestepping either because of
 367     * architectural singlestep or QEMU gdbstub singlestep. This does
 368     * not include the command line '-singlestep' mode which is rather
 369     * misnamed as it only means "one instruction per TB" and doesn't
 370     * affect the code we generate.
 371     */
 372    return s->base.singlestep_enabled || s->ss_active;
 373}
 374
 375static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 376{
 377    TCGv_i32 tmp1 = tcg_temp_new_i32();
 378    TCGv_i32 tmp2 = tcg_temp_new_i32();
 379    tcg_gen_ext16s_i32(tmp1, a);
 380    tcg_gen_ext16s_i32(tmp2, b);
 381    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 382    tcg_temp_free_i32(tmp2);
 383    tcg_gen_sari_i32(a, a, 16);
 384    tcg_gen_sari_i32(b, b, 16);
 385    tcg_gen_mul_i32(b, b, a);
 386    tcg_gen_mov_i32(a, tmp1);
 387    tcg_temp_free_i32(tmp1);
 388}
 389
 390/* Byteswap each halfword.  */
 391static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 392{
 393    TCGv_i32 tmp = tcg_temp_new_i32();
 394    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 395    tcg_gen_shri_i32(tmp, var, 8);
 396    tcg_gen_and_i32(tmp, tmp, mask);
 397    tcg_gen_and_i32(var, var, mask);
 398    tcg_gen_shli_i32(var, var, 8);
 399    tcg_gen_or_i32(dest, var, tmp);
 400    tcg_temp_free_i32(mask);
 401    tcg_temp_free_i32(tmp);
 402}
 403
 404/* Byteswap low halfword and sign extend.  */
 405static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 406{
 407    tcg_gen_ext16u_i32(var, var);
 408    tcg_gen_bswap16_i32(var, var);
 409    tcg_gen_ext16s_i32(dest, var);
 410}
 411
 412/* Swap low and high halfwords.  */
 413static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
 414{
 415    tcg_gen_rotri_i32(dest, var, 16);
 416}
 417
 418/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 419    tmp = (t0 ^ t1) & 0x8000;
 420    t0 &= ~0x8000;
 421    t1 &= ~0x8000;
 422    t0 = (t0 + t1) ^ tmp;
 423 */
 424
 425static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 426{
 427    TCGv_i32 tmp = tcg_temp_new_i32();
 428    tcg_gen_xor_i32(tmp, t0, t1);
 429    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 430    tcg_gen_andi_i32(t0, t0, ~0x8000);
 431    tcg_gen_andi_i32(t1, t1, ~0x8000);
 432    tcg_gen_add_i32(t0, t0, t1);
 433    tcg_gen_xor_i32(dest, t0, tmp);
 434    tcg_temp_free_i32(tmp);
 435}
 436
 437/* Set N and Z flags from var.  */
 438static inline void gen_logic_CC(TCGv_i32 var)
 439{
 440    tcg_gen_mov_i32(cpu_NF, var);
 441    tcg_gen_mov_i32(cpu_ZF, var);
 442}
 443
 444/* dest = T0 + T1 + CF. */
 445static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 446{
 447    tcg_gen_add_i32(dest, t0, t1);
 448    tcg_gen_add_i32(dest, dest, cpu_CF);
 449}
 450
 451/* dest = T0 - T1 + CF - 1.  */
 452static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 453{
 454    tcg_gen_sub_i32(dest, t0, t1);
 455    tcg_gen_add_i32(dest, dest, cpu_CF);
 456    tcg_gen_subi_i32(dest, dest, 1);
 457}
 458
 459/* dest = T0 + T1. Compute C, N, V and Z flags */
 460static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 461{
 462    TCGv_i32 tmp = tcg_temp_new_i32();
 463    tcg_gen_movi_i32(tmp, 0);
 464    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 465    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 466    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 467    tcg_gen_xor_i32(tmp, t0, t1);
 468    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 469    tcg_temp_free_i32(tmp);
 470    tcg_gen_mov_i32(dest, cpu_NF);
 471}
 472
 473/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 474static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 475{
 476    TCGv_i32 tmp = tcg_temp_new_i32();
 477    if (TCG_TARGET_HAS_add2_i32) {
 478        tcg_gen_movi_i32(tmp, 0);
 479        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 480        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 481    } else {
 482        TCGv_i64 q0 = tcg_temp_new_i64();
 483        TCGv_i64 q1 = tcg_temp_new_i64();
 484        tcg_gen_extu_i32_i64(q0, t0);
 485        tcg_gen_extu_i32_i64(q1, t1);
 486        tcg_gen_add_i64(q0, q0, q1);
 487        tcg_gen_extu_i32_i64(q1, cpu_CF);
 488        tcg_gen_add_i64(q0, q0, q1);
 489        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 490        tcg_temp_free_i64(q0);
 491        tcg_temp_free_i64(q1);
 492    }
 493    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 494    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 495    tcg_gen_xor_i32(tmp, t0, t1);
 496    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 497    tcg_temp_free_i32(tmp);
 498    tcg_gen_mov_i32(dest, cpu_NF);
 499}
 500
 501/* dest = T0 - T1. Compute C, N, V and Z flags */
 502static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 503{
 504    TCGv_i32 tmp;
 505    tcg_gen_sub_i32(cpu_NF, t0, t1);
 506    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 507    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 508    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 509    tmp = tcg_temp_new_i32();
 510    tcg_gen_xor_i32(tmp, t0, t1);
 511    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 512    tcg_temp_free_i32(tmp);
 513    tcg_gen_mov_i32(dest, cpu_NF);
 514}
 515
 516/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 517static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 518{
 519    TCGv_i32 tmp = tcg_temp_new_i32();
 520    tcg_gen_not_i32(tmp, t1);
 521    gen_adc_CC(dest, t0, tmp);
 522    tcg_temp_free_i32(tmp);
 523}
 524
 525#define GEN_SHIFT(name)                                               \
 526static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 527{                                                                     \
 528    TCGv_i32 tmp1, tmp2, tmp3;                                        \
 529    tmp1 = tcg_temp_new_i32();                                        \
 530    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 531    tmp2 = tcg_const_i32(0);                                          \
 532    tmp3 = tcg_const_i32(0x1f);                                       \
 533    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 534    tcg_temp_free_i32(tmp3);                                          \
 535    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 536    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 537    tcg_temp_free_i32(tmp2);                                          \
 538    tcg_temp_free_i32(tmp1);                                          \
 539}
 540GEN_SHIFT(shl)
 541GEN_SHIFT(shr)
 542#undef GEN_SHIFT
 543
 544static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 545{
 546    TCGv_i32 tmp1, tmp2;
 547    tmp1 = tcg_temp_new_i32();
 548    tcg_gen_andi_i32(tmp1, t1, 0xff);
 549    tmp2 = tcg_const_i32(0x1f);
 550    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 551    tcg_temp_free_i32(tmp2);
 552    tcg_gen_sar_i32(dest, t0, tmp1);
 553    tcg_temp_free_i32(tmp1);
 554}
 555
 556static void shifter_out_im(TCGv_i32 var, int shift)
 557{
 558    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 559}
 560
 561/* Shift by immediate.  Includes special handling for shift == 0.  */
 562static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 563                                    int shift, int flags)
 564{
 565    switch (shiftop) {
 566    case 0: /* LSL */
 567        if (shift != 0) {
 568            if (flags)
 569                shifter_out_im(var, 32 - shift);
 570            tcg_gen_shli_i32(var, var, shift);
 571        }
 572        break;
 573    case 1: /* LSR */
 574        if (shift == 0) {
 575            if (flags) {
 576                tcg_gen_shri_i32(cpu_CF, var, 31);
 577            }
 578            tcg_gen_movi_i32(var, 0);
 579        } else {
 580            if (flags)
 581                shifter_out_im(var, shift - 1);
 582            tcg_gen_shri_i32(var, var, shift);
 583        }
 584        break;
 585    case 2: /* ASR */
 586        if (shift == 0)
 587            shift = 32;
 588        if (flags)
 589            shifter_out_im(var, shift - 1);
 590        if (shift == 32)
 591          shift = 31;
 592        tcg_gen_sari_i32(var, var, shift);
 593        break;
 594    case 3: /* ROR/RRX */
 595        if (shift != 0) {
 596            if (flags)
 597                shifter_out_im(var, shift - 1);
 598            tcg_gen_rotri_i32(var, var, shift); break;
 599        } else {
 600            TCGv_i32 tmp = tcg_temp_new_i32();
 601            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 602            if (flags)
 603                shifter_out_im(var, 0);
 604            tcg_gen_shri_i32(var, var, 1);
 605            tcg_gen_or_i32(var, var, tmp);
 606            tcg_temp_free_i32(tmp);
 607        }
 608    }
 609};
 610
 611static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 612                                     TCGv_i32 shift, int flags)
 613{
 614    if (flags) {
 615        switch (shiftop) {
 616        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 617        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 618        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 619        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 620        }
 621    } else {
 622        switch (shiftop) {
 623        case 0:
 624            gen_shl(var, var, shift);
 625            break;
 626        case 1:
 627            gen_shr(var, var, shift);
 628            break;
 629        case 2:
 630            gen_sar(var, var, shift);
 631            break;
 632        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 633                tcg_gen_rotr_i32(var, var, shift); break;
 634        }
 635    }
 636    tcg_temp_free_i32(shift);
 637}
 638
 639/*
 640 * Generate a conditional based on ARM condition code cc.
 641 * This is common between ARM and Aarch64 targets.
 642 */
 643void arm_test_cc(DisasCompare *cmp, int cc)
 644{
 645    TCGv_i32 value;
 646    TCGCond cond;
 647    bool global = true;
 648
 649    switch (cc) {
 650    case 0: /* eq: Z */
 651    case 1: /* ne: !Z */
 652        cond = TCG_COND_EQ;
 653        value = cpu_ZF;
 654        break;
 655
 656    case 2: /* cs: C */
 657    case 3: /* cc: !C */
 658        cond = TCG_COND_NE;
 659        value = cpu_CF;
 660        break;
 661
 662    case 4: /* mi: N */
 663    case 5: /* pl: !N */
 664        cond = TCG_COND_LT;
 665        value = cpu_NF;
 666        break;
 667
 668    case 6: /* vs: V */
 669    case 7: /* vc: !V */
 670        cond = TCG_COND_LT;
 671        value = cpu_VF;
 672        break;
 673
 674    case 8: /* hi: C && !Z */
 675    case 9: /* ls: !C || Z -> !(C && !Z) */
 676        cond = TCG_COND_NE;
 677        value = tcg_temp_new_i32();
 678        global = false;
 679        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 680           ZF is non-zero for !Z; so AND the two subexpressions.  */
 681        tcg_gen_neg_i32(value, cpu_CF);
 682        tcg_gen_and_i32(value, value, cpu_ZF);
 683        break;
 684
 685    case 10: /* ge: N == V -> N ^ V == 0 */
 686    case 11: /* lt: N != V -> N ^ V != 0 */
 687        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 688        cond = TCG_COND_GE;
 689        value = tcg_temp_new_i32();
 690        global = false;
 691        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 692        break;
 693
 694    case 12: /* gt: !Z && N == V */
 695    case 13: /* le: Z || N != V */
 696        cond = TCG_COND_NE;
 697        value = tcg_temp_new_i32();
 698        global = false;
 699        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 700         * the sign bit then AND with ZF to yield the result.  */
 701        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 702        tcg_gen_sari_i32(value, value, 31);
 703        tcg_gen_andc_i32(value, cpu_ZF, value);
 704        break;
 705
 706    case 14: /* always */
 707    case 15: /* always */
 708        /* Use the ALWAYS condition, which will fold early.
 709         * It doesn't matter what we use for the value.  */
 710        cond = TCG_COND_ALWAYS;
 711        value = cpu_ZF;
 712        goto no_invert;
 713
 714    default:
 715        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 716        abort();
 717    }
 718
 719    if (cc & 1) {
 720        cond = tcg_invert_cond(cond);
 721    }
 722
 723 no_invert:
 724    cmp->cond = cond;
 725    cmp->value = value;
 726    cmp->value_global = global;
 727}
 728
 729void arm_free_cc(DisasCompare *cmp)
 730{
 731    if (!cmp->value_global) {
 732        tcg_temp_free_i32(cmp->value);
 733    }
 734}
 735
 736void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 737{
 738    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 739}
 740
 741void arm_gen_test_cc(int cc, TCGLabel *label)
 742{
 743    DisasCompare cmp;
 744    arm_test_cc(&cmp, cc);
 745    arm_jump_cc(&cmp, label);
 746    arm_free_cc(&cmp);
 747}
 748
 749static inline void gen_set_condexec(DisasContext *s)
 750{
 751    if (s->condexec_mask) {
 752        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 753        TCGv_i32 tmp = tcg_temp_new_i32();
 754        tcg_gen_movi_i32(tmp, val);
 755        store_cpu_field(tmp, condexec_bits);
 756    }
 757}
 758
 759static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 760{
 761    tcg_gen_movi_i32(cpu_R[15], val);
 762}
 763
 764/* Set PC and Thumb state from var.  var is marked as dead.  */
 765static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 766{
 767    s->base.is_jmp = DISAS_JUMP;
 768    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 769    tcg_gen_andi_i32(var, var, 1);
 770    store_cpu_field(var, thumb);
 771}
 772
 773/*
 774 * Set PC and Thumb state from var. var is marked as dead.
 775 * For M-profile CPUs, include logic to detect exception-return
 776 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 777 * and BX reg, and no others, and happens only for code in Handler mode.
 778 * The Security Extension also requires us to check for the FNC_RETURN
 779 * which signals a function return from non-secure state; this can happen
 780 * in both Handler and Thread mode.
 781 * To avoid having to do multiple comparisons in inline generated code,
 782 * we make the check we do here loose, so it will match for EXC_RETURN
 783 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 784 * for these spurious cases and returns without doing anything (giving
 785 * the same behaviour as for a branch to a non-magic address).
 786 *
 787 * In linux-user mode it is unclear what the right behaviour for an
 788 * attempted FNC_RETURN should be, because in real hardware this will go
 789 * directly to Secure code (ie not the Linux kernel) which will then treat
 790 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 791 * attempt behave the way it would on a CPU without the security extension,
 792 * which is to say "like a normal branch". That means we can simply treat
 793 * all branches as normal with no magic address behaviour.
 794 */
 795static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 796{
 797    /* Generate the same code here as for a simple bx, but flag via
 798     * s->base.is_jmp that we need to do the rest of the work later.
 799     */
 800    gen_bx(s, var);
 801#ifndef CONFIG_USER_ONLY
 802    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 803        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 804        s->base.is_jmp = DISAS_BX_EXCRET;
 805    }
 806#endif
 807}
 808
 809static inline void gen_bx_excret_final_code(DisasContext *s)
 810{
 811    /* Generate the code to finish possible exception return and end the TB */
 812    TCGLabel *excret_label = gen_new_label();
 813    uint32_t min_magic;
 814
 815    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 816        /* Covers FNC_RETURN and EXC_RETURN magic */
 817        min_magic = FNC_RETURN_MIN_MAGIC;
 818    } else {
 819        /* EXC_RETURN magic only */
 820        min_magic = EXC_RETURN_MIN_MAGIC;
 821    }
 822
 823    /* Is the new PC value in the magic range indicating exception return? */
 824    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 825    /* No: end the TB as we would for a DISAS_JMP */
 826    if (is_singlestepping(s)) {
 827        gen_singlestep_exception(s);
 828    } else {
 829        tcg_gen_exit_tb(NULL, 0);
 830    }
 831    gen_set_label(excret_label);
 832    /* Yes: this is an exception return.
 833     * At this point in runtime env->regs[15] and env->thumb will hold
 834     * the exception-return magic number, which do_v7m_exception_exit()
 835     * will read. Nothing else will be able to see those values because
 836     * the cpu-exec main loop guarantees that we will always go straight
 837     * from raising the exception to the exception-handling code.
 838     *
 839     * gen_ss_advance(s) does nothing on M profile currently but
 840     * calling it is conceptually the right thing as we have executed
 841     * this instruction (compare SWI, HVC, SMC handling).
 842     */
 843    gen_ss_advance(s);
 844    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 845}
 846
 847static inline void gen_bxns(DisasContext *s, int rm)
 848{
 849    TCGv_i32 var = load_reg(s, rm);
 850
 851    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 852     * we need to sync state before calling it, but:
 853     *  - we don't need to do gen_set_pc_im() because the bxns helper will
 854     *    always set the PC itself
 855     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 856     *    unless it's outside an IT block or the last insn in an IT block,
 857     *    so we know that condexec == 0 (already set at the top of the TB)
 858     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 859     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 860     */
 861    gen_helper_v7m_bxns(cpu_env, var);
 862    tcg_temp_free_i32(var);
 863    s->base.is_jmp = DISAS_EXIT;
 864}
 865
 866static inline void gen_blxns(DisasContext *s, int rm)
 867{
 868    TCGv_i32 var = load_reg(s, rm);
 869
 870    /* We don't need to sync condexec state, for the same reason as bxns.
 871     * We do however need to set the PC, because the blxns helper reads it.
 872     * The blxns helper may throw an exception.
 873     */
 874    gen_set_pc_im(s, s->base.pc_next);
 875    gen_helper_v7m_blxns(cpu_env, var);
 876    tcg_temp_free_i32(var);
 877    s->base.is_jmp = DISAS_EXIT;
 878}
 879
 880/* Variant of store_reg which uses branch&exchange logic when storing
 881   to r15 in ARM architecture v7 and above. The source must be a temporary
 882   and will be marked as dead. */
 883static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 884{
 885    if (reg == 15 && ENABLE_ARCH_7) {
 886        gen_bx(s, var);
 887    } else {
 888        store_reg(s, reg, var);
 889    }
 890}
 891
 892/* Variant of store_reg which uses branch&exchange logic when storing
 893 * to r15 in ARM architecture v5T and above. This is used for storing
 894 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 895 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 896static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 897{
 898    if (reg == 15 && ENABLE_ARCH_5) {
 899        gen_bx_excret(s, var);
 900    } else {
 901        store_reg(s, reg, var);
 902    }
 903}
 904
 905#ifdef CONFIG_USER_ONLY
 906#define IS_USER_ONLY 1
 907#else
 908#define IS_USER_ONLY 0
 909#endif
 910
 911/* Abstractions of "generate code to do a guest load/store for
 912 * AArch32", where a vaddr is always 32 bits (and is zero
 913 * extended if we're a 64 bit core) and  data is also
 914 * 32 bits unless specifically doing a 64 bit access.
 915 * These functions work like tcg_gen_qemu_{ld,st}* except
 916 * that the address argument is TCGv_i32 rather than TCGv.
 917 */
 918
 919static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 920{
 921    TCGv addr = tcg_temp_new();
 922    tcg_gen_extu_i32_tl(addr, a32);
 923
 924    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 925    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 926        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 927    }
 928    return addr;
 929}
 930
 931static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 932                            int index, MemOp opc)
 933{
 934    TCGv addr;
 935
 936    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 937        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 938        opc |= MO_ALIGN;
 939    }
 940
 941    addr = gen_aa32_addr(s, a32, opc);
 942    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 943    tcg_temp_free(addr);
 944}
 945
 946static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 947                            int index, MemOp opc)
 948{
 949    TCGv addr;
 950
 951    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 952        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 953        opc |= MO_ALIGN;
 954    }
 955
 956    addr = gen_aa32_addr(s, a32, opc);
 957    tcg_gen_qemu_st_i32(val, addr, index, opc);
 958    tcg_temp_free(addr);
 959}
 960
 961#define DO_GEN_LD(SUFF, OPC)                                             \
 962static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 963                                     TCGv_i32 a32, int index)            \
 964{                                                                        \
 965    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 966}
 967
 968#define DO_GEN_ST(SUFF, OPC)                                             \
 969static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 970                                     TCGv_i32 a32, int index)            \
 971{                                                                        \
 972    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 973}
 974
 975static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 976{
 977    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 978    if (!IS_USER_ONLY && s->sctlr_b) {
 979        tcg_gen_rotri_i64(val, val, 32);
 980    }
 981}
 982
 983static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 984                            int index, MemOp opc)
 985{
 986    TCGv addr = gen_aa32_addr(s, a32, opc);
 987    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 988    gen_aa32_frob64(s, val);
 989    tcg_temp_free(addr);
 990}
 991
 992static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 993                                 TCGv_i32 a32, int index)
 994{
 995    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
 996}
 997
 998static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 999                            int index, MemOp opc)
1000{
1001    TCGv addr = gen_aa32_addr(s, a32, opc);
1002
1003    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1004    if (!IS_USER_ONLY && s->sctlr_b) {
1005        TCGv_i64 tmp = tcg_temp_new_i64();
1006        tcg_gen_rotri_i64(tmp, val, 32);
1007        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1008        tcg_temp_free_i64(tmp);
1009    } else {
1010        tcg_gen_qemu_st_i64(val, addr, index, opc);
1011    }
1012    tcg_temp_free(addr);
1013}
1014
1015static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1016                                 TCGv_i32 a32, int index)
1017{
1018    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1019}
1020
1021DO_GEN_LD(8u, MO_UB)
1022DO_GEN_LD(16u, MO_UW)
1023DO_GEN_LD(32u, MO_UL)
1024DO_GEN_ST(8, MO_UB)
1025DO_GEN_ST(16, MO_UW)
1026DO_GEN_ST(32, MO_UL)
1027
1028static inline void gen_hvc(DisasContext *s, int imm16)
1029{
1030    /* The pre HVC helper handles cases when HVC gets trapped
1031     * as an undefined insn by runtime configuration (ie before
1032     * the insn really executes).
1033     */
1034    gen_set_pc_im(s, s->pc_curr);
1035    gen_helper_pre_hvc(cpu_env);
1036    /* Otherwise we will treat this as a real exception which
1037     * happens after execution of the insn. (The distinction matters
1038     * for the PC value reported to the exception handler and also
1039     * for single stepping.)
1040     */
1041    s->svc_imm = imm16;
1042    gen_set_pc_im(s, s->base.pc_next);
1043    s->base.is_jmp = DISAS_HVC;
1044}
1045
1046static inline void gen_smc(DisasContext *s)
1047{
1048    /* As with HVC, we may take an exception either before or after
1049     * the insn executes.
1050     */
1051    TCGv_i32 tmp;
1052
1053    gen_set_pc_im(s, s->pc_curr);
1054    tmp = tcg_const_i32(syn_aa32_smc());
1055    gen_helper_pre_smc(cpu_env, tmp);
1056    tcg_temp_free_i32(tmp);
1057    gen_set_pc_im(s, s->base.pc_next);
1058    s->base.is_jmp = DISAS_SMC;
1059}
1060
1061static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1062{
1063    gen_set_condexec(s);
1064    gen_set_pc_im(s, pc);
1065    gen_exception_internal(excp);
1066    s->base.is_jmp = DISAS_NORETURN;
1067}
1068
1069static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1070                               int syn, uint32_t target_el)
1071{
1072    gen_set_condexec(s);
1073    gen_set_pc_im(s, pc);
1074    gen_exception(excp, syn, target_el);
1075    s->base.is_jmp = DISAS_NORETURN;
1076}
1077
1078static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1079{
1080    TCGv_i32 tcg_syn;
1081
1082    gen_set_condexec(s);
1083    gen_set_pc_im(s, s->pc_curr);
1084    tcg_syn = tcg_const_i32(syn);
1085    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1086    tcg_temp_free_i32(tcg_syn);
1087    s->base.is_jmp = DISAS_NORETURN;
1088}
1089
1090static void unallocated_encoding(DisasContext *s)
1091{
1092    /* Unallocated and reserved encodings are uncategorized */
1093    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1094                       default_exception_el(s));
1095}
1096
1097static void gen_exception_el(DisasContext *s, int excp, uint32_t syn,
1098                             TCGv_i32 tcg_el)
1099{
1100    TCGv_i32 tcg_excp;
1101    TCGv_i32 tcg_syn;
1102
1103    gen_set_condexec(s);
1104    gen_set_pc_im(s, s->pc_curr);
1105    tcg_excp = tcg_const_i32(excp);
1106    tcg_syn = tcg_const_i32(syn);
1107    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn, tcg_el);
1108    tcg_temp_free_i32(tcg_syn);
1109    tcg_temp_free_i32(tcg_excp);
1110    s->base.is_jmp = DISAS_NORETURN;
1111}
1112
1113/* Force a TB lookup after an instruction that changes the CPU state.  */
1114static inline void gen_lookup_tb(DisasContext *s)
1115{
1116    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1117    s->base.is_jmp = DISAS_EXIT;
1118}
1119
1120static inline void gen_hlt(DisasContext *s, int imm)
1121{
1122    /* HLT. This has two purposes.
1123     * Architecturally, it is an external halting debug instruction.
1124     * Since QEMU doesn't implement external debug, we treat this as
1125     * it is required for halting debug disabled: it will UNDEF.
1126     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1127     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1128     * must trigger semihosting even for ARMv7 and earlier, where
1129     * HLT was an undefined encoding.
1130     * In system mode, we don't allow userspace access to
1131     * semihosting, to provide some semblance of security
1132     * (and for consistency with our 32-bit semihosting).
1133     */
1134    if (semihosting_enabled() &&
1135#ifndef CONFIG_USER_ONLY
1136        s->current_el != 0 &&
1137#endif
1138        (imm == (s->thumb ? 0x3c : 0xf000))) {
1139        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1140        return;
1141    }
1142
1143    unallocated_encoding(s);
1144}
1145
1146/*
1147 * Return the offset of a "full" NEON Dreg.
1148 */
1149static long neon_full_reg_offset(unsigned reg)
1150{
1151    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152}
1153
1154/*
1155 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1156 * where 0 is the least significant end of the register.
1157 */
1158static long neon_element_offset(int reg, int element, MemOp memop)
1159{
1160    int element_size = 1 << (memop & MO_SIZE);
1161    int ofs = element * element_size;
1162#ifdef HOST_WORDS_BIGENDIAN
1163    /*
1164     * Calculate the offset assuming fully little-endian,
1165     * then XOR to account for the order of the 8-byte units.
1166     */
1167    if (element_size < 8) {
1168        ofs ^= 8 - element_size;
1169    }
1170#endif
1171    return neon_full_reg_offset(reg) + ofs;
1172}
1173
1174/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1175static long vfp_reg_offset(bool dp, unsigned reg)
1176{
1177    if (dp) {
1178        return neon_element_offset(reg, 0, MO_64);
1179    } else {
1180        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1181    }
1182}
1183
1184static inline void vfp_load_reg64(TCGv_i64 var, int reg)
1185{
1186    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
1187}
1188
1189static inline void vfp_store_reg64(TCGv_i64 var, int reg)
1190{
1191    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
1192}
1193
1194static inline void vfp_load_reg32(TCGv_i32 var, int reg)
1195{
1196    tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1197}
1198
1199static inline void vfp_store_reg32(TCGv_i32 var, int reg)
1200{
1201    tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1202}
1203
1204static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1205{
1206    long off = neon_element_offset(reg, ele, memop);
1207
1208    switch (memop) {
1209    case MO_SB:
1210        tcg_gen_ld8s_i32(dest, cpu_env, off);
1211        break;
1212    case MO_UB:
1213        tcg_gen_ld8u_i32(dest, cpu_env, off);
1214        break;
1215    case MO_SW:
1216        tcg_gen_ld16s_i32(dest, cpu_env, off);
1217        break;
1218    case MO_UW:
1219        tcg_gen_ld16u_i32(dest, cpu_env, off);
1220        break;
1221    case MO_UL:
1222    case MO_SL:
1223        tcg_gen_ld_i32(dest, cpu_env, off);
1224        break;
1225    default:
1226        g_assert_not_reached();
1227    }
1228}
1229
1230static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1231{
1232    long off = neon_element_offset(reg, ele, memop);
1233
1234    switch (memop) {
1235    case MO_SL:
1236        tcg_gen_ld32s_i64(dest, cpu_env, off);
1237        break;
1238    case MO_UL:
1239        tcg_gen_ld32u_i64(dest, cpu_env, off);
1240        break;
1241    case MO_Q:
1242        tcg_gen_ld_i64(dest, cpu_env, off);
1243        break;
1244    default:
1245        g_assert_not_reached();
1246    }
1247}
1248
1249static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1250{
1251    long off = neon_element_offset(reg, ele, memop);
1252
1253    switch (memop) {
1254    case MO_8:
1255        tcg_gen_st8_i32(src, cpu_env, off);
1256        break;
1257    case MO_16:
1258        tcg_gen_st16_i32(src, cpu_env, off);
1259        break;
1260    case MO_32:
1261        tcg_gen_st_i32(src, cpu_env, off);
1262        break;
1263    default:
1264        g_assert_not_reached();
1265    }
1266}
1267
1268static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1269{
1270    long off = neon_element_offset(reg, ele, memop);
1271
1272    switch (memop) {
1273    case MO_32:
1274        tcg_gen_st32_i64(src, cpu_env, off);
1275        break;
1276    case MO_64:
1277        tcg_gen_st_i64(src, cpu_env, off);
1278        break;
1279    default:
1280        g_assert_not_reached();
1281    }
1282}
1283
1284static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1285{
1286    TCGv_ptr ret = tcg_temp_new_ptr();
1287    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1288    return ret;
1289}
1290
1291#define ARM_CP_RW_BIT   (1 << 20)
1292
1293/* Include the VFP and Neon decoders */
1294#include "decode-m-nocp.c.inc"
1295#include "translate-vfp.c.inc"
1296#include "translate-neon.c.inc"
1297
1298static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1299{
1300    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1301}
1302
1303static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1304{
1305    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1306}
1307
1308static inline TCGv_i32 iwmmxt_load_creg(int reg)
1309{
1310    TCGv_i32 var = tcg_temp_new_i32();
1311    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1312    return var;
1313}
1314
1315static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1316{
1317    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1318    tcg_temp_free_i32(var);
1319}
1320
1321static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1322{
1323    iwmmxt_store_reg(cpu_M0, rn);
1324}
1325
1326static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1327{
1328    iwmmxt_load_reg(cpu_M0, rn);
1329}
1330
1331static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1332{
1333    iwmmxt_load_reg(cpu_V1, rn);
1334    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1335}
1336
1337static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1338{
1339    iwmmxt_load_reg(cpu_V1, rn);
1340    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1341}
1342
1343static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1344{
1345    iwmmxt_load_reg(cpu_V1, rn);
1346    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1347}
1348
1349#define IWMMXT_OP(name) \
1350static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1351{ \
1352    iwmmxt_load_reg(cpu_V1, rn); \
1353    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1354}
1355
1356#define IWMMXT_OP_ENV(name) \
1357static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1358{ \
1359    iwmmxt_load_reg(cpu_V1, rn); \
1360    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1361}
1362
1363#define IWMMXT_OP_ENV_SIZE(name) \
1364IWMMXT_OP_ENV(name##b) \
1365IWMMXT_OP_ENV(name##w) \
1366IWMMXT_OP_ENV(name##l)
1367
1368#define IWMMXT_OP_ENV1(name) \
1369static inline void gen_op_iwmmxt_##name##_M0(void) \
1370{ \
1371    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1372}
1373
1374IWMMXT_OP(maddsq)
1375IWMMXT_OP(madduq)
1376IWMMXT_OP(sadb)
1377IWMMXT_OP(sadw)
1378IWMMXT_OP(mulslw)
1379IWMMXT_OP(mulshw)
1380IWMMXT_OP(mululw)
1381IWMMXT_OP(muluhw)
1382IWMMXT_OP(macsw)
1383IWMMXT_OP(macuw)
1384
1385IWMMXT_OP_ENV_SIZE(unpackl)
1386IWMMXT_OP_ENV_SIZE(unpackh)
1387
1388IWMMXT_OP_ENV1(unpacklub)
1389IWMMXT_OP_ENV1(unpackluw)
1390IWMMXT_OP_ENV1(unpacklul)
1391IWMMXT_OP_ENV1(unpackhub)
1392IWMMXT_OP_ENV1(unpackhuw)
1393IWMMXT_OP_ENV1(unpackhul)
1394IWMMXT_OP_ENV1(unpacklsb)
1395IWMMXT_OP_ENV1(unpacklsw)
1396IWMMXT_OP_ENV1(unpacklsl)
1397IWMMXT_OP_ENV1(unpackhsb)
1398IWMMXT_OP_ENV1(unpackhsw)
1399IWMMXT_OP_ENV1(unpackhsl)
1400
1401IWMMXT_OP_ENV_SIZE(cmpeq)
1402IWMMXT_OP_ENV_SIZE(cmpgtu)
1403IWMMXT_OP_ENV_SIZE(cmpgts)
1404
1405IWMMXT_OP_ENV_SIZE(mins)
1406IWMMXT_OP_ENV_SIZE(minu)
1407IWMMXT_OP_ENV_SIZE(maxs)
1408IWMMXT_OP_ENV_SIZE(maxu)
1409
1410IWMMXT_OP_ENV_SIZE(subn)
1411IWMMXT_OP_ENV_SIZE(addn)
1412IWMMXT_OP_ENV_SIZE(subu)
1413IWMMXT_OP_ENV_SIZE(addu)
1414IWMMXT_OP_ENV_SIZE(subs)
1415IWMMXT_OP_ENV_SIZE(adds)
1416
1417IWMMXT_OP_ENV(avgb0)
1418IWMMXT_OP_ENV(avgb1)
1419IWMMXT_OP_ENV(avgw0)
1420IWMMXT_OP_ENV(avgw1)
1421
1422IWMMXT_OP_ENV(packuw)
1423IWMMXT_OP_ENV(packul)
1424IWMMXT_OP_ENV(packuq)
1425IWMMXT_OP_ENV(packsw)
1426IWMMXT_OP_ENV(packsl)
1427IWMMXT_OP_ENV(packsq)
1428
1429static void gen_op_iwmmxt_set_mup(void)
1430{
1431    TCGv_i32 tmp;
1432    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1433    tcg_gen_ori_i32(tmp, tmp, 2);
1434    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1435}
1436
1437static void gen_op_iwmmxt_set_cup(void)
1438{
1439    TCGv_i32 tmp;
1440    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1441    tcg_gen_ori_i32(tmp, tmp, 1);
1442    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1443}
1444
1445static void gen_op_iwmmxt_setpsr_nz(void)
1446{
1447    TCGv_i32 tmp = tcg_temp_new_i32();
1448    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1449    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1450}
1451
1452static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1453{
1454    iwmmxt_load_reg(cpu_V1, rn);
1455    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1456    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1457}
1458
1459static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1460                                     TCGv_i32 dest)
1461{
1462    int rd;
1463    uint32_t offset;
1464    TCGv_i32 tmp;
1465
1466    rd = (insn >> 16) & 0xf;
1467    tmp = load_reg(s, rd);
1468
1469    offset = (insn & 0xff) << ((insn >> 7) & 2);
1470    if (insn & (1 << 24)) {
1471        /* Pre indexed */
1472        if (insn & (1 << 23))
1473            tcg_gen_addi_i32(tmp, tmp, offset);
1474        else
1475            tcg_gen_addi_i32(tmp, tmp, -offset);
1476        tcg_gen_mov_i32(dest, tmp);
1477        if (insn & (1 << 21))
1478            store_reg(s, rd, tmp);
1479        else
1480            tcg_temp_free_i32(tmp);
1481    } else if (insn & (1 << 21)) {
1482        /* Post indexed */
1483        tcg_gen_mov_i32(dest, tmp);
1484        if (insn & (1 << 23))
1485            tcg_gen_addi_i32(tmp, tmp, offset);
1486        else
1487            tcg_gen_addi_i32(tmp, tmp, -offset);
1488        store_reg(s, rd, tmp);
1489    } else if (!(insn & (1 << 23)))
1490        return 1;
1491    return 0;
1492}
1493
1494static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1495{
1496    int rd = (insn >> 0) & 0xf;
1497    TCGv_i32 tmp;
1498
1499    if (insn & (1 << 8)) {
1500        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1501            return 1;
1502        } else {
1503            tmp = iwmmxt_load_creg(rd);
1504        }
1505    } else {
1506        tmp = tcg_temp_new_i32();
1507        iwmmxt_load_reg(cpu_V0, rd);
1508        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1509    }
1510    tcg_gen_andi_i32(tmp, tmp, mask);
1511    tcg_gen_mov_i32(dest, tmp);
1512    tcg_temp_free_i32(tmp);
1513    return 0;
1514}
1515
1516/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1517   (ie. an undefined instruction).  */
1518static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1519{
1520    int rd, wrd;
1521    int rdhi, rdlo, rd0, rd1, i;
1522    TCGv_i32 addr;
1523    TCGv_i32 tmp, tmp2, tmp3;
1524
1525    if ((insn & 0x0e000e00) == 0x0c000000) {
1526        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1527            wrd = insn & 0xf;
1528            rdlo = (insn >> 12) & 0xf;
1529            rdhi = (insn >> 16) & 0xf;
1530            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1531                iwmmxt_load_reg(cpu_V0, wrd);
1532                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1533                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1534            } else {                                    /* TMCRR */
1535                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1536                iwmmxt_store_reg(cpu_V0, wrd);
1537                gen_op_iwmmxt_set_mup();
1538            }
1539            return 0;
1540        }
1541
1542        wrd = (insn >> 12) & 0xf;
1543        addr = tcg_temp_new_i32();
1544        if (gen_iwmmxt_address(s, insn, addr)) {
1545            tcg_temp_free_i32(addr);
1546            return 1;
1547        }
1548        if (insn & ARM_CP_RW_BIT) {
1549            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1550                tmp = tcg_temp_new_i32();
1551                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1552                iwmmxt_store_creg(wrd, tmp);
1553            } else {
1554                i = 1;
1555                if (insn & (1 << 8)) {
1556                    if (insn & (1 << 22)) {             /* WLDRD */
1557                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1558                        i = 0;
1559                    } else {                            /* WLDRW wRd */
1560                        tmp = tcg_temp_new_i32();
1561                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1562                    }
1563                } else {
1564                    tmp = tcg_temp_new_i32();
1565                    if (insn & (1 << 22)) {             /* WLDRH */
1566                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1567                    } else {                            /* WLDRB */
1568                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1569                    }
1570                }
1571                if (i) {
1572                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1573                    tcg_temp_free_i32(tmp);
1574                }
1575                gen_op_iwmmxt_movq_wRn_M0(wrd);
1576            }
1577        } else {
1578            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1579                tmp = iwmmxt_load_creg(wrd);
1580                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1581            } else {
1582                gen_op_iwmmxt_movq_M0_wRn(wrd);
1583                tmp = tcg_temp_new_i32();
1584                if (insn & (1 << 8)) {
1585                    if (insn & (1 << 22)) {             /* WSTRD */
1586                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1587                    } else {                            /* WSTRW wRd */
1588                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1589                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1590                    }
1591                } else {
1592                    if (insn & (1 << 22)) {             /* WSTRH */
1593                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1594                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1595                    } else {                            /* WSTRB */
1596                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1597                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1598                    }
1599                }
1600            }
1601            tcg_temp_free_i32(tmp);
1602        }
1603        tcg_temp_free_i32(addr);
1604        return 0;
1605    }
1606
1607    if ((insn & 0x0f000000) != 0x0e000000)
1608        return 1;
1609
1610    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1611    case 0x000:                                                 /* WOR */
1612        wrd = (insn >> 12) & 0xf;
1613        rd0 = (insn >> 0) & 0xf;
1614        rd1 = (insn >> 16) & 0xf;
1615        gen_op_iwmmxt_movq_M0_wRn(rd0);
1616        gen_op_iwmmxt_orq_M0_wRn(rd1);
1617        gen_op_iwmmxt_setpsr_nz();
1618        gen_op_iwmmxt_movq_wRn_M0(wrd);
1619        gen_op_iwmmxt_set_mup();
1620        gen_op_iwmmxt_set_cup();
1621        break;
1622    case 0x011:                                                 /* TMCR */
1623        if (insn & 0xf)
1624            return 1;
1625        rd = (insn >> 12) & 0xf;
1626        wrd = (insn >> 16) & 0xf;
1627        switch (wrd) {
1628        case ARM_IWMMXT_wCID:
1629        case ARM_IWMMXT_wCASF:
1630            break;
1631        case ARM_IWMMXT_wCon:
1632            gen_op_iwmmxt_set_cup();
1633            /* Fall through.  */
1634        case ARM_IWMMXT_wCSSF:
1635            tmp = iwmmxt_load_creg(wrd);
1636            tmp2 = load_reg(s, rd);
1637            tcg_gen_andc_i32(tmp, tmp, tmp2);
1638            tcg_temp_free_i32(tmp2);
1639            iwmmxt_store_creg(wrd, tmp);
1640            break;
1641        case ARM_IWMMXT_wCGR0:
1642        case ARM_IWMMXT_wCGR1:
1643        case ARM_IWMMXT_wCGR2:
1644        case ARM_IWMMXT_wCGR3:
1645            gen_op_iwmmxt_set_cup();
1646            tmp = load_reg(s, rd);
1647            iwmmxt_store_creg(wrd, tmp);
1648            break;
1649        default:
1650            return 1;
1651        }
1652        break;
1653    case 0x100:                                                 /* WXOR */
1654        wrd = (insn >> 12) & 0xf;
1655        rd0 = (insn >> 0) & 0xf;
1656        rd1 = (insn >> 16) & 0xf;
1657        gen_op_iwmmxt_movq_M0_wRn(rd0);
1658        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1659        gen_op_iwmmxt_setpsr_nz();
1660        gen_op_iwmmxt_movq_wRn_M0(wrd);
1661        gen_op_iwmmxt_set_mup();
1662        gen_op_iwmmxt_set_cup();
1663        break;
1664    case 0x111:                                                 /* TMRC */
1665        if (insn & 0xf)
1666            return 1;
1667        rd = (insn >> 12) & 0xf;
1668        wrd = (insn >> 16) & 0xf;
1669        tmp = iwmmxt_load_creg(wrd);
1670        store_reg(s, rd, tmp);
1671        break;
1672    case 0x300:                                                 /* WANDN */
1673        wrd = (insn >> 12) & 0xf;
1674        rd0 = (insn >> 0) & 0xf;
1675        rd1 = (insn >> 16) & 0xf;
1676        gen_op_iwmmxt_movq_M0_wRn(rd0);
1677        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1678        gen_op_iwmmxt_andq_M0_wRn(rd1);
1679        gen_op_iwmmxt_setpsr_nz();
1680        gen_op_iwmmxt_movq_wRn_M0(wrd);
1681        gen_op_iwmmxt_set_mup();
1682        gen_op_iwmmxt_set_cup();
1683        break;
1684    case 0x200:                                                 /* WAND */
1685        wrd = (insn >> 12) & 0xf;
1686        rd0 = (insn >> 0) & 0xf;
1687        rd1 = (insn >> 16) & 0xf;
1688        gen_op_iwmmxt_movq_M0_wRn(rd0);
1689        gen_op_iwmmxt_andq_M0_wRn(rd1);
1690        gen_op_iwmmxt_setpsr_nz();
1691        gen_op_iwmmxt_movq_wRn_M0(wrd);
1692        gen_op_iwmmxt_set_mup();
1693        gen_op_iwmmxt_set_cup();
1694        break;
1695    case 0x810: case 0xa10:                             /* WMADD */
1696        wrd = (insn >> 12) & 0xf;
1697        rd0 = (insn >> 0) & 0xf;
1698        rd1 = (insn >> 16) & 0xf;
1699        gen_op_iwmmxt_movq_M0_wRn(rd0);
1700        if (insn & (1 << 21))
1701            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1702        else
1703            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1704        gen_op_iwmmxt_movq_wRn_M0(wrd);
1705        gen_op_iwmmxt_set_mup();
1706        break;
1707    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1708        wrd = (insn >> 12) & 0xf;
1709        rd0 = (insn >> 16) & 0xf;
1710        rd1 = (insn >> 0) & 0xf;
1711        gen_op_iwmmxt_movq_M0_wRn(rd0);
1712        switch ((insn >> 22) & 3) {
1713        case 0:
1714            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1715            break;
1716        case 1:
1717            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1718            break;
1719        case 2:
1720            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1721            break;
1722        case 3:
1723            return 1;
1724        }
1725        gen_op_iwmmxt_movq_wRn_M0(wrd);
1726        gen_op_iwmmxt_set_mup();
1727        gen_op_iwmmxt_set_cup();
1728        break;
1729    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1730        wrd = (insn >> 12) & 0xf;
1731        rd0 = (insn >> 16) & 0xf;
1732        rd1 = (insn >> 0) & 0xf;
1733        gen_op_iwmmxt_movq_M0_wRn(rd0);
1734        switch ((insn >> 22) & 3) {
1735        case 0:
1736            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1737            break;
1738        case 1:
1739            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1740            break;
1741        case 2:
1742            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1743            break;
1744        case 3:
1745            return 1;
1746        }
1747        gen_op_iwmmxt_movq_wRn_M0(wrd);
1748        gen_op_iwmmxt_set_mup();
1749        gen_op_iwmmxt_set_cup();
1750        break;
1751    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1752        wrd = (insn >> 12) & 0xf;
1753        rd0 = (insn >> 16) & 0xf;
1754        rd1 = (insn >> 0) & 0xf;
1755        gen_op_iwmmxt_movq_M0_wRn(rd0);
1756        if (insn & (1 << 22))
1757            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1758        else
1759            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1760        if (!(insn & (1 << 20)))
1761            gen_op_iwmmxt_addl_M0_wRn(wrd);
1762        gen_op_iwmmxt_movq_wRn_M0(wrd);
1763        gen_op_iwmmxt_set_mup();
1764        break;
1765    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1766        wrd = (insn >> 12) & 0xf;
1767        rd0 = (insn >> 16) & 0xf;
1768        rd1 = (insn >> 0) & 0xf;
1769        gen_op_iwmmxt_movq_M0_wRn(rd0);
1770        if (insn & (1 << 21)) {
1771            if (insn & (1 << 20))
1772                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1773            else
1774                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1775        } else {
1776            if (insn & (1 << 20))
1777                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1778            else
1779                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1780        }
1781        gen_op_iwmmxt_movq_wRn_M0(wrd);
1782        gen_op_iwmmxt_set_mup();
1783        break;
1784    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1785        wrd = (insn >> 12) & 0xf;
1786        rd0 = (insn >> 16) & 0xf;
1787        rd1 = (insn >> 0) & 0xf;
1788        gen_op_iwmmxt_movq_M0_wRn(rd0);
1789        if (insn & (1 << 21))
1790            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1791        else
1792            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1793        if (!(insn & (1 << 20))) {
1794            iwmmxt_load_reg(cpu_V1, wrd);
1795            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1796        }
1797        gen_op_iwmmxt_movq_wRn_M0(wrd);
1798        gen_op_iwmmxt_set_mup();
1799        break;
1800    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1801        wrd = (insn >> 12) & 0xf;
1802        rd0 = (insn >> 16) & 0xf;
1803        rd1 = (insn >> 0) & 0xf;
1804        gen_op_iwmmxt_movq_M0_wRn(rd0);
1805        switch ((insn >> 22) & 3) {
1806        case 0:
1807            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1808            break;
1809        case 1:
1810            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1811            break;
1812        case 2:
1813            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1814            break;
1815        case 3:
1816            return 1;
1817        }
1818        gen_op_iwmmxt_movq_wRn_M0(wrd);
1819        gen_op_iwmmxt_set_mup();
1820        gen_op_iwmmxt_set_cup();
1821        break;
1822    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1823        wrd = (insn >> 12) & 0xf;
1824        rd0 = (insn >> 16) & 0xf;
1825        rd1 = (insn >> 0) & 0xf;
1826        gen_op_iwmmxt_movq_M0_wRn(rd0);
1827        if (insn & (1 << 22)) {
1828            if (insn & (1 << 20))
1829                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1830            else
1831                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1832        } else {
1833            if (insn & (1 << 20))
1834                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1835            else
1836                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1837        }
1838        gen_op_iwmmxt_movq_wRn_M0(wrd);
1839        gen_op_iwmmxt_set_mup();
1840        gen_op_iwmmxt_set_cup();
1841        break;
1842    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1843        wrd = (insn >> 12) & 0xf;
1844        rd0 = (insn >> 16) & 0xf;
1845        rd1 = (insn >> 0) & 0xf;
1846        gen_op_iwmmxt_movq_M0_wRn(rd0);
1847        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1848        tcg_gen_andi_i32(tmp, tmp, 7);
1849        iwmmxt_load_reg(cpu_V1, rd1);
1850        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1851        tcg_temp_free_i32(tmp);
1852        gen_op_iwmmxt_movq_wRn_M0(wrd);
1853        gen_op_iwmmxt_set_mup();
1854        break;
1855    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1856        if (((insn >> 6) & 3) == 3)
1857            return 1;
1858        rd = (insn >> 12) & 0xf;
1859        wrd = (insn >> 16) & 0xf;
1860        tmp = load_reg(s, rd);
1861        gen_op_iwmmxt_movq_M0_wRn(wrd);
1862        switch ((insn >> 6) & 3) {
1863        case 0:
1864            tmp2 = tcg_const_i32(0xff);
1865            tmp3 = tcg_const_i32((insn & 7) << 3);
1866            break;
1867        case 1:
1868            tmp2 = tcg_const_i32(0xffff);
1869            tmp3 = tcg_const_i32((insn & 3) << 4);
1870            break;
1871        case 2:
1872            tmp2 = tcg_const_i32(0xffffffff);
1873            tmp3 = tcg_const_i32((insn & 1) << 5);
1874            break;
1875        default:
1876            tmp2 = NULL;
1877            tmp3 = NULL;
1878        }
1879        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1880        tcg_temp_free_i32(tmp3);
1881        tcg_temp_free_i32(tmp2);
1882        tcg_temp_free_i32(tmp);
1883        gen_op_iwmmxt_movq_wRn_M0(wrd);
1884        gen_op_iwmmxt_set_mup();
1885        break;
1886    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1887        rd = (insn >> 12) & 0xf;
1888        wrd = (insn >> 16) & 0xf;
1889        if (rd == 15 || ((insn >> 22) & 3) == 3)
1890            return 1;
1891        gen_op_iwmmxt_movq_M0_wRn(wrd);
1892        tmp = tcg_temp_new_i32();
1893        switch ((insn >> 22) & 3) {
1894        case 0:
1895            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1896            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1897            if (insn & 8) {
1898                tcg_gen_ext8s_i32(tmp, tmp);
1899            } else {
1900                tcg_gen_andi_i32(tmp, tmp, 0xff);
1901            }
1902            break;
1903        case 1:
1904            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1905            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1906            if (insn & 8) {
1907                tcg_gen_ext16s_i32(tmp, tmp);
1908            } else {
1909                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1910            }
1911            break;
1912        case 2:
1913            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1914            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1915            break;
1916        }
1917        store_reg(s, rd, tmp);
1918        break;
1919    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1920        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1921            return 1;
1922        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1923        switch ((insn >> 22) & 3) {
1924        case 0:
1925            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1926            break;
1927        case 1:
1928            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1929            break;
1930        case 2:
1931            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1932            break;
1933        }
1934        tcg_gen_shli_i32(tmp, tmp, 28);
1935        gen_set_nzcv(tmp);
1936        tcg_temp_free_i32(tmp);
1937        break;
1938    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1939        if (((insn >> 6) & 3) == 3)
1940            return 1;
1941        rd = (insn >> 12) & 0xf;
1942        wrd = (insn >> 16) & 0xf;
1943        tmp = load_reg(s, rd);
1944        switch ((insn >> 6) & 3) {
1945        case 0:
1946            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1947            break;
1948        case 1:
1949            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1950            break;
1951        case 2:
1952            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1953            break;
1954        }
1955        tcg_temp_free_i32(tmp);
1956        gen_op_iwmmxt_movq_wRn_M0(wrd);
1957        gen_op_iwmmxt_set_mup();
1958        break;
1959    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1960        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1961            return 1;
1962        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1963        tmp2 = tcg_temp_new_i32();
1964        tcg_gen_mov_i32(tmp2, tmp);
1965        switch ((insn >> 22) & 3) {
1966        case 0:
1967            for (i = 0; i < 7; i ++) {
1968                tcg_gen_shli_i32(tmp2, tmp2, 4);
1969                tcg_gen_and_i32(tmp, tmp, tmp2);
1970            }
1971            break;
1972        case 1:
1973            for (i = 0; i < 3; i ++) {
1974                tcg_gen_shli_i32(tmp2, tmp2, 8);
1975                tcg_gen_and_i32(tmp, tmp, tmp2);
1976            }
1977            break;
1978        case 2:
1979            tcg_gen_shli_i32(tmp2, tmp2, 16);
1980            tcg_gen_and_i32(tmp, tmp, tmp2);
1981            break;
1982        }
1983        gen_set_nzcv(tmp);
1984        tcg_temp_free_i32(tmp2);
1985        tcg_temp_free_i32(tmp);
1986        break;
1987    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1988        wrd = (insn >> 12) & 0xf;
1989        rd0 = (insn >> 16) & 0xf;
1990        gen_op_iwmmxt_movq_M0_wRn(rd0);
1991        switch ((insn >> 22) & 3) {
1992        case 0:
1993            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1994            break;
1995        case 1:
1996            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1997            break;
1998        case 2:
1999            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2000            break;
2001        case 3:
2002            return 1;
2003        }
2004        gen_op_iwmmxt_movq_wRn_M0(wrd);
2005        gen_op_iwmmxt_set_mup();
2006        break;
2007    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2008        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2009            return 1;
2010        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2011        tmp2 = tcg_temp_new_i32();
2012        tcg_gen_mov_i32(tmp2, tmp);
2013        switch ((insn >> 22) & 3) {
2014        case 0:
2015            for (i = 0; i < 7; i ++) {
2016                tcg_gen_shli_i32(tmp2, tmp2, 4);
2017                tcg_gen_or_i32(tmp, tmp, tmp2);
2018            }
2019            break;
2020        case 1:
2021            for (i = 0; i < 3; i ++) {
2022                tcg_gen_shli_i32(tmp2, tmp2, 8);
2023                tcg_gen_or_i32(tmp, tmp, tmp2);
2024            }
2025            break;
2026        case 2:
2027            tcg_gen_shli_i32(tmp2, tmp2, 16);
2028            tcg_gen_or_i32(tmp, tmp, tmp2);
2029            break;
2030        }
2031        gen_set_nzcv(tmp);
2032        tcg_temp_free_i32(tmp2);
2033        tcg_temp_free_i32(tmp);
2034        break;
2035    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2036        rd = (insn >> 12) & 0xf;
2037        rd0 = (insn >> 16) & 0xf;
2038        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2039            return 1;
2040        gen_op_iwmmxt_movq_M0_wRn(rd0);
2041        tmp = tcg_temp_new_i32();
2042        switch ((insn >> 22) & 3) {
2043        case 0:
2044            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2045            break;
2046        case 1:
2047            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2048            break;
2049        case 2:
2050            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2051            break;
2052        }
2053        store_reg(s, rd, tmp);
2054        break;
2055    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2056    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2057        wrd = (insn >> 12) & 0xf;
2058        rd0 = (insn >> 16) & 0xf;
2059        rd1 = (insn >> 0) & 0xf;
2060        gen_op_iwmmxt_movq_M0_wRn(rd0);
2061        switch ((insn >> 22) & 3) {
2062        case 0:
2063            if (insn & (1 << 21))
2064                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2065            else
2066                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2067            break;
2068        case 1:
2069            if (insn & (1 << 21))
2070                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2071            else
2072                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2073            break;
2074        case 2:
2075            if (insn & (1 << 21))
2076                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2077            else
2078                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2079            break;
2080        case 3:
2081            return 1;
2082        }
2083        gen_op_iwmmxt_movq_wRn_M0(wrd);
2084        gen_op_iwmmxt_set_mup();
2085        gen_op_iwmmxt_set_cup();
2086        break;
2087    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2088    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2089        wrd = (insn >> 12) & 0xf;
2090        rd0 = (insn >> 16) & 0xf;
2091        gen_op_iwmmxt_movq_M0_wRn(rd0);
2092        switch ((insn >> 22) & 3) {
2093        case 0:
2094            if (insn & (1 << 21))
2095                gen_op_iwmmxt_unpacklsb_M0();
2096            else
2097                gen_op_iwmmxt_unpacklub_M0();
2098            break;
2099        case 1:
2100            if (insn & (1 << 21))
2101                gen_op_iwmmxt_unpacklsw_M0();
2102            else
2103                gen_op_iwmmxt_unpackluw_M0();
2104            break;
2105        case 2:
2106            if (insn & (1 << 21))
2107                gen_op_iwmmxt_unpacklsl_M0();
2108            else
2109                gen_op_iwmmxt_unpacklul_M0();
2110            break;
2111        case 3:
2112            return 1;
2113        }
2114        gen_op_iwmmxt_movq_wRn_M0(wrd);
2115        gen_op_iwmmxt_set_mup();
2116        gen_op_iwmmxt_set_cup();
2117        break;
2118    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2119    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2120        wrd = (insn >> 12) & 0xf;
2121        rd0 = (insn >> 16) & 0xf;
2122        gen_op_iwmmxt_movq_M0_wRn(rd0);
2123        switch ((insn >> 22) & 3) {
2124        case 0:
2125            if (insn & (1 << 21))
2126                gen_op_iwmmxt_unpackhsb_M0();
2127            else
2128                gen_op_iwmmxt_unpackhub_M0();
2129            break;
2130        case 1:
2131            if (insn & (1 << 21))
2132                gen_op_iwmmxt_unpackhsw_M0();
2133            else
2134                gen_op_iwmmxt_unpackhuw_M0();
2135            break;
2136        case 2:
2137            if (insn & (1 << 21))
2138                gen_op_iwmmxt_unpackhsl_M0();
2139            else
2140                gen_op_iwmmxt_unpackhul_M0();
2141            break;
2142        case 3:
2143            return 1;
2144        }
2145        gen_op_iwmmxt_movq_wRn_M0(wrd);
2146        gen_op_iwmmxt_set_mup();
2147        gen_op_iwmmxt_set_cup();
2148        break;
2149    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2150    case 0x214: case 0x614: case 0xa14: case 0xe14:
2151        if (((insn >> 22) & 3) == 0)
2152            return 1;
2153        wrd = (insn >> 12) & 0xf;
2154        rd0 = (insn >> 16) & 0xf;
2155        gen_op_iwmmxt_movq_M0_wRn(rd0);
2156        tmp = tcg_temp_new_i32();
2157        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2158            tcg_temp_free_i32(tmp);
2159            return 1;
2160        }
2161        switch ((insn >> 22) & 3) {
2162        case 1:
2163            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2164            break;
2165        case 2:
2166            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2167            break;
2168        case 3:
2169            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2170            break;
2171        }
2172        tcg_temp_free_i32(tmp);
2173        gen_op_iwmmxt_movq_wRn_M0(wrd);
2174        gen_op_iwmmxt_set_mup();
2175        gen_op_iwmmxt_set_cup();
2176        break;
2177    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2178    case 0x014: case 0x414: case 0x814: case 0xc14:
2179        if (((insn >> 22) & 3) == 0)
2180            return 1;
2181        wrd = (insn >> 12) & 0xf;
2182        rd0 = (insn >> 16) & 0xf;
2183        gen_op_iwmmxt_movq_M0_wRn(rd0);
2184        tmp = tcg_temp_new_i32();
2185        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2186            tcg_temp_free_i32(tmp);
2187            return 1;
2188        }
2189        switch ((insn >> 22) & 3) {
2190        case 1:
2191            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2192            break;
2193        case 2:
2194            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2195            break;
2196        case 3:
2197            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2198            break;
2199        }
2200        tcg_temp_free_i32(tmp);
2201        gen_op_iwmmxt_movq_wRn_M0(wrd);
2202        gen_op_iwmmxt_set_mup();
2203        gen_op_iwmmxt_set_cup();
2204        break;
2205    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2206    case 0x114: case 0x514: case 0x914: case 0xd14:
2207        if (((insn >> 22) & 3) == 0)
2208            return 1;
2209        wrd = (insn >> 12) & 0xf;
2210        rd0 = (insn >> 16) & 0xf;
2211        gen_op_iwmmxt_movq_M0_wRn(rd0);
2212        tmp = tcg_temp_new_i32();
2213        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2214            tcg_temp_free_i32(tmp);
2215            return 1;
2216        }
2217        switch ((insn >> 22) & 3) {
2218        case 1:
2219            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2220            break;
2221        case 2:
2222            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2223            break;
2224        case 3:
2225            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2226            break;
2227        }
2228        tcg_temp_free_i32(tmp);
2229        gen_op_iwmmxt_movq_wRn_M0(wrd);
2230        gen_op_iwmmxt_set_mup();
2231        gen_op_iwmmxt_set_cup();
2232        break;
2233    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2234    case 0x314: case 0x714: case 0xb14: case 0xf14:
2235        if (((insn >> 22) & 3) == 0)
2236            return 1;
2237        wrd = (insn >> 12) & 0xf;
2238        rd0 = (insn >> 16) & 0xf;
2239        gen_op_iwmmxt_movq_M0_wRn(rd0);
2240        tmp = tcg_temp_new_i32();
2241        switch ((insn >> 22) & 3) {
2242        case 1:
2243            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2244                tcg_temp_free_i32(tmp);
2245                return 1;
2246            }
2247            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2248            break;
2249        case 2:
2250            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2251                tcg_temp_free_i32(tmp);
2252                return 1;
2253            }
2254            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2255            break;
2256        case 3:
2257            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2258                tcg_temp_free_i32(tmp);
2259                return 1;
2260            }
2261            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2262            break;
2263        }
2264        tcg_temp_free_i32(tmp);
2265        gen_op_iwmmxt_movq_wRn_M0(wrd);
2266        gen_op_iwmmxt_set_mup();
2267        gen_op_iwmmxt_set_cup();
2268        break;
2269    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2270    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2271        wrd = (insn >> 12) & 0xf;
2272        rd0 = (insn >> 16) & 0xf;
2273        rd1 = (insn >> 0) & 0xf;
2274        gen_op_iwmmxt_movq_M0_wRn(rd0);
2275        switch ((insn >> 22) & 3) {
2276        case 0:
2277            if (insn & (1 << 21))
2278                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2279            else
2280                gen_op_iwmmxt_minub_M0_wRn(rd1);
2281            break;
2282        case 1:
2283            if (insn & (1 << 21))
2284                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2285            else
2286                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2287            break;
2288        case 2:
2289            if (insn & (1 << 21))
2290                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2291            else
2292                gen_op_iwmmxt_minul_M0_wRn(rd1);
2293            break;
2294        case 3:
2295            return 1;
2296        }
2297        gen_op_iwmmxt_movq_wRn_M0(wrd);
2298        gen_op_iwmmxt_set_mup();
2299        break;
2300    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2301    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2302        wrd = (insn >> 12) & 0xf;
2303        rd0 = (insn >> 16) & 0xf;
2304        rd1 = (insn >> 0) & 0xf;
2305        gen_op_iwmmxt_movq_M0_wRn(rd0);
2306        switch ((insn >> 22) & 3) {
2307        case 0:
2308            if (insn & (1 << 21))
2309                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2310            else
2311                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2312            break;
2313        case 1:
2314            if (insn & (1 << 21))
2315                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2316            else
2317                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2318            break;
2319        case 2:
2320            if (insn & (1 << 21))
2321                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2322            else
2323                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2324            break;
2325        case 3:
2326            return 1;
2327        }
2328        gen_op_iwmmxt_movq_wRn_M0(wrd);
2329        gen_op_iwmmxt_set_mup();
2330        break;
2331    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2332    case 0x402: case 0x502: case 0x602: case 0x702:
2333        wrd = (insn >> 12) & 0xf;
2334        rd0 = (insn >> 16) & 0xf;
2335        rd1 = (insn >> 0) & 0xf;
2336        gen_op_iwmmxt_movq_M0_wRn(rd0);
2337        tmp = tcg_const_i32((insn >> 20) & 3);
2338        iwmmxt_load_reg(cpu_V1, rd1);
2339        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2340        tcg_temp_free_i32(tmp);
2341        gen_op_iwmmxt_movq_wRn_M0(wrd);
2342        gen_op_iwmmxt_set_mup();
2343        break;
2344    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2345    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2346    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2347    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2348        wrd = (insn >> 12) & 0xf;
2349        rd0 = (insn >> 16) & 0xf;
2350        rd1 = (insn >> 0) & 0xf;
2351        gen_op_iwmmxt_movq_M0_wRn(rd0);
2352        switch ((insn >> 20) & 0xf) {
2353        case 0x0:
2354            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2355            break;
2356        case 0x1:
2357            gen_op_iwmmxt_subub_M0_wRn(rd1);
2358            break;
2359        case 0x3:
2360            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2361            break;
2362        case 0x4:
2363            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2364            break;
2365        case 0x5:
2366            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2367            break;
2368        case 0x7:
2369            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2370            break;
2371        case 0x8:
2372            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2373            break;
2374        case 0x9:
2375            gen_op_iwmmxt_subul_M0_wRn(rd1);
2376            break;
2377        case 0xb:
2378            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2379            break;
2380        default:
2381            return 1;
2382        }
2383        gen_op_iwmmxt_movq_wRn_M0(wrd);
2384        gen_op_iwmmxt_set_mup();
2385        gen_op_iwmmxt_set_cup();
2386        break;
2387    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2388    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2389    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2390    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2391        wrd = (insn >> 12) & 0xf;
2392        rd0 = (insn >> 16) & 0xf;
2393        gen_op_iwmmxt_movq_M0_wRn(rd0);
2394        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2395        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2396        tcg_temp_free_i32(tmp);
2397        gen_op_iwmmxt_movq_wRn_M0(wrd);
2398        gen_op_iwmmxt_set_mup();
2399        gen_op_iwmmxt_set_cup();
2400        break;
2401    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2402    case 0x418: case 0x518: case 0x618: case 0x718:
2403    case 0x818: case 0x918: case 0xa18: case 0xb18:
2404    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2405        wrd = (insn >> 12) & 0xf;
2406        rd0 = (insn >> 16) & 0xf;
2407        rd1 = (insn >> 0) & 0xf;
2408        gen_op_iwmmxt_movq_M0_wRn(rd0);
2409        switch ((insn >> 20) & 0xf) {
2410        case 0x0:
2411            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2412            break;
2413        case 0x1:
2414            gen_op_iwmmxt_addub_M0_wRn(rd1);
2415            break;
2416        case 0x3:
2417            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2418            break;
2419        case 0x4:
2420            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2421            break;
2422        case 0x5:
2423            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2424            break;
2425        case 0x7:
2426            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2427            break;
2428        case 0x8:
2429            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2430            break;
2431        case 0x9:
2432            gen_op_iwmmxt_addul_M0_wRn(rd1);
2433            break;
2434        case 0xb:
2435            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2436            break;
2437        default:
2438            return 1;
2439        }
2440        gen_op_iwmmxt_movq_wRn_M0(wrd);
2441        gen_op_iwmmxt_set_mup();
2442        gen_op_iwmmxt_set_cup();
2443        break;
2444    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2445    case 0x408: case 0x508: case 0x608: case 0x708:
2446    case 0x808: case 0x908: case 0xa08: case 0xb08:
2447    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2448        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2449            return 1;
2450        wrd = (insn >> 12) & 0xf;
2451        rd0 = (insn >> 16) & 0xf;
2452        rd1 = (insn >> 0) & 0xf;
2453        gen_op_iwmmxt_movq_M0_wRn(rd0);
2454        switch ((insn >> 22) & 3) {
2455        case 1:
2456            if (insn & (1 << 21))
2457                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2458            else
2459                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2460            break;
2461        case 2:
2462            if (insn & (1 << 21))
2463                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2464            else
2465                gen_op_iwmmxt_packul_M0_wRn(rd1);
2466            break;
2467        case 3:
2468            if (insn & (1 << 21))
2469                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2470            else
2471                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2472            break;
2473        }
2474        gen_op_iwmmxt_movq_wRn_M0(wrd);
2475        gen_op_iwmmxt_set_mup();
2476        gen_op_iwmmxt_set_cup();
2477        break;
2478    case 0x201: case 0x203: case 0x205: case 0x207:
2479    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2480    case 0x211: case 0x213: case 0x215: case 0x217:
2481    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2482        wrd = (insn >> 5) & 0xf;
2483        rd0 = (insn >> 12) & 0xf;
2484        rd1 = (insn >> 0) & 0xf;
2485        if (rd0 == 0xf || rd1 == 0xf)
2486            return 1;
2487        gen_op_iwmmxt_movq_M0_wRn(wrd);
2488        tmp = load_reg(s, rd0);
2489        tmp2 = load_reg(s, rd1);
2490        switch ((insn >> 16) & 0xf) {
2491        case 0x0:                                       /* TMIA */
2492            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2493            break;
2494        case 0x8:                                       /* TMIAPH */
2495            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2496            break;
2497        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2498            if (insn & (1 << 16))
2499                tcg_gen_shri_i32(tmp, tmp, 16);
2500            if (insn & (1 << 17))
2501                tcg_gen_shri_i32(tmp2, tmp2, 16);
2502            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2503            break;
2504        default:
2505            tcg_temp_free_i32(tmp2);
2506            tcg_temp_free_i32(tmp);
2507            return 1;
2508        }
2509        tcg_temp_free_i32(tmp2);
2510        tcg_temp_free_i32(tmp);
2511        gen_op_iwmmxt_movq_wRn_M0(wrd);
2512        gen_op_iwmmxt_set_mup();
2513        break;
2514    default:
2515        return 1;
2516    }
2517
2518    return 0;
2519}
2520
2521/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2522   (ie. an undefined instruction).  */
2523static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2524{
2525    int acc, rd0, rd1, rdhi, rdlo;
2526    TCGv_i32 tmp, tmp2;
2527
2528    if ((insn & 0x0ff00f10) == 0x0e200010) {
2529        /* Multiply with Internal Accumulate Format */
2530        rd0 = (insn >> 12) & 0xf;
2531        rd1 = insn & 0xf;
2532        acc = (insn >> 5) & 7;
2533
2534        if (acc != 0)
2535            return 1;
2536
2537        tmp = load_reg(s, rd0);
2538        tmp2 = load_reg(s, rd1);
2539        switch ((insn >> 16) & 0xf) {
2540        case 0x0:                                       /* MIA */
2541            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2542            break;
2543        case 0x8:                                       /* MIAPH */
2544            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2545            break;
2546        case 0xc:                                       /* MIABB */
2547        case 0xd:                                       /* MIABT */
2548        case 0xe:                                       /* MIATB */
2549        case 0xf:                                       /* MIATT */
2550            if (insn & (1 << 16))
2551                tcg_gen_shri_i32(tmp, tmp, 16);
2552            if (insn & (1 << 17))
2553                tcg_gen_shri_i32(tmp2, tmp2, 16);
2554            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2555            break;
2556        default:
2557            return 1;
2558        }
2559        tcg_temp_free_i32(tmp2);
2560        tcg_temp_free_i32(tmp);
2561
2562        gen_op_iwmmxt_movq_wRn_M0(acc);
2563        return 0;
2564    }
2565
2566    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2567        /* Internal Accumulator Access Format */
2568        rdhi = (insn >> 16) & 0xf;
2569        rdlo = (insn >> 12) & 0xf;
2570        acc = insn & 7;
2571
2572        if (acc != 0)
2573            return 1;
2574
2575        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2576            iwmmxt_load_reg(cpu_V0, acc);
2577            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2578            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2579            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2580        } else {                                        /* MAR */
2581            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2582            iwmmxt_store_reg(cpu_V0, acc);
2583        }
2584        return 0;
2585    }
2586
2587    return 1;
2588}
2589
2590static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2591{
2592#ifndef CONFIG_USER_ONLY
2593    return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2594           ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2595#else
2596    return true;
2597#endif
2598}
2599
2600static void gen_goto_ptr(void)
2601{
2602    tcg_gen_lookup_and_goto_ptr();
2603}
2604
2605/* This will end the TB but doesn't guarantee we'll return to
2606 * cpu_loop_exec. Any live exit_requests will be processed as we
2607 * enter the next TB.
2608 */
2609static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2610{
2611    if (use_goto_tb(s, dest)) {
2612        tcg_gen_goto_tb(n);
2613        gen_set_pc_im(s, dest);
2614        tcg_gen_exit_tb(s->base.tb, n);
2615    } else {
2616        gen_set_pc_im(s, dest);
2617        gen_goto_ptr();
2618    }
2619    s->base.is_jmp = DISAS_NORETURN;
2620}
2621
2622/* Jump, specifying which TB number to use if we gen_goto_tb() */
2623static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
2624{
2625    if (unlikely(is_singlestepping(s))) {
2626        /* An indirect jump so that we still trigger the debug exception.  */
2627        gen_set_pc_im(s, dest);
2628        s->base.is_jmp = DISAS_JUMP;
2629    } else {
2630        gen_goto_tb(s, tbno, dest);
2631    }
2632}
2633
2634static inline void gen_jmp(DisasContext *s, uint32_t dest)
2635{
2636    gen_jmp_tb(s, dest, 0);
2637}
2638
2639static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2640{
2641    if (x)
2642        tcg_gen_sari_i32(t0, t0, 16);
2643    else
2644        gen_sxth(t0);
2645    if (y)
2646        tcg_gen_sari_i32(t1, t1, 16);
2647    else
2648        gen_sxth(t1);
2649    tcg_gen_mul_i32(t0, t0, t1);
2650}
2651
2652/* Return the mask of PSR bits set by a MSR instruction.  */
2653static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2654{
2655    uint32_t mask = 0;
2656
2657    if (flags & (1 << 0)) {
2658        mask |= 0xff;
2659    }
2660    if (flags & (1 << 1)) {
2661        mask |= 0xff00;
2662    }
2663    if (flags & (1 << 2)) {
2664        mask |= 0xff0000;
2665    }
2666    if (flags & (1 << 3)) {
2667        mask |= 0xff000000;
2668    }
2669
2670    /* Mask out undefined and reserved bits.  */
2671    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2672
2673    /* Mask out execution state.  */
2674    if (!spsr) {
2675        mask &= ~CPSR_EXEC;
2676    }
2677
2678    /* Mask out privileged bits.  */
2679    if (IS_USER(s)) {
2680        mask &= CPSR_USER;
2681    }
2682    return mask;
2683}
2684
2685/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2686static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2687{
2688    TCGv_i32 tmp;
2689    if (spsr) {
2690        /* ??? This is also undefined in system mode.  */
2691        if (IS_USER(s))
2692            return 1;
2693
2694        tmp = load_cpu_field(spsr);
2695        tcg_gen_andi_i32(tmp, tmp, ~mask);
2696        tcg_gen_andi_i32(t0, t0, mask);
2697        tcg_gen_or_i32(tmp, tmp, t0);
2698        store_cpu_field(tmp, spsr);
2699    } else {
2700        gen_set_cpsr(t0, mask);
2701    }
2702    tcg_temp_free_i32(t0);
2703    gen_lookup_tb(s);
2704    return 0;
2705}
2706
2707/* Returns nonzero if access to the PSR is not permitted.  */
2708static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2709{
2710    TCGv_i32 tmp;
2711    tmp = tcg_temp_new_i32();
2712    tcg_gen_movi_i32(tmp, val);
2713    return gen_set_psr(s, mask, spsr, tmp);
2714}
2715
2716static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2717                                     int *tgtmode, int *regno)
2718{
2719    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2720     * the target mode and register number, and identify the various
2721     * unpredictable cases.
2722     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2723     *  + executed in user mode
2724     *  + using R15 as the src/dest register
2725     *  + accessing an unimplemented register
2726     *  + accessing a register that's inaccessible at current PL/security state*
2727     *  + accessing a register that you could access with a different insn
2728     * We choose to UNDEF in all these cases.
2729     * Since we don't know which of the various AArch32 modes we are in
2730     * we have to defer some checks to runtime.
2731     * Accesses to Monitor mode registers from Secure EL1 (which implies
2732     * that EL3 is AArch64) must trap to EL3.
2733     *
2734     * If the access checks fail this function will emit code to take
2735     * an exception and return false. Otherwise it will return true,
2736     * and set *tgtmode and *regno appropriately.
2737     */
2738    int exc_target = default_exception_el(s);
2739
2740    /* These instructions are present only in ARMv8, or in ARMv7 with the
2741     * Virtualization Extensions.
2742     */
2743    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2744        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2745        goto undef;
2746    }
2747
2748    if (IS_USER(s) || rn == 15) {
2749        goto undef;
2750    }
2751
2752    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2753     * of registers into (r, sysm).
2754     */
2755    if (r) {
2756        /* SPSRs for other modes */
2757        switch (sysm) {
2758        case 0xe: /* SPSR_fiq */
2759            *tgtmode = ARM_CPU_MODE_FIQ;
2760            break;
2761        case 0x10: /* SPSR_irq */
2762            *tgtmode = ARM_CPU_MODE_IRQ;
2763            break;
2764        case 0x12: /* SPSR_svc */
2765            *tgtmode = ARM_CPU_MODE_SVC;
2766            break;
2767        case 0x14: /* SPSR_abt */
2768            *tgtmode = ARM_CPU_MODE_ABT;
2769            break;
2770        case 0x16: /* SPSR_und */
2771            *tgtmode = ARM_CPU_MODE_UND;
2772            break;
2773        case 0x1c: /* SPSR_mon */
2774            *tgtmode = ARM_CPU_MODE_MON;
2775            break;
2776        case 0x1e: /* SPSR_hyp */
2777            *tgtmode = ARM_CPU_MODE_HYP;
2778            break;
2779        default: /* unallocated */
2780            goto undef;
2781        }
2782        /* We arbitrarily assign SPSR a register number of 16. */
2783        *regno = 16;
2784    } else {
2785        /* general purpose registers for other modes */
2786        switch (sysm) {
2787        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2788            *tgtmode = ARM_CPU_MODE_USR;
2789            *regno = sysm + 8;
2790            break;
2791        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2792            *tgtmode = ARM_CPU_MODE_FIQ;
2793            *regno = sysm;
2794            break;
2795        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2796            *tgtmode = ARM_CPU_MODE_IRQ;
2797            *regno = sysm & 1 ? 13 : 14;
2798            break;
2799        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2800            *tgtmode = ARM_CPU_MODE_SVC;
2801            *regno = sysm & 1 ? 13 : 14;
2802            break;
2803        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2804            *tgtmode = ARM_CPU_MODE_ABT;
2805            *regno = sysm & 1 ? 13 : 14;
2806            break;
2807        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2808            *tgtmode = ARM_CPU_MODE_UND;
2809            *regno = sysm & 1 ? 13 : 14;
2810            break;
2811        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2812            *tgtmode = ARM_CPU_MODE_MON;
2813            *regno = sysm & 1 ? 13 : 14;
2814            break;
2815        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2816            *tgtmode = ARM_CPU_MODE_HYP;
2817            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2818            *regno = sysm & 1 ? 13 : 17;
2819            break;
2820        default: /* unallocated */
2821            goto undef;
2822        }
2823    }
2824
2825    /* Catch the 'accessing inaccessible register' cases we can detect
2826     * at translate time.
2827     */
2828    switch (*tgtmode) {
2829    case ARM_CPU_MODE_MON:
2830        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2831            goto undef;
2832        }
2833        if (s->current_el == 1) {
2834            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2835             * then accesses to Mon registers trap to Secure EL2, if it exists,
2836             * otherwise EL3.
2837             */
2838            TCGv_i32 tcg_el;
2839
2840            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2841                dc_isar_feature(aa64_sel2, s)) {
2842                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2843                tcg_el = load_cpu_field(cp15.scr_el3);
2844                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2845                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2846            } else {
2847                tcg_el = tcg_const_i32(3);
2848            }
2849
2850            gen_exception_el(s, EXCP_UDEF, syn_uncategorized(), tcg_el);
2851            tcg_temp_free_i32(tcg_el);
2852            return false;
2853        }
2854        break;
2855    case ARM_CPU_MODE_HYP:
2856        /*
2857         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2858         * (and so we can forbid accesses from EL2 or below). elr_hyp
2859         * can be accessed also from Hyp mode, so forbid accesses from
2860         * EL0 or EL1.
2861         */
2862        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2863            (s->current_el < 3 && *regno != 17)) {
2864            goto undef;
2865        }
2866        break;
2867    default:
2868        break;
2869    }
2870
2871    return true;
2872
2873undef:
2874    /* If we get here then some access check did not pass */
2875    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2876                       syn_uncategorized(), exc_target);
2877    return false;
2878}
2879
2880static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2881{
2882    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2883    int tgtmode = 0, regno = 0;
2884
2885    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2886        return;
2887    }
2888
2889    /* Sync state because msr_banked() can raise exceptions */
2890    gen_set_condexec(s);
2891    gen_set_pc_im(s, s->pc_curr);
2892    tcg_reg = load_reg(s, rn);
2893    tcg_tgtmode = tcg_const_i32(tgtmode);
2894    tcg_regno = tcg_const_i32(regno);
2895    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2896    tcg_temp_free_i32(tcg_tgtmode);
2897    tcg_temp_free_i32(tcg_regno);
2898    tcg_temp_free_i32(tcg_reg);
2899    s->base.is_jmp = DISAS_UPDATE_EXIT;
2900}
2901
2902static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2903{
2904    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2905    int tgtmode = 0, regno = 0;
2906
2907    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2908        return;
2909    }
2910
2911    /* Sync state because mrs_banked() can raise exceptions */
2912    gen_set_condexec(s);
2913    gen_set_pc_im(s, s->pc_curr);
2914    tcg_reg = tcg_temp_new_i32();
2915    tcg_tgtmode = tcg_const_i32(tgtmode);
2916    tcg_regno = tcg_const_i32(regno);
2917    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2918    tcg_temp_free_i32(tcg_tgtmode);
2919    tcg_temp_free_i32(tcg_regno);
2920    store_reg(s, rn, tcg_reg);
2921    s->base.is_jmp = DISAS_UPDATE_EXIT;
2922}
2923
2924/* Store value to PC as for an exception return (ie don't
2925 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2926 * will do the masking based on the new value of the Thumb bit.
2927 */
2928static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2929{
2930    tcg_gen_mov_i32(cpu_R[15], pc);
2931    tcg_temp_free_i32(pc);
2932}
2933
2934/* Generate a v6 exception return.  Marks both values as dead.  */
2935static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2936{
2937    store_pc_exc_ret(s, pc);
2938    /* The cpsr_write_eret helper will mask the low bits of PC
2939     * appropriately depending on the new Thumb bit, so it must
2940     * be called after storing the new PC.
2941     */
2942    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2943        gen_io_start();
2944    }
2945    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2946    tcg_temp_free_i32(cpsr);
2947    /* Must exit loop to check un-masked IRQs */
2948    s->base.is_jmp = DISAS_EXIT;
2949}
2950
2951/* Generate an old-style exception return. Marks pc as dead. */
2952static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2953{
2954    gen_rfe(s, pc, load_cpu_field(spsr));
2955}
2956
2957static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2958                            uint32_t opr_sz, uint32_t max_sz,
2959                            gen_helper_gvec_3_ptr *fn)
2960{
2961    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2962
2963    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2964    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2965                       opr_sz, max_sz, 0, fn);
2966    tcg_temp_free_ptr(qc_ptr);
2967}
2968
2969void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2970                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2971{
2972    static gen_helper_gvec_3_ptr * const fns[2] = {
2973        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2974    };
2975    tcg_debug_assert(vece >= 1 && vece <= 2);
2976    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2977}
2978
2979void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2980                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2981{
2982    static gen_helper_gvec_3_ptr * const fns[2] = {
2983        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2984    };
2985    tcg_debug_assert(vece >= 1 && vece <= 2);
2986    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2987}
2988
2989#define GEN_CMP0(NAME, COND)                                            \
2990    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2991    {                                                                   \
2992        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2993        tcg_gen_neg_i32(d, d);                                          \
2994    }                                                                   \
2995    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2996    {                                                                   \
2997        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2998        tcg_gen_neg_i64(d, d);                                          \
2999    }                                                                   \
3000    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3001    {                                                                   \
3002        TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3003        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3004        tcg_temp_free_vec(zero);                                        \
3005    }                                                                   \
3006    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3007                            uint32_t opr_sz, uint32_t max_sz)           \
3008    {                                                                   \
3009        const GVecGen2 op[4] = {                                        \
3010            { .fno = gen_helper_gvec_##NAME##0_b,                       \
3011              .fniv = gen_##NAME##0_vec,                                \
3012              .opt_opc = vecop_list_cmp,                                \
3013              .vece = MO_8 },                                           \
3014            { .fno = gen_helper_gvec_##NAME##0_h,                       \
3015              .fniv = gen_##NAME##0_vec,                                \
3016              .opt_opc = vecop_list_cmp,                                \
3017              .vece = MO_16 },                                          \
3018            { .fni4 = gen_##NAME##0_i32,                                \
3019              .fniv = gen_##NAME##0_vec,                                \
3020              .opt_opc = vecop_list_cmp,                                \
3021              .vece = MO_32 },                                          \
3022            { .fni8 = gen_##NAME##0_i64,                                \
3023              .fniv = gen_##NAME##0_vec,                                \
3024              .opt_opc = vecop_list_cmp,                                \
3025              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3026              .vece = MO_64 },                                          \
3027        };                                                              \
3028        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3029    }
3030
3031static const TCGOpcode vecop_list_cmp[] = {
3032    INDEX_op_cmp_vec, 0
3033};
3034
3035GEN_CMP0(ceq, TCG_COND_EQ)
3036GEN_CMP0(cle, TCG_COND_LE)
3037GEN_CMP0(cge, TCG_COND_GE)
3038GEN_CMP0(clt, TCG_COND_LT)
3039GEN_CMP0(cgt, TCG_COND_GT)
3040
3041#undef GEN_CMP0
3042
3043static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3044{
3045    tcg_gen_vec_sar8i_i64(a, a, shift);
3046    tcg_gen_vec_add8_i64(d, d, a);
3047}
3048
3049static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3050{
3051    tcg_gen_vec_sar16i_i64(a, a, shift);
3052    tcg_gen_vec_add16_i64(d, d, a);
3053}
3054
3055static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3056{
3057    tcg_gen_sari_i32(a, a, shift);
3058    tcg_gen_add_i32(d, d, a);
3059}
3060
3061static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3062{
3063    tcg_gen_sari_i64(a, a, shift);
3064    tcg_gen_add_i64(d, d, a);
3065}
3066
3067static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3068{
3069    tcg_gen_sari_vec(vece, a, a, sh);
3070    tcg_gen_add_vec(vece, d, d, a);
3071}
3072
3073void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3074                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3075{
3076    static const TCGOpcode vecop_list[] = {
3077        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3078    };
3079    static const GVecGen2i ops[4] = {
3080        { .fni8 = gen_ssra8_i64,
3081          .fniv = gen_ssra_vec,
3082          .fno = gen_helper_gvec_ssra_b,
3083          .load_dest = true,
3084          .opt_opc = vecop_list,
3085          .vece = MO_8 },
3086        { .fni8 = gen_ssra16_i64,
3087          .fniv = gen_ssra_vec,
3088          .fno = gen_helper_gvec_ssra_h,
3089          .load_dest = true,
3090          .opt_opc = vecop_list,
3091          .vece = MO_16 },
3092        { .fni4 = gen_ssra32_i32,
3093          .fniv = gen_ssra_vec,
3094          .fno = gen_helper_gvec_ssra_s,
3095          .load_dest = true,
3096          .opt_opc = vecop_list,
3097          .vece = MO_32 },
3098        { .fni8 = gen_ssra64_i64,
3099          .fniv = gen_ssra_vec,
3100          .fno = gen_helper_gvec_ssra_b,
3101          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3102          .opt_opc = vecop_list,
3103          .load_dest = true,
3104          .vece = MO_64 },
3105    };
3106
3107    /* tszimm encoding produces immediates in the range [1..esize]. */
3108    tcg_debug_assert(shift > 0);
3109    tcg_debug_assert(shift <= (8 << vece));
3110
3111    /*
3112     * Shifts larger than the element size are architecturally valid.
3113     * Signed results in all sign bits.
3114     */
3115    shift = MIN(shift, (8 << vece) - 1);
3116    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3117}
3118
3119static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3120{
3121    tcg_gen_vec_shr8i_i64(a, a, shift);
3122    tcg_gen_vec_add8_i64(d, d, a);
3123}
3124
3125static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3126{
3127    tcg_gen_vec_shr16i_i64(a, a, shift);
3128    tcg_gen_vec_add16_i64(d, d, a);
3129}
3130
3131static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3132{
3133    tcg_gen_shri_i32(a, a, shift);
3134    tcg_gen_add_i32(d, d, a);
3135}
3136
3137static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3138{
3139    tcg_gen_shri_i64(a, a, shift);
3140    tcg_gen_add_i64(d, d, a);
3141}
3142
3143static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3144{
3145    tcg_gen_shri_vec(vece, a, a, sh);
3146    tcg_gen_add_vec(vece, d, d, a);
3147}
3148
3149void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3150                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3151{
3152    static const TCGOpcode vecop_list[] = {
3153        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3154    };
3155    static const GVecGen2i ops[4] = {
3156        { .fni8 = gen_usra8_i64,
3157          .fniv = gen_usra_vec,
3158          .fno = gen_helper_gvec_usra_b,
3159          .load_dest = true,
3160          .opt_opc = vecop_list,
3161          .vece = MO_8, },
3162        { .fni8 = gen_usra16_i64,
3163          .fniv = gen_usra_vec,
3164          .fno = gen_helper_gvec_usra_h,
3165          .load_dest = true,
3166          .opt_opc = vecop_list,
3167          .vece = MO_16, },
3168        { .fni4 = gen_usra32_i32,
3169          .fniv = gen_usra_vec,
3170          .fno = gen_helper_gvec_usra_s,
3171          .load_dest = true,
3172          .opt_opc = vecop_list,
3173          .vece = MO_32, },
3174        { .fni8 = gen_usra64_i64,
3175          .fniv = gen_usra_vec,
3176          .fno = gen_helper_gvec_usra_d,
3177          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3178          .load_dest = true,
3179          .opt_opc = vecop_list,
3180          .vece = MO_64, },
3181    };
3182
3183    /* tszimm encoding produces immediates in the range [1..esize]. */
3184    tcg_debug_assert(shift > 0);
3185    tcg_debug_assert(shift <= (8 << vece));
3186
3187    /*
3188     * Shifts larger than the element size are architecturally valid.
3189     * Unsigned results in all zeros as input to accumulate: nop.
3190     */
3191    if (shift < (8 << vece)) {
3192        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3193    } else {
3194        /* Nop, but we do need to clear the tail. */
3195        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3196    }
3197}
3198
3199/*
3200 * Shift one less than the requested amount, and the low bit is
3201 * the rounding bit.  For the 8 and 16-bit operations, because we
3202 * mask the low bit, we can perform a normal integer shift instead
3203 * of a vector shift.
3204 */
3205static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3206{
3207    TCGv_i64 t = tcg_temp_new_i64();
3208
3209    tcg_gen_shri_i64(t, a, sh - 1);
3210    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3211    tcg_gen_vec_sar8i_i64(d, a, sh);
3212    tcg_gen_vec_add8_i64(d, d, t);
3213    tcg_temp_free_i64(t);
3214}
3215
3216static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3217{
3218    TCGv_i64 t = tcg_temp_new_i64();
3219
3220    tcg_gen_shri_i64(t, a, sh - 1);
3221    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3222    tcg_gen_vec_sar16i_i64(d, a, sh);
3223    tcg_gen_vec_add16_i64(d, d, t);
3224    tcg_temp_free_i64(t);
3225}
3226
3227static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3228{
3229    TCGv_i32 t = tcg_temp_new_i32();
3230
3231    tcg_gen_extract_i32(t, a, sh - 1, 1);
3232    tcg_gen_sari_i32(d, a, sh);
3233    tcg_gen_add_i32(d, d, t);
3234    tcg_temp_free_i32(t);
3235}
3236
3237static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3238{
3239    TCGv_i64 t = tcg_temp_new_i64();
3240
3241    tcg_gen_extract_i64(t, a, sh - 1, 1);
3242    tcg_gen_sari_i64(d, a, sh);
3243    tcg_gen_add_i64(d, d, t);
3244    tcg_temp_free_i64(t);
3245}
3246
3247static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3248{
3249    TCGv_vec t = tcg_temp_new_vec_matching(d);
3250    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3251
3252    tcg_gen_shri_vec(vece, t, a, sh - 1);
3253    tcg_gen_dupi_vec(vece, ones, 1);
3254    tcg_gen_and_vec(vece, t, t, ones);
3255    tcg_gen_sari_vec(vece, d, a, sh);
3256    tcg_gen_add_vec(vece, d, d, t);
3257
3258    tcg_temp_free_vec(t);
3259    tcg_temp_free_vec(ones);
3260}
3261
3262void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3263                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3264{
3265    static const TCGOpcode vecop_list[] = {
3266        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3267    };
3268    static const GVecGen2i ops[4] = {
3269        { .fni8 = gen_srshr8_i64,
3270          .fniv = gen_srshr_vec,
3271          .fno = gen_helper_gvec_srshr_b,
3272          .opt_opc = vecop_list,
3273          .vece = MO_8 },
3274        { .fni8 = gen_srshr16_i64,
3275          .fniv = gen_srshr_vec,
3276          .fno = gen_helper_gvec_srshr_h,
3277          .opt_opc = vecop_list,
3278          .vece = MO_16 },
3279        { .fni4 = gen_srshr32_i32,
3280          .fniv = gen_srshr_vec,
3281          .fno = gen_helper_gvec_srshr_s,
3282          .opt_opc = vecop_list,
3283          .vece = MO_32 },
3284        { .fni8 = gen_srshr64_i64,
3285          .fniv = gen_srshr_vec,
3286          .fno = gen_helper_gvec_srshr_d,
3287          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3288          .opt_opc = vecop_list,
3289          .vece = MO_64 },
3290    };
3291
3292    /* tszimm encoding produces immediates in the range [1..esize] */
3293    tcg_debug_assert(shift > 0);
3294    tcg_debug_assert(shift <= (8 << vece));
3295
3296    if (shift == (8 << vece)) {
3297        /*
3298         * Shifts larger than the element size are architecturally valid.
3299         * Signed results in all sign bits.  With rounding, this produces
3300         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3301         * I.e. always zero.
3302         */
3303        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3304    } else {
3305        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3306    }
3307}
3308
3309static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3310{
3311    TCGv_i64 t = tcg_temp_new_i64();
3312
3313    gen_srshr8_i64(t, a, sh);
3314    tcg_gen_vec_add8_i64(d, d, t);
3315    tcg_temp_free_i64(t);
3316}
3317
3318static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3319{
3320    TCGv_i64 t = tcg_temp_new_i64();
3321
3322    gen_srshr16_i64(t, a, sh);
3323    tcg_gen_vec_add16_i64(d, d, t);
3324    tcg_temp_free_i64(t);
3325}
3326
3327static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3328{
3329    TCGv_i32 t = tcg_temp_new_i32();
3330
3331    gen_srshr32_i32(t, a, sh);
3332    tcg_gen_add_i32(d, d, t);
3333    tcg_temp_free_i32(t);
3334}
3335
3336static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3337{
3338    TCGv_i64 t = tcg_temp_new_i64();
3339
3340    gen_srshr64_i64(t, a, sh);
3341    tcg_gen_add_i64(d, d, t);
3342    tcg_temp_free_i64(t);
3343}
3344
3345static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3346{
3347    TCGv_vec t = tcg_temp_new_vec_matching(d);
3348
3349    gen_srshr_vec(vece, t, a, sh);
3350    tcg_gen_add_vec(vece, d, d, t);
3351    tcg_temp_free_vec(t);
3352}
3353
3354void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3355                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3356{
3357    static const TCGOpcode vecop_list[] = {
3358        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3359    };
3360    static const GVecGen2i ops[4] = {
3361        { .fni8 = gen_srsra8_i64,
3362          .fniv = gen_srsra_vec,
3363          .fno = gen_helper_gvec_srsra_b,
3364          .opt_opc = vecop_list,
3365          .load_dest = true,
3366          .vece = MO_8 },
3367        { .fni8 = gen_srsra16_i64,
3368          .fniv = gen_srsra_vec,
3369          .fno = gen_helper_gvec_srsra_h,
3370          .opt_opc = vecop_list,
3371          .load_dest = true,
3372          .vece = MO_16 },
3373        { .fni4 = gen_srsra32_i32,
3374          .fniv = gen_srsra_vec,
3375          .fno = gen_helper_gvec_srsra_s,
3376          .opt_opc = vecop_list,
3377          .load_dest = true,
3378          .vece = MO_32 },
3379        { .fni8 = gen_srsra64_i64,
3380          .fniv = gen_srsra_vec,
3381          .fno = gen_helper_gvec_srsra_d,
3382          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3383          .opt_opc = vecop_list,
3384          .load_dest = true,
3385          .vece = MO_64 },
3386    };
3387
3388    /* tszimm encoding produces immediates in the range [1..esize] */
3389    tcg_debug_assert(shift > 0);
3390    tcg_debug_assert(shift <= (8 << vece));
3391
3392    /*
3393     * Shifts larger than the element size are architecturally valid.
3394     * Signed results in all sign bits.  With rounding, this produces
3395     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3396     * I.e. always zero.  With accumulation, this leaves D unchanged.
3397     */
3398    if (shift == (8 << vece)) {
3399        /* Nop, but we do need to clear the tail. */
3400        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3401    } else {
3402        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3403    }
3404}
3405
3406static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3407{
3408    TCGv_i64 t = tcg_temp_new_i64();
3409
3410    tcg_gen_shri_i64(t, a, sh - 1);
3411    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3412    tcg_gen_vec_shr8i_i64(d, a, sh);
3413    tcg_gen_vec_add8_i64(d, d, t);
3414    tcg_temp_free_i64(t);
3415}
3416
3417static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3418{
3419    TCGv_i64 t = tcg_temp_new_i64();
3420
3421    tcg_gen_shri_i64(t, a, sh - 1);
3422    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3423    tcg_gen_vec_shr16i_i64(d, a, sh);
3424    tcg_gen_vec_add16_i64(d, d, t);
3425    tcg_temp_free_i64(t);
3426}
3427
3428static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3429{
3430    TCGv_i32 t = tcg_temp_new_i32();
3431
3432    tcg_gen_extract_i32(t, a, sh - 1, 1);
3433    tcg_gen_shri_i32(d, a, sh);
3434    tcg_gen_add_i32(d, d, t);
3435    tcg_temp_free_i32(t);
3436}
3437
3438static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3439{
3440    TCGv_i64 t = tcg_temp_new_i64();
3441
3442    tcg_gen_extract_i64(t, a, sh - 1, 1);
3443    tcg_gen_shri_i64(d, a, sh);
3444    tcg_gen_add_i64(d, d, t);
3445    tcg_temp_free_i64(t);
3446}
3447
3448static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3449{
3450    TCGv_vec t = tcg_temp_new_vec_matching(d);
3451    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3452
3453    tcg_gen_shri_vec(vece, t, a, shift - 1);
3454    tcg_gen_dupi_vec(vece, ones, 1);
3455    tcg_gen_and_vec(vece, t, t, ones);
3456    tcg_gen_shri_vec(vece, d, a, shift);
3457    tcg_gen_add_vec(vece, d, d, t);
3458
3459    tcg_temp_free_vec(t);
3460    tcg_temp_free_vec(ones);
3461}
3462
3463void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3464                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3465{
3466    static const TCGOpcode vecop_list[] = {
3467        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3468    };
3469    static const GVecGen2i ops[4] = {
3470        { .fni8 = gen_urshr8_i64,
3471          .fniv = gen_urshr_vec,
3472          .fno = gen_helper_gvec_urshr_b,
3473          .opt_opc = vecop_list,
3474          .vece = MO_8 },
3475        { .fni8 = gen_urshr16_i64,
3476          .fniv = gen_urshr_vec,
3477          .fno = gen_helper_gvec_urshr_h,
3478          .opt_opc = vecop_list,
3479          .vece = MO_16 },
3480        { .fni4 = gen_urshr32_i32,
3481          .fniv = gen_urshr_vec,
3482          .fno = gen_helper_gvec_urshr_s,
3483          .opt_opc = vecop_list,
3484          .vece = MO_32 },
3485        { .fni8 = gen_urshr64_i64,
3486          .fniv = gen_urshr_vec,
3487          .fno = gen_helper_gvec_urshr_d,
3488          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3489          .opt_opc = vecop_list,
3490          .vece = MO_64 },
3491    };
3492
3493    /* tszimm encoding produces immediates in the range [1..esize] */
3494    tcg_debug_assert(shift > 0);
3495    tcg_debug_assert(shift <= (8 << vece));
3496
3497    if (shift == (8 << vece)) {
3498        /*
3499         * Shifts larger than the element size are architecturally valid.
3500         * Unsigned results in zero.  With rounding, this produces a
3501         * copy of the most significant bit.
3502         */
3503        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3504    } else {
3505        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3506    }
3507}
3508
3509static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3510{
3511    TCGv_i64 t = tcg_temp_new_i64();
3512
3513    if (sh == 8) {
3514        tcg_gen_vec_shr8i_i64(t, a, 7);
3515    } else {
3516        gen_urshr8_i64(t, a, sh);
3517    }
3518    tcg_gen_vec_add8_i64(d, d, t);
3519    tcg_temp_free_i64(t);
3520}
3521
3522static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3523{
3524    TCGv_i64 t = tcg_temp_new_i64();
3525
3526    if (sh == 16) {
3527        tcg_gen_vec_shr16i_i64(t, a, 15);
3528    } else {
3529        gen_urshr16_i64(t, a, sh);
3530    }
3531    tcg_gen_vec_add16_i64(d, d, t);
3532    tcg_temp_free_i64(t);
3533}
3534
3535static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3536{
3537    TCGv_i32 t = tcg_temp_new_i32();
3538
3539    if (sh == 32) {
3540        tcg_gen_shri_i32(t, a, 31);
3541    } else {
3542        gen_urshr32_i32(t, a, sh);
3543    }
3544    tcg_gen_add_i32(d, d, t);
3545    tcg_temp_free_i32(t);
3546}
3547
3548static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3549{
3550    TCGv_i64 t = tcg_temp_new_i64();
3551
3552    if (sh == 64) {
3553        tcg_gen_shri_i64(t, a, 63);
3554    } else {
3555        gen_urshr64_i64(t, a, sh);
3556    }
3557    tcg_gen_add_i64(d, d, t);
3558    tcg_temp_free_i64(t);
3559}
3560
3561static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3562{
3563    TCGv_vec t = tcg_temp_new_vec_matching(d);
3564
3565    if (sh == (8 << vece)) {
3566        tcg_gen_shri_vec(vece, t, a, sh - 1);
3567    } else {
3568        gen_urshr_vec(vece, t, a, sh);
3569    }
3570    tcg_gen_add_vec(vece, d, d, t);
3571    tcg_temp_free_vec(t);
3572}
3573
3574void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3575                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3576{
3577    static const TCGOpcode vecop_list[] = {
3578        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3579    };
3580    static const GVecGen2i ops[4] = {
3581        { .fni8 = gen_ursra8_i64,
3582          .fniv = gen_ursra_vec,
3583          .fno = gen_helper_gvec_ursra_b,
3584          .opt_opc = vecop_list,
3585          .load_dest = true,
3586          .vece = MO_8 },
3587        { .fni8 = gen_ursra16_i64,
3588          .fniv = gen_ursra_vec,
3589          .fno = gen_helper_gvec_ursra_h,
3590          .opt_opc = vecop_list,
3591          .load_dest = true,
3592          .vece = MO_16 },
3593        { .fni4 = gen_ursra32_i32,
3594          .fniv = gen_ursra_vec,
3595          .fno = gen_helper_gvec_ursra_s,
3596          .opt_opc = vecop_list,
3597          .load_dest = true,
3598          .vece = MO_32 },
3599        { .fni8 = gen_ursra64_i64,
3600          .fniv = gen_ursra_vec,
3601          .fno = gen_helper_gvec_ursra_d,
3602          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3603          .opt_opc = vecop_list,
3604          .load_dest = true,
3605          .vece = MO_64 },
3606    };
3607
3608    /* tszimm encoding produces immediates in the range [1..esize] */
3609    tcg_debug_assert(shift > 0);
3610    tcg_debug_assert(shift <= (8 << vece));
3611
3612    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3613}
3614
3615static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3616{
3617    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3618    TCGv_i64 t = tcg_temp_new_i64();
3619
3620    tcg_gen_shri_i64(t, a, shift);
3621    tcg_gen_andi_i64(t, t, mask);
3622    tcg_gen_andi_i64(d, d, ~mask);
3623    tcg_gen_or_i64(d, d, t);
3624    tcg_temp_free_i64(t);
3625}
3626
3627static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3628{
3629    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3630    TCGv_i64 t = tcg_temp_new_i64();
3631
3632    tcg_gen_shri_i64(t, a, shift);
3633    tcg_gen_andi_i64(t, t, mask);
3634    tcg_gen_andi_i64(d, d, ~mask);
3635    tcg_gen_or_i64(d, d, t);
3636    tcg_temp_free_i64(t);
3637}
3638
3639static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3640{
3641    tcg_gen_shri_i32(a, a, shift);
3642    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3643}
3644
3645static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3646{
3647    tcg_gen_shri_i64(a, a, shift);
3648    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3649}
3650
3651static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3652{
3653    TCGv_vec t = tcg_temp_new_vec_matching(d);
3654    TCGv_vec m = tcg_temp_new_vec_matching(d);
3655
3656    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3657    tcg_gen_shri_vec(vece, t, a, sh);
3658    tcg_gen_and_vec(vece, d, d, m);
3659    tcg_gen_or_vec(vece, d, d, t);
3660
3661    tcg_temp_free_vec(t);
3662    tcg_temp_free_vec(m);
3663}
3664
3665void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3666                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3667{
3668    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3669    const GVecGen2i ops[4] = {
3670        { .fni8 = gen_shr8_ins_i64,
3671          .fniv = gen_shr_ins_vec,
3672          .fno = gen_helper_gvec_sri_b,
3673          .load_dest = true,
3674          .opt_opc = vecop_list,
3675          .vece = MO_8 },
3676        { .fni8 = gen_shr16_ins_i64,
3677          .fniv = gen_shr_ins_vec,
3678          .fno = gen_helper_gvec_sri_h,
3679          .load_dest = true,
3680          .opt_opc = vecop_list,
3681          .vece = MO_16 },
3682        { .fni4 = gen_shr32_ins_i32,
3683          .fniv = gen_shr_ins_vec,
3684          .fno = gen_helper_gvec_sri_s,
3685          .load_dest = true,
3686          .opt_opc = vecop_list,
3687          .vece = MO_32 },
3688        { .fni8 = gen_shr64_ins_i64,
3689          .fniv = gen_shr_ins_vec,
3690          .fno = gen_helper_gvec_sri_d,
3691          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3692          .load_dest = true,
3693          .opt_opc = vecop_list,
3694          .vece = MO_64 },
3695    };
3696
3697    /* tszimm encoding produces immediates in the range [1..esize]. */
3698    tcg_debug_assert(shift > 0);
3699    tcg_debug_assert(shift <= (8 << vece));
3700
3701    /* Shift of esize leaves destination unchanged. */
3702    if (shift < (8 << vece)) {
3703        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3704    } else {
3705        /* Nop, but we do need to clear the tail. */
3706        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3707    }
3708}
3709
3710static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3711{
3712    uint64_t mask = dup_const(MO_8, 0xff << shift);
3713    TCGv_i64 t = tcg_temp_new_i64();
3714
3715    tcg_gen_shli_i64(t, a, shift);
3716    tcg_gen_andi_i64(t, t, mask);
3717    tcg_gen_andi_i64(d, d, ~mask);
3718    tcg_gen_or_i64(d, d, t);
3719    tcg_temp_free_i64(t);
3720}
3721
3722static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3723{
3724    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3725    TCGv_i64 t = tcg_temp_new_i64();
3726
3727    tcg_gen_shli_i64(t, a, shift);
3728    tcg_gen_andi_i64(t, t, mask);
3729    tcg_gen_andi_i64(d, d, ~mask);
3730    tcg_gen_or_i64(d, d, t);
3731    tcg_temp_free_i64(t);
3732}
3733
3734static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3735{
3736    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3737}
3738
3739static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3740{
3741    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3742}
3743
3744static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3745{
3746    TCGv_vec t = tcg_temp_new_vec_matching(d);
3747    TCGv_vec m = tcg_temp_new_vec_matching(d);
3748
3749    tcg_gen_shli_vec(vece, t, a, sh);
3750    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3751    tcg_gen_and_vec(vece, d, d, m);
3752    tcg_gen_or_vec(vece, d, d, t);
3753
3754    tcg_temp_free_vec(t);
3755    tcg_temp_free_vec(m);
3756}
3757
3758void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3759                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3760{
3761    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3762    const GVecGen2i ops[4] = {
3763        { .fni8 = gen_shl8_ins_i64,
3764          .fniv = gen_shl_ins_vec,
3765          .fno = gen_helper_gvec_sli_b,
3766          .load_dest = true,
3767          .opt_opc = vecop_list,
3768          .vece = MO_8 },
3769        { .fni8 = gen_shl16_ins_i64,
3770          .fniv = gen_shl_ins_vec,
3771          .fno = gen_helper_gvec_sli_h,
3772          .load_dest = true,
3773          .opt_opc = vecop_list,
3774          .vece = MO_16 },
3775        { .fni4 = gen_shl32_ins_i32,
3776          .fniv = gen_shl_ins_vec,
3777          .fno = gen_helper_gvec_sli_s,
3778          .load_dest = true,
3779          .opt_opc = vecop_list,
3780          .vece = MO_32 },
3781        { .fni8 = gen_shl64_ins_i64,
3782          .fniv = gen_shl_ins_vec,
3783          .fno = gen_helper_gvec_sli_d,
3784          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3785          .load_dest = true,
3786          .opt_opc = vecop_list,
3787          .vece = MO_64 },
3788    };
3789
3790    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3791    tcg_debug_assert(shift >= 0);
3792    tcg_debug_assert(shift < (8 << vece));
3793
3794    if (shift == 0) {
3795        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3796    } else {
3797        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3798    }
3799}
3800
3801static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3802{
3803    gen_helper_neon_mul_u8(a, a, b);
3804    gen_helper_neon_add_u8(d, d, a);
3805}
3806
3807static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3808{
3809    gen_helper_neon_mul_u8(a, a, b);
3810    gen_helper_neon_sub_u8(d, d, a);
3811}
3812
3813static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3814{
3815    gen_helper_neon_mul_u16(a, a, b);
3816    gen_helper_neon_add_u16(d, d, a);
3817}
3818
3819static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3820{
3821    gen_helper_neon_mul_u16(a, a, b);
3822    gen_helper_neon_sub_u16(d, d, a);
3823}
3824
3825static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3826{
3827    tcg_gen_mul_i32(a, a, b);
3828    tcg_gen_add_i32(d, d, a);
3829}
3830
3831static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3832{
3833    tcg_gen_mul_i32(a, a, b);
3834    tcg_gen_sub_i32(d, d, a);
3835}
3836
3837static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3838{
3839    tcg_gen_mul_i64(a, a, b);
3840    tcg_gen_add_i64(d, d, a);
3841}
3842
3843static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3844{
3845    tcg_gen_mul_i64(a, a, b);
3846    tcg_gen_sub_i64(d, d, a);
3847}
3848
3849static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3850{
3851    tcg_gen_mul_vec(vece, a, a, b);
3852    tcg_gen_add_vec(vece, d, d, a);
3853}
3854
3855static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3856{
3857    tcg_gen_mul_vec(vece, a, a, b);
3858    tcg_gen_sub_vec(vece, d, d, a);
3859}
3860
3861/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3862 * these tables are shared with AArch64 which does support them.
3863 */
3864void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3865                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3866{
3867    static const TCGOpcode vecop_list[] = {
3868        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3869    };
3870    static const GVecGen3 ops[4] = {
3871        { .fni4 = gen_mla8_i32,
3872          .fniv = gen_mla_vec,
3873          .load_dest = true,
3874          .opt_opc = vecop_list,
3875          .vece = MO_8 },
3876        { .fni4 = gen_mla16_i32,
3877          .fniv = gen_mla_vec,
3878          .load_dest = true,
3879          .opt_opc = vecop_list,
3880          .vece = MO_16 },
3881        { .fni4 = gen_mla32_i32,
3882          .fniv = gen_mla_vec,
3883          .load_dest = true,
3884          .opt_opc = vecop_list,
3885          .vece = MO_32 },
3886        { .fni8 = gen_mla64_i64,
3887          .fniv = gen_mla_vec,
3888          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3889          .load_dest = true,
3890          .opt_opc = vecop_list,
3891          .vece = MO_64 },
3892    };
3893    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3894}
3895
3896void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3897                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3898{
3899    static const TCGOpcode vecop_list[] = {
3900        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3901    };
3902    static const GVecGen3 ops[4] = {
3903        { .fni4 = gen_mls8_i32,
3904          .fniv = gen_mls_vec,
3905          .load_dest = true,
3906          .opt_opc = vecop_list,
3907          .vece = MO_8 },
3908        { .fni4 = gen_mls16_i32,
3909          .fniv = gen_mls_vec,
3910          .load_dest = true,
3911          .opt_opc = vecop_list,
3912          .vece = MO_16 },
3913        { .fni4 = gen_mls32_i32,
3914          .fniv = gen_mls_vec,
3915          .load_dest = true,
3916          .opt_opc = vecop_list,
3917          .vece = MO_32 },
3918        { .fni8 = gen_mls64_i64,
3919          .fniv = gen_mls_vec,
3920          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3921          .load_dest = true,
3922          .opt_opc = vecop_list,
3923          .vece = MO_64 },
3924    };
3925    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3926}
3927
3928/* CMTST : test is "if (X & Y != 0)". */
3929static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3930{
3931    tcg_gen_and_i32(d, a, b);
3932    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3933    tcg_gen_neg_i32(d, d);
3934}
3935
3936void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3937{
3938    tcg_gen_and_i64(d, a, b);
3939    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3940    tcg_gen_neg_i64(d, d);
3941}
3942
3943static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3944{
3945    tcg_gen_and_vec(vece, d, a, b);
3946    tcg_gen_dupi_vec(vece, a, 0);
3947    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3948}
3949
3950void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3951                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3952{
3953    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3954    static const GVecGen3 ops[4] = {
3955        { .fni4 = gen_helper_neon_tst_u8,
3956          .fniv = gen_cmtst_vec,
3957          .opt_opc = vecop_list,
3958          .vece = MO_8 },
3959        { .fni4 = gen_helper_neon_tst_u16,
3960          .fniv = gen_cmtst_vec,
3961          .opt_opc = vecop_list,
3962          .vece = MO_16 },
3963        { .fni4 = gen_cmtst_i32,
3964          .fniv = gen_cmtst_vec,
3965          .opt_opc = vecop_list,
3966          .vece = MO_32 },
3967        { .fni8 = gen_cmtst_i64,
3968          .fniv = gen_cmtst_vec,
3969          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3970          .opt_opc = vecop_list,
3971          .vece = MO_64 },
3972    };
3973    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3974}
3975
3976void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3977{
3978    TCGv_i32 lval = tcg_temp_new_i32();
3979    TCGv_i32 rval = tcg_temp_new_i32();
3980    TCGv_i32 lsh = tcg_temp_new_i32();
3981    TCGv_i32 rsh = tcg_temp_new_i32();
3982    TCGv_i32 zero = tcg_const_i32(0);
3983    TCGv_i32 max = tcg_const_i32(32);
3984
3985    /*
3986     * Rely on the TCG guarantee that out of range shifts produce
3987     * unspecified results, not undefined behaviour (i.e. no trap).
3988     * Discard out-of-range results after the fact.
3989     */
3990    tcg_gen_ext8s_i32(lsh, shift);
3991    tcg_gen_neg_i32(rsh, lsh);
3992    tcg_gen_shl_i32(lval, src, lsh);
3993    tcg_gen_shr_i32(rval, src, rsh);
3994    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3995    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3996
3997    tcg_temp_free_i32(lval);
3998    tcg_temp_free_i32(rval);
3999    tcg_temp_free_i32(lsh);
4000    tcg_temp_free_i32(rsh);
4001    tcg_temp_free_i32(zero);
4002    tcg_temp_free_i32(max);
4003}
4004
4005void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4006{
4007    TCGv_i64 lval = tcg_temp_new_i64();
4008    TCGv_i64 rval = tcg_temp_new_i64();
4009    TCGv_i64 lsh = tcg_temp_new_i64();
4010    TCGv_i64 rsh = tcg_temp_new_i64();
4011    TCGv_i64 zero = tcg_const_i64(0);
4012    TCGv_i64 max = tcg_const_i64(64);
4013
4014    /*
4015     * Rely on the TCG guarantee that out of range shifts produce
4016     * unspecified results, not undefined behaviour (i.e. no trap).
4017     * Discard out-of-range results after the fact.
4018     */
4019    tcg_gen_ext8s_i64(lsh, shift);
4020    tcg_gen_neg_i64(rsh, lsh);
4021    tcg_gen_shl_i64(lval, src, lsh);
4022    tcg_gen_shr_i64(rval, src, rsh);
4023    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4024    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4025
4026    tcg_temp_free_i64(lval);
4027    tcg_temp_free_i64(rval);
4028    tcg_temp_free_i64(lsh);
4029    tcg_temp_free_i64(rsh);
4030    tcg_temp_free_i64(zero);
4031    tcg_temp_free_i64(max);
4032}
4033
4034static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4035                         TCGv_vec src, TCGv_vec shift)
4036{
4037    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4038    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4039    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4040    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4041    TCGv_vec msk, max;
4042
4043    tcg_gen_neg_vec(vece, rsh, shift);
4044    if (vece == MO_8) {
4045        tcg_gen_mov_vec(lsh, shift);
4046    } else {
4047        msk = tcg_temp_new_vec_matching(dst);
4048        tcg_gen_dupi_vec(vece, msk, 0xff);
4049        tcg_gen_and_vec(vece, lsh, shift, msk);
4050        tcg_gen_and_vec(vece, rsh, rsh, msk);
4051        tcg_temp_free_vec(msk);
4052    }
4053
4054    /*
4055     * Rely on the TCG guarantee that out of range shifts produce
4056     * unspecified results, not undefined behaviour (i.e. no trap).
4057     * Discard out-of-range results after the fact.
4058     */
4059    tcg_gen_shlv_vec(vece, lval, src, lsh);
4060    tcg_gen_shrv_vec(vece, rval, src, rsh);
4061
4062    max = tcg_temp_new_vec_matching(dst);
4063    tcg_gen_dupi_vec(vece, max, 8 << vece);
4064
4065    /*
4066     * The choice of LT (signed) and GEU (unsigned) are biased toward
4067     * the instructions of the x86_64 host.  For MO_8, the whole byte
4068     * is significant so we must use an unsigned compare; otherwise we
4069     * have already masked to a byte and so a signed compare works.
4070     * Other tcg hosts have a full set of comparisons and do not care.
4071     */
4072    if (vece == MO_8) {
4073        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4074        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4075        tcg_gen_andc_vec(vece, lval, lval, lsh);
4076        tcg_gen_andc_vec(vece, rval, rval, rsh);
4077    } else {
4078        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4079        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4080        tcg_gen_and_vec(vece, lval, lval, lsh);
4081        tcg_gen_and_vec(vece, rval, rval, rsh);
4082    }
4083    tcg_gen_or_vec(vece, dst, lval, rval);
4084
4085    tcg_temp_free_vec(max);
4086    tcg_temp_free_vec(lval);
4087    tcg_temp_free_vec(rval);
4088    tcg_temp_free_vec(lsh);
4089    tcg_temp_free_vec(rsh);
4090}
4091
4092void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4093                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4094{
4095    static const TCGOpcode vecop_list[] = {
4096        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4097        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4098    };
4099    static const GVecGen3 ops[4] = {
4100        { .fniv = gen_ushl_vec,
4101          .fno = gen_helper_gvec_ushl_b,
4102          .opt_opc = vecop_list,
4103          .vece = MO_8 },
4104        { .fniv = gen_ushl_vec,
4105          .fno = gen_helper_gvec_ushl_h,
4106          .opt_opc = vecop_list,
4107          .vece = MO_16 },
4108        { .fni4 = gen_ushl_i32,
4109          .fniv = gen_ushl_vec,
4110          .opt_opc = vecop_list,
4111          .vece = MO_32 },
4112        { .fni8 = gen_ushl_i64,
4113          .fniv = gen_ushl_vec,
4114          .opt_opc = vecop_list,
4115          .vece = MO_64 },
4116    };
4117    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4118}
4119
4120void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4121{
4122    TCGv_i32 lval = tcg_temp_new_i32();
4123    TCGv_i32 rval = tcg_temp_new_i32();
4124    TCGv_i32 lsh = tcg_temp_new_i32();
4125    TCGv_i32 rsh = tcg_temp_new_i32();
4126    TCGv_i32 zero = tcg_const_i32(0);
4127    TCGv_i32 max = tcg_const_i32(31);
4128
4129    /*
4130     * Rely on the TCG guarantee that out of range shifts produce
4131     * unspecified results, not undefined behaviour (i.e. no trap).
4132     * Discard out-of-range results after the fact.
4133     */
4134    tcg_gen_ext8s_i32(lsh, shift);
4135    tcg_gen_neg_i32(rsh, lsh);
4136    tcg_gen_shl_i32(lval, src, lsh);
4137    tcg_gen_umin_i32(rsh, rsh, max);
4138    tcg_gen_sar_i32(rval, src, rsh);
4139    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4140    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4141
4142    tcg_temp_free_i32(lval);
4143    tcg_temp_free_i32(rval);
4144    tcg_temp_free_i32(lsh);
4145    tcg_temp_free_i32(rsh);
4146    tcg_temp_free_i32(zero);
4147    tcg_temp_free_i32(max);
4148}
4149
4150void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4151{
4152    TCGv_i64 lval = tcg_temp_new_i64();
4153    TCGv_i64 rval = tcg_temp_new_i64();
4154    TCGv_i64 lsh = tcg_temp_new_i64();
4155    TCGv_i64 rsh = tcg_temp_new_i64();
4156    TCGv_i64 zero = tcg_const_i64(0);
4157    TCGv_i64 max = tcg_const_i64(63);
4158
4159    /*
4160     * Rely on the TCG guarantee that out of range shifts produce
4161     * unspecified results, not undefined behaviour (i.e. no trap).
4162     * Discard out-of-range results after the fact.
4163     */
4164    tcg_gen_ext8s_i64(lsh, shift);
4165    tcg_gen_neg_i64(rsh, lsh);
4166    tcg_gen_shl_i64(lval, src, lsh);
4167    tcg_gen_umin_i64(rsh, rsh, max);
4168    tcg_gen_sar_i64(rval, src, rsh);
4169    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4170    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4171
4172    tcg_temp_free_i64(lval);
4173    tcg_temp_free_i64(rval);
4174    tcg_temp_free_i64(lsh);
4175    tcg_temp_free_i64(rsh);
4176    tcg_temp_free_i64(zero);
4177    tcg_temp_free_i64(max);
4178}
4179
4180static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4181                         TCGv_vec src, TCGv_vec shift)
4182{
4183    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4184    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4185    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4186    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4187    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4188
4189    /*
4190     * Rely on the TCG guarantee that out of range shifts produce
4191     * unspecified results, not undefined behaviour (i.e. no trap).
4192     * Discard out-of-range results after the fact.
4193     */
4194    tcg_gen_neg_vec(vece, rsh, shift);
4195    if (vece == MO_8) {
4196        tcg_gen_mov_vec(lsh, shift);
4197    } else {
4198        tcg_gen_dupi_vec(vece, tmp, 0xff);
4199        tcg_gen_and_vec(vece, lsh, shift, tmp);
4200        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4201    }
4202
4203    /* Bound rsh so out of bound right shift gets -1.  */
4204    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4205    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4206    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4207
4208    tcg_gen_shlv_vec(vece, lval, src, lsh);
4209    tcg_gen_sarv_vec(vece, rval, src, rsh);
4210
4211    /* Select in-bound left shift.  */
4212    tcg_gen_andc_vec(vece, lval, lval, tmp);
4213
4214    /* Select between left and right shift.  */
4215    if (vece == MO_8) {
4216        tcg_gen_dupi_vec(vece, tmp, 0);
4217        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4218    } else {
4219        tcg_gen_dupi_vec(vece, tmp, 0x80);
4220        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4221    }
4222
4223    tcg_temp_free_vec(lval);
4224    tcg_temp_free_vec(rval);
4225    tcg_temp_free_vec(lsh);
4226    tcg_temp_free_vec(rsh);
4227    tcg_temp_free_vec(tmp);
4228}
4229
4230void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4231                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4232{
4233    static const TCGOpcode vecop_list[] = {
4234        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4235        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4236    };
4237    static const GVecGen3 ops[4] = {
4238        { .fniv = gen_sshl_vec,
4239          .fno = gen_helper_gvec_sshl_b,
4240          .opt_opc = vecop_list,
4241          .vece = MO_8 },
4242        { .fniv = gen_sshl_vec,
4243          .fno = gen_helper_gvec_sshl_h,
4244          .opt_opc = vecop_list,
4245          .vece = MO_16 },
4246        { .fni4 = gen_sshl_i32,
4247          .fniv = gen_sshl_vec,
4248          .opt_opc = vecop_list,
4249          .vece = MO_32 },
4250        { .fni8 = gen_sshl_i64,
4251          .fniv = gen_sshl_vec,
4252          .opt_opc = vecop_list,
4253          .vece = MO_64 },
4254    };
4255    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4256}
4257
4258static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4259                          TCGv_vec a, TCGv_vec b)
4260{
4261    TCGv_vec x = tcg_temp_new_vec_matching(t);
4262    tcg_gen_add_vec(vece, x, a, b);
4263    tcg_gen_usadd_vec(vece, t, a, b);
4264    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4265    tcg_gen_or_vec(vece, sat, sat, x);
4266    tcg_temp_free_vec(x);
4267}
4268
4269void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4270                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4271{
4272    static const TCGOpcode vecop_list[] = {
4273        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4274    };
4275    static const GVecGen4 ops[4] = {
4276        { .fniv = gen_uqadd_vec,
4277          .fno = gen_helper_gvec_uqadd_b,
4278          .write_aofs = true,
4279          .opt_opc = vecop_list,
4280          .vece = MO_8 },
4281        { .fniv = gen_uqadd_vec,
4282          .fno = gen_helper_gvec_uqadd_h,
4283          .write_aofs = true,
4284          .opt_opc = vecop_list,
4285          .vece = MO_16 },
4286        { .fniv = gen_uqadd_vec,
4287          .fno = gen_helper_gvec_uqadd_s,
4288          .write_aofs = true,
4289          .opt_opc = vecop_list,
4290          .vece = MO_32 },
4291        { .fniv = gen_uqadd_vec,
4292          .fno = gen_helper_gvec_uqadd_d,
4293          .write_aofs = true,
4294          .opt_opc = vecop_list,
4295          .vece = MO_64 },
4296    };
4297    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4298                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4299}
4300
4301static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4302                          TCGv_vec a, TCGv_vec b)
4303{
4304    TCGv_vec x = tcg_temp_new_vec_matching(t);
4305    tcg_gen_add_vec(vece, x, a, b);
4306    tcg_gen_ssadd_vec(vece, t, a, b);
4307    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4308    tcg_gen_or_vec(vece, sat, sat, x);
4309    tcg_temp_free_vec(x);
4310}
4311
4312void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4313                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4314{
4315    static const TCGOpcode vecop_list[] = {
4316        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4317    };
4318    static const GVecGen4 ops[4] = {
4319        { .fniv = gen_sqadd_vec,
4320          .fno = gen_helper_gvec_sqadd_b,
4321          .opt_opc = vecop_list,
4322          .write_aofs = true,
4323          .vece = MO_8 },
4324        { .fniv = gen_sqadd_vec,
4325          .fno = gen_helper_gvec_sqadd_h,
4326          .opt_opc = vecop_list,
4327          .write_aofs = true,
4328          .vece = MO_16 },
4329        { .fniv = gen_sqadd_vec,
4330          .fno = gen_helper_gvec_sqadd_s,
4331          .opt_opc = vecop_list,
4332          .write_aofs = true,
4333          .vece = MO_32 },
4334        { .fniv = gen_sqadd_vec,
4335          .fno = gen_helper_gvec_sqadd_d,
4336          .opt_opc = vecop_list,
4337          .write_aofs = true,
4338          .vece = MO_64 },
4339    };
4340    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4341                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4342}
4343
4344static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4345                          TCGv_vec a, TCGv_vec b)
4346{
4347    TCGv_vec x = tcg_temp_new_vec_matching(t);
4348    tcg_gen_sub_vec(vece, x, a, b);
4349    tcg_gen_ussub_vec(vece, t, a, b);
4350    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4351    tcg_gen_or_vec(vece, sat, sat, x);
4352    tcg_temp_free_vec(x);
4353}
4354
4355void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4356                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4357{
4358    static const TCGOpcode vecop_list[] = {
4359        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4360    };
4361    static const GVecGen4 ops[4] = {
4362        { .fniv = gen_uqsub_vec,
4363          .fno = gen_helper_gvec_uqsub_b,
4364          .opt_opc = vecop_list,
4365          .write_aofs = true,
4366          .vece = MO_8 },
4367        { .fniv = gen_uqsub_vec,
4368          .fno = gen_helper_gvec_uqsub_h,
4369          .opt_opc = vecop_list,
4370          .write_aofs = true,
4371          .vece = MO_16 },
4372        { .fniv = gen_uqsub_vec,
4373          .fno = gen_helper_gvec_uqsub_s,
4374          .opt_opc = vecop_list,
4375          .write_aofs = true,
4376          .vece = MO_32 },
4377        { .fniv = gen_uqsub_vec,
4378          .fno = gen_helper_gvec_uqsub_d,
4379          .opt_opc = vecop_list,
4380          .write_aofs = true,
4381          .vece = MO_64 },
4382    };
4383    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4384                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4385}
4386
4387static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4388                          TCGv_vec a, TCGv_vec b)
4389{
4390    TCGv_vec x = tcg_temp_new_vec_matching(t);
4391    tcg_gen_sub_vec(vece, x, a, b);
4392    tcg_gen_sssub_vec(vece, t, a, b);
4393    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4394    tcg_gen_or_vec(vece, sat, sat, x);
4395    tcg_temp_free_vec(x);
4396}
4397
4398void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4399                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4400{
4401    static const TCGOpcode vecop_list[] = {
4402        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4403    };
4404    static const GVecGen4 ops[4] = {
4405        { .fniv = gen_sqsub_vec,
4406          .fno = gen_helper_gvec_sqsub_b,
4407          .opt_opc = vecop_list,
4408          .write_aofs = true,
4409          .vece = MO_8 },
4410        { .fniv = gen_sqsub_vec,
4411          .fno = gen_helper_gvec_sqsub_h,
4412          .opt_opc = vecop_list,
4413          .write_aofs = true,
4414          .vece = MO_16 },
4415        { .fniv = gen_sqsub_vec,
4416          .fno = gen_helper_gvec_sqsub_s,
4417          .opt_opc = vecop_list,
4418          .write_aofs = true,
4419          .vece = MO_32 },
4420        { .fniv = gen_sqsub_vec,
4421          .fno = gen_helper_gvec_sqsub_d,
4422          .opt_opc = vecop_list,
4423          .write_aofs = true,
4424          .vece = MO_64 },
4425    };
4426    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4427                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4428}
4429
4430static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4431{
4432    TCGv_i32 t = tcg_temp_new_i32();
4433
4434    tcg_gen_sub_i32(t, a, b);
4435    tcg_gen_sub_i32(d, b, a);
4436    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4437    tcg_temp_free_i32(t);
4438}
4439
4440static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4441{
4442    TCGv_i64 t = tcg_temp_new_i64();
4443
4444    tcg_gen_sub_i64(t, a, b);
4445    tcg_gen_sub_i64(d, b, a);
4446    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4447    tcg_temp_free_i64(t);
4448}
4449
4450static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4451{
4452    TCGv_vec t = tcg_temp_new_vec_matching(d);
4453
4454    tcg_gen_smin_vec(vece, t, a, b);
4455    tcg_gen_smax_vec(vece, d, a, b);
4456    tcg_gen_sub_vec(vece, d, d, t);
4457    tcg_temp_free_vec(t);
4458}
4459
4460void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4461                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4462{
4463    static const TCGOpcode vecop_list[] = {
4464        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4465    };
4466    static const GVecGen3 ops[4] = {
4467        { .fniv = gen_sabd_vec,
4468          .fno = gen_helper_gvec_sabd_b,
4469          .opt_opc = vecop_list,
4470          .vece = MO_8 },
4471        { .fniv = gen_sabd_vec,
4472          .fno = gen_helper_gvec_sabd_h,
4473          .opt_opc = vecop_list,
4474          .vece = MO_16 },
4475        { .fni4 = gen_sabd_i32,
4476          .fniv = gen_sabd_vec,
4477          .fno = gen_helper_gvec_sabd_s,
4478          .opt_opc = vecop_list,
4479          .vece = MO_32 },
4480        { .fni8 = gen_sabd_i64,
4481          .fniv = gen_sabd_vec,
4482          .fno = gen_helper_gvec_sabd_d,
4483          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4484          .opt_opc = vecop_list,
4485          .vece = MO_64 },
4486    };
4487    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4488}
4489
4490static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4491{
4492    TCGv_i32 t = tcg_temp_new_i32();
4493
4494    tcg_gen_sub_i32(t, a, b);
4495    tcg_gen_sub_i32(d, b, a);
4496    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4497    tcg_temp_free_i32(t);
4498}
4499
4500static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4501{
4502    TCGv_i64 t = tcg_temp_new_i64();
4503
4504    tcg_gen_sub_i64(t, a, b);
4505    tcg_gen_sub_i64(d, b, a);
4506    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4507    tcg_temp_free_i64(t);
4508}
4509
4510static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4511{
4512    TCGv_vec t = tcg_temp_new_vec_matching(d);
4513
4514    tcg_gen_umin_vec(vece, t, a, b);
4515    tcg_gen_umax_vec(vece, d, a, b);
4516    tcg_gen_sub_vec(vece, d, d, t);
4517    tcg_temp_free_vec(t);
4518}
4519
4520void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4521                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4522{
4523    static const TCGOpcode vecop_list[] = {
4524        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4525    };
4526    static const GVecGen3 ops[4] = {
4527        { .fniv = gen_uabd_vec,
4528          .fno = gen_helper_gvec_uabd_b,
4529          .opt_opc = vecop_list,
4530          .vece = MO_8 },
4531        { .fniv = gen_uabd_vec,
4532          .fno = gen_helper_gvec_uabd_h,
4533          .opt_opc = vecop_list,
4534          .vece = MO_16 },
4535        { .fni4 = gen_uabd_i32,
4536          .fniv = gen_uabd_vec,
4537          .fno = gen_helper_gvec_uabd_s,
4538          .opt_opc = vecop_list,
4539          .vece = MO_32 },
4540        { .fni8 = gen_uabd_i64,
4541          .fniv = gen_uabd_vec,
4542          .fno = gen_helper_gvec_uabd_d,
4543          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4544          .opt_opc = vecop_list,
4545          .vece = MO_64 },
4546    };
4547    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4548}
4549
4550static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4551{
4552    TCGv_i32 t = tcg_temp_new_i32();
4553    gen_sabd_i32(t, a, b);
4554    tcg_gen_add_i32(d, d, t);
4555    tcg_temp_free_i32(t);
4556}
4557
4558static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4559{
4560    TCGv_i64 t = tcg_temp_new_i64();
4561    gen_sabd_i64(t, a, b);
4562    tcg_gen_add_i64(d, d, t);
4563    tcg_temp_free_i64(t);
4564}
4565
4566static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4567{
4568    TCGv_vec t = tcg_temp_new_vec_matching(d);
4569    gen_sabd_vec(vece, t, a, b);
4570    tcg_gen_add_vec(vece, d, d, t);
4571    tcg_temp_free_vec(t);
4572}
4573
4574void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4575                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4576{
4577    static const TCGOpcode vecop_list[] = {
4578        INDEX_op_sub_vec, INDEX_op_add_vec,
4579        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4580    };
4581    static const GVecGen3 ops[4] = {
4582        { .fniv = gen_saba_vec,
4583          .fno = gen_helper_gvec_saba_b,
4584          .opt_opc = vecop_list,
4585          .load_dest = true,
4586          .vece = MO_8 },
4587        { .fniv = gen_saba_vec,
4588          .fno = gen_helper_gvec_saba_h,
4589          .opt_opc = vecop_list,
4590          .load_dest = true,
4591          .vece = MO_16 },
4592        { .fni4 = gen_saba_i32,
4593          .fniv = gen_saba_vec,
4594          .fno = gen_helper_gvec_saba_s,
4595          .opt_opc = vecop_list,
4596          .load_dest = true,
4597          .vece = MO_32 },
4598        { .fni8 = gen_saba_i64,
4599          .fniv = gen_saba_vec,
4600          .fno = gen_helper_gvec_saba_d,
4601          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4602          .opt_opc = vecop_list,
4603          .load_dest = true,
4604          .vece = MO_64 },
4605    };
4606    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4607}
4608
4609static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4610{
4611    TCGv_i32 t = tcg_temp_new_i32();
4612    gen_uabd_i32(t, a, b);
4613    tcg_gen_add_i32(d, d, t);
4614    tcg_temp_free_i32(t);
4615}
4616
4617static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4618{
4619    TCGv_i64 t = tcg_temp_new_i64();
4620    gen_uabd_i64(t, a, b);
4621    tcg_gen_add_i64(d, d, t);
4622    tcg_temp_free_i64(t);
4623}
4624
4625static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4626{
4627    TCGv_vec t = tcg_temp_new_vec_matching(d);
4628    gen_uabd_vec(vece, t, a, b);
4629    tcg_gen_add_vec(vece, d, d, t);
4630    tcg_temp_free_vec(t);
4631}
4632
4633void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4634                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4635{
4636    static const TCGOpcode vecop_list[] = {
4637        INDEX_op_sub_vec, INDEX_op_add_vec,
4638        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4639    };
4640    static const GVecGen3 ops[4] = {
4641        { .fniv = gen_uaba_vec,
4642          .fno = gen_helper_gvec_uaba_b,
4643          .opt_opc = vecop_list,
4644          .load_dest = true,
4645          .vece = MO_8 },
4646        { .fniv = gen_uaba_vec,
4647          .fno = gen_helper_gvec_uaba_h,
4648          .opt_opc = vecop_list,
4649          .load_dest = true,
4650          .vece = MO_16 },
4651        { .fni4 = gen_uaba_i32,
4652          .fniv = gen_uaba_vec,
4653          .fno = gen_helper_gvec_uaba_s,
4654          .opt_opc = vecop_list,
4655          .load_dest = true,
4656          .vece = MO_32 },
4657        { .fni8 = gen_uaba_i64,
4658          .fniv = gen_uaba_vec,
4659          .fno = gen_helper_gvec_uaba_d,
4660          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4661          .opt_opc = vecop_list,
4662          .load_dest = true,
4663          .vece = MO_64 },
4664    };
4665    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4666}
4667
4668static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4669                           int opc1, int crn, int crm, int opc2,
4670                           bool isread, int rt, int rt2)
4671{
4672    const ARMCPRegInfo *ri;
4673
4674    ri = get_arm_cp_reginfo(s->cp_regs,
4675            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4676    if (ri) {
4677        bool need_exit_tb;
4678
4679        /* Check access permissions */
4680        if (!cp_access_ok(s->current_el, ri, isread)) {
4681            unallocated_encoding(s);
4682            return;
4683        }
4684
4685        if (s->hstr_active || ri->accessfn ||
4686            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4687            /* Emit code to perform further access permissions checks at
4688             * runtime; this may result in an exception.
4689             * Note that on XScale all cp0..c13 registers do an access check
4690             * call in order to handle c15_cpar.
4691             */
4692            TCGv_ptr tmpptr;
4693            TCGv_i32 tcg_syn, tcg_isread;
4694            uint32_t syndrome;
4695
4696            /* Note that since we are an implementation which takes an
4697             * exception on a trapped conditional instruction only if the
4698             * instruction passes its condition code check, we can take
4699             * advantage of the clause in the ARM ARM that allows us to set
4700             * the COND field in the instruction to 0xE in all cases.
4701             * We could fish the actual condition out of the insn (ARM)
4702             * or the condexec bits (Thumb) but it isn't necessary.
4703             */
4704            switch (cpnum) {
4705            case 14:
4706                if (is64) {
4707                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4708                                                 isread, false);
4709                } else {
4710                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4711                                                rt, isread, false);
4712                }
4713                break;
4714            case 15:
4715                if (is64) {
4716                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4717                                                 isread, false);
4718                } else {
4719                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4720                                                rt, isread, false);
4721                }
4722                break;
4723            default:
4724                /* ARMv8 defines that only coprocessors 14 and 15 exist,
4725                 * so this can only happen if this is an ARMv7 or earlier CPU,
4726                 * in which case the syndrome information won't actually be
4727                 * guest visible.
4728                 */
4729                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4730                syndrome = syn_uncategorized();
4731                break;
4732            }
4733
4734            gen_set_condexec(s);
4735            gen_set_pc_im(s, s->pc_curr);
4736            tmpptr = tcg_const_ptr(ri);
4737            tcg_syn = tcg_const_i32(syndrome);
4738            tcg_isread = tcg_const_i32(isread);
4739            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
4740                                           tcg_isread);
4741            tcg_temp_free_ptr(tmpptr);
4742            tcg_temp_free_i32(tcg_syn);
4743            tcg_temp_free_i32(tcg_isread);
4744        } else if (ri->type & ARM_CP_RAISES_EXC) {
4745            /*
4746             * The readfn or writefn might raise an exception;
4747             * synchronize the CPU state in case it does.
4748             */
4749            gen_set_condexec(s);
4750            gen_set_pc_im(s, s->pc_curr);
4751        }
4752
4753        /* Handle special cases first */
4754        switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
4755        case ARM_CP_NOP:
4756            return;
4757        case ARM_CP_WFI:
4758            if (isread) {
4759                unallocated_encoding(s);
4760                return;
4761            }
4762            gen_set_pc_im(s, s->base.pc_next);
4763            s->base.is_jmp = DISAS_WFI;
4764            return;
4765        default:
4766            break;
4767        }
4768
4769        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4770            gen_io_start();
4771        }
4772
4773        if (isread) {
4774            /* Read */
4775            if (is64) {
4776                TCGv_i64 tmp64;
4777                TCGv_i32 tmp;
4778                if (ri->type & ARM_CP_CONST) {
4779                    tmp64 = tcg_const_i64(ri->resetvalue);
4780                } else if (ri->readfn) {
4781                    TCGv_ptr tmpptr;
4782                    tmp64 = tcg_temp_new_i64();
4783                    tmpptr = tcg_const_ptr(ri);
4784                    gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
4785                    tcg_temp_free_ptr(tmpptr);
4786                } else {
4787                    tmp64 = tcg_temp_new_i64();
4788                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4789                }
4790                tmp = tcg_temp_new_i32();
4791                tcg_gen_extrl_i64_i32(tmp, tmp64);
4792                store_reg(s, rt, tmp);
4793                tmp = tcg_temp_new_i32();
4794                tcg_gen_extrh_i64_i32(tmp, tmp64);
4795                tcg_temp_free_i64(tmp64);
4796                store_reg(s, rt2, tmp);
4797            } else {
4798                TCGv_i32 tmp;
4799                if (ri->type & ARM_CP_CONST) {
4800                    tmp = tcg_const_i32(ri->resetvalue);
4801                } else if (ri->readfn) {
4802                    TCGv_ptr tmpptr;
4803                    tmp = tcg_temp_new_i32();
4804                    tmpptr = tcg_const_ptr(ri);
4805                    gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
4806                    tcg_temp_free_ptr(tmpptr);
4807                } else {
4808                    tmp = load_cpu_offset(ri->fieldoffset);
4809                }
4810                if (rt == 15) {
4811                    /* Destination register of r15 for 32 bit loads sets
4812                     * the condition codes from the high 4 bits of the value
4813                     */
4814                    gen_set_nzcv(tmp);
4815                    tcg_temp_free_i32(tmp);
4816                } else {
4817                    store_reg(s, rt, tmp);
4818                }
4819            }
4820        } else {
4821            /* Write */
4822            if (ri->type & ARM_CP_CONST) {
4823                /* If not forbidden by access permissions, treat as WI */
4824                return;
4825            }
4826
4827            if (is64) {
4828                TCGv_i32 tmplo, tmphi;
4829                TCGv_i64 tmp64 = tcg_temp_new_i64();
4830                tmplo = load_reg(s, rt);
4831                tmphi = load_reg(s, rt2);
4832                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4833                tcg_temp_free_i32(tmplo);
4834                tcg_temp_free_i32(tmphi);
4835                if (ri->writefn) {
4836                    TCGv_ptr tmpptr = tcg_const_ptr(ri);
4837                    gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
4838                    tcg_temp_free_ptr(tmpptr);
4839                } else {
4840                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4841                }
4842                tcg_temp_free_i64(tmp64);
4843            } else {
4844                if (ri->writefn) {
4845                    TCGv_i32 tmp;
4846                    TCGv_ptr tmpptr;
4847                    tmp = load_reg(s, rt);
4848                    tmpptr = tcg_const_ptr(ri);
4849                    gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
4850                    tcg_temp_free_ptr(tmpptr);
4851                    tcg_temp_free_i32(tmp);
4852                } else {
4853                    TCGv_i32 tmp = load_reg(s, rt);
4854                    store_cpu_offset(tmp, ri->fieldoffset);
4855                }
4856            }
4857        }
4858
4859        /* I/O operations must end the TB here (whether read or write) */
4860        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4861                        (ri->type & ARM_CP_IO));
4862
4863        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4864            /*
4865             * A write to any coprocessor register that ends a TB
4866             * must rebuild the hflags for the next TB.
4867             */
4868            TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
4869            if (arm_dc_feature(s, ARM_FEATURE_M)) {
4870                gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
4871            } else {
4872                if (ri->type & ARM_CP_NEWEL) {
4873                    gen_helper_rebuild_hflags_a32_newel(cpu_env);
4874                } else {
4875                    gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
4876                }
4877            }
4878            tcg_temp_free_i32(tcg_el);
4879            /*
4880             * We default to ending the TB on a coprocessor register write,
4881             * but allow this to be suppressed by the register definition
4882             * (usually only necessary to work around guest bugs).
4883             */
4884            need_exit_tb = true;
4885        }
4886        if (need_exit_tb) {
4887            gen_lookup_tb(s);
4888        }
4889
4890        return;
4891    }
4892
4893    /* Unknown register; this might be a guest error or a QEMU
4894     * unimplemented feature.
4895     */
4896    if (is64) {
4897        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4898                      "64 bit system register cp:%d opc1: %d crm:%d "
4899                      "(%s)\n",
4900                      isread ? "read" : "write", cpnum, opc1, crm,
4901                      s->ns ? "non-secure" : "secure");
4902    } else {
4903        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4904                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4905                      "(%s)\n",
4906                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4907                      s->ns ? "non-secure" : "secure");
4908    }
4909
4910    unallocated_encoding(s);
4911    return;
4912}
4913
4914/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4915static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4916{
4917    int cpnum = (insn >> 8) & 0xf;
4918
4919    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4920        unallocated_encoding(s);
4921    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4922        if (disas_iwmmxt_insn(s, insn)) {
4923            unallocated_encoding(s);
4924        }
4925    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4926        if (disas_dsp_insn(s, insn)) {
4927            unallocated_encoding(s);
4928        }
4929    }
4930}
4931
4932/* Store a 64-bit value to a register pair.  Clobbers val.  */
4933static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4934{
4935    TCGv_i32 tmp;
4936    tmp = tcg_temp_new_i32();
4937    tcg_gen_extrl_i64_i32(tmp, val);
4938    store_reg(s, rlow, tmp);
4939    tmp = tcg_temp_new_i32();
4940    tcg_gen_extrh_i64_i32(tmp, val);
4941    store_reg(s, rhigh, tmp);
4942}
4943
4944/* load and add a 64-bit value from a register pair.  */
4945static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4946{
4947    TCGv_i64 tmp;
4948    TCGv_i32 tmpl;
4949    TCGv_i32 tmph;
4950
4951    /* Load 64-bit value rd:rn.  */
4952    tmpl = load_reg(s, rlow);
4953    tmph = load_reg(s, rhigh);
4954    tmp = tcg_temp_new_i64();
4955    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4956    tcg_temp_free_i32(tmpl);
4957    tcg_temp_free_i32(tmph);
4958    tcg_gen_add_i64(val, val, tmp);
4959    tcg_temp_free_i64(tmp);
4960}
4961
4962/* Set N and Z flags from hi|lo.  */
4963static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4964{
4965    tcg_gen_mov_i32(cpu_NF, hi);
4966    tcg_gen_or_i32(cpu_ZF, lo, hi);
4967}
4968
4969/* Load/Store exclusive instructions are implemented by remembering
4970   the value/address loaded, and seeing if these are the same
4971   when the store is performed.  This should be sufficient to implement
4972   the architecturally mandated semantics, and avoids having to monitor
4973   regular stores.  The compare vs the remembered value is done during
4974   the cmpxchg operation, but we must compare the addresses manually.  */
4975static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4976                               TCGv_i32 addr, int size)
4977{
4978    TCGv_i32 tmp = tcg_temp_new_i32();
4979    MemOp opc = size | MO_ALIGN | s->be_data;
4980
4981    s->is_ldex = true;
4982
4983    if (size == 3) {
4984        TCGv_i32 tmp2 = tcg_temp_new_i32();
4985        TCGv_i64 t64 = tcg_temp_new_i64();
4986
4987        /* For AArch32, architecturally the 32-bit word at the lowest
4988         * address is always Rt and the one at addr+4 is Rt2, even if
4989         * the CPU is big-endian. That means we don't want to do a
4990         * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
4991         * for an architecturally 64-bit access, but instead do a
4992         * 64-bit access using MO_BE if appropriate and then split
4993         * the two halves.
4994         * This only makes a difference for BE32 user-mode, where
4995         * frob64() must not flip the two halves of the 64-bit data
4996         * but this code must treat BE32 user-mode like BE32 system.
4997         */
4998        TCGv taddr = gen_aa32_addr(s, addr, opc);
4999
5000        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
5001        tcg_temp_free(taddr);
5002        tcg_gen_mov_i64(cpu_exclusive_val, t64);
5003        if (s->be_data == MO_BE) {
5004            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5005        } else {
5006            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5007        }
5008        tcg_temp_free_i64(t64);
5009
5010        store_reg(s, rt2, tmp2);
5011    } else {
5012        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5013        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5014    }
5015
5016    store_reg(s, rt, tmp);
5017    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5018}
5019
5020static void gen_clrex(DisasContext *s)
5021{
5022    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5023}
5024
5025static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5026                                TCGv_i32 addr, int size)
5027{
5028    TCGv_i32 t0, t1, t2;
5029    TCGv_i64 extaddr;
5030    TCGv taddr;
5031    TCGLabel *done_label;
5032    TCGLabel *fail_label;
5033    MemOp opc = size | MO_ALIGN | s->be_data;
5034
5035    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5036         [addr] = {Rt};
5037         {Rd} = 0;
5038       } else {
5039         {Rd} = 1;
5040       } */
5041    fail_label = gen_new_label();
5042    done_label = gen_new_label();
5043    extaddr = tcg_temp_new_i64();
5044    tcg_gen_extu_i32_i64(extaddr, addr);
5045    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5046    tcg_temp_free_i64(extaddr);
5047
5048    taddr = gen_aa32_addr(s, addr, opc);
5049    t0 = tcg_temp_new_i32();
5050    t1 = load_reg(s, rt);
5051    if (size == 3) {
5052        TCGv_i64 o64 = tcg_temp_new_i64();
5053        TCGv_i64 n64 = tcg_temp_new_i64();
5054
5055        t2 = load_reg(s, rt2);
5056        /* For AArch32, architecturally the 32-bit word at the lowest
5057         * address is always Rt and the one at addr+4 is Rt2, even if
5058         * the CPU is big-endian. Since we're going to treat this as a
5059         * single 64-bit BE store, we need to put the two halves in the
5060         * opposite order for BE to LE, so that they end up in the right
5061         * places.
5062         * We don't want gen_aa32_frob64() because that does the wrong
5063         * thing for BE32 usermode.
5064         */
5065        if (s->be_data == MO_BE) {
5066            tcg_gen_concat_i32_i64(n64, t2, t1);
5067        } else {
5068            tcg_gen_concat_i32_i64(n64, t1, t2);
5069        }
5070        tcg_temp_free_i32(t2);
5071
5072        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5073                                   get_mem_index(s), opc);
5074        tcg_temp_free_i64(n64);
5075
5076        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5077        tcg_gen_extrl_i64_i32(t0, o64);
5078
5079        tcg_temp_free_i64(o64);
5080    } else {
5081        t2 = tcg_temp_new_i32();
5082        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5083        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5084        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5085        tcg_temp_free_i32(t2);
5086    }
5087    tcg_temp_free_i32(t1);
5088    tcg_temp_free(taddr);
5089    tcg_gen_mov_i32(cpu_R[rd], t0);
5090    tcg_temp_free_i32(t0);
5091    tcg_gen_br(done_label);
5092
5093    gen_set_label(fail_label);
5094    tcg_gen_movi_i32(cpu_R[rd], 1);
5095    gen_set_label(done_label);
5096    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5097}
5098
5099/* gen_srs:
5100 * @env: CPUARMState
5101 * @s: DisasContext
5102 * @mode: mode field from insn (which stack to store to)
5103 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5104 * @writeback: true if writeback bit set
5105 *
5106 * Generate code for the SRS (Store Return State) insn.
5107 */
5108static void gen_srs(DisasContext *s,
5109                    uint32_t mode, uint32_t amode, bool writeback)
5110{
5111    int32_t offset;
5112    TCGv_i32 addr, tmp;
5113    bool undef = false;
5114
5115    /* SRS is:
5116     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5117     *   and specified mode is monitor mode
5118     * - UNDEFINED in Hyp mode
5119     * - UNPREDICTABLE in User or System mode
5120     * - UNPREDICTABLE if the specified mode is:
5121     * -- not implemented
5122     * -- not a valid mode number
5123     * -- a mode that's at a higher exception level
5124     * -- Monitor, if we are Non-secure
5125     * For the UNPREDICTABLE cases we choose to UNDEF.
5126     */
5127    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5128        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
5129        return;
5130    }
5131
5132    if (s->current_el == 0 || s->current_el == 2) {
5133        undef = true;
5134    }
5135
5136    switch (mode) {
5137    case ARM_CPU_MODE_USR:
5138    case ARM_CPU_MODE_FIQ:
5139    case ARM_CPU_MODE_IRQ:
5140    case ARM_CPU_MODE_SVC:
5141    case ARM_CPU_MODE_ABT:
5142    case ARM_CPU_MODE_UND:
5143    case ARM_CPU_MODE_SYS:
5144        break;
5145    case ARM_CPU_MODE_HYP:
5146        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5147            undef = true;
5148        }
5149        break;
5150    case ARM_CPU_MODE_MON:
5151        /* No need to check specifically for "are we non-secure" because
5152         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5153         * so if this isn't EL3 then we must be non-secure.
5154         */
5155        if (s->current_el != 3) {
5156            undef = true;
5157        }
5158        break;
5159    default:
5160        undef = true;
5161    }
5162
5163    if (undef) {
5164        unallocated_encoding(s);
5165        return;
5166    }
5167
5168    addr = tcg_temp_new_i32();
5169    tmp = tcg_const_i32(mode);
5170    /* get_r13_banked() will raise an exception if called from System mode */
5171    gen_set_condexec(s);
5172    gen_set_pc_im(s, s->pc_curr);
5173    gen_helper_get_r13_banked(addr, cpu_env, tmp);
5174    tcg_temp_free_i32(tmp);
5175    switch (amode) {
5176    case 0: /* DA */
5177        offset = -4;
5178        break;
5179    case 1: /* IA */
5180        offset = 0;
5181        break;
5182    case 2: /* DB */
5183        offset = -8;
5184        break;
5185    case 3: /* IB */
5186        offset = 4;
5187        break;
5188    default:
5189        abort();
5190    }
5191    tcg_gen_addi_i32(addr, addr, offset);
5192    tmp = load_reg(s, 14);
5193    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5194    tcg_temp_free_i32(tmp);
5195    tmp = load_cpu_field(spsr);
5196    tcg_gen_addi_i32(addr, addr, 4);
5197    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5198    tcg_temp_free_i32(tmp);
5199    if (writeback) {
5200        switch (amode) {
5201        case 0:
5202            offset = -8;
5203            break;
5204        case 1:
5205            offset = 4;
5206            break;
5207        case 2:
5208            offset = -4;
5209            break;
5210        case 3:
5211            offset = 0;
5212            break;
5213        default:
5214            abort();
5215        }
5216        tcg_gen_addi_i32(addr, addr, offset);
5217        tmp = tcg_const_i32(mode);
5218        gen_helper_set_r13_banked(cpu_env, tmp, addr);
5219        tcg_temp_free_i32(tmp);
5220    }
5221    tcg_temp_free_i32(addr);
5222    s->base.is_jmp = DISAS_UPDATE_EXIT;
5223}
5224
5225/* Skip this instruction if the ARM condition is false */
5226static void arm_skip_unless(DisasContext *s, uint32_t cond)
5227{
5228    arm_gen_condlabel(s);
5229    arm_gen_test_cc(cond ^ 1, s->condlabel);
5230}
5231
5232
5233/*
5234 * Constant expanders used by T16/T32 decode
5235 */
5236
5237/* Return only the rotation part of T32ExpandImm.  */
5238static int t32_expandimm_rot(DisasContext *s, int x)
5239{
5240    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5241}
5242
5243/* Return the unrotated immediate from T32ExpandImm.  */
5244static int t32_expandimm_imm(DisasContext *s, int x)
5245{
5246    int imm = extract32(x, 0, 8);
5247
5248    switch (extract32(x, 8, 4)) {
5249    case 0: /* XY */
5250        /* Nothing to do.  */
5251        break;
5252    case 1: /* 00XY00XY */
5253        imm *= 0x00010001;
5254        break;
5255    case 2: /* XY00XY00 */
5256        imm *= 0x01000100;
5257        break;
5258    case 3: /* XYXYXYXY */
5259        imm *= 0x01010101;
5260        break;
5261    default:
5262        /* Rotated constant.  */
5263        imm |= 0x80;
5264        break;
5265    }
5266    return imm;
5267}
5268
5269static int t32_branch24(DisasContext *s, int x)
5270{
5271    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5272    x ^= !(x < 0) * (3 << 21);
5273    /* Append the final zero.  */
5274    return x << 1;
5275}
5276
5277static int t16_setflags(DisasContext *s)
5278{
5279    return s->condexec_mask == 0;
5280}
5281
5282static int t16_push_list(DisasContext *s, int x)
5283{
5284    return (x & 0xff) | (x & 0x100) << (14 - 8);
5285}
5286
5287static int t16_pop_list(DisasContext *s, int x)
5288{
5289    return (x & 0xff) | (x & 0x100) << (15 - 8);
5290}
5291
5292/*
5293 * Include the generated decoders.
5294 */
5295
5296#include "decode-a32.c.inc"
5297#include "decode-a32-uncond.c.inc"
5298#include "decode-t32.c.inc"
5299#include "decode-t16.c.inc"
5300
5301static bool valid_cp(DisasContext *s, int cp)
5302{
5303    /*
5304     * Return true if this coprocessor field indicates something
5305     * that's really a possible coprocessor.
5306     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5307     * and of those only cp14 and cp15 were used for registers.
5308     * cp10 and cp11 were used for VFP and Neon, whose decode is
5309     * dealt with elsewhere. With the advent of fp16, cp9 is also
5310     * now part of VFP.
5311     * For v8A and later, the encoding has been tightened so that
5312     * only cp14 and cp15 are valid, and other values aren't considered
5313     * to be in the coprocessor-instruction space at all. v8M still
5314     * permits coprocessors 0..7.
5315     * For XScale, we must not decode the XScale cp0, cp1 space as
5316     * a standard coprocessor insn, because we want to fall through to
5317     * the legacy disas_xscale_insn() decoder after decodetree is done.
5318     */
5319    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5320        return false;
5321    }
5322
5323    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5324        !arm_dc_feature(s, ARM_FEATURE_M)) {
5325        return cp >= 14;
5326    }
5327    return cp < 8 || cp >= 14;
5328}
5329
5330static bool trans_MCR(DisasContext *s, arg_MCR *a)
5331{
5332    if (!valid_cp(s, a->cp)) {
5333        return false;
5334    }
5335    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5336                   false, a->rt, 0);
5337    return true;
5338}
5339
5340static bool trans_MRC(DisasContext *s, arg_MRC *a)
5341{
5342    if (!valid_cp(s, a->cp)) {
5343        return false;
5344    }
5345    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5346                   true, a->rt, 0);
5347    return true;
5348}
5349
5350static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5351{
5352    if (!valid_cp(s, a->cp)) {
5353        return false;
5354    }
5355    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5356                   false, a->rt, a->rt2);
5357    return true;
5358}
5359
5360static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5361{
5362    if (!valid_cp(s, a->cp)) {
5363        return false;
5364    }
5365    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5366                   true, a->rt, a->rt2);
5367    return true;
5368}
5369
5370/* Helpers to swap operands for reverse-subtract.  */
5371static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5372{
5373    tcg_gen_sub_i32(dst, b, a);
5374}
5375
5376static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5377{
5378    gen_sub_CC(dst, b, a);
5379}
5380
5381static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5382{
5383    gen_sub_carry(dest, b, a);
5384}
5385
5386static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5387{
5388    gen_sbc_CC(dest, b, a);
5389}
5390
5391/*
5392 * Helpers for the data processing routines.
5393 *
5394 * After the computation store the results back.
5395 * This may be suppressed altogether (STREG_NONE), require a runtime
5396 * check against the stack limits (STREG_SP_CHECK), or generate an
5397 * exception return.  Oh, or store into a register.
5398 *
5399 * Always return true, indicating success for a trans_* function.
5400 */
5401typedef enum {
5402   STREG_NONE,
5403   STREG_NORMAL,
5404   STREG_SP_CHECK,
5405   STREG_EXC_RET,
5406} StoreRegKind;
5407
5408static bool store_reg_kind(DisasContext *s, int rd,
5409                            TCGv_i32 val, StoreRegKind kind)
5410{
5411    switch (kind) {
5412    case STREG_NONE:
5413        tcg_temp_free_i32(val);
5414        return true;
5415    case STREG_NORMAL:
5416        /* See ALUWritePC: Interworking only from a32 mode. */
5417        if (s->thumb) {
5418            store_reg(s, rd, val);
5419        } else {
5420            store_reg_bx(s, rd, val);
5421        }
5422        return true;
5423    case STREG_SP_CHECK:
5424        store_sp_checked(s, val);
5425        return true;
5426    case STREG_EXC_RET:
5427        gen_exception_return(s, val);
5428        return true;
5429    }
5430    g_assert_not_reached();
5431}
5432
5433/*
5434 * Data Processing (register)
5435 *
5436 * Operate, with set flags, one register source,
5437 * one immediate shifted register source, and a destination.
5438 */
5439static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5440                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5441                         int logic_cc, StoreRegKind kind)
5442{
5443    TCGv_i32 tmp1, tmp2;
5444
5445    tmp2 = load_reg(s, a->rm);
5446    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5447    tmp1 = load_reg(s, a->rn);
5448
5449    gen(tmp1, tmp1, tmp2);
5450    tcg_temp_free_i32(tmp2);
5451
5452    if (logic_cc) {
5453        gen_logic_CC(tmp1);
5454    }
5455    return store_reg_kind(s, a->rd, tmp1, kind);
5456}
5457
5458static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5459                         void (*gen)(TCGv_i32, TCGv_i32),
5460                         int logic_cc, StoreRegKind kind)
5461{
5462    TCGv_i32 tmp;
5463
5464    tmp = load_reg(s, a->rm);
5465    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5466
5467    gen(tmp, tmp);
5468    if (logic_cc) {
5469        gen_logic_CC(tmp);
5470    }
5471    return store_reg_kind(s, a->rd, tmp, kind);
5472}
5473
5474/*
5475 * Data-processing (register-shifted register)
5476 *
5477 * Operate, with set flags, one register source,
5478 * one register shifted register source, and a destination.
5479 */
5480static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5481                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5482                         int logic_cc, StoreRegKind kind)
5483{
5484    TCGv_i32 tmp1, tmp2;
5485
5486    tmp1 = load_reg(s, a->rs);
5487    tmp2 = load_reg(s, a->rm);
5488    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5489    tmp1 = load_reg(s, a->rn);
5490
5491    gen(tmp1, tmp1, tmp2);
5492    tcg_temp_free_i32(tmp2);
5493
5494    if (logic_cc) {
5495        gen_logic_CC(tmp1);
5496    }
5497    return store_reg_kind(s, a->rd, tmp1, kind);
5498}
5499
5500static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5501                         void (*gen)(TCGv_i32, TCGv_i32),
5502                         int logic_cc, StoreRegKind kind)
5503{
5504    TCGv_i32 tmp1, tmp2;
5505
5506    tmp1 = load_reg(s, a->rs);
5507    tmp2 = load_reg(s, a->rm);
5508    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5509
5510    gen(tmp2, tmp2);
5511    if (logic_cc) {
5512        gen_logic_CC(tmp2);
5513    }
5514    return store_reg_kind(s, a->rd, tmp2, kind);
5515}
5516
5517/*
5518 * Data-processing (immediate)
5519 *
5520 * Operate, with set flags, one register source,
5521 * one rotated immediate, and a destination.
5522 *
5523 * Note that logic_cc && a->rot setting CF based on the msb of the
5524 * immediate is the reason why we must pass in the unrotated form
5525 * of the immediate.
5526 */
5527static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5528                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5529                         int logic_cc, StoreRegKind kind)
5530{
5531    TCGv_i32 tmp1, tmp2;
5532    uint32_t imm;
5533
5534    imm = ror32(a->imm, a->rot);
5535    if (logic_cc && a->rot) {
5536        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5537    }
5538    tmp2 = tcg_const_i32(imm);
5539    tmp1 = load_reg(s, a->rn);
5540
5541    gen(tmp1, tmp1, tmp2);
5542    tcg_temp_free_i32(tmp2);
5543
5544    if (logic_cc) {
5545        gen_logic_CC(tmp1);
5546    }
5547    return store_reg_kind(s, a->rd, tmp1, kind);
5548}
5549
5550static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5551                         void (*gen)(TCGv_i32, TCGv_i32),
5552                         int logic_cc, StoreRegKind kind)
5553{
5554    TCGv_i32 tmp;
5555    uint32_t imm;
5556
5557    imm = ror32(a->imm, a->rot);
5558    if (logic_cc && a->rot) {
5559        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5560    }
5561    tmp = tcg_const_i32(imm);
5562
5563    gen(tmp, tmp);
5564    if (logic_cc) {
5565        gen_logic_CC(tmp);
5566    }
5567    return store_reg_kind(s, a->rd, tmp, kind);
5568}
5569
5570#define DO_ANY3(NAME, OP, L, K)                                         \
5571    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5572    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5573    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5574    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5575    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5576    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5577
5578#define DO_ANY2(NAME, OP, L, K)                                         \
5579    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5580    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5581    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5582    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5583    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5584    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5585
5586#define DO_CMP2(NAME, OP, L)                                            \
5587    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5588    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5589    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5590    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5591    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5592    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5593
5594DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5595DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5596DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5597DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5598
5599DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5600DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5601DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5602DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5603
5604DO_CMP2(TST, tcg_gen_and_i32, true)
5605DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5606DO_CMP2(CMN, gen_add_CC, false)
5607DO_CMP2(CMP, gen_sub_CC, false)
5608
5609DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5610        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5611
5612/*
5613 * Note for the computation of StoreRegKind we return out of the
5614 * middle of the functions that are expanded by DO_ANY3, and that
5615 * we modify a->s via that parameter before it is used by OP.
5616 */
5617DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5618        ({
5619            StoreRegKind ret = STREG_NORMAL;
5620            if (a->rd == 15 && a->s) {
5621                /*
5622                 * See ALUExceptionReturn:
5623                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5624                 * In Hyp mode, UNDEFINED.
5625                 */
5626                if (IS_USER(s) || s->current_el == 2) {
5627                    unallocated_encoding(s);
5628                    return true;
5629                }
5630                /* There is no writeback of nzcv to PSTATE.  */
5631                a->s = 0;
5632                ret = STREG_EXC_RET;
5633            } else if (a->rd == 13 && a->rn == 13) {
5634                ret = STREG_SP_CHECK;
5635            }
5636            ret;
5637        }))
5638
5639DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5640        ({
5641            StoreRegKind ret = STREG_NORMAL;
5642            if (a->rd == 15 && a->s) {
5643                /*
5644                 * See ALUExceptionReturn:
5645                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5646                 * In Hyp mode, UNDEFINED.
5647                 */
5648                if (IS_USER(s) || s->current_el == 2) {
5649                    unallocated_encoding(s);
5650                    return true;
5651                }
5652                /* There is no writeback of nzcv to PSTATE.  */
5653                a->s = 0;
5654                ret = STREG_EXC_RET;
5655            } else if (a->rd == 13) {
5656                ret = STREG_SP_CHECK;
5657            }
5658            ret;
5659        }))
5660
5661DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5662
5663/*
5664 * ORN is only available with T32, so there is no register-shifted-register
5665 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5666 */
5667static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5668{
5669    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5670}
5671
5672static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5673{
5674    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5675}
5676
5677#undef DO_ANY3
5678#undef DO_ANY2
5679#undef DO_CMP2
5680
5681static bool trans_ADR(DisasContext *s, arg_ri *a)
5682{
5683    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5684    return true;
5685}
5686
5687static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5688{
5689    TCGv_i32 tmp;
5690
5691    if (!ENABLE_ARCH_6T2) {
5692        return false;
5693    }
5694
5695    tmp = tcg_const_i32(a->imm);
5696    store_reg(s, a->rd, tmp);
5697    return true;
5698}
5699
5700static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5701{
5702    TCGv_i32 tmp;
5703
5704    if (!ENABLE_ARCH_6T2) {
5705        return false;
5706    }
5707
5708    tmp = load_reg(s, a->rd);
5709    tcg_gen_ext16u_i32(tmp, tmp);
5710    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5711    store_reg(s, a->rd, tmp);
5712    return true;
5713}
5714
5715/*
5716 * Multiply and multiply accumulate
5717 */
5718
5719static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5720{
5721    TCGv_i32 t1, t2;
5722
5723    t1 = load_reg(s, a->rn);
5724    t2 = load_reg(s, a->rm);
5725    tcg_gen_mul_i32(t1, t1, t2);
5726    tcg_temp_free_i32(t2);
5727    if (add) {
5728        t2 = load_reg(s, a->ra);
5729        tcg_gen_add_i32(t1, t1, t2);
5730        tcg_temp_free_i32(t2);
5731    }
5732    if (a->s) {
5733        gen_logic_CC(t1);
5734    }
5735    store_reg(s, a->rd, t1);
5736    return true;
5737}
5738
5739static bool trans_MUL(DisasContext *s, arg_MUL *a)
5740{
5741    return op_mla(s, a, false);
5742}
5743
5744static bool trans_MLA(DisasContext *s, arg_MLA *a)
5745{
5746    return op_mla(s, a, true);
5747}
5748
5749static bool trans_MLS(DisasContext *s, arg_MLS *a)
5750{
5751    TCGv_i32 t1, t2;
5752
5753    if (!ENABLE_ARCH_6T2) {
5754        return false;
5755    }
5756    t1 = load_reg(s, a->rn);
5757    t2 = load_reg(s, a->rm);
5758    tcg_gen_mul_i32(t1, t1, t2);
5759    tcg_temp_free_i32(t2);
5760    t2 = load_reg(s, a->ra);
5761    tcg_gen_sub_i32(t1, t2, t1);
5762    tcg_temp_free_i32(t2);
5763    store_reg(s, a->rd, t1);
5764    return true;
5765}
5766
5767static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5768{
5769    TCGv_i32 t0, t1, t2, t3;
5770
5771    t0 = load_reg(s, a->rm);
5772    t1 = load_reg(s, a->rn);
5773    if (uns) {
5774        tcg_gen_mulu2_i32(t0, t1, t0, t1);
5775    } else {
5776        tcg_gen_muls2_i32(t0, t1, t0, t1);
5777    }
5778    if (add) {
5779        t2 = load_reg(s, a->ra);
5780        t3 = load_reg(s, a->rd);
5781        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5782        tcg_temp_free_i32(t2);
5783        tcg_temp_free_i32(t3);
5784    }
5785    if (a->s) {
5786        gen_logicq_cc(t0, t1);
5787    }
5788    store_reg(s, a->ra, t0);
5789    store_reg(s, a->rd, t1);
5790    return true;
5791}
5792
5793static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5794{
5795    return op_mlal(s, a, true, false);
5796}
5797
5798static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5799{
5800    return op_mlal(s, a, false, false);
5801}
5802
5803static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5804{
5805    return op_mlal(s, a, true, true);
5806}
5807
5808static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5809{
5810    return op_mlal(s, a, false, true);
5811}
5812
5813static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5814{
5815    TCGv_i32 t0, t1, t2, zero;
5816
5817    if (s->thumb
5818        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5819        : !ENABLE_ARCH_6) {
5820        return false;
5821    }
5822
5823    t0 = load_reg(s, a->rm);
5824    t1 = load_reg(s, a->rn);
5825    tcg_gen_mulu2_i32(t0, t1, t0, t1);
5826    zero = tcg_const_i32(0);
5827    t2 = load_reg(s, a->ra);
5828    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5829    tcg_temp_free_i32(t2);
5830    t2 = load_reg(s, a->rd);
5831    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5832    tcg_temp_free_i32(t2);
5833    tcg_temp_free_i32(zero);
5834    store_reg(s, a->ra, t0);
5835    store_reg(s, a->rd, t1);
5836    return true;
5837}
5838
5839/*
5840 * Saturating addition and subtraction
5841 */
5842
5843static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5844{
5845    TCGv_i32 t0, t1;
5846
5847    if (s->thumb
5848        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5849        : !ENABLE_ARCH_5TE) {
5850        return false;
5851    }
5852
5853    t0 = load_reg(s, a->rm);
5854    t1 = load_reg(s, a->rn);
5855    if (doub) {
5856        gen_helper_add_saturate(t1, cpu_env, t1, t1);
5857    }
5858    if (add) {
5859        gen_helper_add_saturate(t0, cpu_env, t0, t1);
5860    } else {
5861        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5862    }
5863    tcg_temp_free_i32(t1);
5864    store_reg(s, a->rd, t0);
5865    return true;
5866}
5867
5868#define DO_QADDSUB(NAME, ADD, DOUB) \
5869static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5870{                                                        \
5871    return op_qaddsub(s, a, ADD, DOUB);                  \
5872}
5873
5874DO_QADDSUB(QADD, true, false)
5875DO_QADDSUB(QSUB, false, false)
5876DO_QADDSUB(QDADD, true, true)
5877DO_QADDSUB(QDSUB, false, true)
5878
5879#undef DO_QADDSUB
5880
5881/*
5882 * Halfword multiply and multiply accumulate
5883 */
5884
5885static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5886                       int add_long, bool nt, bool mt)
5887{
5888    TCGv_i32 t0, t1, tl, th;
5889
5890    if (s->thumb
5891        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5892        : !ENABLE_ARCH_5TE) {
5893        return false;
5894    }
5895
5896    t0 = load_reg(s, a->rn);
5897    t1 = load_reg(s, a->rm);
5898    gen_mulxy(t0, t1, nt, mt);
5899    tcg_temp_free_i32(t1);
5900
5901    switch (add_long) {
5902    case 0:
5903        store_reg(s, a->rd, t0);
5904        break;
5905    case 1:
5906        t1 = load_reg(s, a->ra);
5907        gen_helper_add_setq(t0, cpu_env, t0, t1);
5908        tcg_temp_free_i32(t1);
5909        store_reg(s, a->rd, t0);
5910        break;
5911    case 2:
5912        tl = load_reg(s, a->ra);
5913        th = load_reg(s, a->rd);
5914        /* Sign-extend the 32-bit product to 64 bits.  */
5915        t1 = tcg_temp_new_i32();
5916        tcg_gen_sari_i32(t1, t0, 31);
5917        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5918        tcg_temp_free_i32(t0);
5919        tcg_temp_free_i32(t1);
5920        store_reg(s, a->ra, tl);
5921        store_reg(s, a->rd, th);
5922        break;
5923    default:
5924        g_assert_not_reached();
5925    }
5926    return true;
5927}
5928
5929#define DO_SMLAX(NAME, add, nt, mt) \
5930static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5931{                                                          \
5932    return op_smlaxxx(s, a, add, nt, mt);                  \
5933}
5934
5935DO_SMLAX(SMULBB, 0, 0, 0)
5936DO_SMLAX(SMULBT, 0, 0, 1)
5937DO_SMLAX(SMULTB, 0, 1, 0)
5938DO_SMLAX(SMULTT, 0, 1, 1)
5939
5940DO_SMLAX(SMLABB, 1, 0, 0)
5941DO_SMLAX(SMLABT, 1, 0, 1)
5942DO_SMLAX(SMLATB, 1, 1, 0)
5943DO_SMLAX(SMLATT, 1, 1, 1)
5944
5945DO_SMLAX(SMLALBB, 2, 0, 0)
5946DO_SMLAX(SMLALBT, 2, 0, 1)
5947DO_SMLAX(SMLALTB, 2, 1, 0)
5948DO_SMLAX(SMLALTT, 2, 1, 1)
5949
5950#undef DO_SMLAX
5951
5952static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
5953{
5954    TCGv_i32 t0, t1;
5955
5956    if (!ENABLE_ARCH_5TE) {
5957        return false;
5958    }
5959
5960    t0 = load_reg(s, a->rn);
5961    t1 = load_reg(s, a->rm);
5962    /*
5963     * Since the nominal result is product<47:16>, shift the 16-bit
5964     * input up by 16 bits, so that the result is at product<63:32>.
5965     */
5966    if (mt) {
5967        tcg_gen_andi_i32(t1, t1, 0xffff0000);
5968    } else {
5969        tcg_gen_shli_i32(t1, t1, 16);
5970    }
5971    tcg_gen_muls2_i32(t0, t1, t0, t1);
5972    tcg_temp_free_i32(t0);
5973    if (add) {
5974        t0 = load_reg(s, a->ra);
5975        gen_helper_add_setq(t1, cpu_env, t1, t0);
5976        tcg_temp_free_i32(t0);
5977    }
5978    store_reg(s, a->rd, t1);
5979    return true;
5980}
5981
5982#define DO_SMLAWX(NAME, add, mt) \
5983static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
5984{                                                          \
5985    return op_smlawx(s, a, add, mt);                       \
5986}
5987
5988DO_SMLAWX(SMULWB, 0, 0)
5989DO_SMLAWX(SMULWT, 0, 1)
5990DO_SMLAWX(SMLAWB, 1, 0)
5991DO_SMLAWX(SMLAWT, 1, 1)
5992
5993#undef DO_SMLAWX
5994
5995/*
5996 * MSR (immediate) and hints
5997 */
5998
5999static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6000{
6001    /*
6002     * When running single-threaded TCG code, use the helper to ensure that
6003     * the next round-robin scheduled vCPU gets a crack.  When running in
6004     * MTTCG we don't generate jumps to the helper as it won't affect the
6005     * scheduling of other vCPUs.
6006     */
6007    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6008        gen_set_pc_im(s, s->base.pc_next);
6009        s->base.is_jmp = DISAS_YIELD;
6010    }
6011    return true;
6012}
6013
6014static bool trans_WFE(DisasContext *s, arg_WFE *a)
6015{
6016    /*
6017     * When running single-threaded TCG code, use the helper to ensure that
6018     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6019     * just skip this instruction.  Currently the SEV/SEVL instructions,
6020     * which are *one* of many ways to wake the CPU from WFE, are not
6021     * implemented so we can't sleep like WFI does.
6022     */
6023    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6024        gen_set_pc_im(s, s->base.pc_next);
6025        s->base.is_jmp = DISAS_WFE;
6026    }
6027    return true;
6028}
6029
6030static bool trans_WFI(DisasContext *s, arg_WFI *a)
6031{
6032    /* For WFI, halt the vCPU until an IRQ. */
6033    gen_set_pc_im(s, s->base.pc_next);
6034    s->base.is_jmp = DISAS_WFI;
6035    return true;
6036}
6037
6038static bool trans_NOP(DisasContext *s, arg_NOP *a)
6039{
6040    return true;
6041}
6042
6043static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6044{
6045    uint32_t val = ror32(a->imm, a->rot * 2);
6046    uint32_t mask = msr_mask(s, a->mask, a->r);
6047
6048    if (gen_set_psr_im(s, mask, a->r, val)) {
6049        unallocated_encoding(s);
6050    }
6051    return true;
6052}
6053
6054/*
6055 * Cyclic Redundancy Check
6056 */
6057
6058static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6059{
6060    TCGv_i32 t1, t2, t3;
6061
6062    if (!dc_isar_feature(aa32_crc32, s)) {
6063        return false;
6064    }
6065
6066    t1 = load_reg(s, a->rn);
6067    t2 = load_reg(s, a->rm);
6068    switch (sz) {
6069    case MO_8:
6070        gen_uxtb(t2);
6071        break;
6072    case MO_16:
6073        gen_uxth(t2);
6074        break;
6075    case MO_32:
6076        break;
6077    default:
6078        g_assert_not_reached();
6079    }
6080    t3 = tcg_const_i32(1 << sz);
6081    if (c) {
6082        gen_helper_crc32c(t1, t1, t2, t3);
6083    } else {
6084        gen_helper_crc32(t1, t1, t2, t3);
6085    }
6086    tcg_temp_free_i32(t2);
6087    tcg_temp_free_i32(t3);
6088    store_reg(s, a->rd, t1);
6089    return true;
6090}
6091
6092#define DO_CRC32(NAME, c, sz) \
6093static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6094    { return op_crc32(s, a, c, sz); }
6095
6096DO_CRC32(CRC32B, false, MO_8)
6097DO_CRC32(CRC32H, false, MO_16)
6098DO_CRC32(CRC32W, false, MO_32)
6099DO_CRC32(CRC32CB, true, MO_8)
6100DO_CRC32(CRC32CH, true, MO_16)
6101DO_CRC32(CRC32CW, true, MO_32)
6102
6103#undef DO_CRC32
6104
6105/*
6106 * Miscellaneous instructions
6107 */
6108
6109static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6110{
6111    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6112        return false;
6113    }
6114    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6115    return true;
6116}
6117
6118static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6119{
6120    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6121        return false;
6122    }
6123    gen_msr_banked(s, a->r, a->sysm, a->rn);
6124    return true;
6125}
6126
6127static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6128{
6129    TCGv_i32 tmp;
6130
6131    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6132        return false;
6133    }
6134    if (a->r) {
6135        if (IS_USER(s)) {
6136            unallocated_encoding(s);
6137            return true;
6138        }
6139        tmp = load_cpu_field(spsr);
6140    } else {
6141        tmp = tcg_temp_new_i32();
6142        gen_helper_cpsr_read(tmp, cpu_env);
6143    }
6144    store_reg(s, a->rd, tmp);
6145    return true;
6146}
6147
6148static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6149{
6150    TCGv_i32 tmp;
6151    uint32_t mask = msr_mask(s, a->mask, a->r);
6152
6153    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6154        return false;
6155    }
6156    tmp = load_reg(s, a->rn);
6157    if (gen_set_psr(s, mask, a->r, tmp)) {
6158        unallocated_encoding(s);
6159    }
6160    return true;
6161}
6162
6163static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6164{
6165    TCGv_i32 tmp;
6166
6167    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6168        return false;
6169    }
6170    tmp = tcg_const_i32(a->sysm);
6171    gen_helper_v7m_mrs(tmp, cpu_env, tmp);
6172    store_reg(s, a->rd, tmp);
6173    return true;
6174}
6175
6176static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6177{
6178    TCGv_i32 addr, reg;
6179
6180    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6181        return false;
6182    }
6183    addr = tcg_const_i32((a->mask << 10) | a->sysm);
6184    reg = load_reg(s, a->rn);
6185    gen_helper_v7m_msr(cpu_env, addr, reg);
6186    tcg_temp_free_i32(addr);
6187    tcg_temp_free_i32(reg);
6188    /* If we wrote to CONTROL, the EL might have changed */
6189    gen_helper_rebuild_hflags_m32_newel(cpu_env);
6190    gen_lookup_tb(s);
6191    return true;
6192}
6193
6194static bool trans_BX(DisasContext *s, arg_BX *a)
6195{
6196    if (!ENABLE_ARCH_4T) {
6197        return false;
6198    }
6199    gen_bx_excret(s, load_reg(s, a->rm));
6200    return true;
6201}
6202
6203static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6204{
6205    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6206        return false;
6207    }
6208    /* Trivial implementation equivalent to bx.  */
6209    gen_bx(s, load_reg(s, a->rm));
6210    return true;
6211}
6212
6213static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6214{
6215    TCGv_i32 tmp;
6216
6217    if (!ENABLE_ARCH_5) {
6218        return false;
6219    }
6220    tmp = load_reg(s, a->rm);
6221    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6222    gen_bx(s, tmp);
6223    return true;
6224}
6225
6226/*
6227 * BXNS/BLXNS: only exist for v8M with the security extensions,
6228 * and always UNDEF if NonSecure.  We don't implement these in
6229 * the user-only mode either (in theory you can use them from
6230 * Secure User mode but they are too tied in to system emulation).
6231 */
6232static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6233{
6234    if (!s->v8m_secure || IS_USER_ONLY) {
6235        unallocated_encoding(s);
6236    } else {
6237        gen_bxns(s, a->rm);
6238    }
6239    return true;
6240}
6241
6242static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6243{
6244    if (!s->v8m_secure || IS_USER_ONLY) {
6245        unallocated_encoding(s);
6246    } else {
6247        gen_blxns(s, a->rm);
6248    }
6249    return true;
6250}
6251
6252static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6253{
6254    TCGv_i32 tmp;
6255
6256    if (!ENABLE_ARCH_5) {
6257        return false;
6258    }
6259    tmp = load_reg(s, a->rm);
6260    tcg_gen_clzi_i32(tmp, tmp, 32);
6261    store_reg(s, a->rd, tmp);
6262    return true;
6263}
6264
6265static bool trans_ERET(DisasContext *s, arg_ERET *a)
6266{
6267    TCGv_i32 tmp;
6268
6269    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6270        return false;
6271    }
6272    if (IS_USER(s)) {
6273        unallocated_encoding(s);
6274        return true;
6275    }
6276    if (s->current_el == 2) {
6277        /* ERET from Hyp uses ELR_Hyp, not LR */
6278        tmp = load_cpu_field(elr_el[2]);
6279    } else {
6280        tmp = load_reg(s, 14);
6281    }
6282    gen_exception_return(s, tmp);
6283    return true;
6284}
6285
6286static bool trans_HLT(DisasContext *s, arg_HLT *a)
6287{
6288    gen_hlt(s, a->imm);
6289    return true;
6290}
6291
6292static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6293{
6294    if (!ENABLE_ARCH_5) {
6295        return false;
6296    }
6297    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6298        semihosting_enabled() &&
6299#ifndef CONFIG_USER_ONLY
6300        !IS_USER(s) &&
6301#endif
6302        (a->imm == 0xab)) {
6303        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6304    } else {
6305        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6306    }
6307    return true;
6308}
6309
6310static bool trans_HVC(DisasContext *s, arg_HVC *a)
6311{
6312    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6313        return false;
6314    }
6315    if (IS_USER(s)) {
6316        unallocated_encoding(s);
6317    } else {
6318        gen_hvc(s, a->imm);
6319    }
6320    return true;
6321}
6322
6323static bool trans_SMC(DisasContext *s, arg_SMC *a)
6324{
6325    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6326        return false;
6327    }
6328    if (IS_USER(s)) {
6329        unallocated_encoding(s);
6330    } else {
6331        gen_smc(s);
6332    }
6333    return true;
6334}
6335
6336static bool trans_SG(DisasContext *s, arg_SG *a)
6337{
6338    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6339        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6340        return false;
6341    }
6342    /*
6343     * SG (v8M only)
6344     * The bulk of the behaviour for this instruction is implemented
6345     * in v7m_handle_execute_nsc(), which deals with the insn when
6346     * it is executed by a CPU in non-secure state from memory
6347     * which is Secure & NonSecure-Callable.
6348     * Here we only need to handle the remaining cases:
6349     *  * in NS memory (including the "security extension not
6350     *    implemented" case) : NOP
6351     *  * in S memory but CPU already secure (clear IT bits)
6352     * We know that the attribute for the memory this insn is
6353     * in must match the current CPU state, because otherwise
6354     * get_phys_addr_pmsav8 would have generated an exception.
6355     */
6356    if (s->v8m_secure) {
6357        /* Like the IT insn, we don't need to generate any code */
6358        s->condexec_cond = 0;
6359        s->condexec_mask = 0;
6360    }
6361    return true;
6362}
6363
6364static bool trans_TT(DisasContext *s, arg_TT *a)
6365{
6366    TCGv_i32 addr, tmp;
6367
6368    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6369        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6370        return false;
6371    }
6372    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6373        /* We UNDEF for these UNPREDICTABLE cases */
6374        unallocated_encoding(s);
6375        return true;
6376    }
6377    if (a->A && !s->v8m_secure) {
6378        /* This case is UNDEFINED.  */
6379        unallocated_encoding(s);
6380        return true;
6381    }
6382
6383    addr = load_reg(s, a->rn);
6384    tmp = tcg_const_i32((a->A << 1) | a->T);
6385    gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
6386    tcg_temp_free_i32(addr);
6387    store_reg(s, a->rd, tmp);
6388    return true;
6389}
6390
6391/*
6392 * Load/store register index
6393 */
6394
6395static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6396{
6397    ISSInfo ret;
6398
6399    /* ISS not valid if writeback */
6400    if (p && !w) {
6401        ret = rd;
6402        if (s->base.pc_next - s->pc_curr == 2) {
6403            ret |= ISSIs16Bit;
6404        }
6405    } else {
6406        ret = ISSInvalid;
6407    }
6408    return ret;
6409}
6410
6411static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6412{
6413    TCGv_i32 addr = load_reg(s, a->rn);
6414
6415    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6416        gen_helper_v8m_stackcheck(cpu_env, addr);
6417    }
6418
6419    if (a->p) {
6420        TCGv_i32 ofs = load_reg(s, a->rm);
6421        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6422        if (a->u) {
6423            tcg_gen_add_i32(addr, addr, ofs);
6424        } else {
6425            tcg_gen_sub_i32(addr, addr, ofs);
6426        }
6427        tcg_temp_free_i32(ofs);
6428    }
6429    return addr;
6430}
6431
6432static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6433                            TCGv_i32 addr, int address_offset)
6434{
6435    if (!a->p) {
6436        TCGv_i32 ofs = load_reg(s, a->rm);
6437        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6438        if (a->u) {
6439            tcg_gen_add_i32(addr, addr, ofs);
6440        } else {
6441            tcg_gen_sub_i32(addr, addr, ofs);
6442        }
6443        tcg_temp_free_i32(ofs);
6444    } else if (!a->w) {
6445        tcg_temp_free_i32(addr);
6446        return;
6447    }
6448    tcg_gen_addi_i32(addr, addr, address_offset);
6449    store_reg(s, a->rn, addr);
6450}
6451
6452static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6453                       MemOp mop, int mem_idx)
6454{
6455    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6456    TCGv_i32 addr, tmp;
6457
6458    addr = op_addr_rr_pre(s, a);
6459
6460    tmp = tcg_temp_new_i32();
6461    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6462    disas_set_da_iss(s, mop, issinfo);
6463
6464    /*
6465     * Perform base writeback before the loaded value to
6466     * ensure correct behavior with overlapping index registers.
6467     */
6468    op_addr_rr_post(s, a, addr, 0);
6469    store_reg_from_load(s, a->rt, tmp);
6470    return true;
6471}
6472
6473static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6474                        MemOp mop, int mem_idx)
6475{
6476    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6477    TCGv_i32 addr, tmp;
6478
6479    addr = op_addr_rr_pre(s, a);
6480
6481    tmp = load_reg(s, a->rt);
6482    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6483    disas_set_da_iss(s, mop, issinfo);
6484    tcg_temp_free_i32(tmp);
6485
6486    op_addr_rr_post(s, a, addr, 0);
6487    return true;
6488}
6489
6490static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6491{
6492    int mem_idx = get_mem_index(s);
6493    TCGv_i32 addr, tmp;
6494
6495    if (!ENABLE_ARCH_5TE) {
6496        return false;
6497    }
6498    if (a->rt & 1) {
6499        unallocated_encoding(s);
6500        return true;
6501    }
6502    addr = op_addr_rr_pre(s, a);
6503
6504    tmp = tcg_temp_new_i32();
6505    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6506    store_reg(s, a->rt, tmp);
6507
6508    tcg_gen_addi_i32(addr, addr, 4);
6509
6510    tmp = tcg_temp_new_i32();
6511    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6512    store_reg(s, a->rt + 1, tmp);
6513
6514    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6515    op_addr_rr_post(s, a, addr, -4);
6516    return true;
6517}
6518
6519static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6520{
6521    int mem_idx = get_mem_index(s);
6522    TCGv_i32 addr, tmp;
6523
6524    if (!ENABLE_ARCH_5TE) {
6525        return false;
6526    }
6527    if (a->rt & 1) {
6528        unallocated_encoding(s);
6529        return true;
6530    }
6531    addr = op_addr_rr_pre(s, a);
6532
6533    tmp = load_reg(s, a->rt);
6534    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6535    tcg_temp_free_i32(tmp);
6536
6537    tcg_gen_addi_i32(addr, addr, 4);
6538
6539    tmp = load_reg(s, a->rt + 1);
6540    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6541    tcg_temp_free_i32(tmp);
6542
6543    op_addr_rr_post(s, a, addr, -4);
6544    return true;
6545}
6546
6547/*
6548 * Load/store immediate index
6549 */
6550
6551static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6552{
6553    int ofs = a->imm;
6554
6555    if (!a->u) {
6556        ofs = -ofs;
6557    }
6558
6559    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6560        /*
6561         * Stackcheck. Here we know 'addr' is the current SP;
6562         * U is set if we're moving SP up, else down. It is
6563         * UNKNOWN whether the limit check triggers when SP starts
6564         * below the limit and ends up above it; we chose to do so.
6565         */
6566        if (!a->u) {
6567            TCGv_i32 newsp = tcg_temp_new_i32();
6568            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6569            gen_helper_v8m_stackcheck(cpu_env, newsp);
6570            tcg_temp_free_i32(newsp);
6571        } else {
6572            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6573        }
6574    }
6575
6576    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6577}
6578
6579static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6580                            TCGv_i32 addr, int address_offset)
6581{
6582    if (!a->p) {
6583        if (a->u) {
6584            address_offset += a->imm;
6585        } else {
6586            address_offset -= a->imm;
6587        }
6588    } else if (!a->w) {
6589        tcg_temp_free_i32(addr);
6590        return;
6591    }
6592    tcg_gen_addi_i32(addr, addr, address_offset);
6593    store_reg(s, a->rn, addr);
6594}
6595
6596static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6597                       MemOp mop, int mem_idx)
6598{
6599    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6600    TCGv_i32 addr, tmp;
6601
6602    addr = op_addr_ri_pre(s, a);
6603
6604    tmp = tcg_temp_new_i32();
6605    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6606    disas_set_da_iss(s, mop, issinfo);
6607
6608    /*
6609     * Perform base writeback before the loaded value to
6610     * ensure correct behavior with overlapping index registers.
6611     */
6612    op_addr_ri_post(s, a, addr, 0);
6613    store_reg_from_load(s, a->rt, tmp);
6614    return true;
6615}
6616
6617static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6618                        MemOp mop, int mem_idx)
6619{
6620    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6621    TCGv_i32 addr, tmp;
6622
6623    addr = op_addr_ri_pre(s, a);
6624
6625    tmp = load_reg(s, a->rt);
6626    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
6627    disas_set_da_iss(s, mop, issinfo);
6628    tcg_temp_free_i32(tmp);
6629
6630    op_addr_ri_post(s, a, addr, 0);
6631    return true;
6632}
6633
6634static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6635{
6636    int mem_idx = get_mem_index(s);
6637    TCGv_i32 addr, tmp;
6638
6639    addr = op_addr_ri_pre(s, a);
6640
6641    tmp = tcg_temp_new_i32();
6642    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6643    store_reg(s, a->rt, tmp);
6644
6645    tcg_gen_addi_i32(addr, addr, 4);
6646
6647    tmp = tcg_temp_new_i32();
6648    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6649    store_reg(s, rt2, tmp);
6650
6651    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6652    op_addr_ri_post(s, a, addr, -4);
6653    return true;
6654}
6655
6656static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6657{
6658    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6659        return false;
6660    }
6661    return op_ldrd_ri(s, a, a->rt + 1);
6662}
6663
6664static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6665{
6666    arg_ldst_ri b = {
6667        .u = a->u, .w = a->w, .p = a->p,
6668        .rn = a->rn, .rt = a->rt, .imm = a->imm
6669    };
6670    return op_ldrd_ri(s, &b, a->rt2);
6671}
6672
6673static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6674{
6675    int mem_idx = get_mem_index(s);
6676    TCGv_i32 addr, tmp;
6677
6678    addr = op_addr_ri_pre(s, a);
6679
6680    tmp = load_reg(s, a->rt);
6681    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6682    tcg_temp_free_i32(tmp);
6683
6684    tcg_gen_addi_i32(addr, addr, 4);
6685
6686    tmp = load_reg(s, rt2);
6687    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
6688    tcg_temp_free_i32(tmp);
6689
6690    op_addr_ri_post(s, a, addr, -4);
6691    return true;
6692}
6693
6694static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6695{
6696    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6697        return false;
6698    }
6699    return op_strd_ri(s, a, a->rt + 1);
6700}
6701
6702static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6703{
6704    arg_ldst_ri b = {
6705        .u = a->u, .w = a->w, .p = a->p,
6706        .rn = a->rn, .rt = a->rt, .imm = a->imm
6707    };
6708    return op_strd_ri(s, &b, a->rt2);
6709}
6710
6711#define DO_LDST(NAME, WHICH, MEMOP) \
6712static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6713{                                                                     \
6714    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6715}                                                                     \
6716static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6717{                                                                     \
6718    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6719}                                                                     \
6720static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6721{                                                                     \
6722    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6723}                                                                     \
6724static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6725{                                                                     \
6726    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6727}
6728
6729DO_LDST(LDR, load, MO_UL)
6730DO_LDST(LDRB, load, MO_UB)
6731DO_LDST(LDRH, load, MO_UW)
6732DO_LDST(LDRSB, load, MO_SB)
6733DO_LDST(LDRSH, load, MO_SW)
6734
6735DO_LDST(STR, store, MO_UL)
6736DO_LDST(STRB, store, MO_UB)
6737DO_LDST(STRH, store, MO_UW)
6738
6739#undef DO_LDST
6740
6741/*
6742 * Synchronization primitives
6743 */
6744
6745static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6746{
6747    TCGv_i32 addr, tmp;
6748    TCGv taddr;
6749
6750    opc |= s->be_data;
6751    addr = load_reg(s, a->rn);
6752    taddr = gen_aa32_addr(s, addr, opc);
6753    tcg_temp_free_i32(addr);
6754
6755    tmp = load_reg(s, a->rt2);
6756    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6757    tcg_temp_free(taddr);
6758
6759    store_reg(s, a->rt, tmp);
6760    return true;
6761}
6762
6763static bool trans_SWP(DisasContext *s, arg_SWP *a)
6764{
6765    return op_swp(s, a, MO_UL | MO_ALIGN);
6766}
6767
6768static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6769{
6770    return op_swp(s, a, MO_UB);
6771}
6772
6773/*
6774 * Load/Store Exclusive and Load-Acquire/Store-Release
6775 */
6776
6777static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6778{
6779    TCGv_i32 addr;
6780    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6781    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6782
6783    /* We UNDEF for these UNPREDICTABLE cases.  */
6784    if (a->rd == 15 || a->rn == 15 || a->rt == 15
6785        || a->rd == a->rn || a->rd == a->rt
6786        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6787        || (mop == MO_64
6788            && (a->rt2 == 15
6789                || a->rd == a->rt2
6790                || (!v8a && s->thumb && a->rt2 == 13)))) {
6791        unallocated_encoding(s);
6792        return true;
6793    }
6794
6795    if (rel) {
6796        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6797    }
6798
6799    addr = tcg_temp_local_new_i32();
6800    load_reg_var(s, addr, a->rn);
6801    tcg_gen_addi_i32(addr, addr, a->imm);
6802
6803    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6804    tcg_temp_free_i32(addr);
6805    return true;
6806}
6807
6808static bool trans_STREX(DisasContext *s, arg_STREX *a)
6809{
6810    if (!ENABLE_ARCH_6) {
6811        return false;
6812    }
6813    return op_strex(s, a, MO_32, false);
6814}
6815
6816static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6817{
6818    if (!ENABLE_ARCH_6K) {
6819        return false;
6820    }
6821    /* We UNDEF for these UNPREDICTABLE cases.  */
6822    if (a->rt & 1) {
6823        unallocated_encoding(s);
6824        return true;
6825    }
6826    a->rt2 = a->rt + 1;
6827    return op_strex(s, a, MO_64, false);
6828}
6829
6830static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6831{
6832    return op_strex(s, a, MO_64, false);
6833}
6834
6835static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6836{
6837    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6838        return false;
6839    }
6840    return op_strex(s, a, MO_8, false);
6841}
6842
6843static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6844{
6845    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6846        return false;
6847    }
6848    return op_strex(s, a, MO_16, false);
6849}
6850
6851static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6852{
6853    if (!ENABLE_ARCH_8) {
6854        return false;
6855    }
6856    return op_strex(s, a, MO_32, true);
6857}
6858
6859static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6860{
6861    if (!ENABLE_ARCH_8) {
6862        return false;
6863    }
6864    /* We UNDEF for these UNPREDICTABLE cases.  */
6865    if (a->rt & 1) {
6866        unallocated_encoding(s);
6867        return true;
6868    }
6869    a->rt2 = a->rt + 1;
6870    return op_strex(s, a, MO_64, true);
6871}
6872
6873static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6874{
6875    if (!ENABLE_ARCH_8) {
6876        return false;
6877    }
6878    return op_strex(s, a, MO_64, true);
6879}
6880
6881static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6882{
6883    if (!ENABLE_ARCH_8) {
6884        return false;
6885    }
6886    return op_strex(s, a, MO_8, true);
6887}
6888
6889static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6890{
6891    if (!ENABLE_ARCH_8) {
6892        return false;
6893    }
6894    return op_strex(s, a, MO_16, true);
6895}
6896
6897static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6898{
6899    TCGv_i32 addr, tmp;
6900
6901    if (!ENABLE_ARCH_8) {
6902        return false;
6903    }
6904    /* We UNDEF for these UNPREDICTABLE cases.  */
6905    if (a->rn == 15 || a->rt == 15) {
6906        unallocated_encoding(s);
6907        return true;
6908    }
6909
6910    addr = load_reg(s, a->rn);
6911    tmp = load_reg(s, a->rt);
6912    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6913    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
6914    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
6915
6916    tcg_temp_free_i32(tmp);
6917    tcg_temp_free_i32(addr);
6918    return true;
6919}
6920
6921static bool trans_STL(DisasContext *s, arg_STL *a)
6922{
6923    return op_stl(s, a, MO_UL);
6924}
6925
6926static bool trans_STLB(DisasContext *s, arg_STL *a)
6927{
6928    return op_stl(s, a, MO_UB);
6929}
6930
6931static bool trans_STLH(DisasContext *s, arg_STL *a)
6932{
6933    return op_stl(s, a, MO_UW);
6934}
6935
6936static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
6937{
6938    TCGv_i32 addr;
6939    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6940    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6941
6942    /* We UNDEF for these UNPREDICTABLE cases.  */
6943    if (a->rn == 15 || a->rt == 15
6944        || (!v8a && s->thumb && a->rt == 13)
6945        || (mop == MO_64
6946            && (a->rt2 == 15 || a->rt == a->rt2
6947                || (!v8a && s->thumb && a->rt2 == 13)))) {
6948        unallocated_encoding(s);
6949        return true;
6950    }
6951
6952    addr = tcg_temp_local_new_i32();
6953    load_reg_var(s, addr, a->rn);
6954    tcg_gen_addi_i32(addr, addr, a->imm);
6955
6956    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
6957    tcg_temp_free_i32(addr);
6958
6959    if (acq) {
6960        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
6961    }
6962    return true;
6963}
6964
6965static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
6966{
6967    if (!ENABLE_ARCH_6) {
6968        return false;
6969    }
6970    return op_ldrex(s, a, MO_32, false);
6971}
6972
6973static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
6974{
6975    if (!ENABLE_ARCH_6K) {
6976        return false;
6977    }
6978    /* We UNDEF for these UNPREDICTABLE cases.  */
6979    if (a->rt & 1) {
6980        unallocated_encoding(s);
6981        return true;
6982    }
6983    a->rt2 = a->rt + 1;
6984    return op_ldrex(s, a, MO_64, false);
6985}
6986
6987static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
6988{
6989    return op_ldrex(s, a, MO_64, false);
6990}
6991
6992static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
6993{
6994    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6995        return false;
6996    }
6997    return op_ldrex(s, a, MO_8, false);
6998}
6999
7000static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7001{
7002    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7003        return false;
7004    }
7005    return op_ldrex(s, a, MO_16, false);
7006}
7007
7008static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7009{
7010    if (!ENABLE_ARCH_8) {
7011        return false;
7012    }
7013    return op_ldrex(s, a, MO_32, true);
7014}
7015
7016static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7017{
7018    if (!ENABLE_ARCH_8) {
7019        return false;
7020    }
7021    /* We UNDEF for these UNPREDICTABLE cases.  */
7022    if (a->rt & 1) {
7023        unallocated_encoding(s);
7024        return true;
7025    }
7026    a->rt2 = a->rt + 1;
7027    return op_ldrex(s, a, MO_64, true);
7028}
7029
7030static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7031{
7032    if (!ENABLE_ARCH_8) {
7033        return false;
7034    }
7035    return op_ldrex(s, a, MO_64, true);
7036}
7037
7038static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7039{
7040    if (!ENABLE_ARCH_8) {
7041        return false;
7042    }
7043    return op_ldrex(s, a, MO_8, true);
7044}
7045
7046static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7047{
7048    if (!ENABLE_ARCH_8) {
7049        return false;
7050    }
7051    return op_ldrex(s, a, MO_16, true);
7052}
7053
7054static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7055{
7056    TCGv_i32 addr, tmp;
7057
7058    if (!ENABLE_ARCH_8) {
7059        return false;
7060    }
7061    /* We UNDEF for these UNPREDICTABLE cases.  */
7062    if (a->rn == 15 || a->rt == 15) {
7063        unallocated_encoding(s);
7064        return true;
7065    }
7066
7067    addr = load_reg(s, a->rn);
7068    tmp = tcg_temp_new_i32();
7069    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
7070    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7071    tcg_temp_free_i32(addr);
7072
7073    store_reg(s, a->rt, tmp);
7074    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7075    return true;
7076}
7077
7078static bool trans_LDA(DisasContext *s, arg_LDA *a)
7079{
7080    return op_lda(s, a, MO_UL);
7081}
7082
7083static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7084{
7085    return op_lda(s, a, MO_UB);
7086}
7087
7088static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7089{
7090    return op_lda(s, a, MO_UW);
7091}
7092
7093/*
7094 * Media instructions
7095 */
7096
7097static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7098{
7099    TCGv_i32 t1, t2;
7100
7101    if (!ENABLE_ARCH_6) {
7102        return false;
7103    }
7104
7105    t1 = load_reg(s, a->rn);
7106    t2 = load_reg(s, a->rm);
7107    gen_helper_usad8(t1, t1, t2);
7108    tcg_temp_free_i32(t2);
7109    if (a->ra != 15) {
7110        t2 = load_reg(s, a->ra);
7111        tcg_gen_add_i32(t1, t1, t2);
7112        tcg_temp_free_i32(t2);
7113    }
7114    store_reg(s, a->rd, t1);
7115    return true;
7116}
7117
7118static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7119{
7120    TCGv_i32 tmp;
7121    int width = a->widthm1 + 1;
7122    int shift = a->lsb;
7123
7124    if (!ENABLE_ARCH_6T2) {
7125        return false;
7126    }
7127    if (shift + width > 32) {
7128        /* UNPREDICTABLE; we choose to UNDEF */
7129        unallocated_encoding(s);
7130        return true;
7131    }
7132
7133    tmp = load_reg(s, a->rn);
7134    if (u) {
7135        tcg_gen_extract_i32(tmp, tmp, shift, width);
7136    } else {
7137        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7138    }
7139    store_reg(s, a->rd, tmp);
7140    return true;
7141}
7142
7143static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7144{
7145    return op_bfx(s, a, false);
7146}
7147
7148static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7149{
7150    return op_bfx(s, a, true);
7151}
7152
7153static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7154{
7155    TCGv_i32 tmp;
7156    int msb = a->msb, lsb = a->lsb;
7157    int width;
7158
7159    if (!ENABLE_ARCH_6T2) {
7160        return false;
7161    }
7162    if (msb < lsb) {
7163        /* UNPREDICTABLE; we choose to UNDEF */
7164        unallocated_encoding(s);
7165        return true;
7166    }
7167
7168    width = msb + 1 - lsb;
7169    if (a->rn == 15) {
7170        /* BFC */
7171        tmp = tcg_const_i32(0);
7172    } else {
7173        /* BFI */
7174        tmp = load_reg(s, a->rn);
7175    }
7176    if (width != 32) {
7177        TCGv_i32 tmp2 = load_reg(s, a->rd);
7178        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7179        tcg_temp_free_i32(tmp2);
7180    }
7181    store_reg(s, a->rd, tmp);
7182    return true;
7183}
7184
7185static bool trans_UDF(DisasContext *s, arg_UDF *a)
7186{
7187    unallocated_encoding(s);
7188    return true;
7189}
7190
7191/*
7192 * Parallel addition and subtraction
7193 */
7194
7195static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7196                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7197{
7198    TCGv_i32 t0, t1;
7199
7200    if (s->thumb
7201        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7202        : !ENABLE_ARCH_6) {
7203        return false;
7204    }
7205
7206    t0 = load_reg(s, a->rn);
7207    t1 = load_reg(s, a->rm);
7208
7209    gen(t0, t0, t1);
7210
7211    tcg_temp_free_i32(t1);
7212    store_reg(s, a->rd, t0);
7213    return true;
7214}
7215
7216static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7217                             void (*gen)(TCGv_i32, TCGv_i32,
7218                                         TCGv_i32, TCGv_ptr))
7219{
7220    TCGv_i32 t0, t1;
7221    TCGv_ptr ge;
7222
7223    if (s->thumb
7224        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7225        : !ENABLE_ARCH_6) {
7226        return false;
7227    }
7228
7229    t0 = load_reg(s, a->rn);
7230    t1 = load_reg(s, a->rm);
7231
7232    ge = tcg_temp_new_ptr();
7233    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7234    gen(t0, t0, t1, ge);
7235
7236    tcg_temp_free_ptr(ge);
7237    tcg_temp_free_i32(t1);
7238    store_reg(s, a->rd, t0);
7239    return true;
7240}
7241
7242#define DO_PAR_ADDSUB(NAME, helper) \
7243static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7244{                                                       \
7245    return op_par_addsub(s, a, helper);                 \
7246}
7247
7248#define DO_PAR_ADDSUB_GE(NAME, helper) \
7249static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7250{                                                       \
7251    return op_par_addsub_ge(s, a, helper);              \
7252}
7253
7254DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7255DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7256DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7257DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7258DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7259DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7260
7261DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7262DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7263DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7264DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7265DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7266DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7267
7268DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7269DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7270DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7271DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7272DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7273DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7274
7275DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7276DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7277DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7278DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7279DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7280DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7281
7282DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7283DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7284DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7285DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7286DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7287DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7288
7289DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7290DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7291DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7292DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7293DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7294DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7295
7296#undef DO_PAR_ADDSUB
7297#undef DO_PAR_ADDSUB_GE
7298
7299/*
7300 * Packing, unpacking, saturation, and reversal
7301 */
7302
7303static bool trans_PKH(DisasContext *s, arg_PKH *a)
7304{
7305    TCGv_i32 tn, tm;
7306    int shift = a->imm;
7307
7308    if (s->thumb
7309        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7310        : !ENABLE_ARCH_6) {
7311        return false;
7312    }
7313
7314    tn = load_reg(s, a->rn);
7315    tm = load_reg(s, a->rm);
7316    if (a->tb) {
7317        /* PKHTB */
7318        if (shift == 0) {
7319            shift = 31;
7320        }
7321        tcg_gen_sari_i32(tm, tm, shift);
7322        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7323    } else {
7324        /* PKHBT */
7325        tcg_gen_shli_i32(tm, tm, shift);
7326        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7327    }
7328    tcg_temp_free_i32(tm);
7329    store_reg(s, a->rd, tn);
7330    return true;
7331}
7332
7333static bool op_sat(DisasContext *s, arg_sat *a,
7334                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7335{
7336    TCGv_i32 tmp, satimm;
7337    int shift = a->imm;
7338
7339    if (!ENABLE_ARCH_6) {
7340        return false;
7341    }
7342
7343    tmp = load_reg(s, a->rn);
7344    if (a->sh) {
7345        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7346    } else {
7347        tcg_gen_shli_i32(tmp, tmp, shift);
7348    }
7349
7350    satimm = tcg_const_i32(a->satimm);
7351    gen(tmp, cpu_env, tmp, satimm);
7352    tcg_temp_free_i32(satimm);
7353
7354    store_reg(s, a->rd, tmp);
7355    return true;
7356}
7357
7358static bool trans_SSAT(DisasContext *s, arg_sat *a)
7359{
7360    return op_sat(s, a, gen_helper_ssat);
7361}
7362
7363static bool trans_USAT(DisasContext *s, arg_sat *a)
7364{
7365    return op_sat(s, a, gen_helper_usat);
7366}
7367
7368static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7369{
7370    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7371        return false;
7372    }
7373    return op_sat(s, a, gen_helper_ssat16);
7374}
7375
7376static bool trans_USAT16(DisasContext *s, arg_sat *a)
7377{
7378    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7379        return false;
7380    }
7381    return op_sat(s, a, gen_helper_usat16);
7382}
7383
7384static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7385                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7386                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7387{
7388    TCGv_i32 tmp;
7389
7390    if (!ENABLE_ARCH_6) {
7391        return false;
7392    }
7393
7394    tmp = load_reg(s, a->rm);
7395    /*
7396     * TODO: In many cases we could do a shift instead of a rotate.
7397     * Combined with a simple extend, that becomes an extract.
7398     */
7399    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7400    gen_extract(tmp, tmp);
7401
7402    if (a->rn != 15) {
7403        TCGv_i32 tmp2 = load_reg(s, a->rn);
7404        gen_add(tmp, tmp, tmp2);
7405        tcg_temp_free_i32(tmp2);
7406    }
7407    store_reg(s, a->rd, tmp);
7408    return true;
7409}
7410
7411static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7412{
7413    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7414}
7415
7416static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7417{
7418    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7419}
7420
7421static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7422{
7423    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7424        return false;
7425    }
7426    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7427}
7428
7429static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7430{
7431    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7432}
7433
7434static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7435{
7436    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7437}
7438
7439static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7440{
7441    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7442        return false;
7443    }
7444    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7445}
7446
7447static bool trans_SEL(DisasContext *s, arg_rrr *a)
7448{
7449    TCGv_i32 t1, t2, t3;
7450
7451    if (s->thumb
7452        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7453        : !ENABLE_ARCH_6) {
7454        return false;
7455    }
7456
7457    t1 = load_reg(s, a->rn);
7458    t2 = load_reg(s, a->rm);
7459    t3 = tcg_temp_new_i32();
7460    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7461    gen_helper_sel_flags(t1, t3, t1, t2);
7462    tcg_temp_free_i32(t3);
7463    tcg_temp_free_i32(t2);
7464    store_reg(s, a->rd, t1);
7465    return true;
7466}
7467
7468static bool op_rr(DisasContext *s, arg_rr *a,
7469                  void (*gen)(TCGv_i32, TCGv_i32))
7470{
7471    TCGv_i32 tmp;
7472
7473    tmp = load_reg(s, a->rm);
7474    gen(tmp, tmp);
7475    store_reg(s, a->rd, tmp);
7476    return true;
7477}
7478
7479static bool trans_REV(DisasContext *s, arg_rr *a)
7480{
7481    if (!ENABLE_ARCH_6) {
7482        return false;
7483    }
7484    return op_rr(s, a, tcg_gen_bswap32_i32);
7485}
7486
7487static bool trans_REV16(DisasContext *s, arg_rr *a)
7488{
7489    if (!ENABLE_ARCH_6) {
7490        return false;
7491    }
7492    return op_rr(s, a, gen_rev16);
7493}
7494
7495static bool trans_REVSH(DisasContext *s, arg_rr *a)
7496{
7497    if (!ENABLE_ARCH_6) {
7498        return false;
7499    }
7500    return op_rr(s, a, gen_revsh);
7501}
7502
7503static bool trans_RBIT(DisasContext *s, arg_rr *a)
7504{
7505    if (!ENABLE_ARCH_6T2) {
7506        return false;
7507    }
7508    return op_rr(s, a, gen_helper_rbit);
7509}
7510
7511/*
7512 * Signed multiply, signed and unsigned divide
7513 */
7514
7515static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7516{
7517    TCGv_i32 t1, t2;
7518
7519    if (!ENABLE_ARCH_6) {
7520        return false;
7521    }
7522
7523    t1 = load_reg(s, a->rn);
7524    t2 = load_reg(s, a->rm);
7525    if (m_swap) {
7526        gen_swap_half(t2, t2);
7527    }
7528    gen_smul_dual(t1, t2);
7529
7530    if (sub) {
7531        /*
7532         * This subtraction cannot overflow, so we can do a simple
7533         * 32-bit subtraction and then a possible 32-bit saturating
7534         * addition of Ra.
7535         */
7536        tcg_gen_sub_i32(t1, t1, t2);
7537        tcg_temp_free_i32(t2);
7538
7539        if (a->ra != 15) {
7540            t2 = load_reg(s, a->ra);
7541            gen_helper_add_setq(t1, cpu_env, t1, t2);
7542            tcg_temp_free_i32(t2);
7543        }
7544    } else if (a->ra == 15) {
7545        /* Single saturation-checking addition */
7546        gen_helper_add_setq(t1, cpu_env, t1, t2);
7547        tcg_temp_free_i32(t2);
7548    } else {
7549        /*
7550         * We need to add the products and Ra together and then
7551         * determine whether the final result overflowed. Doing
7552         * this as two separate add-and-check-overflow steps incorrectly
7553         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7554         * Do all the arithmetic at 64-bits and then check for overflow.
7555         */
7556        TCGv_i64 p64, q64;
7557        TCGv_i32 t3, qf, one;
7558
7559        p64 = tcg_temp_new_i64();
7560        q64 = tcg_temp_new_i64();
7561        tcg_gen_ext_i32_i64(p64, t1);
7562        tcg_gen_ext_i32_i64(q64, t2);
7563        tcg_gen_add_i64(p64, p64, q64);
7564        load_reg_var(s, t2, a->ra);
7565        tcg_gen_ext_i32_i64(q64, t2);
7566        tcg_gen_add_i64(p64, p64, q64);
7567        tcg_temp_free_i64(q64);
7568
7569        tcg_gen_extr_i64_i32(t1, t2, p64);
7570        tcg_temp_free_i64(p64);
7571        /*
7572         * t1 is the low half of the result which goes into Rd.
7573         * We have overflow and must set Q if the high half (t2)
7574         * is different from the sign-extension of t1.
7575         */
7576        t3 = tcg_temp_new_i32();
7577        tcg_gen_sari_i32(t3, t1, 31);
7578        qf = load_cpu_field(QF);
7579        one = tcg_const_i32(1);
7580        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7581        store_cpu_field(qf, QF);
7582        tcg_temp_free_i32(one);
7583        tcg_temp_free_i32(t3);
7584        tcg_temp_free_i32(t2);
7585    }
7586    store_reg(s, a->rd, t1);
7587    return true;
7588}
7589
7590static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7591{
7592    return op_smlad(s, a, false, false);
7593}
7594
7595static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7596{
7597    return op_smlad(s, a, true, false);
7598}
7599
7600static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7601{
7602    return op_smlad(s, a, false, true);
7603}
7604
7605static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7606{
7607    return op_smlad(s, a, true, true);
7608}
7609
7610static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7611{
7612    TCGv_i32 t1, t2;
7613    TCGv_i64 l1, l2;
7614
7615    if (!ENABLE_ARCH_6) {
7616        return false;
7617    }
7618
7619    t1 = load_reg(s, a->rn);
7620    t2 = load_reg(s, a->rm);
7621    if (m_swap) {
7622        gen_swap_half(t2, t2);
7623    }
7624    gen_smul_dual(t1, t2);
7625
7626    l1 = tcg_temp_new_i64();
7627    l2 = tcg_temp_new_i64();
7628    tcg_gen_ext_i32_i64(l1, t1);
7629    tcg_gen_ext_i32_i64(l2, t2);
7630    tcg_temp_free_i32(t1);
7631    tcg_temp_free_i32(t2);
7632
7633    if (sub) {
7634        tcg_gen_sub_i64(l1, l1, l2);
7635    } else {
7636        tcg_gen_add_i64(l1, l1, l2);
7637    }
7638    tcg_temp_free_i64(l2);
7639
7640    gen_addq(s, l1, a->ra, a->rd);
7641    gen_storeq_reg(s, a->ra, a->rd, l1);
7642    tcg_temp_free_i64(l1);
7643    return true;
7644}
7645
7646static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7647{
7648    return op_smlald(s, a, false, false);
7649}
7650
7651static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7652{
7653    return op_smlald(s, a, true, false);
7654}
7655
7656static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7657{
7658    return op_smlald(s, a, false, true);
7659}
7660
7661static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7662{
7663    return op_smlald(s, a, true, true);
7664}
7665
7666static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7667{
7668    TCGv_i32 t1, t2;
7669
7670    if (s->thumb
7671        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7672        : !ENABLE_ARCH_6) {
7673        return false;
7674    }
7675
7676    t1 = load_reg(s, a->rn);
7677    t2 = load_reg(s, a->rm);
7678    tcg_gen_muls2_i32(t2, t1, t1, t2);
7679
7680    if (a->ra != 15) {
7681        TCGv_i32 t3 = load_reg(s, a->ra);
7682        if (sub) {
7683            /*
7684             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7685             * a non-zero multiplicand lowpart, and the correct result
7686             * lowpart for rounding.
7687             */
7688            TCGv_i32 zero = tcg_const_i32(0);
7689            tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
7690            tcg_temp_free_i32(zero);
7691        } else {
7692            tcg_gen_add_i32(t1, t1, t3);
7693        }
7694        tcg_temp_free_i32(t3);
7695    }
7696    if (round) {
7697        /*
7698         * Adding 0x80000000 to the 64-bit quantity means that we have
7699         * carry in to the high word when the low word has the msb set.
7700         */
7701        tcg_gen_shri_i32(t2, t2, 31);
7702        tcg_gen_add_i32(t1, t1, t2);
7703    }
7704    tcg_temp_free_i32(t2);
7705    store_reg(s, a->rd, t1);
7706    return true;
7707}
7708
7709static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7710{
7711    return op_smmla(s, a, false, false);
7712}
7713
7714static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7715{
7716    return op_smmla(s, a, true, false);
7717}
7718
7719static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7720{
7721    return op_smmla(s, a, false, true);
7722}
7723
7724static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7725{
7726    return op_smmla(s, a, true, true);
7727}
7728
7729static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7730{
7731    TCGv_i32 t1, t2;
7732
7733    if (s->thumb
7734        ? !dc_isar_feature(aa32_thumb_div, s)
7735        : !dc_isar_feature(aa32_arm_div, s)) {
7736        return false;
7737    }
7738
7739    t1 = load_reg(s, a->rn);
7740    t2 = load_reg(s, a->rm);
7741    if (u) {
7742        gen_helper_udiv(t1, t1, t2);
7743    } else {
7744        gen_helper_sdiv(t1, t1, t2);
7745    }
7746    tcg_temp_free_i32(t2);
7747    store_reg(s, a->rd, t1);
7748    return true;
7749}
7750
7751static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7752{
7753    return op_div(s, a, false);
7754}
7755
7756static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7757{
7758    return op_div(s, a, true);
7759}
7760
7761/*
7762 * Block data transfer
7763 */
7764
7765static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7766{
7767    TCGv_i32 addr = load_reg(s, a->rn);
7768
7769    if (a->b) {
7770        if (a->i) {
7771            /* pre increment */
7772            tcg_gen_addi_i32(addr, addr, 4);
7773        } else {
7774            /* pre decrement */
7775            tcg_gen_addi_i32(addr, addr, -(n * 4));
7776        }
7777    } else if (!a->i && n != 1) {
7778        /* post decrement */
7779        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7780    }
7781
7782    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7783        /*
7784         * If the writeback is incrementing SP rather than
7785         * decrementing it, and the initial SP is below the
7786         * stack limit but the final written-back SP would
7787         * be above, then then we must not perform any memory
7788         * accesses, but it is IMPDEF whether we generate
7789         * an exception. We choose to do so in this case.
7790         * At this point 'addr' is the lowest address, so
7791         * either the original SP (if incrementing) or our
7792         * final SP (if decrementing), so that's what we check.
7793         */
7794        gen_helper_v8m_stackcheck(cpu_env, addr);
7795    }
7796
7797    return addr;
7798}
7799
7800static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7801                               TCGv_i32 addr, int n)
7802{
7803    if (a->w) {
7804        /* write back */
7805        if (!a->b) {
7806            if (a->i) {
7807                /* post increment */
7808                tcg_gen_addi_i32(addr, addr, 4);
7809            } else {
7810                /* post decrement */
7811                tcg_gen_addi_i32(addr, addr, -(n * 4));
7812            }
7813        } else if (!a->i && n != 1) {
7814            /* pre decrement */
7815            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7816        }
7817        store_reg(s, a->rn, addr);
7818    } else {
7819        tcg_temp_free_i32(addr);
7820    }
7821}
7822
7823static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7824{
7825    int i, j, n, list, mem_idx;
7826    bool user = a->u;
7827    TCGv_i32 addr, tmp, tmp2;
7828
7829    if (user) {
7830        /* STM (user) */
7831        if (IS_USER(s)) {
7832            /* Only usable in supervisor mode.  */
7833            unallocated_encoding(s);
7834            return true;
7835        }
7836    }
7837
7838    list = a->list;
7839    n = ctpop16(list);
7840    if (n < min_n || a->rn == 15) {
7841        unallocated_encoding(s);
7842        return true;
7843    }
7844
7845    addr = op_addr_block_pre(s, a, n);
7846    mem_idx = get_mem_index(s);
7847
7848    for (i = j = 0; i < 16; i++) {
7849        if (!(list & (1 << i))) {
7850            continue;
7851        }
7852
7853        if (user && i != 15) {
7854            tmp = tcg_temp_new_i32();
7855            tmp2 = tcg_const_i32(i);
7856            gen_helper_get_user_reg(tmp, cpu_env, tmp2);
7857            tcg_temp_free_i32(tmp2);
7858        } else {
7859            tmp = load_reg(s, i);
7860        }
7861        gen_aa32_st32(s, tmp, addr, mem_idx);
7862        tcg_temp_free_i32(tmp);
7863
7864        /* No need to add after the last transfer.  */
7865        if (++j != n) {
7866            tcg_gen_addi_i32(addr, addr, 4);
7867        }
7868    }
7869
7870    op_addr_block_post(s, a, addr, n);
7871    return true;
7872}
7873
7874static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7875{
7876    /* BitCount(list) < 1 is UNPREDICTABLE */
7877    return op_stm(s, a, 1);
7878}
7879
7880static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7881{
7882    /* Writeback register in register list is UNPREDICTABLE for T32.  */
7883    if (a->w && (a->list & (1 << a->rn))) {
7884        unallocated_encoding(s);
7885        return true;
7886    }
7887    /* BitCount(list) < 2 is UNPREDICTABLE */
7888    return op_stm(s, a, 2);
7889}
7890
7891static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7892{
7893    int i, j, n, list, mem_idx;
7894    bool loaded_base;
7895    bool user = a->u;
7896    bool exc_return = false;
7897    TCGv_i32 addr, tmp, tmp2, loaded_var;
7898
7899    if (user) {
7900        /* LDM (user), LDM (exception return) */
7901        if (IS_USER(s)) {
7902            /* Only usable in supervisor mode.  */
7903            unallocated_encoding(s);
7904            return true;
7905        }
7906        if (extract32(a->list, 15, 1)) {
7907            exc_return = true;
7908            user = false;
7909        } else {
7910            /* LDM (user) does not allow writeback.  */
7911            if (a->w) {
7912                unallocated_encoding(s);
7913                return true;
7914            }
7915        }
7916    }
7917
7918    list = a->list;
7919    n = ctpop16(list);
7920    if (n < min_n || a->rn == 15) {
7921        unallocated_encoding(s);
7922        return true;
7923    }
7924
7925    addr = op_addr_block_pre(s, a, n);
7926    mem_idx = get_mem_index(s);
7927    loaded_base = false;
7928    loaded_var = NULL;
7929
7930    for (i = j = 0; i < 16; i++) {
7931        if (!(list & (1 << i))) {
7932            continue;
7933        }
7934
7935        tmp = tcg_temp_new_i32();
7936        gen_aa32_ld32u(s, tmp, addr, mem_idx);
7937        if (user) {
7938            tmp2 = tcg_const_i32(i);
7939            gen_helper_set_user_reg(cpu_env, tmp2, tmp);
7940            tcg_temp_free_i32(tmp2);
7941            tcg_temp_free_i32(tmp);
7942        } else if (i == a->rn) {
7943            loaded_var = tmp;
7944            loaded_base = true;
7945        } else if (i == 15 && exc_return) {
7946            store_pc_exc_ret(s, tmp);
7947        } else {
7948            store_reg_from_load(s, i, tmp);
7949        }
7950
7951        /* No need to add after the last transfer.  */
7952        if (++j != n) {
7953            tcg_gen_addi_i32(addr, addr, 4);
7954        }
7955    }
7956
7957    op_addr_block_post(s, a, addr, n);
7958
7959    if (loaded_base) {
7960        /* Note that we reject base == pc above.  */
7961        store_reg(s, a->rn, loaded_var);
7962    }
7963
7964    if (exc_return) {
7965        /* Restore CPSR from SPSR.  */
7966        tmp = load_cpu_field(spsr);
7967        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7968            gen_io_start();
7969        }
7970        gen_helper_cpsr_write_eret(cpu_env, tmp);
7971        tcg_temp_free_i32(tmp);
7972        /* Must exit loop to check un-masked IRQs */
7973        s->base.is_jmp = DISAS_EXIT;
7974    }
7975    return true;
7976}
7977
7978static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
7979{
7980    /*
7981     * Writeback register in register list is UNPREDICTABLE
7982     * for ArchVersion() >= 7.  Prior to v7, A32 would write
7983     * an UNKNOWN value to the base register.
7984     */
7985    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
7986        unallocated_encoding(s);
7987        return true;
7988    }
7989    /* BitCount(list) < 1 is UNPREDICTABLE */
7990    return do_ldm(s, a, 1);
7991}
7992
7993static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
7994{
7995    /* Writeback register in register list is UNPREDICTABLE for T32. */
7996    if (a->w && (a->list & (1 << a->rn))) {
7997        unallocated_encoding(s);
7998        return true;
7999    }
8000    /* BitCount(list) < 2 is UNPREDICTABLE */
8001    return do_ldm(s, a, 2);
8002}
8003
8004static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8005{
8006    /* Writeback is conditional on the base register not being loaded.  */
8007    a->w = !(a->list & (1 << a->rn));
8008    /* BitCount(list) < 1 is UNPREDICTABLE */
8009    return do_ldm(s, a, 1);
8010}
8011
8012static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8013{
8014    int i;
8015    TCGv_i32 zero;
8016
8017    if (!dc_isar_feature(aa32_m_sec_state, s)) {
8018        return false;
8019    }
8020
8021    if (extract32(a->list, 13, 1)) {
8022        return false;
8023    }
8024
8025    if (!a->list) {
8026        /* UNPREDICTABLE; we choose to UNDEF */
8027        return false;
8028    }
8029
8030    zero = tcg_const_i32(0);
8031    for (i = 0; i < 15; i++) {
8032        if (extract32(a->list, i, 1)) {
8033            /* Clear R[i] */
8034            tcg_gen_mov_i32(cpu_R[i], zero);
8035        }
8036    }
8037    if (extract32(a->list, 15, 1)) {
8038        /*
8039         * Clear APSR (by calling the MSR helper with the same argument
8040         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8041         */
8042        TCGv_i32 maskreg = tcg_const_i32(0xc << 8);
8043        gen_helper_v7m_msr(cpu_env, maskreg, zero);
8044        tcg_temp_free_i32(maskreg);
8045    }
8046    tcg_temp_free_i32(zero);
8047    return true;
8048}
8049
8050/*
8051 * Branch, branch with link
8052 */
8053
8054static bool trans_B(DisasContext *s, arg_i *a)
8055{
8056    gen_jmp(s, read_pc(s) + a->imm);
8057    return true;
8058}
8059
8060static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8061{
8062    /* This has cond from encoding, required to be outside IT block.  */
8063    if (a->cond >= 0xe) {
8064        return false;
8065    }
8066    if (s->condexec_mask) {
8067        unallocated_encoding(s);
8068        return true;
8069    }
8070    arm_skip_unless(s, a->cond);
8071    gen_jmp(s, read_pc(s) + a->imm);
8072    return true;
8073}
8074
8075static bool trans_BL(DisasContext *s, arg_i *a)
8076{
8077    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8078    gen_jmp(s, read_pc(s) + a->imm);
8079    return true;
8080}
8081
8082static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8083{
8084    TCGv_i32 tmp;
8085
8086    /*
8087     * BLX <imm> would be useless on M-profile; the encoding space
8088     * is used for other insns from v8.1M onward, and UNDEFs before that.
8089     */
8090    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8091        return false;
8092    }
8093
8094    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8095    if (s->thumb && (a->imm & 2)) {
8096        return false;
8097    }
8098    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8099    tmp = tcg_const_i32(!s->thumb);
8100    store_cpu_field(tmp, thumb);
8101    gen_jmp(s, (read_pc(s) & ~3) + a->imm);
8102    return true;
8103}
8104
8105static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8106{
8107    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8108    tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
8109    return true;
8110}
8111
8112static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8113{
8114    TCGv_i32 tmp = tcg_temp_new_i32();
8115
8116    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8117    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8118    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8119    gen_bx(s, tmp);
8120    return true;
8121}
8122
8123static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8124{
8125    TCGv_i32 tmp;
8126
8127    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8128    if (!ENABLE_ARCH_5) {
8129        return false;
8130    }
8131    tmp = tcg_temp_new_i32();
8132    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8133    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8134    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8135    gen_bx(s, tmp);
8136    return true;
8137}
8138
8139static bool trans_BF(DisasContext *s, arg_BF *a)
8140{
8141    /*
8142     * M-profile branch future insns. The architecture permits an
8143     * implementation to implement these as NOPs (equivalent to
8144     * discarding the LO_BRANCH_INFO cache immediately), and we
8145     * take that IMPDEF option because for QEMU a "real" implementation
8146     * would be complicated and wouldn't execute any faster.
8147     */
8148    if (!dc_isar_feature(aa32_lob, s)) {
8149        return false;
8150    }
8151    if (a->boff == 0) {
8152        /* SEE "Related encodings" (loop insns) */
8153        return false;
8154    }
8155    /* Handle as NOP */
8156    return true;
8157}
8158
8159static bool trans_DLS(DisasContext *s, arg_DLS *a)
8160{
8161    /* M-profile low-overhead loop start */
8162    TCGv_i32 tmp;
8163
8164    if (!dc_isar_feature(aa32_lob, s)) {
8165        return false;
8166    }
8167    if (a->rn == 13 || a->rn == 15) {
8168        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8169        return false;
8170    }
8171
8172    /* Not a while loop, no tail predication: just set LR to the count */
8173    tmp = load_reg(s, a->rn);
8174    store_reg(s, 14, tmp);
8175    return true;
8176}
8177
8178static bool trans_WLS(DisasContext *s, arg_WLS *a)
8179{
8180    /* M-profile low-overhead while-loop start */
8181    TCGv_i32 tmp;
8182    TCGLabel *nextlabel;
8183
8184    if (!dc_isar_feature(aa32_lob, s)) {
8185        return false;
8186    }
8187    if (a->rn == 13 || a->rn == 15) {
8188        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8189        return false;
8190    }
8191    if (s->condexec_mask) {
8192        /*
8193         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8194         * we choose to UNDEF, because otherwise our use of
8195         * gen_goto_tb(1) would clash with the use of TB exit 1
8196         * in the dc->condjmp condition-failed codepath in
8197         * arm_tr_tb_stop() and we'd get an assertion.
8198         */
8199        return false;
8200    }
8201    nextlabel = gen_new_label();
8202    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
8203    tmp = load_reg(s, a->rn);
8204    store_reg(s, 14, tmp);
8205    gen_jmp_tb(s, s->base.pc_next, 1);
8206
8207    gen_set_label(nextlabel);
8208    gen_jmp(s, read_pc(s) + a->imm);
8209    return true;
8210}
8211
8212static bool trans_LE(DisasContext *s, arg_LE *a)
8213{
8214    /*
8215     * M-profile low-overhead loop end. The architecture permits an
8216     * implementation to discard the LO_BRANCH_INFO cache at any time,
8217     * and we take the IMPDEF option to never set it in the first place
8218     * (equivalent to always discarding it immediately), because for QEMU
8219     * a "real" implementation would be complicated and wouldn't execute
8220     * any faster.
8221     */
8222    TCGv_i32 tmp;
8223
8224    if (!dc_isar_feature(aa32_lob, s)) {
8225        return false;
8226    }
8227
8228    if (!a->f) {
8229        /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
8230        arm_gen_condlabel(s);
8231        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
8232        /* Decrement LR */
8233        tmp = load_reg(s, 14);
8234        tcg_gen_addi_i32(tmp, tmp, -1);
8235        store_reg(s, 14, tmp);
8236    }
8237    /* Jump back to the loop start */
8238    gen_jmp(s, read_pc(s) - a->imm);
8239    return true;
8240}
8241
8242static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8243{
8244    TCGv_i32 addr, tmp;
8245
8246    tmp = load_reg(s, a->rm);
8247    if (half) {
8248        tcg_gen_add_i32(tmp, tmp, tmp);
8249    }
8250    addr = load_reg(s, a->rn);
8251    tcg_gen_add_i32(addr, addr, tmp);
8252
8253    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
8254                    half ? MO_UW | s->be_data : MO_UB);
8255    tcg_temp_free_i32(addr);
8256
8257    tcg_gen_add_i32(tmp, tmp, tmp);
8258    tcg_gen_addi_i32(tmp, tmp, read_pc(s));
8259    store_reg(s, 15, tmp);
8260    return true;
8261}
8262
8263static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8264{
8265    return op_tbranch(s, a, false);
8266}
8267
8268static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8269{
8270    return op_tbranch(s, a, true);
8271}
8272
8273static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8274{
8275    TCGv_i32 tmp = load_reg(s, a->rn);
8276
8277    arm_gen_condlabel(s);
8278    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8279                        tmp, 0, s->condlabel);
8280    tcg_temp_free_i32(tmp);
8281    gen_jmp(s, read_pc(s) + a->imm);
8282    return true;
8283}
8284
8285/*
8286 * Supervisor call - both T32 & A32 come here so we need to check
8287 * which mode we are in when checking for semihosting.
8288 */
8289
8290static bool trans_SVC(DisasContext *s, arg_SVC *a)
8291{
8292    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8293
8294    if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8295#ifndef CONFIG_USER_ONLY
8296        !IS_USER(s) &&
8297#endif
8298        (a->imm == semihost_imm)) {
8299        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8300    } else {
8301        gen_set_pc_im(s, s->base.pc_next);
8302        s->svc_imm = a->imm;
8303        s->base.is_jmp = DISAS_SWI;
8304    }
8305    return true;
8306}
8307
8308/*
8309 * Unconditional system instructions
8310 */
8311
8312static bool trans_RFE(DisasContext *s, arg_RFE *a)
8313{
8314    static const int8_t pre_offset[4] = {
8315        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8316    };
8317    static const int8_t post_offset[4] = {
8318        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8319    };
8320    TCGv_i32 addr, t1, t2;
8321
8322    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8323        return false;
8324    }
8325    if (IS_USER(s)) {
8326        unallocated_encoding(s);
8327        return true;
8328    }
8329
8330    addr = load_reg(s, a->rn);
8331    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8332
8333    /* Load PC into tmp and CPSR into tmp2.  */
8334    t1 = tcg_temp_new_i32();
8335    gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
8336    tcg_gen_addi_i32(addr, addr, 4);
8337    t2 = tcg_temp_new_i32();
8338    gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
8339
8340    if (a->w) {
8341        /* Base writeback.  */
8342        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8343        store_reg(s, a->rn, addr);
8344    } else {
8345        tcg_temp_free_i32(addr);
8346    }
8347    gen_rfe(s, t1, t2);
8348    return true;
8349}
8350
8351static bool trans_SRS(DisasContext *s, arg_SRS *a)
8352{
8353    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8354        return false;
8355    }
8356    gen_srs(s, a->mode, a->pu, a->w);
8357    return true;
8358}
8359
8360static bool trans_CPS(DisasContext *s, arg_CPS *a)
8361{
8362    uint32_t mask, val;
8363
8364    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8365        return false;
8366    }
8367    if (IS_USER(s)) {
8368        /* Implemented as NOP in user mode.  */
8369        return true;
8370    }
8371    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8372
8373    mask = val = 0;
8374    if (a->imod & 2) {
8375        if (a->A) {
8376            mask |= CPSR_A;
8377        }
8378        if (a->I) {
8379            mask |= CPSR_I;
8380        }
8381        if (a->F) {
8382            mask |= CPSR_F;
8383        }
8384        if (a->imod & 1) {
8385            val |= mask;
8386        }
8387    }
8388    if (a->M) {
8389        mask |= CPSR_M;
8390        val |= a->mode;
8391    }
8392    if (mask) {
8393        gen_set_psr_im(s, mask, 0, val);
8394    }
8395    return true;
8396}
8397
8398static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8399{
8400    TCGv_i32 tmp, addr, el;
8401
8402    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8403        return false;
8404    }
8405    if (IS_USER(s)) {
8406        /* Implemented as NOP in user mode.  */
8407        return true;
8408    }
8409
8410    tmp = tcg_const_i32(a->im);
8411    /* FAULTMASK */
8412    if (a->F) {
8413        addr = tcg_const_i32(19);
8414        gen_helper_v7m_msr(cpu_env, addr, tmp);
8415        tcg_temp_free_i32(addr);
8416    }
8417    /* PRIMASK */
8418    if (a->I) {
8419        addr = tcg_const_i32(16);
8420        gen_helper_v7m_msr(cpu_env, addr, tmp);
8421        tcg_temp_free_i32(addr);
8422    }
8423    el = tcg_const_i32(s->current_el);
8424    gen_helper_rebuild_hflags_m32(cpu_env, el);
8425    tcg_temp_free_i32(el);
8426    tcg_temp_free_i32(tmp);
8427    gen_lookup_tb(s);
8428    return true;
8429}
8430
8431/*
8432 * Clear-Exclusive, Barriers
8433 */
8434
8435static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8436{
8437    if (s->thumb
8438        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8439        : !ENABLE_ARCH_6K) {
8440        return false;
8441    }
8442    gen_clrex(s);
8443    return true;
8444}
8445
8446static bool trans_DSB(DisasContext *s, arg_DSB *a)
8447{
8448    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8449        return false;
8450    }
8451    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8452    return true;
8453}
8454
8455static bool trans_DMB(DisasContext *s, arg_DMB *a)
8456{
8457    return trans_DSB(s, NULL);
8458}
8459
8460static bool trans_ISB(DisasContext *s, arg_ISB *a)
8461{
8462    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8463        return false;
8464    }
8465    /*
8466     * We need to break the TB after this insn to execute
8467     * self-modifying code correctly and also to take
8468     * any pending interrupts immediately.
8469     */
8470    gen_goto_tb(s, 0, s->base.pc_next);
8471    return true;
8472}
8473
8474static bool trans_SB(DisasContext *s, arg_SB *a)
8475{
8476    if (!dc_isar_feature(aa32_sb, s)) {
8477        return false;
8478    }
8479    /*
8480     * TODO: There is no speculation barrier opcode
8481     * for TCG; MB and end the TB instead.
8482     */
8483    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8484    gen_goto_tb(s, 0, s->base.pc_next);
8485    return true;
8486}
8487
8488static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8489{
8490    if (!ENABLE_ARCH_6) {
8491        return false;
8492    }
8493    if (a->E != (s->be_data == MO_BE)) {
8494        gen_helper_setend(cpu_env);
8495        s->base.is_jmp = DISAS_UPDATE_EXIT;
8496    }
8497    return true;
8498}
8499
8500/*
8501 * Preload instructions
8502 * All are nops, contingent on the appropriate arch level.
8503 */
8504
8505static bool trans_PLD(DisasContext *s, arg_PLD *a)
8506{
8507    return ENABLE_ARCH_5TE;
8508}
8509
8510static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8511{
8512    return arm_dc_feature(s, ARM_FEATURE_V7MP);
8513}
8514
8515static bool trans_PLI(DisasContext *s, arg_PLD *a)
8516{
8517    return ENABLE_ARCH_7;
8518}
8519
8520/*
8521 * If-then
8522 */
8523
8524static bool trans_IT(DisasContext *s, arg_IT *a)
8525{
8526    int cond_mask = a->cond_mask;
8527
8528    /*
8529     * No actual code generated for this insn, just setup state.
8530     *
8531     * Combinations of firstcond and mask which set up an 0b1111
8532     * condition are UNPREDICTABLE; we take the CONSTRAINED
8533     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8534     * i.e. both meaning "execute always".
8535     */
8536    s->condexec_cond = (cond_mask >> 4) & 0xe;
8537    s->condexec_mask = cond_mask & 0x1f;
8538    return true;
8539}
8540
8541/* v8.1M CSEL/CSINC/CSNEG/CSINV */
8542static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8543{
8544    TCGv_i32 rn, rm, zero;
8545    DisasCompare c;
8546
8547    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8548        return false;
8549    }
8550
8551    if (a->rm == 13) {
8552        /* SEE "Related encodings" (MVE shifts) */
8553        return false;
8554    }
8555
8556    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8557        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8558        return false;
8559    }
8560
8561    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8562    if (a->rn == 15) {
8563        rn = tcg_const_i32(0);
8564    } else {
8565        rn = load_reg(s, a->rn);
8566    }
8567    if (a->rm == 15) {
8568        rm = tcg_const_i32(0);
8569    } else {
8570        rm = load_reg(s, a->rm);
8571    }
8572
8573    switch (a->op) {
8574    case 0: /* CSEL */
8575        break;
8576    case 1: /* CSINC */
8577        tcg_gen_addi_i32(rm, rm, 1);
8578        break;
8579    case 2: /* CSINV */
8580        tcg_gen_not_i32(rm, rm);
8581        break;
8582    case 3: /* CSNEG */
8583        tcg_gen_neg_i32(rm, rm);
8584        break;
8585    default:
8586        g_assert_not_reached();
8587    }
8588
8589    arm_test_cc(&c, a->fcond);
8590    zero = tcg_const_i32(0);
8591    tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
8592    arm_free_cc(&c);
8593    tcg_temp_free_i32(zero);
8594
8595    store_reg(s, a->rd, rn);
8596    tcg_temp_free_i32(rm);
8597
8598    return true;
8599}
8600
8601/*
8602 * Legacy decoder.
8603 */
8604
8605static void disas_arm_insn(DisasContext *s, unsigned int insn)
8606{
8607    unsigned int cond = insn >> 28;
8608
8609    /* M variants do not implement ARM mode; this must raise the INVSTATE
8610     * UsageFault exception.
8611     */
8612    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8613        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
8614                           default_exception_el(s));
8615        return;
8616    }
8617
8618    if (cond == 0xf) {
8619        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8620         * choose to UNDEF. In ARMv5 and above the space is used
8621         * for miscellaneous unconditional instructions.
8622         */
8623        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8624            unallocated_encoding(s);
8625            return;
8626        }
8627
8628        /* Unconditional instructions.  */
8629        /* TODO: Perhaps merge these into one decodetree output file.  */
8630        if (disas_a32_uncond(s, insn) ||
8631            disas_vfp_uncond(s, insn) ||
8632            disas_neon_dp(s, insn) ||
8633            disas_neon_ls(s, insn) ||
8634            disas_neon_shared(s, insn)) {
8635            return;
8636        }
8637        /* fall back to legacy decoder */
8638
8639        if ((insn & 0x0e000f00) == 0x0c000100) {
8640            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8641                /* iWMMXt register transfer.  */
8642                if (extract32(s->c15_cpar, 1, 1)) {
8643                    if (!disas_iwmmxt_insn(s, insn)) {
8644                        return;
8645                    }
8646                }
8647            }
8648        }
8649        goto illegal_op;
8650    }
8651    if (cond != 0xe) {
8652        /* if not always execute, we generate a conditional jump to
8653           next instruction */
8654        arm_skip_unless(s, cond);
8655    }
8656
8657    /* TODO: Perhaps merge these into one decodetree output file.  */
8658    if (disas_a32(s, insn) ||
8659        disas_vfp(s, insn)) {
8660        return;
8661    }
8662    /* fall back to legacy decoder */
8663    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8664    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8665        if (((insn & 0x0c000e00) == 0x0c000000)
8666            && ((insn & 0x03000000) != 0x03000000)) {
8667            /* Coprocessor insn, coprocessor 0 or 1 */
8668            disas_xscale_insn(s, insn);
8669            return;
8670        }
8671    }
8672
8673illegal_op:
8674    unallocated_encoding(s);
8675}
8676
8677static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8678{
8679    /*
8680     * Return true if this is a 16 bit instruction. We must be precise
8681     * about this (matching the decode).
8682     */
8683    if ((insn >> 11) < 0x1d) {
8684        /* Definitely a 16-bit instruction */
8685        return true;
8686    }
8687
8688    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8689     * first half of a 32-bit Thumb insn. Thumb-1 cores might
8690     * end up actually treating this as two 16-bit insns, though,
8691     * if it's half of a bl/blx pair that might span a page boundary.
8692     */
8693    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8694        arm_dc_feature(s, ARM_FEATURE_M)) {
8695        /* Thumb2 cores (including all M profile ones) always treat
8696         * 32-bit insns as 32-bit.
8697         */
8698        return false;
8699    }
8700
8701    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8702        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8703         * is not on the next page; we merge this into a 32-bit
8704         * insn.
8705         */
8706        return false;
8707    }
8708    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8709     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8710     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8711     *  -- handle as single 16 bit insn
8712     */
8713    return true;
8714}
8715
8716/* Translate a 32-bit thumb instruction. */
8717static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8718{
8719    /*
8720     * ARMv6-M supports a limited subset of Thumb2 instructions.
8721     * Other Thumb1 architectures allow only 32-bit
8722     * combined BL/BLX prefix and suffix.
8723     */
8724    if (arm_dc_feature(s, ARM_FEATURE_M) &&
8725        !arm_dc_feature(s, ARM_FEATURE_V7)) {
8726        int i;
8727        bool found = false;
8728        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8729                                               0xf3b08040 /* dsb */,
8730                                               0xf3b08050 /* dmb */,
8731                                               0xf3b08060 /* isb */,
8732                                               0xf3e08000 /* mrs */,
8733                                               0xf000d000 /* bl */};
8734        static const uint32_t armv6m_mask[] = {0xffe0d000,
8735                                               0xfff0d0f0,
8736                                               0xfff0d0f0,
8737                                               0xfff0d0f0,
8738                                               0xffe0d000,
8739                                               0xf800d000};
8740
8741        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
8742            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
8743                found = true;
8744                break;
8745            }
8746        }
8747        if (!found) {
8748            goto illegal_op;
8749        }
8750    } else if ((insn & 0xf800e800) != 0xf000e800)  {
8751        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
8752            unallocated_encoding(s);
8753            return;
8754        }
8755    }
8756
8757    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8758        /*
8759         * NOCP takes precedence over any UNDEF for (almost) the
8760         * entire wide range of coprocessor-space encodings, so check
8761         * for it first before proceeding to actually decode eg VFP
8762         * insns. This decode also handles the few insns which are
8763         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
8764         */
8765        if (disas_m_nocp(s, insn)) {
8766            return;
8767        }
8768    }
8769
8770    if ((insn & 0xef000000) == 0xef000000) {
8771        /*
8772         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8773         * transform into
8774         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
8775         */
8776        uint32_t a32_insn = (insn & 0xe2ffffff) |
8777            ((insn & (1 << 28)) >> 4) | (1 << 28);
8778
8779        if (disas_neon_dp(s, a32_insn)) {
8780            return;
8781        }
8782    }
8783
8784    if ((insn & 0xff100000) == 0xf9000000) {
8785        /*
8786         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8787         * transform into
8788         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
8789         */
8790        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
8791
8792        if (disas_neon_ls(s, a32_insn)) {
8793            return;
8794        }
8795    }
8796
8797    /*
8798     * TODO: Perhaps merge these into one decodetree output file.
8799     * Note disas_vfp is written for a32 with cond field in the
8800     * top nibble.  The t32 encoding requires 0xe in the top nibble.
8801     */
8802    if (disas_t32(s, insn) ||
8803        disas_vfp_uncond(s, insn) ||
8804        disas_neon_shared(s, insn) ||
8805        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
8806        return;
8807    }
8808
8809illegal_op:
8810    unallocated_encoding(s);
8811}
8812
8813static void disas_thumb_insn(DisasContext *s, uint32_t insn)
8814{
8815    if (!disas_t16(s, insn)) {
8816        unallocated_encoding(s);
8817    }
8818}
8819
8820static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
8821{
8822    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
8823     * (False positives are OK, false negatives are not.)
8824     * We know this is a Thumb insn, and our caller ensures we are
8825     * only called if dc->base.pc_next is less than 4 bytes from the page
8826     * boundary, so we cross the page if the first 16 bits indicate
8827     * that this is a 32 bit insn.
8828     */
8829    uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
8830
8831    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
8832}
8833
8834static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
8835{
8836    DisasContext *dc = container_of(dcbase, DisasContext, base);
8837    CPUARMState *env = cs->env_ptr;
8838    ARMCPU *cpu = env_archcpu(env);
8839    uint32_t tb_flags = dc->base.tb->flags;
8840    uint32_t condexec, core_mmu_idx;
8841
8842    dc->isar = &cpu->isar;
8843    dc->condjmp = 0;
8844
8845    dc->aarch64 = 0;
8846    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
8847     * there is no secure EL1, so we route exceptions to EL3.
8848     */
8849    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
8850                               !arm_el_is_aa64(env, 3);
8851    dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
8852    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8853    condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
8854    dc->condexec_mask = (condexec & 0xf) << 1;
8855    dc->condexec_cond = condexec >> 4;
8856
8857    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
8858    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
8859    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
8860#if !defined(CONFIG_USER_ONLY)
8861    dc->user = (dc->current_el == 0);
8862#endif
8863    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
8864
8865    if (arm_feature(env, ARM_FEATURE_M)) {
8866        dc->vfp_enabled = 1;
8867        dc->be_data = MO_TE;
8868        dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
8869        dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
8870            regime_is_secure(env, dc->mmu_idx);
8871        dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
8872        dc->v8m_fpccr_s_wrong =
8873            FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
8874        dc->v7m_new_fp_ctxt_needed =
8875            FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
8876        dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
8877    } else {
8878        dc->be_data =
8879            FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
8880        dc->debug_target_el =
8881            FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
8882        dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
8883        dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
8884        dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
8885        dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
8886        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
8887            dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
8888        } else {
8889            dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
8890            dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
8891        }
8892    }
8893    dc->cp_regs = cpu->cp_regs;
8894    dc->features = env->features;
8895
8896    /* Single step state. The code-generation logic here is:
8897     *  SS_ACTIVE == 0:
8898     *   generate code with no special handling for single-stepping (except
8899     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
8900     *   this happens anyway because those changes are all system register or
8901     *   PSTATE writes).
8902     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
8903     *   emit code for one insn
8904     *   emit code to clear PSTATE.SS
8905     *   emit code to generate software step exception for completed step
8906     *   end TB (as usual for having generated an exception)
8907     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
8908     *   emit code to generate a software step exception
8909     *   end the TB
8910     */
8911    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
8912    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
8913    dc->is_ldex = false;
8914
8915    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
8916
8917    /* If architectural single step active, limit to 1.  */
8918    if (is_singlestepping(dc)) {
8919        dc->base.max_insns = 1;
8920    }
8921
8922    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
8923       to those left on the page.  */
8924    if (!dc->thumb) {
8925        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
8926        dc->base.max_insns = MIN(dc->base.max_insns, bound);
8927    }
8928
8929    cpu_V0 = tcg_temp_new_i64();
8930    cpu_V1 = tcg_temp_new_i64();
8931    cpu_M0 = tcg_temp_new_i64();
8932}
8933
8934static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
8935{
8936    DisasContext *dc = container_of(dcbase, DisasContext, base);
8937
8938    /* A note on handling of the condexec (IT) bits:
8939     *
8940     * We want to avoid the overhead of having to write the updated condexec
8941     * bits back to the CPUARMState for every instruction in an IT block. So:
8942     * (1) if the condexec bits are not already zero then we write
8943     * zero back into the CPUARMState now. This avoids complications trying
8944     * to do it at the end of the block. (For example if we don't do this
8945     * it's hard to identify whether we can safely skip writing condexec
8946     * at the end of the TB, which we definitely want to do for the case
8947     * where a TB doesn't do anything with the IT state at all.)
8948     * (2) if we are going to leave the TB then we call gen_set_condexec()
8949     * which will write the correct value into CPUARMState if zero is wrong.
8950     * This is done both for leaving the TB at the end, and for leaving
8951     * it because of an exception we know will happen, which is done in
8952     * gen_exception_insn(). The latter is necessary because we need to
8953     * leave the TB with the PC/IT state just prior to execution of the
8954     * instruction which caused the exception.
8955     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
8956     * then the CPUARMState will be wrong and we need to reset it.
8957     * This is handled in the same way as restoration of the
8958     * PC in these situations; we save the value of the condexec bits
8959     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
8960     * then uses this to restore them after an exception.
8961     *
8962     * Note that there are no instructions which can read the condexec
8963     * bits, and none which can write non-static values to them, so
8964     * we don't need to care about whether CPUARMState is correct in the
8965     * middle of a TB.
8966     */
8967
8968    /* Reset the conditional execution bits immediately. This avoids
8969       complications trying to do it at the end of the block.  */
8970    if (dc->condexec_mask || dc->condexec_cond) {
8971        TCGv_i32 tmp = tcg_temp_new_i32();
8972        tcg_gen_movi_i32(tmp, 0);
8973        store_cpu_field(tmp, condexec_bits);
8974    }
8975}
8976
8977static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8978{
8979    DisasContext *dc = container_of(dcbase, DisasContext, base);
8980
8981    tcg_gen_insn_start(dc->base.pc_next,
8982                       (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
8983                       0);
8984    dc->insn_start = tcg_last_op();
8985}
8986
8987static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8988                                    const CPUBreakpoint *bp)
8989{
8990    DisasContext *dc = container_of(dcbase, DisasContext, base);
8991
8992    if (bp->flags & BP_CPU) {
8993        gen_set_condexec(dc);
8994        gen_set_pc_im(dc, dc->base.pc_next);
8995        gen_helper_check_breakpoints(cpu_env);
8996        /* End the TB early; it's likely not going to be executed */
8997        dc->base.is_jmp = DISAS_TOO_MANY;
8998    } else {
8999        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
9000        /* The address covered by the breakpoint must be
9001           included in [tb->pc, tb->pc + tb->size) in order
9002           to for it to be properly cleared -- thus we
9003           increment the PC here so that the logic setting
9004           tb->size below does the right thing.  */
9005        /* TODO: Advance PC by correct instruction length to
9006         * avoid disassembler error messages */
9007        dc->base.pc_next += 2;
9008        dc->base.is_jmp = DISAS_NORETURN;
9009    }
9010
9011    return true;
9012}
9013
9014static bool arm_pre_translate_insn(DisasContext *dc)
9015{
9016#ifdef CONFIG_USER_ONLY
9017    /* Intercept jump to the magic kernel page.  */
9018    if (dc->base.pc_next >= 0xffff0000) {
9019        /* We always get here via a jump, so know we are not in a
9020           conditional execution block.  */
9021        gen_exception_internal(EXCP_KERNEL_TRAP);
9022        dc->base.is_jmp = DISAS_NORETURN;
9023        return true;
9024    }
9025#endif
9026
9027    if (dc->ss_active && !dc->pstate_ss) {
9028        /* Singlestep state is Active-pending.
9029         * If we're in this state at the start of a TB then either
9030         *  a) we just took an exception to an EL which is being debugged
9031         *     and this is the first insn in the exception handler
9032         *  b) debug exceptions were masked and we just unmasked them
9033         *     without changing EL (eg by clearing PSTATE.D)
9034         * In either case we're going to take a swstep exception in the
9035         * "did not step an insn" case, and so the syndrome ISV and EX
9036         * bits should be zero.
9037         */
9038        assert(dc->base.num_insns == 1);
9039        gen_swstep_exception(dc, 0, 0);
9040        dc->base.is_jmp = DISAS_NORETURN;
9041        return true;
9042    }
9043
9044    return false;
9045}
9046
9047static void arm_post_translate_insn(DisasContext *dc)
9048{
9049    if (dc->condjmp && !dc->base.is_jmp) {
9050        gen_set_label(dc->condlabel);
9051        dc->condjmp = 0;
9052    }
9053    translator_loop_temp_check(&dc->base);
9054}
9055
9056static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9057{
9058    DisasContext *dc = container_of(dcbase, DisasContext, base);
9059    CPUARMState *env = cpu->env_ptr;
9060    unsigned int insn;
9061
9062    if (arm_pre_translate_insn(dc)) {
9063        return;
9064    }
9065
9066    dc->pc_curr = dc->base.pc_next;
9067    insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
9068    dc->insn = insn;
9069    dc->base.pc_next += 4;
9070    disas_arm_insn(dc, insn);
9071
9072    arm_post_translate_insn(dc);
9073
9074    /* ARM is a fixed-length ISA.  We performed the cross-page check
9075       in init_disas_context by adjusting max_insns.  */
9076}
9077
9078static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9079{
9080    /* Return true if this Thumb insn is always unconditional,
9081     * even inside an IT block. This is true of only a very few
9082     * instructions: BKPT, HLT, and SG.
9083     *
9084     * A larger class of instructions are UNPREDICTABLE if used
9085     * inside an IT block; we do not need to detect those here, because
9086     * what we do by default (perform the cc check and update the IT
9087     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9088     * choice for those situations.
9089     *
9090     * insn is either a 16-bit or a 32-bit instruction; the two are
9091     * distinguishable because for the 16-bit case the top 16 bits
9092     * are zeroes, and that isn't a valid 32-bit encoding.
9093     */
9094    if ((insn & 0xffffff00) == 0xbe00) {
9095        /* BKPT */
9096        return true;
9097    }
9098
9099    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9100        !arm_dc_feature(s, ARM_FEATURE_M)) {
9101        /* HLT: v8A only. This is unconditional even when it is going to
9102         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9103         * For v7 cores this was a plain old undefined encoding and so
9104         * honours its cc check. (We might be using the encoding as
9105         * a semihosting trap, but we don't change the cc check behaviour
9106         * on that account, because a debugger connected to a real v7A
9107         * core and emulating semihosting traps by catching the UNDEF
9108         * exception would also only see cases where the cc check passed.
9109         * No guest code should be trying to do a HLT semihosting trap
9110         * in an IT block anyway.
9111         */
9112        return true;
9113    }
9114
9115    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9116        arm_dc_feature(s, ARM_FEATURE_M)) {
9117        /* SG: v8M only */
9118        return true;
9119    }
9120
9121    return false;
9122}
9123
9124static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9125{
9126    DisasContext *dc = container_of(dcbase, DisasContext, base);
9127    CPUARMState *env = cpu->env_ptr;
9128    uint32_t insn;
9129    bool is_16bit;
9130
9131    if (arm_pre_translate_insn(dc)) {
9132        return;
9133    }
9134
9135    dc->pc_curr = dc->base.pc_next;
9136    insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
9137    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9138    dc->base.pc_next += 2;
9139    if (!is_16bit) {
9140        uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
9141
9142        insn = insn << 16 | insn2;
9143        dc->base.pc_next += 2;
9144    }
9145    dc->insn = insn;
9146
9147    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9148        uint32_t cond = dc->condexec_cond;
9149
9150        /*
9151         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9152         * "always"; 0xf is not "never".
9153         */
9154        if (cond < 0x0e) {
9155            arm_skip_unless(dc, cond);
9156        }
9157    }
9158
9159    if (is_16bit) {
9160        disas_thumb_insn(dc, insn);
9161    } else {
9162        disas_thumb2_insn(dc, insn);
9163    }
9164
9165    /* Advance the Thumb condexec condition.  */
9166    if (dc->condexec_mask) {
9167        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9168                             ((dc->condexec_mask >> 4) & 1));
9169        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9170        if (dc->condexec_mask == 0) {
9171            dc->condexec_cond = 0;
9172        }
9173    }
9174
9175    arm_post_translate_insn(dc);
9176
9177    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9178     * will touch a new page.  This ensures that prefetch aborts occur at
9179     * the right place.
9180     *
9181     * We want to stop the TB if the next insn starts in a new page,
9182     * or if it spans between this page and the next. This means that
9183     * if we're looking at the last halfword in the page we need to
9184     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9185     * or a 32-bit Thumb insn (which won't).
9186     * This is to avoid generating a silly TB with a single 16-bit insn
9187     * in it at the end of this page (which would execute correctly
9188     * but isn't very efficient).
9189     */
9190    if (dc->base.is_jmp == DISAS_NEXT
9191        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9192            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9193                && insn_crosses_page(env, dc)))) {
9194        dc->base.is_jmp = DISAS_TOO_MANY;
9195    }
9196}
9197
9198static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9199{
9200    DisasContext *dc = container_of(dcbase, DisasContext, base);
9201
9202    /* At this stage dc->condjmp will only be set when the skipped
9203       instruction was a conditional branch or trap, and the PC has
9204       already been written.  */
9205    gen_set_condexec(dc);
9206    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9207        /* Exception return branches need some special case code at the
9208         * end of the TB, which is complex enough that it has to
9209         * handle the single-step vs not and the condition-failed
9210         * insn codepath itself.
9211         */
9212        gen_bx_excret_final_code(dc);
9213    } else if (unlikely(is_singlestepping(dc))) {
9214        /* Unconditional and "condition passed" instruction codepath. */
9215        switch (dc->base.is_jmp) {
9216        case DISAS_SWI:
9217            gen_ss_advance(dc);
9218            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9219                          default_exception_el(dc));
9220            break;
9221        case DISAS_HVC:
9222            gen_ss_advance(dc);
9223            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9224            break;
9225        case DISAS_SMC:
9226            gen_ss_advance(dc);
9227            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9228            break;
9229        case DISAS_NEXT:
9230        case DISAS_TOO_MANY:
9231        case DISAS_UPDATE_EXIT:
9232        case DISAS_UPDATE_NOCHAIN:
9233            gen_set_pc_im(dc, dc->base.pc_next);
9234            /* fall through */
9235        default:
9236            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9237            gen_singlestep_exception(dc);
9238            break;
9239        case DISAS_NORETURN:
9240            break;
9241        }
9242    } else {
9243        /* While branches must always occur at the end of an IT block,
9244           there are a few other things that can cause us to terminate
9245           the TB in the middle of an IT block:
9246            - Exception generating instructions (bkpt, swi, undefined).
9247            - Page boundaries.
9248            - Hardware watchpoints.
9249           Hardware breakpoints have already been handled and skip this code.
9250         */
9251        switch (dc->base.is_jmp) {
9252        case DISAS_NEXT:
9253        case DISAS_TOO_MANY:
9254            gen_goto_tb(dc, 1, dc->base.pc_next);
9255            break;
9256        case DISAS_UPDATE_NOCHAIN:
9257            gen_set_pc_im(dc, dc->base.pc_next);
9258            /* fall through */
9259        case DISAS_JUMP:
9260            gen_goto_ptr();
9261            break;
9262        case DISAS_UPDATE_EXIT:
9263            gen_set_pc_im(dc, dc->base.pc_next);
9264            /* fall through */
9265        default:
9266            /* indicate that the hash table must be used to find the next TB */
9267            tcg_gen_exit_tb(NULL, 0);
9268            break;
9269        case DISAS_NORETURN:
9270            /* nothing more to generate */
9271            break;
9272        case DISAS_WFI:
9273        {
9274            TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
9275                                          !(dc->insn & (1U << 31))) ? 2 : 4);
9276
9277            gen_helper_wfi(cpu_env, tmp);
9278            tcg_temp_free_i32(tmp);
9279            /* The helper doesn't necessarily throw an exception, but we
9280             * must go back to the main loop to check for interrupts anyway.
9281             */
9282            tcg_gen_exit_tb(NULL, 0);
9283            break;
9284        }
9285        case DISAS_WFE:
9286            gen_helper_wfe(cpu_env);
9287            break;
9288        case DISAS_YIELD:
9289            gen_helper_yield(cpu_env);
9290            break;
9291        case DISAS_SWI:
9292            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9293                          default_exception_el(dc));
9294            break;
9295        case DISAS_HVC:
9296            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9297            break;
9298        case DISAS_SMC:
9299            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9300            break;
9301        }
9302    }
9303
9304    if (dc->condjmp) {
9305        /* "Condition failed" instruction codepath for the branch/trap insn */
9306        gen_set_label(dc->condlabel);
9307        gen_set_condexec(dc);
9308        if (unlikely(is_singlestepping(dc))) {
9309            gen_set_pc_im(dc, dc->base.pc_next);
9310            gen_singlestep_exception(dc);
9311        } else {
9312            gen_goto_tb(dc, 1, dc->base.pc_next);
9313        }
9314    }
9315}
9316
9317static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
9318{
9319    DisasContext *dc = container_of(dcbase, DisasContext, base);
9320
9321    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
9322    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
9323}
9324
9325static const TranslatorOps arm_translator_ops = {
9326    .init_disas_context = arm_tr_init_disas_context,
9327    .tb_start           = arm_tr_tb_start,
9328    .insn_start         = arm_tr_insn_start,
9329    .breakpoint_check   = arm_tr_breakpoint_check,
9330    .translate_insn     = arm_tr_translate_insn,
9331    .tb_stop            = arm_tr_tb_stop,
9332    .disas_log          = arm_tr_disas_log,
9333};
9334
9335static const TranslatorOps thumb_translator_ops = {
9336    .init_disas_context = arm_tr_init_disas_context,
9337    .tb_start           = arm_tr_tb_start,
9338    .insn_start         = arm_tr_insn_start,
9339    .breakpoint_check   = arm_tr_breakpoint_check,
9340    .translate_insn     = thumb_tr_translate_insn,
9341    .tb_stop            = arm_tr_tb_stop,
9342    .disas_log          = arm_tr_disas_log,
9343};
9344
9345/* generate intermediate code for basic block 'tb'.  */
9346void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9347{
9348    DisasContext dc = { };
9349    const TranslatorOps *ops = &arm_translator_ops;
9350
9351    if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
9352        ops = &thumb_translator_ops;
9353    }
9354#ifdef TARGET_AARCH64
9355    if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
9356        ops = &aarch64_translator_ops;
9357    }
9358#endif
9359
9360    translator_loop(ops, &dc.base, cpu, tb, max_insns);
9361}
9362
9363void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9364                          target_ulong *data)
9365{
9366    if (is_a64(env)) {
9367        env->pc = data[0];
9368        env->condexec_bits = 0;
9369        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9370    } else {
9371        env->regs[15] = data[0];
9372        env->condexec_bits = data[1];
9373        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9374    }
9375}
9376