qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg/tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "semihosting/semihost.h"
  33#include "exec/helper-proto.h"
  34#include "exec/helper-gen.h"
  35#include "exec/log.h"
  36#include "cpregs.h"
  37
  38
  39#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  40#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  41/* currently all emulated v5 cores are also v5TE, so don't bother */
  42#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  43#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  44#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  45#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  46#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  47#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  48#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  49
  50#include "translate.h"
  51#include "translate-a32.h"
  52
  53/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  54static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  55/* These are TCG globals which alias CPUARMState fields */
  56static TCGv_i32 cpu_R[16];
  57TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  58TCGv_i64 cpu_exclusive_addr;
  59TCGv_i64 cpu_exclusive_val;
  60
  61#include "exec/gen-icount.h"
  62
  63static const char * const regnames[] =
  64    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  65      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  66
  67
  68/* initialize TCG globals.  */
  69void arm_translate_init(void)
  70{
  71    int i;
  72
  73    for (i = 0; i < 16; i++) {
  74        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  75                                          offsetof(CPUARMState, regs[i]),
  76                                          regnames[i]);
  77    }
  78    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  79    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  80    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  81    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  82
  83    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  84        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  85    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  86        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  87
  88    a64_translate_init();
  89}
  90
  91uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
  92{
  93    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
  94    switch (cmode) {
  95    case 0: case 1:
  96        /* no-op */
  97        break;
  98    case 2: case 3:
  99        imm <<= 8;
 100        break;
 101    case 4: case 5:
 102        imm <<= 16;
 103        break;
 104    case 6: case 7:
 105        imm <<= 24;
 106        break;
 107    case 8: case 9:
 108        imm |= imm << 16;
 109        break;
 110    case 10: case 11:
 111        imm = (imm << 8) | (imm << 24);
 112        break;
 113    case 12:
 114        imm = (imm << 8) | 0xff;
 115        break;
 116    case 13:
 117        imm = (imm << 16) | 0xffff;
 118        break;
 119    case 14:
 120        if (op) {
 121            /*
 122             * This and cmode == 15 op == 1 are the only cases where
 123             * the top and bottom 32 bits of the encoded constant differ.
 124             */
 125            uint64_t imm64 = 0;
 126            int n;
 127
 128            for (n = 0; n < 8; n++) {
 129                if (imm & (1 << n)) {
 130                    imm64 |= (0xffULL << (n * 8));
 131                }
 132            }
 133            return imm64;
 134        }
 135        imm |= (imm << 8) | (imm << 16) | (imm << 24);
 136        break;
 137    case 15:
 138        if (op) {
 139            /* Reserved encoding for AArch32; valid for AArch64 */
 140            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
 141            if (imm & 0x80) {
 142                imm64 |= 0x8000000000000000ULL;
 143            }
 144            if (imm & 0x40) {
 145                imm64 |= 0x3fc0000000000000ULL;
 146            } else {
 147                imm64 |= 0x4000000000000000ULL;
 148            }
 149            return imm64;
 150        }
 151        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
 152            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
 153        break;
 154    }
 155    if (op) {
 156        imm = ~imm;
 157    }
 158    return dup_const(MO_32, imm);
 159}
 160
 161/* Generate a label used for skipping this instruction */
 162void arm_gen_condlabel(DisasContext *s)
 163{
 164    if (!s->condjmp) {
 165        s->condlabel = gen_disas_label(s);
 166        s->condjmp = 1;
 167    }
 168}
 169
 170/* Flags for the disas_set_da_iss info argument:
 171 * lower bits hold the Rt register number, higher bits are flags.
 172 */
 173typedef enum ISSInfo {
 174    ISSNone = 0,
 175    ISSRegMask = 0x1f,
 176    ISSInvalid = (1 << 5),
 177    ISSIsAcqRel = (1 << 6),
 178    ISSIsWrite = (1 << 7),
 179    ISSIs16Bit = (1 << 8),
 180} ISSInfo;
 181
 182/*
 183 * Store var into env + offset to a member with size bytes.
 184 * Free var after use.
 185 */
 186void store_cpu_offset(TCGv_i32 var, int offset, int size)
 187{
 188    switch (size) {
 189    case 1:
 190        tcg_gen_st8_i32(var, cpu_env, offset);
 191        break;
 192    case 4:
 193        tcg_gen_st_i32(var, cpu_env, offset);
 194        break;
 195    default:
 196        g_assert_not_reached();
 197    }
 198    tcg_temp_free_i32(var);
 199}
 200
 201/* Save the syndrome information for a Data Abort */
 202static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 203{
 204    uint32_t syn;
 205    int sas = memop & MO_SIZE;
 206    bool sse = memop & MO_SIGN;
 207    bool is_acqrel = issinfo & ISSIsAcqRel;
 208    bool is_write = issinfo & ISSIsWrite;
 209    bool is_16bit = issinfo & ISSIs16Bit;
 210    int srt = issinfo & ISSRegMask;
 211
 212    if (issinfo & ISSInvalid) {
 213        /* Some callsites want to conditionally provide ISS info,
 214         * eg "only if this was not a writeback"
 215         */
 216        return;
 217    }
 218
 219    if (srt == 15) {
 220        /* For AArch32, insns where the src/dest is R15 never generate
 221         * ISS information. Catching that here saves checking at all
 222         * the call sites.
 223         */
 224        return;
 225    }
 226
 227    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 228                                  0, 0, 0, is_write, 0, is_16bit);
 229    disas_set_insn_syndrome(s, syn);
 230}
 231
 232static inline int get_a32_user_mem_index(DisasContext *s)
 233{
 234    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 235     * insns:
 236     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 237     *  otherwise, access as if at PL0.
 238     */
 239    switch (s->mmu_idx) {
 240    case ARMMMUIdx_E3:
 241    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 242    case ARMMMUIdx_E10_0:
 243    case ARMMMUIdx_E10_1:
 244    case ARMMMUIdx_E10_1_PAN:
 245        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 246    case ARMMMUIdx_MUser:
 247    case ARMMMUIdx_MPriv:
 248        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 249    case ARMMMUIdx_MUserNegPri:
 250    case ARMMMUIdx_MPrivNegPri:
 251        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 252    case ARMMMUIdx_MSUser:
 253    case ARMMMUIdx_MSPriv:
 254        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 255    case ARMMMUIdx_MSUserNegPri:
 256    case ARMMMUIdx_MSPrivNegPri:
 257        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 258    default:
 259        g_assert_not_reached();
 260    }
 261}
 262
 263/* The pc_curr difference for an architectural jump. */
 264static target_long jmp_diff(DisasContext *s, target_long diff)
 265{
 266    return diff + (s->thumb ? 4 : 8);
 267}
 268
 269static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
 270{
 271    assert(s->pc_save != -1);
 272    if (TARGET_TB_PCREL) {
 273        tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
 274    } else {
 275        tcg_gen_movi_i32(var, s->pc_curr + diff);
 276    }
 277}
 278
 279/* Set a variable to the value of a CPU register.  */
 280void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 281{
 282    if (reg == 15) {
 283        gen_pc_plus_diff(s, var, jmp_diff(s, 0));
 284    } else {
 285        tcg_gen_mov_i32(var, cpu_R[reg]);
 286    }
 287}
 288
 289/*
 290 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 291 * This is used for load/store for which use of PC implies (literal),
 292 * or ADD that implies ADR.
 293 */
 294TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 295{
 296    TCGv_i32 tmp = tcg_temp_new_i32();
 297
 298    if (reg == 15) {
 299        /*
 300         * This address is computed from an aligned PC:
 301         * subtract off the low bits.
 302         */
 303        gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
 304    } else {
 305        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 306    }
 307    return tmp;
 308}
 309
 310/* Set a CPU register.  The source must be a temporary and will be
 311   marked as dead.  */
 312void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 313{
 314    if (reg == 15) {
 315        /* In Thumb mode, we must ignore bit 0.
 316         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 317         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 318         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 319         */
 320        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 321        s->base.is_jmp = DISAS_JUMP;
 322        s->pc_save = -1;
 323    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
 324        /* For M-profile SP bits [1:0] are always zero */
 325        tcg_gen_andi_i32(var, var, ~3);
 326    }
 327    tcg_gen_mov_i32(cpu_R[reg], var);
 328    tcg_temp_free_i32(var);
 329}
 330
 331/*
 332 * Variant of store_reg which applies v8M stack-limit checks before updating
 333 * SP. If the check fails this will result in an exception being taken.
 334 * We disable the stack checks for CONFIG_USER_ONLY because we have
 335 * no idea what the stack limits should be in that case.
 336 * If stack checking is not being done this just acts like store_reg().
 337 */
 338static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 339{
 340#ifndef CONFIG_USER_ONLY
 341    if (s->v8m_stackcheck) {
 342        gen_helper_v8m_stackcheck(cpu_env, var);
 343    }
 344#endif
 345    store_reg(s, 13, var);
 346}
 347
 348/* Value extensions.  */
 349#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 350#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 351#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 352#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 353
 354#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 355#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 356
 357void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 358{
 359    gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
 360}
 361
 362static void gen_rebuild_hflags(DisasContext *s, bool new_el)
 363{
 364    bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
 365
 366    if (new_el) {
 367        if (m_profile) {
 368            gen_helper_rebuild_hflags_m32_newel(cpu_env);
 369        } else {
 370            gen_helper_rebuild_hflags_a32_newel(cpu_env);
 371        }
 372    } else {
 373        TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
 374        if (m_profile) {
 375            gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
 376        } else {
 377            gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
 378        }
 379    }
 380}
 381
 382static void gen_exception_internal(int excp)
 383{
 384    assert(excp_is_internal(excp));
 385    gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
 386}
 387
 388static void gen_singlestep_exception(DisasContext *s)
 389{
 390    /* We just completed step of an insn. Move from Active-not-pending
 391     * to Active-pending, and then also take the swstep exception.
 392     * This corresponds to making the (IMPDEF) choice to prioritize
 393     * swstep exceptions over asynchronous exceptions taken to an exception
 394     * level where debug is disabled. This choice has the advantage that
 395     * we do not need to maintain internal state corresponding to the
 396     * ISV/EX syndrome bits between completion of the step and generation
 397     * of the exception, and our syndrome information is always correct.
 398     */
 399    gen_ss_advance(s);
 400    gen_swstep_exception(s, 1, s->is_ldex);
 401    s->base.is_jmp = DISAS_NORETURN;
 402}
 403
 404void clear_eci_state(DisasContext *s)
 405{
 406    /*
 407     * Clear any ECI/ICI state: used when a load multiple/store
 408     * multiple insn executes.
 409     */
 410    if (s->eci) {
 411        store_cpu_field_constant(0, condexec_bits);
 412        s->eci = 0;
 413    }
 414}
 415
 416static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 417{
 418    TCGv_i32 tmp1 = tcg_temp_new_i32();
 419    TCGv_i32 tmp2 = tcg_temp_new_i32();
 420    tcg_gen_ext16s_i32(tmp1, a);
 421    tcg_gen_ext16s_i32(tmp2, b);
 422    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 423    tcg_temp_free_i32(tmp2);
 424    tcg_gen_sari_i32(a, a, 16);
 425    tcg_gen_sari_i32(b, b, 16);
 426    tcg_gen_mul_i32(b, b, a);
 427    tcg_gen_mov_i32(a, tmp1);
 428    tcg_temp_free_i32(tmp1);
 429}
 430
 431/* Byteswap each halfword.  */
 432void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 433{
 434    TCGv_i32 tmp = tcg_temp_new_i32();
 435    TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
 436    tcg_gen_shri_i32(tmp, var, 8);
 437    tcg_gen_and_i32(tmp, tmp, mask);
 438    tcg_gen_and_i32(var, var, mask);
 439    tcg_gen_shli_i32(var, var, 8);
 440    tcg_gen_or_i32(dest, var, tmp);
 441    tcg_temp_free_i32(tmp);
 442}
 443
 444/* Byteswap low halfword and sign extend.  */
 445static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 446{
 447    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
 448}
 449
 450/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 451    tmp = (t0 ^ t1) & 0x8000;
 452    t0 &= ~0x8000;
 453    t1 &= ~0x8000;
 454    t0 = (t0 + t1) ^ tmp;
 455 */
 456
 457static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458{
 459    TCGv_i32 tmp = tcg_temp_new_i32();
 460    tcg_gen_xor_i32(tmp, t0, t1);
 461    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 462    tcg_gen_andi_i32(t0, t0, ~0x8000);
 463    tcg_gen_andi_i32(t1, t1, ~0x8000);
 464    tcg_gen_add_i32(t0, t0, t1);
 465    tcg_gen_xor_i32(dest, t0, tmp);
 466    tcg_temp_free_i32(tmp);
 467}
 468
 469/* Set N and Z flags from var.  */
 470static inline void gen_logic_CC(TCGv_i32 var)
 471{
 472    tcg_gen_mov_i32(cpu_NF, var);
 473    tcg_gen_mov_i32(cpu_ZF, var);
 474}
 475
 476/* dest = T0 + T1 + CF. */
 477static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 478{
 479    tcg_gen_add_i32(dest, t0, t1);
 480    tcg_gen_add_i32(dest, dest, cpu_CF);
 481}
 482
 483/* dest = T0 - T1 + CF - 1.  */
 484static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 485{
 486    tcg_gen_sub_i32(dest, t0, t1);
 487    tcg_gen_add_i32(dest, dest, cpu_CF);
 488    tcg_gen_subi_i32(dest, dest, 1);
 489}
 490
 491/* dest = T0 + T1. Compute C, N, V and Z flags */
 492static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 493{
 494    TCGv_i32 tmp = tcg_temp_new_i32();
 495    tcg_gen_movi_i32(tmp, 0);
 496    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 497    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 498    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 499    tcg_gen_xor_i32(tmp, t0, t1);
 500    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 501    tcg_temp_free_i32(tmp);
 502    tcg_gen_mov_i32(dest, cpu_NF);
 503}
 504
 505/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 506static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 507{
 508    TCGv_i32 tmp = tcg_temp_new_i32();
 509    if (TCG_TARGET_HAS_add2_i32) {
 510        tcg_gen_movi_i32(tmp, 0);
 511        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 512        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 513    } else {
 514        TCGv_i64 q0 = tcg_temp_new_i64();
 515        TCGv_i64 q1 = tcg_temp_new_i64();
 516        tcg_gen_extu_i32_i64(q0, t0);
 517        tcg_gen_extu_i32_i64(q1, t1);
 518        tcg_gen_add_i64(q0, q0, q1);
 519        tcg_gen_extu_i32_i64(q1, cpu_CF);
 520        tcg_gen_add_i64(q0, q0, q1);
 521        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 522        tcg_temp_free_i64(q0);
 523        tcg_temp_free_i64(q1);
 524    }
 525    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 526    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 527    tcg_gen_xor_i32(tmp, t0, t1);
 528    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 529    tcg_temp_free_i32(tmp);
 530    tcg_gen_mov_i32(dest, cpu_NF);
 531}
 532
 533/* dest = T0 - T1. Compute C, N, V and Z flags */
 534static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 535{
 536    TCGv_i32 tmp;
 537    tcg_gen_sub_i32(cpu_NF, t0, t1);
 538    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 539    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 540    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 541    tmp = tcg_temp_new_i32();
 542    tcg_gen_xor_i32(tmp, t0, t1);
 543    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 544    tcg_temp_free_i32(tmp);
 545    tcg_gen_mov_i32(dest, cpu_NF);
 546}
 547
 548/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 549static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550{
 551    TCGv_i32 tmp = tcg_temp_new_i32();
 552    tcg_gen_not_i32(tmp, t1);
 553    gen_adc_CC(dest, t0, tmp);
 554    tcg_temp_free_i32(tmp);
 555}
 556
 557#define GEN_SHIFT(name)                                               \
 558static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 559{                                                                     \
 560    TCGv_i32 tmpd = tcg_temp_new_i32();                               \
 561    TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
 562    TCGv_i32 zero = tcg_constant_i32(0);                              \
 563    tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
 564    tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
 565    tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
 566    tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
 567    tcg_temp_free_i32(tmpd);                                          \
 568    tcg_temp_free_i32(tmp1);                                          \
 569}
 570GEN_SHIFT(shl)
 571GEN_SHIFT(shr)
 572#undef GEN_SHIFT
 573
 574static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 575{
 576    TCGv_i32 tmp1 = tcg_temp_new_i32();
 577
 578    tcg_gen_andi_i32(tmp1, t1, 0xff);
 579    tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
 580    tcg_gen_sar_i32(dest, t0, tmp1);
 581    tcg_temp_free_i32(tmp1);
 582}
 583
 584static void shifter_out_im(TCGv_i32 var, int shift)
 585{
 586    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 587}
 588
 589/* Shift by immediate.  Includes special handling for shift == 0.  */
 590static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 591                                    int shift, int flags)
 592{
 593    switch (shiftop) {
 594    case 0: /* LSL */
 595        if (shift != 0) {
 596            if (flags)
 597                shifter_out_im(var, 32 - shift);
 598            tcg_gen_shli_i32(var, var, shift);
 599        }
 600        break;
 601    case 1: /* LSR */
 602        if (shift == 0) {
 603            if (flags) {
 604                tcg_gen_shri_i32(cpu_CF, var, 31);
 605            }
 606            tcg_gen_movi_i32(var, 0);
 607        } else {
 608            if (flags)
 609                shifter_out_im(var, shift - 1);
 610            tcg_gen_shri_i32(var, var, shift);
 611        }
 612        break;
 613    case 2: /* ASR */
 614        if (shift == 0)
 615            shift = 32;
 616        if (flags)
 617            shifter_out_im(var, shift - 1);
 618        if (shift == 32)
 619          shift = 31;
 620        tcg_gen_sari_i32(var, var, shift);
 621        break;
 622    case 3: /* ROR/RRX */
 623        if (shift != 0) {
 624            if (flags)
 625                shifter_out_im(var, shift - 1);
 626            tcg_gen_rotri_i32(var, var, shift); break;
 627        } else {
 628            TCGv_i32 tmp = tcg_temp_new_i32();
 629            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 630            if (flags)
 631                shifter_out_im(var, 0);
 632            tcg_gen_shri_i32(var, var, 1);
 633            tcg_gen_or_i32(var, var, tmp);
 634            tcg_temp_free_i32(tmp);
 635        }
 636    }
 637};
 638
 639static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 640                                     TCGv_i32 shift, int flags)
 641{
 642    if (flags) {
 643        switch (shiftop) {
 644        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 645        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 646        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 647        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 648        }
 649    } else {
 650        switch (shiftop) {
 651        case 0:
 652            gen_shl(var, var, shift);
 653            break;
 654        case 1:
 655            gen_shr(var, var, shift);
 656            break;
 657        case 2:
 658            gen_sar(var, var, shift);
 659            break;
 660        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 661                tcg_gen_rotr_i32(var, var, shift); break;
 662        }
 663    }
 664    tcg_temp_free_i32(shift);
 665}
 666
 667/*
 668 * Generate a conditional based on ARM condition code cc.
 669 * This is common between ARM and Aarch64 targets.
 670 */
 671void arm_test_cc(DisasCompare *cmp, int cc)
 672{
 673    TCGv_i32 value;
 674    TCGCond cond;
 675    bool global = true;
 676
 677    switch (cc) {
 678    case 0: /* eq: Z */
 679    case 1: /* ne: !Z */
 680        cond = TCG_COND_EQ;
 681        value = cpu_ZF;
 682        break;
 683
 684    case 2: /* cs: C */
 685    case 3: /* cc: !C */
 686        cond = TCG_COND_NE;
 687        value = cpu_CF;
 688        break;
 689
 690    case 4: /* mi: N */
 691    case 5: /* pl: !N */
 692        cond = TCG_COND_LT;
 693        value = cpu_NF;
 694        break;
 695
 696    case 6: /* vs: V */
 697    case 7: /* vc: !V */
 698        cond = TCG_COND_LT;
 699        value = cpu_VF;
 700        break;
 701
 702    case 8: /* hi: C && !Z */
 703    case 9: /* ls: !C || Z -> !(C && !Z) */
 704        cond = TCG_COND_NE;
 705        value = tcg_temp_new_i32();
 706        global = false;
 707        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 708           ZF is non-zero for !Z; so AND the two subexpressions.  */
 709        tcg_gen_neg_i32(value, cpu_CF);
 710        tcg_gen_and_i32(value, value, cpu_ZF);
 711        break;
 712
 713    case 10: /* ge: N == V -> N ^ V == 0 */
 714    case 11: /* lt: N != V -> N ^ V != 0 */
 715        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 716        cond = TCG_COND_GE;
 717        value = tcg_temp_new_i32();
 718        global = false;
 719        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 720        break;
 721
 722    case 12: /* gt: !Z && N == V */
 723    case 13: /* le: Z || N != V */
 724        cond = TCG_COND_NE;
 725        value = tcg_temp_new_i32();
 726        global = false;
 727        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 728         * the sign bit then AND with ZF to yield the result.  */
 729        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 730        tcg_gen_sari_i32(value, value, 31);
 731        tcg_gen_andc_i32(value, cpu_ZF, value);
 732        break;
 733
 734    case 14: /* always */
 735    case 15: /* always */
 736        /* Use the ALWAYS condition, which will fold early.
 737         * It doesn't matter what we use for the value.  */
 738        cond = TCG_COND_ALWAYS;
 739        value = cpu_ZF;
 740        goto no_invert;
 741
 742    default:
 743        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 744        abort();
 745    }
 746
 747    if (cc & 1) {
 748        cond = tcg_invert_cond(cond);
 749    }
 750
 751 no_invert:
 752    cmp->cond = cond;
 753    cmp->value = value;
 754    cmp->value_global = global;
 755}
 756
 757void arm_free_cc(DisasCompare *cmp)
 758{
 759    if (!cmp->value_global) {
 760        tcg_temp_free_i32(cmp->value);
 761    }
 762}
 763
 764void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 765{
 766    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 767}
 768
 769void arm_gen_test_cc(int cc, TCGLabel *label)
 770{
 771    DisasCompare cmp;
 772    arm_test_cc(&cmp, cc);
 773    arm_jump_cc(&cmp, label);
 774    arm_free_cc(&cmp);
 775}
 776
 777void gen_set_condexec(DisasContext *s)
 778{
 779    if (s->condexec_mask) {
 780        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 781
 782        store_cpu_field_constant(val, condexec_bits);
 783    }
 784}
 785
 786void gen_update_pc(DisasContext *s, target_long diff)
 787{
 788    gen_pc_plus_diff(s, cpu_R[15], diff);
 789    s->pc_save = s->pc_curr + diff;
 790}
 791
 792/* Set PC and Thumb state from var.  var is marked as dead.  */
 793static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 794{
 795    s->base.is_jmp = DISAS_JUMP;
 796    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 797    tcg_gen_andi_i32(var, var, 1);
 798    store_cpu_field(var, thumb);
 799    s->pc_save = -1;
 800}
 801
 802/*
 803 * Set PC and Thumb state from var. var is marked as dead.
 804 * For M-profile CPUs, include logic to detect exception-return
 805 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 806 * and BX reg, and no others, and happens only for code in Handler mode.
 807 * The Security Extension also requires us to check for the FNC_RETURN
 808 * which signals a function return from non-secure state; this can happen
 809 * in both Handler and Thread mode.
 810 * To avoid having to do multiple comparisons in inline generated code,
 811 * we make the check we do here loose, so it will match for EXC_RETURN
 812 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 813 * for these spurious cases and returns without doing anything (giving
 814 * the same behaviour as for a branch to a non-magic address).
 815 *
 816 * In linux-user mode it is unclear what the right behaviour for an
 817 * attempted FNC_RETURN should be, because in real hardware this will go
 818 * directly to Secure code (ie not the Linux kernel) which will then treat
 819 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 820 * attempt behave the way it would on a CPU without the security extension,
 821 * which is to say "like a normal branch". That means we can simply treat
 822 * all branches as normal with no magic address behaviour.
 823 */
 824static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 825{
 826    /* Generate the same code here as for a simple bx, but flag via
 827     * s->base.is_jmp that we need to do the rest of the work later.
 828     */
 829    gen_bx(s, var);
 830#ifndef CONFIG_USER_ONLY
 831    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 832        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 833        s->base.is_jmp = DISAS_BX_EXCRET;
 834    }
 835#endif
 836}
 837
 838static inline void gen_bx_excret_final_code(DisasContext *s)
 839{
 840    /* Generate the code to finish possible exception return and end the TB */
 841    DisasLabel excret_label = gen_disas_label(s);
 842    uint32_t min_magic;
 843
 844    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 845        /* Covers FNC_RETURN and EXC_RETURN magic */
 846        min_magic = FNC_RETURN_MIN_MAGIC;
 847    } else {
 848        /* EXC_RETURN magic only */
 849        min_magic = EXC_RETURN_MIN_MAGIC;
 850    }
 851
 852    /* Is the new PC value in the magic range indicating exception return? */
 853    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
 854    /* No: end the TB as we would for a DISAS_JMP */
 855    if (s->ss_active) {
 856        gen_singlestep_exception(s);
 857    } else {
 858        tcg_gen_exit_tb(NULL, 0);
 859    }
 860    set_disas_label(s, excret_label);
 861    /* Yes: this is an exception return.
 862     * At this point in runtime env->regs[15] and env->thumb will hold
 863     * the exception-return magic number, which do_v7m_exception_exit()
 864     * will read. Nothing else will be able to see those values because
 865     * the cpu-exec main loop guarantees that we will always go straight
 866     * from raising the exception to the exception-handling code.
 867     *
 868     * gen_ss_advance(s) does nothing on M profile currently but
 869     * calling it is conceptually the right thing as we have executed
 870     * this instruction (compare SWI, HVC, SMC handling).
 871     */
 872    gen_ss_advance(s);
 873    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 874}
 875
 876static inline void gen_bxns(DisasContext *s, int rm)
 877{
 878    TCGv_i32 var = load_reg(s, rm);
 879
 880    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 881     * we need to sync state before calling it, but:
 882     *  - we don't need to do gen_update_pc() because the bxns helper will
 883     *    always set the PC itself
 884     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 885     *    unless it's outside an IT block or the last insn in an IT block,
 886     *    so we know that condexec == 0 (already set at the top of the TB)
 887     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 888     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 889     */
 890    gen_helper_v7m_bxns(cpu_env, var);
 891    tcg_temp_free_i32(var);
 892    s->base.is_jmp = DISAS_EXIT;
 893}
 894
 895static inline void gen_blxns(DisasContext *s, int rm)
 896{
 897    TCGv_i32 var = load_reg(s, rm);
 898
 899    /* We don't need to sync condexec state, for the same reason as bxns.
 900     * We do however need to set the PC, because the blxns helper reads it.
 901     * The blxns helper may throw an exception.
 902     */
 903    gen_update_pc(s, curr_insn_len(s));
 904    gen_helper_v7m_blxns(cpu_env, var);
 905    tcg_temp_free_i32(var);
 906    s->base.is_jmp = DISAS_EXIT;
 907}
 908
 909/* Variant of store_reg which uses branch&exchange logic when storing
 910   to r15 in ARM architecture v7 and above. The source must be a temporary
 911   and will be marked as dead. */
 912static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 913{
 914    if (reg == 15 && ENABLE_ARCH_7) {
 915        gen_bx(s, var);
 916    } else {
 917        store_reg(s, reg, var);
 918    }
 919}
 920
 921/* Variant of store_reg which uses branch&exchange logic when storing
 922 * to r15 in ARM architecture v5T and above. This is used for storing
 923 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 924 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 925static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 926{
 927    if (reg == 15 && ENABLE_ARCH_5) {
 928        gen_bx_excret(s, var);
 929    } else {
 930        store_reg(s, reg, var);
 931    }
 932}
 933
 934#ifdef CONFIG_USER_ONLY
 935#define IS_USER_ONLY 1
 936#else
 937#define IS_USER_ONLY 0
 938#endif
 939
 940MemOp pow2_align(unsigned i)
 941{
 942    static const MemOp mop_align[] = {
 943        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
 944        /*
 945         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
 946         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
 947         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
 948         */
 949        MO_ALIGN_16
 950    };
 951    g_assert(i < ARRAY_SIZE(mop_align));
 952    return mop_align[i];
 953}
 954
 955/*
 956 * Abstractions of "generate code to do a guest load/store for
 957 * AArch32", where a vaddr is always 32 bits (and is zero
 958 * extended if we're a 64 bit core) and  data is also
 959 * 32 bits unless specifically doing a 64 bit access.
 960 * These functions work like tcg_gen_qemu_{ld,st}* except
 961 * that the address argument is TCGv_i32 rather than TCGv.
 962 */
 963
 964static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 965{
 966    TCGv addr = tcg_temp_new();
 967    tcg_gen_extu_i32_tl(addr, a32);
 968
 969    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 970    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 971        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 972    }
 973    return addr;
 974}
 975
 976/*
 977 * Internal routines are used for NEON cases where the endianness
 978 * and/or alignment has already been taken into account and manipulated.
 979 */
 980void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
 981                              TCGv_i32 a32, int index, MemOp opc)
 982{
 983    TCGv addr = gen_aa32_addr(s, a32, opc);
 984    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 985    tcg_temp_free(addr);
 986}
 987
 988void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
 989                              TCGv_i32 a32, int index, MemOp opc)
 990{
 991    TCGv addr = gen_aa32_addr(s, a32, opc);
 992    tcg_gen_qemu_st_i32(val, addr, index, opc);
 993    tcg_temp_free(addr);
 994}
 995
 996void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
 997                              TCGv_i32 a32, int index, MemOp opc)
 998{
 999    TCGv addr = gen_aa32_addr(s, a32, opc);
1000
1001    tcg_gen_qemu_ld_i64(val, addr, index, opc);
1002
1003    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1004    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1005        tcg_gen_rotri_i64(val, val, 32);
1006    }
1007    tcg_temp_free(addr);
1008}
1009
1010void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
1011                              TCGv_i32 a32, int index, MemOp opc)
1012{
1013    TCGv addr = gen_aa32_addr(s, a32, opc);
1014
1015    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1016    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1017        TCGv_i64 tmp = tcg_temp_new_i64();
1018        tcg_gen_rotri_i64(tmp, val, 32);
1019        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1020        tcg_temp_free_i64(tmp);
1021    } else {
1022        tcg_gen_qemu_st_i64(val, addr, index, opc);
1023    }
1024    tcg_temp_free(addr);
1025}
1026
1027void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1028                     int index, MemOp opc)
1029{
1030    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1031}
1032
1033void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1034                     int index, MemOp opc)
1035{
1036    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1037}
1038
1039void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1040                     int index, MemOp opc)
1041{
1042    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1043}
1044
1045void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1046                     int index, MemOp opc)
1047{
1048    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1049}
1050
1051#define DO_GEN_LD(SUFF, OPC)                                            \
1052    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1053                                         TCGv_i32 a32, int index)       \
1054    {                                                                   \
1055        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1056    }
1057
1058#define DO_GEN_ST(SUFF, OPC)                                            \
1059    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1060                                         TCGv_i32 a32, int index)       \
1061    {                                                                   \
1062        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1063    }
1064
1065static inline void gen_hvc(DisasContext *s, int imm16)
1066{
1067    /* The pre HVC helper handles cases when HVC gets trapped
1068     * as an undefined insn by runtime configuration (ie before
1069     * the insn really executes).
1070     */
1071    gen_update_pc(s, 0);
1072    gen_helper_pre_hvc(cpu_env);
1073    /* Otherwise we will treat this as a real exception which
1074     * happens after execution of the insn. (The distinction matters
1075     * for the PC value reported to the exception handler and also
1076     * for single stepping.)
1077     */
1078    s->svc_imm = imm16;
1079    gen_update_pc(s, curr_insn_len(s));
1080    s->base.is_jmp = DISAS_HVC;
1081}
1082
1083static inline void gen_smc(DisasContext *s)
1084{
1085    /* As with HVC, we may take an exception either before or after
1086     * the insn executes.
1087     */
1088    gen_update_pc(s, 0);
1089    gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1090    gen_update_pc(s, curr_insn_len(s));
1091    s->base.is_jmp = DISAS_SMC;
1092}
1093
1094static void gen_exception_internal_insn(DisasContext *s, int excp)
1095{
1096    gen_set_condexec(s);
1097    gen_update_pc(s, 0);
1098    gen_exception_internal(excp);
1099    s->base.is_jmp = DISAS_NORETURN;
1100}
1101
1102static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1103{
1104    gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1105                                          tcg_constant_i32(syndrome), tcg_el);
1106}
1107
1108static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1109{
1110    gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1111}
1112
1113static void gen_exception(int excp, uint32_t syndrome)
1114{
1115    gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1116                                       tcg_constant_i32(syndrome));
1117}
1118
1119static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1120                                    int excp, uint32_t syn, TCGv_i32 tcg_el)
1121{
1122    if (s->aarch64) {
1123        gen_a64_update_pc(s, pc_diff);
1124    } else {
1125        gen_set_condexec(s);
1126        gen_update_pc(s, pc_diff);
1127    }
1128    gen_exception_el_v(excp, syn, tcg_el);
1129    s->base.is_jmp = DISAS_NORETURN;
1130}
1131
1132void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1133                           uint32_t syn, uint32_t target_el)
1134{
1135    gen_exception_insn_el_v(s, pc_diff, excp, syn,
1136                            tcg_constant_i32(target_el));
1137}
1138
1139void gen_exception_insn(DisasContext *s, target_long pc_diff,
1140                        int excp, uint32_t syn)
1141{
1142    if (s->aarch64) {
1143        gen_a64_update_pc(s, pc_diff);
1144    } else {
1145        gen_set_condexec(s);
1146        gen_update_pc(s, pc_diff);
1147    }
1148    gen_exception(excp, syn);
1149    s->base.is_jmp = DISAS_NORETURN;
1150}
1151
1152static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1153{
1154    gen_set_condexec(s);
1155    gen_update_pc(s, 0);
1156    gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1157    s->base.is_jmp = DISAS_NORETURN;
1158}
1159
1160void unallocated_encoding(DisasContext *s)
1161{
1162    /* Unallocated and reserved encodings are uncategorized */
1163    gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1164}
1165
1166/* Force a TB lookup after an instruction that changes the CPU state.  */
1167void gen_lookup_tb(DisasContext *s)
1168{
1169    gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1170    s->base.is_jmp = DISAS_EXIT;
1171}
1172
1173static inline void gen_hlt(DisasContext *s, int imm)
1174{
1175    /* HLT. This has two purposes.
1176     * Architecturally, it is an external halting debug instruction.
1177     * Since QEMU doesn't implement external debug, we treat this as
1178     * it is required for halting debug disabled: it will UNDEF.
1179     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1180     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1181     * must trigger semihosting even for ARMv7 and earlier, where
1182     * HLT was an undefined encoding.
1183     * In system mode, we don't allow userspace access to
1184     * semihosting, to provide some semblance of security
1185     * (and for consistency with our 32-bit semihosting).
1186     */
1187    if (semihosting_enabled(s->current_el == 0) &&
1188        (imm == (s->thumb ? 0x3c : 0xf000))) {
1189        gen_exception_internal_insn(s, EXCP_SEMIHOST);
1190        return;
1191    }
1192
1193    unallocated_encoding(s);
1194}
1195
1196/*
1197 * Return the offset of a "full" NEON Dreg.
1198 */
1199long neon_full_reg_offset(unsigned reg)
1200{
1201    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1202}
1203
1204/*
1205 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1206 * where 0 is the least significant end of the register.
1207 */
1208long neon_element_offset(int reg, int element, MemOp memop)
1209{
1210    int element_size = 1 << (memop & MO_SIZE);
1211    int ofs = element * element_size;
1212#if HOST_BIG_ENDIAN
1213    /*
1214     * Calculate the offset assuming fully little-endian,
1215     * then XOR to account for the order of the 8-byte units.
1216     */
1217    if (element_size < 8) {
1218        ofs ^= 8 - element_size;
1219    }
1220#endif
1221    return neon_full_reg_offset(reg) + ofs;
1222}
1223
1224/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1225long vfp_reg_offset(bool dp, unsigned reg)
1226{
1227    if (dp) {
1228        return neon_element_offset(reg, 0, MO_64);
1229    } else {
1230        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1231    }
1232}
1233
1234void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1235{
1236    long off = neon_element_offset(reg, ele, memop);
1237
1238    switch (memop) {
1239    case MO_SB:
1240        tcg_gen_ld8s_i32(dest, cpu_env, off);
1241        break;
1242    case MO_UB:
1243        tcg_gen_ld8u_i32(dest, cpu_env, off);
1244        break;
1245    case MO_SW:
1246        tcg_gen_ld16s_i32(dest, cpu_env, off);
1247        break;
1248    case MO_UW:
1249        tcg_gen_ld16u_i32(dest, cpu_env, off);
1250        break;
1251    case MO_UL:
1252    case MO_SL:
1253        tcg_gen_ld_i32(dest, cpu_env, off);
1254        break;
1255    default:
1256        g_assert_not_reached();
1257    }
1258}
1259
1260void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1261{
1262    long off = neon_element_offset(reg, ele, memop);
1263
1264    switch (memop) {
1265    case MO_SL:
1266        tcg_gen_ld32s_i64(dest, cpu_env, off);
1267        break;
1268    case MO_UL:
1269        tcg_gen_ld32u_i64(dest, cpu_env, off);
1270        break;
1271    case MO_UQ:
1272        tcg_gen_ld_i64(dest, cpu_env, off);
1273        break;
1274    default:
1275        g_assert_not_reached();
1276    }
1277}
1278
1279void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1280{
1281    long off = neon_element_offset(reg, ele, memop);
1282
1283    switch (memop) {
1284    case MO_8:
1285        tcg_gen_st8_i32(src, cpu_env, off);
1286        break;
1287    case MO_16:
1288        tcg_gen_st16_i32(src, cpu_env, off);
1289        break;
1290    case MO_32:
1291        tcg_gen_st_i32(src, cpu_env, off);
1292        break;
1293    default:
1294        g_assert_not_reached();
1295    }
1296}
1297
1298void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1299{
1300    long off = neon_element_offset(reg, ele, memop);
1301
1302    switch (memop) {
1303    case MO_32:
1304        tcg_gen_st32_i64(src, cpu_env, off);
1305        break;
1306    case MO_64:
1307        tcg_gen_st_i64(src, cpu_env, off);
1308        break;
1309    default:
1310        g_assert_not_reached();
1311    }
1312}
1313
1314#define ARM_CP_RW_BIT   (1 << 20)
1315
1316static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1317{
1318    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1319}
1320
1321static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1322{
1323    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1324}
1325
1326static inline TCGv_i32 iwmmxt_load_creg(int reg)
1327{
1328    TCGv_i32 var = tcg_temp_new_i32();
1329    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1330    return var;
1331}
1332
1333static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1334{
1335    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1336    tcg_temp_free_i32(var);
1337}
1338
1339static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1340{
1341    iwmmxt_store_reg(cpu_M0, rn);
1342}
1343
1344static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1345{
1346    iwmmxt_load_reg(cpu_M0, rn);
1347}
1348
1349static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1350{
1351    iwmmxt_load_reg(cpu_V1, rn);
1352    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1353}
1354
1355static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1356{
1357    iwmmxt_load_reg(cpu_V1, rn);
1358    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1359}
1360
1361static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1362{
1363    iwmmxt_load_reg(cpu_V1, rn);
1364    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1365}
1366
1367#define IWMMXT_OP(name) \
1368static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1369{ \
1370    iwmmxt_load_reg(cpu_V1, rn); \
1371    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1372}
1373
1374#define IWMMXT_OP_ENV(name) \
1375static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1376{ \
1377    iwmmxt_load_reg(cpu_V1, rn); \
1378    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1379}
1380
1381#define IWMMXT_OP_ENV_SIZE(name) \
1382IWMMXT_OP_ENV(name##b) \
1383IWMMXT_OP_ENV(name##w) \
1384IWMMXT_OP_ENV(name##l)
1385
1386#define IWMMXT_OP_ENV1(name) \
1387static inline void gen_op_iwmmxt_##name##_M0(void) \
1388{ \
1389    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1390}
1391
1392IWMMXT_OP(maddsq)
1393IWMMXT_OP(madduq)
1394IWMMXT_OP(sadb)
1395IWMMXT_OP(sadw)
1396IWMMXT_OP(mulslw)
1397IWMMXT_OP(mulshw)
1398IWMMXT_OP(mululw)
1399IWMMXT_OP(muluhw)
1400IWMMXT_OP(macsw)
1401IWMMXT_OP(macuw)
1402
1403IWMMXT_OP_ENV_SIZE(unpackl)
1404IWMMXT_OP_ENV_SIZE(unpackh)
1405
1406IWMMXT_OP_ENV1(unpacklub)
1407IWMMXT_OP_ENV1(unpackluw)
1408IWMMXT_OP_ENV1(unpacklul)
1409IWMMXT_OP_ENV1(unpackhub)
1410IWMMXT_OP_ENV1(unpackhuw)
1411IWMMXT_OP_ENV1(unpackhul)
1412IWMMXT_OP_ENV1(unpacklsb)
1413IWMMXT_OP_ENV1(unpacklsw)
1414IWMMXT_OP_ENV1(unpacklsl)
1415IWMMXT_OP_ENV1(unpackhsb)
1416IWMMXT_OP_ENV1(unpackhsw)
1417IWMMXT_OP_ENV1(unpackhsl)
1418
1419IWMMXT_OP_ENV_SIZE(cmpeq)
1420IWMMXT_OP_ENV_SIZE(cmpgtu)
1421IWMMXT_OP_ENV_SIZE(cmpgts)
1422
1423IWMMXT_OP_ENV_SIZE(mins)
1424IWMMXT_OP_ENV_SIZE(minu)
1425IWMMXT_OP_ENV_SIZE(maxs)
1426IWMMXT_OP_ENV_SIZE(maxu)
1427
1428IWMMXT_OP_ENV_SIZE(subn)
1429IWMMXT_OP_ENV_SIZE(addn)
1430IWMMXT_OP_ENV_SIZE(subu)
1431IWMMXT_OP_ENV_SIZE(addu)
1432IWMMXT_OP_ENV_SIZE(subs)
1433IWMMXT_OP_ENV_SIZE(adds)
1434
1435IWMMXT_OP_ENV(avgb0)
1436IWMMXT_OP_ENV(avgb1)
1437IWMMXT_OP_ENV(avgw0)
1438IWMMXT_OP_ENV(avgw1)
1439
1440IWMMXT_OP_ENV(packuw)
1441IWMMXT_OP_ENV(packul)
1442IWMMXT_OP_ENV(packuq)
1443IWMMXT_OP_ENV(packsw)
1444IWMMXT_OP_ENV(packsl)
1445IWMMXT_OP_ENV(packsq)
1446
1447static void gen_op_iwmmxt_set_mup(void)
1448{
1449    TCGv_i32 tmp;
1450    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1451    tcg_gen_ori_i32(tmp, tmp, 2);
1452    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1453}
1454
1455static void gen_op_iwmmxt_set_cup(void)
1456{
1457    TCGv_i32 tmp;
1458    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459    tcg_gen_ori_i32(tmp, tmp, 1);
1460    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1461}
1462
1463static void gen_op_iwmmxt_setpsr_nz(void)
1464{
1465    TCGv_i32 tmp = tcg_temp_new_i32();
1466    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1467    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1468}
1469
1470static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1471{
1472    iwmmxt_load_reg(cpu_V1, rn);
1473    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1474    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1475}
1476
1477static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1478                                     TCGv_i32 dest)
1479{
1480    int rd;
1481    uint32_t offset;
1482    TCGv_i32 tmp;
1483
1484    rd = (insn >> 16) & 0xf;
1485    tmp = load_reg(s, rd);
1486
1487    offset = (insn & 0xff) << ((insn >> 7) & 2);
1488    if (insn & (1 << 24)) {
1489        /* Pre indexed */
1490        if (insn & (1 << 23))
1491            tcg_gen_addi_i32(tmp, tmp, offset);
1492        else
1493            tcg_gen_addi_i32(tmp, tmp, -offset);
1494        tcg_gen_mov_i32(dest, tmp);
1495        if (insn & (1 << 21))
1496            store_reg(s, rd, tmp);
1497        else
1498            tcg_temp_free_i32(tmp);
1499    } else if (insn & (1 << 21)) {
1500        /* Post indexed */
1501        tcg_gen_mov_i32(dest, tmp);
1502        if (insn & (1 << 23))
1503            tcg_gen_addi_i32(tmp, tmp, offset);
1504        else
1505            tcg_gen_addi_i32(tmp, tmp, -offset);
1506        store_reg(s, rd, tmp);
1507    } else if (!(insn & (1 << 23)))
1508        return 1;
1509    return 0;
1510}
1511
1512static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1513{
1514    int rd = (insn >> 0) & 0xf;
1515    TCGv_i32 tmp;
1516
1517    if (insn & (1 << 8)) {
1518        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1519            return 1;
1520        } else {
1521            tmp = iwmmxt_load_creg(rd);
1522        }
1523    } else {
1524        tmp = tcg_temp_new_i32();
1525        iwmmxt_load_reg(cpu_V0, rd);
1526        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1527    }
1528    tcg_gen_andi_i32(tmp, tmp, mask);
1529    tcg_gen_mov_i32(dest, tmp);
1530    tcg_temp_free_i32(tmp);
1531    return 0;
1532}
1533
1534/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1535   (ie. an undefined instruction).  */
1536static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1537{
1538    int rd, wrd;
1539    int rdhi, rdlo, rd0, rd1, i;
1540    TCGv_i32 addr;
1541    TCGv_i32 tmp, tmp2, tmp3;
1542
1543    if ((insn & 0x0e000e00) == 0x0c000000) {
1544        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1545            wrd = insn & 0xf;
1546            rdlo = (insn >> 12) & 0xf;
1547            rdhi = (insn >> 16) & 0xf;
1548            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1549                iwmmxt_load_reg(cpu_V0, wrd);
1550                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1551                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1552            } else {                                    /* TMCRR */
1553                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1554                iwmmxt_store_reg(cpu_V0, wrd);
1555                gen_op_iwmmxt_set_mup();
1556            }
1557            return 0;
1558        }
1559
1560        wrd = (insn >> 12) & 0xf;
1561        addr = tcg_temp_new_i32();
1562        if (gen_iwmmxt_address(s, insn, addr)) {
1563            tcg_temp_free_i32(addr);
1564            return 1;
1565        }
1566        if (insn & ARM_CP_RW_BIT) {
1567            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1568                tmp = tcg_temp_new_i32();
1569                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1570                iwmmxt_store_creg(wrd, tmp);
1571            } else {
1572                i = 1;
1573                if (insn & (1 << 8)) {
1574                    if (insn & (1 << 22)) {             /* WLDRD */
1575                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1576                        i = 0;
1577                    } else {                            /* WLDRW wRd */
1578                        tmp = tcg_temp_new_i32();
1579                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1580                    }
1581                } else {
1582                    tmp = tcg_temp_new_i32();
1583                    if (insn & (1 << 22)) {             /* WLDRH */
1584                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1585                    } else {                            /* WLDRB */
1586                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1587                    }
1588                }
1589                if (i) {
1590                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1591                    tcg_temp_free_i32(tmp);
1592                }
1593                gen_op_iwmmxt_movq_wRn_M0(wrd);
1594            }
1595        } else {
1596            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1597                tmp = iwmmxt_load_creg(wrd);
1598                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1599            } else {
1600                gen_op_iwmmxt_movq_M0_wRn(wrd);
1601                tmp = tcg_temp_new_i32();
1602                if (insn & (1 << 8)) {
1603                    if (insn & (1 << 22)) {             /* WSTRD */
1604                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1605                    } else {                            /* WSTRW wRd */
1606                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1607                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1608                    }
1609                } else {
1610                    if (insn & (1 << 22)) {             /* WSTRH */
1611                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1612                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1613                    } else {                            /* WSTRB */
1614                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1615                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1616                    }
1617                }
1618            }
1619            tcg_temp_free_i32(tmp);
1620        }
1621        tcg_temp_free_i32(addr);
1622        return 0;
1623    }
1624
1625    if ((insn & 0x0f000000) != 0x0e000000)
1626        return 1;
1627
1628    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1629    case 0x000:                                                 /* WOR */
1630        wrd = (insn >> 12) & 0xf;
1631        rd0 = (insn >> 0) & 0xf;
1632        rd1 = (insn >> 16) & 0xf;
1633        gen_op_iwmmxt_movq_M0_wRn(rd0);
1634        gen_op_iwmmxt_orq_M0_wRn(rd1);
1635        gen_op_iwmmxt_setpsr_nz();
1636        gen_op_iwmmxt_movq_wRn_M0(wrd);
1637        gen_op_iwmmxt_set_mup();
1638        gen_op_iwmmxt_set_cup();
1639        break;
1640    case 0x011:                                                 /* TMCR */
1641        if (insn & 0xf)
1642            return 1;
1643        rd = (insn >> 12) & 0xf;
1644        wrd = (insn >> 16) & 0xf;
1645        switch (wrd) {
1646        case ARM_IWMMXT_wCID:
1647        case ARM_IWMMXT_wCASF:
1648            break;
1649        case ARM_IWMMXT_wCon:
1650            gen_op_iwmmxt_set_cup();
1651            /* Fall through.  */
1652        case ARM_IWMMXT_wCSSF:
1653            tmp = iwmmxt_load_creg(wrd);
1654            tmp2 = load_reg(s, rd);
1655            tcg_gen_andc_i32(tmp, tmp, tmp2);
1656            tcg_temp_free_i32(tmp2);
1657            iwmmxt_store_creg(wrd, tmp);
1658            break;
1659        case ARM_IWMMXT_wCGR0:
1660        case ARM_IWMMXT_wCGR1:
1661        case ARM_IWMMXT_wCGR2:
1662        case ARM_IWMMXT_wCGR3:
1663            gen_op_iwmmxt_set_cup();
1664            tmp = load_reg(s, rd);
1665            iwmmxt_store_creg(wrd, tmp);
1666            break;
1667        default:
1668            return 1;
1669        }
1670        break;
1671    case 0x100:                                                 /* WXOR */
1672        wrd = (insn >> 12) & 0xf;
1673        rd0 = (insn >> 0) & 0xf;
1674        rd1 = (insn >> 16) & 0xf;
1675        gen_op_iwmmxt_movq_M0_wRn(rd0);
1676        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1677        gen_op_iwmmxt_setpsr_nz();
1678        gen_op_iwmmxt_movq_wRn_M0(wrd);
1679        gen_op_iwmmxt_set_mup();
1680        gen_op_iwmmxt_set_cup();
1681        break;
1682    case 0x111:                                                 /* TMRC */
1683        if (insn & 0xf)
1684            return 1;
1685        rd = (insn >> 12) & 0xf;
1686        wrd = (insn >> 16) & 0xf;
1687        tmp = iwmmxt_load_creg(wrd);
1688        store_reg(s, rd, tmp);
1689        break;
1690    case 0x300:                                                 /* WANDN */
1691        wrd = (insn >> 12) & 0xf;
1692        rd0 = (insn >> 0) & 0xf;
1693        rd1 = (insn >> 16) & 0xf;
1694        gen_op_iwmmxt_movq_M0_wRn(rd0);
1695        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1696        gen_op_iwmmxt_andq_M0_wRn(rd1);
1697        gen_op_iwmmxt_setpsr_nz();
1698        gen_op_iwmmxt_movq_wRn_M0(wrd);
1699        gen_op_iwmmxt_set_mup();
1700        gen_op_iwmmxt_set_cup();
1701        break;
1702    case 0x200:                                                 /* WAND */
1703        wrd = (insn >> 12) & 0xf;
1704        rd0 = (insn >> 0) & 0xf;
1705        rd1 = (insn >> 16) & 0xf;
1706        gen_op_iwmmxt_movq_M0_wRn(rd0);
1707        gen_op_iwmmxt_andq_M0_wRn(rd1);
1708        gen_op_iwmmxt_setpsr_nz();
1709        gen_op_iwmmxt_movq_wRn_M0(wrd);
1710        gen_op_iwmmxt_set_mup();
1711        gen_op_iwmmxt_set_cup();
1712        break;
1713    case 0x810: case 0xa10:                             /* WMADD */
1714        wrd = (insn >> 12) & 0xf;
1715        rd0 = (insn >> 0) & 0xf;
1716        rd1 = (insn >> 16) & 0xf;
1717        gen_op_iwmmxt_movq_M0_wRn(rd0);
1718        if (insn & (1 << 21))
1719            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1720        else
1721            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1722        gen_op_iwmmxt_movq_wRn_M0(wrd);
1723        gen_op_iwmmxt_set_mup();
1724        break;
1725    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1726        wrd = (insn >> 12) & 0xf;
1727        rd0 = (insn >> 16) & 0xf;
1728        rd1 = (insn >> 0) & 0xf;
1729        gen_op_iwmmxt_movq_M0_wRn(rd0);
1730        switch ((insn >> 22) & 3) {
1731        case 0:
1732            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1733            break;
1734        case 1:
1735            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1736            break;
1737        case 2:
1738            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1739            break;
1740        case 3:
1741            return 1;
1742        }
1743        gen_op_iwmmxt_movq_wRn_M0(wrd);
1744        gen_op_iwmmxt_set_mup();
1745        gen_op_iwmmxt_set_cup();
1746        break;
1747    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1748        wrd = (insn >> 12) & 0xf;
1749        rd0 = (insn >> 16) & 0xf;
1750        rd1 = (insn >> 0) & 0xf;
1751        gen_op_iwmmxt_movq_M0_wRn(rd0);
1752        switch ((insn >> 22) & 3) {
1753        case 0:
1754            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1755            break;
1756        case 1:
1757            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1758            break;
1759        case 2:
1760            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1761            break;
1762        case 3:
1763            return 1;
1764        }
1765        gen_op_iwmmxt_movq_wRn_M0(wrd);
1766        gen_op_iwmmxt_set_mup();
1767        gen_op_iwmmxt_set_cup();
1768        break;
1769    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1770        wrd = (insn >> 12) & 0xf;
1771        rd0 = (insn >> 16) & 0xf;
1772        rd1 = (insn >> 0) & 0xf;
1773        gen_op_iwmmxt_movq_M0_wRn(rd0);
1774        if (insn & (1 << 22))
1775            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1776        else
1777            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1778        if (!(insn & (1 << 20)))
1779            gen_op_iwmmxt_addl_M0_wRn(wrd);
1780        gen_op_iwmmxt_movq_wRn_M0(wrd);
1781        gen_op_iwmmxt_set_mup();
1782        break;
1783    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1784        wrd = (insn >> 12) & 0xf;
1785        rd0 = (insn >> 16) & 0xf;
1786        rd1 = (insn >> 0) & 0xf;
1787        gen_op_iwmmxt_movq_M0_wRn(rd0);
1788        if (insn & (1 << 21)) {
1789            if (insn & (1 << 20))
1790                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1791            else
1792                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1793        } else {
1794            if (insn & (1 << 20))
1795                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1796            else
1797                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1798        }
1799        gen_op_iwmmxt_movq_wRn_M0(wrd);
1800        gen_op_iwmmxt_set_mup();
1801        break;
1802    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1803        wrd = (insn >> 12) & 0xf;
1804        rd0 = (insn >> 16) & 0xf;
1805        rd1 = (insn >> 0) & 0xf;
1806        gen_op_iwmmxt_movq_M0_wRn(rd0);
1807        if (insn & (1 << 21))
1808            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1809        else
1810            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1811        if (!(insn & (1 << 20))) {
1812            iwmmxt_load_reg(cpu_V1, wrd);
1813            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1814        }
1815        gen_op_iwmmxt_movq_wRn_M0(wrd);
1816        gen_op_iwmmxt_set_mup();
1817        break;
1818    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1819        wrd = (insn >> 12) & 0xf;
1820        rd0 = (insn >> 16) & 0xf;
1821        rd1 = (insn >> 0) & 0xf;
1822        gen_op_iwmmxt_movq_M0_wRn(rd0);
1823        switch ((insn >> 22) & 3) {
1824        case 0:
1825            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1826            break;
1827        case 1:
1828            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1829            break;
1830        case 2:
1831            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1832            break;
1833        case 3:
1834            return 1;
1835        }
1836        gen_op_iwmmxt_movq_wRn_M0(wrd);
1837        gen_op_iwmmxt_set_mup();
1838        gen_op_iwmmxt_set_cup();
1839        break;
1840    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1841        wrd = (insn >> 12) & 0xf;
1842        rd0 = (insn >> 16) & 0xf;
1843        rd1 = (insn >> 0) & 0xf;
1844        gen_op_iwmmxt_movq_M0_wRn(rd0);
1845        if (insn & (1 << 22)) {
1846            if (insn & (1 << 20))
1847                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1848            else
1849                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1850        } else {
1851            if (insn & (1 << 20))
1852                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1853            else
1854                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1855        }
1856        gen_op_iwmmxt_movq_wRn_M0(wrd);
1857        gen_op_iwmmxt_set_mup();
1858        gen_op_iwmmxt_set_cup();
1859        break;
1860    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1861        wrd = (insn >> 12) & 0xf;
1862        rd0 = (insn >> 16) & 0xf;
1863        rd1 = (insn >> 0) & 0xf;
1864        gen_op_iwmmxt_movq_M0_wRn(rd0);
1865        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1866        tcg_gen_andi_i32(tmp, tmp, 7);
1867        iwmmxt_load_reg(cpu_V1, rd1);
1868        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1869        tcg_temp_free_i32(tmp);
1870        gen_op_iwmmxt_movq_wRn_M0(wrd);
1871        gen_op_iwmmxt_set_mup();
1872        break;
1873    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1874        if (((insn >> 6) & 3) == 3)
1875            return 1;
1876        rd = (insn >> 12) & 0xf;
1877        wrd = (insn >> 16) & 0xf;
1878        tmp = load_reg(s, rd);
1879        gen_op_iwmmxt_movq_M0_wRn(wrd);
1880        switch ((insn >> 6) & 3) {
1881        case 0:
1882            tmp2 = tcg_constant_i32(0xff);
1883            tmp3 = tcg_constant_i32((insn & 7) << 3);
1884            break;
1885        case 1:
1886            tmp2 = tcg_constant_i32(0xffff);
1887            tmp3 = tcg_constant_i32((insn & 3) << 4);
1888            break;
1889        case 2:
1890            tmp2 = tcg_constant_i32(0xffffffff);
1891            tmp3 = tcg_constant_i32((insn & 1) << 5);
1892            break;
1893        default:
1894            g_assert_not_reached();
1895        }
1896        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1897        tcg_temp_free_i32(tmp);
1898        gen_op_iwmmxt_movq_wRn_M0(wrd);
1899        gen_op_iwmmxt_set_mup();
1900        break;
1901    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1902        rd = (insn >> 12) & 0xf;
1903        wrd = (insn >> 16) & 0xf;
1904        if (rd == 15 || ((insn >> 22) & 3) == 3)
1905            return 1;
1906        gen_op_iwmmxt_movq_M0_wRn(wrd);
1907        tmp = tcg_temp_new_i32();
1908        switch ((insn >> 22) & 3) {
1909        case 0:
1910            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1911            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1912            if (insn & 8) {
1913                tcg_gen_ext8s_i32(tmp, tmp);
1914            } else {
1915                tcg_gen_andi_i32(tmp, tmp, 0xff);
1916            }
1917            break;
1918        case 1:
1919            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1920            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1921            if (insn & 8) {
1922                tcg_gen_ext16s_i32(tmp, tmp);
1923            } else {
1924                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1925            }
1926            break;
1927        case 2:
1928            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1929            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1930            break;
1931        }
1932        store_reg(s, rd, tmp);
1933        break;
1934    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1935        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1936            return 1;
1937        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1938        switch ((insn >> 22) & 3) {
1939        case 0:
1940            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1941            break;
1942        case 1:
1943            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1944            break;
1945        case 2:
1946            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1947            break;
1948        }
1949        tcg_gen_shli_i32(tmp, tmp, 28);
1950        gen_set_nzcv(tmp);
1951        tcg_temp_free_i32(tmp);
1952        break;
1953    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1954        if (((insn >> 6) & 3) == 3)
1955            return 1;
1956        rd = (insn >> 12) & 0xf;
1957        wrd = (insn >> 16) & 0xf;
1958        tmp = load_reg(s, rd);
1959        switch ((insn >> 6) & 3) {
1960        case 0:
1961            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1962            break;
1963        case 1:
1964            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1965            break;
1966        case 2:
1967            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1968            break;
1969        }
1970        tcg_temp_free_i32(tmp);
1971        gen_op_iwmmxt_movq_wRn_M0(wrd);
1972        gen_op_iwmmxt_set_mup();
1973        break;
1974    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1975        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1976            return 1;
1977        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1978        tmp2 = tcg_temp_new_i32();
1979        tcg_gen_mov_i32(tmp2, tmp);
1980        switch ((insn >> 22) & 3) {
1981        case 0:
1982            for (i = 0; i < 7; i ++) {
1983                tcg_gen_shli_i32(tmp2, tmp2, 4);
1984                tcg_gen_and_i32(tmp, tmp, tmp2);
1985            }
1986            break;
1987        case 1:
1988            for (i = 0; i < 3; i ++) {
1989                tcg_gen_shli_i32(tmp2, tmp2, 8);
1990                tcg_gen_and_i32(tmp, tmp, tmp2);
1991            }
1992            break;
1993        case 2:
1994            tcg_gen_shli_i32(tmp2, tmp2, 16);
1995            tcg_gen_and_i32(tmp, tmp, tmp2);
1996            break;
1997        }
1998        gen_set_nzcv(tmp);
1999        tcg_temp_free_i32(tmp2);
2000        tcg_temp_free_i32(tmp);
2001        break;
2002    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2003        wrd = (insn >> 12) & 0xf;
2004        rd0 = (insn >> 16) & 0xf;
2005        gen_op_iwmmxt_movq_M0_wRn(rd0);
2006        switch ((insn >> 22) & 3) {
2007        case 0:
2008            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2009            break;
2010        case 1:
2011            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2012            break;
2013        case 2:
2014            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2015            break;
2016        case 3:
2017            return 1;
2018        }
2019        gen_op_iwmmxt_movq_wRn_M0(wrd);
2020        gen_op_iwmmxt_set_mup();
2021        break;
2022    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2023        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2024            return 1;
2025        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2026        tmp2 = tcg_temp_new_i32();
2027        tcg_gen_mov_i32(tmp2, tmp);
2028        switch ((insn >> 22) & 3) {
2029        case 0:
2030            for (i = 0; i < 7; i ++) {
2031                tcg_gen_shli_i32(tmp2, tmp2, 4);
2032                tcg_gen_or_i32(tmp, tmp, tmp2);
2033            }
2034            break;
2035        case 1:
2036            for (i = 0; i < 3; i ++) {
2037                tcg_gen_shli_i32(tmp2, tmp2, 8);
2038                tcg_gen_or_i32(tmp, tmp, tmp2);
2039            }
2040            break;
2041        case 2:
2042            tcg_gen_shli_i32(tmp2, tmp2, 16);
2043            tcg_gen_or_i32(tmp, tmp, tmp2);
2044            break;
2045        }
2046        gen_set_nzcv(tmp);
2047        tcg_temp_free_i32(tmp2);
2048        tcg_temp_free_i32(tmp);
2049        break;
2050    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2051        rd = (insn >> 12) & 0xf;
2052        rd0 = (insn >> 16) & 0xf;
2053        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2054            return 1;
2055        gen_op_iwmmxt_movq_M0_wRn(rd0);
2056        tmp = tcg_temp_new_i32();
2057        switch ((insn >> 22) & 3) {
2058        case 0:
2059            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2060            break;
2061        case 1:
2062            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2063            break;
2064        case 2:
2065            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2066            break;
2067        }
2068        store_reg(s, rd, tmp);
2069        break;
2070    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2071    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2072        wrd = (insn >> 12) & 0xf;
2073        rd0 = (insn >> 16) & 0xf;
2074        rd1 = (insn >> 0) & 0xf;
2075        gen_op_iwmmxt_movq_M0_wRn(rd0);
2076        switch ((insn >> 22) & 3) {
2077        case 0:
2078            if (insn & (1 << 21))
2079                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2080            else
2081                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2082            break;
2083        case 1:
2084            if (insn & (1 << 21))
2085                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2086            else
2087                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2088            break;
2089        case 2:
2090            if (insn & (1 << 21))
2091                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2092            else
2093                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2094            break;
2095        case 3:
2096            return 1;
2097        }
2098        gen_op_iwmmxt_movq_wRn_M0(wrd);
2099        gen_op_iwmmxt_set_mup();
2100        gen_op_iwmmxt_set_cup();
2101        break;
2102    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2103    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2104        wrd = (insn >> 12) & 0xf;
2105        rd0 = (insn >> 16) & 0xf;
2106        gen_op_iwmmxt_movq_M0_wRn(rd0);
2107        switch ((insn >> 22) & 3) {
2108        case 0:
2109            if (insn & (1 << 21))
2110                gen_op_iwmmxt_unpacklsb_M0();
2111            else
2112                gen_op_iwmmxt_unpacklub_M0();
2113            break;
2114        case 1:
2115            if (insn & (1 << 21))
2116                gen_op_iwmmxt_unpacklsw_M0();
2117            else
2118                gen_op_iwmmxt_unpackluw_M0();
2119            break;
2120        case 2:
2121            if (insn & (1 << 21))
2122                gen_op_iwmmxt_unpacklsl_M0();
2123            else
2124                gen_op_iwmmxt_unpacklul_M0();
2125            break;
2126        case 3:
2127            return 1;
2128        }
2129        gen_op_iwmmxt_movq_wRn_M0(wrd);
2130        gen_op_iwmmxt_set_mup();
2131        gen_op_iwmmxt_set_cup();
2132        break;
2133    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2134    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2135        wrd = (insn >> 12) & 0xf;
2136        rd0 = (insn >> 16) & 0xf;
2137        gen_op_iwmmxt_movq_M0_wRn(rd0);
2138        switch ((insn >> 22) & 3) {
2139        case 0:
2140            if (insn & (1 << 21))
2141                gen_op_iwmmxt_unpackhsb_M0();
2142            else
2143                gen_op_iwmmxt_unpackhub_M0();
2144            break;
2145        case 1:
2146            if (insn & (1 << 21))
2147                gen_op_iwmmxt_unpackhsw_M0();
2148            else
2149                gen_op_iwmmxt_unpackhuw_M0();
2150            break;
2151        case 2:
2152            if (insn & (1 << 21))
2153                gen_op_iwmmxt_unpackhsl_M0();
2154            else
2155                gen_op_iwmmxt_unpackhul_M0();
2156            break;
2157        case 3:
2158            return 1;
2159        }
2160        gen_op_iwmmxt_movq_wRn_M0(wrd);
2161        gen_op_iwmmxt_set_mup();
2162        gen_op_iwmmxt_set_cup();
2163        break;
2164    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2165    case 0x214: case 0x614: case 0xa14: case 0xe14:
2166        if (((insn >> 22) & 3) == 0)
2167            return 1;
2168        wrd = (insn >> 12) & 0xf;
2169        rd0 = (insn >> 16) & 0xf;
2170        gen_op_iwmmxt_movq_M0_wRn(rd0);
2171        tmp = tcg_temp_new_i32();
2172        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2173            tcg_temp_free_i32(tmp);
2174            return 1;
2175        }
2176        switch ((insn >> 22) & 3) {
2177        case 1:
2178            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2179            break;
2180        case 2:
2181            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2182            break;
2183        case 3:
2184            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2185            break;
2186        }
2187        tcg_temp_free_i32(tmp);
2188        gen_op_iwmmxt_movq_wRn_M0(wrd);
2189        gen_op_iwmmxt_set_mup();
2190        gen_op_iwmmxt_set_cup();
2191        break;
2192    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2193    case 0x014: case 0x414: case 0x814: case 0xc14:
2194        if (((insn >> 22) & 3) == 0)
2195            return 1;
2196        wrd = (insn >> 12) & 0xf;
2197        rd0 = (insn >> 16) & 0xf;
2198        gen_op_iwmmxt_movq_M0_wRn(rd0);
2199        tmp = tcg_temp_new_i32();
2200        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2201            tcg_temp_free_i32(tmp);
2202            return 1;
2203        }
2204        switch ((insn >> 22) & 3) {
2205        case 1:
2206            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2207            break;
2208        case 2:
2209            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2210            break;
2211        case 3:
2212            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2213            break;
2214        }
2215        tcg_temp_free_i32(tmp);
2216        gen_op_iwmmxt_movq_wRn_M0(wrd);
2217        gen_op_iwmmxt_set_mup();
2218        gen_op_iwmmxt_set_cup();
2219        break;
2220    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2221    case 0x114: case 0x514: case 0x914: case 0xd14:
2222        if (((insn >> 22) & 3) == 0)
2223            return 1;
2224        wrd = (insn >> 12) & 0xf;
2225        rd0 = (insn >> 16) & 0xf;
2226        gen_op_iwmmxt_movq_M0_wRn(rd0);
2227        tmp = tcg_temp_new_i32();
2228        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2229            tcg_temp_free_i32(tmp);
2230            return 1;
2231        }
2232        switch ((insn >> 22) & 3) {
2233        case 1:
2234            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2235            break;
2236        case 2:
2237            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2238            break;
2239        case 3:
2240            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2241            break;
2242        }
2243        tcg_temp_free_i32(tmp);
2244        gen_op_iwmmxt_movq_wRn_M0(wrd);
2245        gen_op_iwmmxt_set_mup();
2246        gen_op_iwmmxt_set_cup();
2247        break;
2248    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2249    case 0x314: case 0x714: case 0xb14: case 0xf14:
2250        if (((insn >> 22) & 3) == 0)
2251            return 1;
2252        wrd = (insn >> 12) & 0xf;
2253        rd0 = (insn >> 16) & 0xf;
2254        gen_op_iwmmxt_movq_M0_wRn(rd0);
2255        tmp = tcg_temp_new_i32();
2256        switch ((insn >> 22) & 3) {
2257        case 1:
2258            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2259                tcg_temp_free_i32(tmp);
2260                return 1;
2261            }
2262            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2263            break;
2264        case 2:
2265            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2266                tcg_temp_free_i32(tmp);
2267                return 1;
2268            }
2269            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2270            break;
2271        case 3:
2272            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2273                tcg_temp_free_i32(tmp);
2274                return 1;
2275            }
2276            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2277            break;
2278        }
2279        tcg_temp_free_i32(tmp);
2280        gen_op_iwmmxt_movq_wRn_M0(wrd);
2281        gen_op_iwmmxt_set_mup();
2282        gen_op_iwmmxt_set_cup();
2283        break;
2284    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2285    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2286        wrd = (insn >> 12) & 0xf;
2287        rd0 = (insn >> 16) & 0xf;
2288        rd1 = (insn >> 0) & 0xf;
2289        gen_op_iwmmxt_movq_M0_wRn(rd0);
2290        switch ((insn >> 22) & 3) {
2291        case 0:
2292            if (insn & (1 << 21))
2293                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2294            else
2295                gen_op_iwmmxt_minub_M0_wRn(rd1);
2296            break;
2297        case 1:
2298            if (insn & (1 << 21))
2299                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2300            else
2301                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2302            break;
2303        case 2:
2304            if (insn & (1 << 21))
2305                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2306            else
2307                gen_op_iwmmxt_minul_M0_wRn(rd1);
2308            break;
2309        case 3:
2310            return 1;
2311        }
2312        gen_op_iwmmxt_movq_wRn_M0(wrd);
2313        gen_op_iwmmxt_set_mup();
2314        break;
2315    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2316    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2317        wrd = (insn >> 12) & 0xf;
2318        rd0 = (insn >> 16) & 0xf;
2319        rd1 = (insn >> 0) & 0xf;
2320        gen_op_iwmmxt_movq_M0_wRn(rd0);
2321        switch ((insn >> 22) & 3) {
2322        case 0:
2323            if (insn & (1 << 21))
2324                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2325            else
2326                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2327            break;
2328        case 1:
2329            if (insn & (1 << 21))
2330                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2331            else
2332                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2333            break;
2334        case 2:
2335            if (insn & (1 << 21))
2336                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2337            else
2338                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2339            break;
2340        case 3:
2341            return 1;
2342        }
2343        gen_op_iwmmxt_movq_wRn_M0(wrd);
2344        gen_op_iwmmxt_set_mup();
2345        break;
2346    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2347    case 0x402: case 0x502: case 0x602: case 0x702:
2348        wrd = (insn >> 12) & 0xf;
2349        rd0 = (insn >> 16) & 0xf;
2350        rd1 = (insn >> 0) & 0xf;
2351        gen_op_iwmmxt_movq_M0_wRn(rd0);
2352        iwmmxt_load_reg(cpu_V1, rd1);
2353        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2354                                tcg_constant_i32((insn >> 20) & 3));
2355        gen_op_iwmmxt_movq_wRn_M0(wrd);
2356        gen_op_iwmmxt_set_mup();
2357        break;
2358    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2359    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2360    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2361    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2362        wrd = (insn >> 12) & 0xf;
2363        rd0 = (insn >> 16) & 0xf;
2364        rd1 = (insn >> 0) & 0xf;
2365        gen_op_iwmmxt_movq_M0_wRn(rd0);
2366        switch ((insn >> 20) & 0xf) {
2367        case 0x0:
2368            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2369            break;
2370        case 0x1:
2371            gen_op_iwmmxt_subub_M0_wRn(rd1);
2372            break;
2373        case 0x3:
2374            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2375            break;
2376        case 0x4:
2377            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2378            break;
2379        case 0x5:
2380            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2381            break;
2382        case 0x7:
2383            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2384            break;
2385        case 0x8:
2386            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2387            break;
2388        case 0x9:
2389            gen_op_iwmmxt_subul_M0_wRn(rd1);
2390            break;
2391        case 0xb:
2392            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2393            break;
2394        default:
2395            return 1;
2396        }
2397        gen_op_iwmmxt_movq_wRn_M0(wrd);
2398        gen_op_iwmmxt_set_mup();
2399        gen_op_iwmmxt_set_cup();
2400        break;
2401    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2402    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2403    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2404    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2405        wrd = (insn >> 12) & 0xf;
2406        rd0 = (insn >> 16) & 0xf;
2407        gen_op_iwmmxt_movq_M0_wRn(rd0);
2408        tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2409        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2410        gen_op_iwmmxt_movq_wRn_M0(wrd);
2411        gen_op_iwmmxt_set_mup();
2412        gen_op_iwmmxt_set_cup();
2413        break;
2414    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2415    case 0x418: case 0x518: case 0x618: case 0x718:
2416    case 0x818: case 0x918: case 0xa18: case 0xb18:
2417    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2418        wrd = (insn >> 12) & 0xf;
2419        rd0 = (insn >> 16) & 0xf;
2420        rd1 = (insn >> 0) & 0xf;
2421        gen_op_iwmmxt_movq_M0_wRn(rd0);
2422        switch ((insn >> 20) & 0xf) {
2423        case 0x0:
2424            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2425            break;
2426        case 0x1:
2427            gen_op_iwmmxt_addub_M0_wRn(rd1);
2428            break;
2429        case 0x3:
2430            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2431            break;
2432        case 0x4:
2433            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2434            break;
2435        case 0x5:
2436            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2437            break;
2438        case 0x7:
2439            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2440            break;
2441        case 0x8:
2442            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2443            break;
2444        case 0x9:
2445            gen_op_iwmmxt_addul_M0_wRn(rd1);
2446            break;
2447        case 0xb:
2448            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2449            break;
2450        default:
2451            return 1;
2452        }
2453        gen_op_iwmmxt_movq_wRn_M0(wrd);
2454        gen_op_iwmmxt_set_mup();
2455        gen_op_iwmmxt_set_cup();
2456        break;
2457    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2458    case 0x408: case 0x508: case 0x608: case 0x708:
2459    case 0x808: case 0x908: case 0xa08: case 0xb08:
2460    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2461        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2462            return 1;
2463        wrd = (insn >> 12) & 0xf;
2464        rd0 = (insn >> 16) & 0xf;
2465        rd1 = (insn >> 0) & 0xf;
2466        gen_op_iwmmxt_movq_M0_wRn(rd0);
2467        switch ((insn >> 22) & 3) {
2468        case 1:
2469            if (insn & (1 << 21))
2470                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2471            else
2472                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2473            break;
2474        case 2:
2475            if (insn & (1 << 21))
2476                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2477            else
2478                gen_op_iwmmxt_packul_M0_wRn(rd1);
2479            break;
2480        case 3:
2481            if (insn & (1 << 21))
2482                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2483            else
2484                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2485            break;
2486        }
2487        gen_op_iwmmxt_movq_wRn_M0(wrd);
2488        gen_op_iwmmxt_set_mup();
2489        gen_op_iwmmxt_set_cup();
2490        break;
2491    case 0x201: case 0x203: case 0x205: case 0x207:
2492    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2493    case 0x211: case 0x213: case 0x215: case 0x217:
2494    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2495        wrd = (insn >> 5) & 0xf;
2496        rd0 = (insn >> 12) & 0xf;
2497        rd1 = (insn >> 0) & 0xf;
2498        if (rd0 == 0xf || rd1 == 0xf)
2499            return 1;
2500        gen_op_iwmmxt_movq_M0_wRn(wrd);
2501        tmp = load_reg(s, rd0);
2502        tmp2 = load_reg(s, rd1);
2503        switch ((insn >> 16) & 0xf) {
2504        case 0x0:                                       /* TMIA */
2505            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2506            break;
2507        case 0x8:                                       /* TMIAPH */
2508            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2509            break;
2510        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2511            if (insn & (1 << 16))
2512                tcg_gen_shri_i32(tmp, tmp, 16);
2513            if (insn & (1 << 17))
2514                tcg_gen_shri_i32(tmp2, tmp2, 16);
2515            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2516            break;
2517        default:
2518            tcg_temp_free_i32(tmp2);
2519            tcg_temp_free_i32(tmp);
2520            return 1;
2521        }
2522        tcg_temp_free_i32(tmp2);
2523        tcg_temp_free_i32(tmp);
2524        gen_op_iwmmxt_movq_wRn_M0(wrd);
2525        gen_op_iwmmxt_set_mup();
2526        break;
2527    default:
2528        return 1;
2529    }
2530
2531    return 0;
2532}
2533
2534/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2535   (ie. an undefined instruction).  */
2536static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2537{
2538    int acc, rd0, rd1, rdhi, rdlo;
2539    TCGv_i32 tmp, tmp2;
2540
2541    if ((insn & 0x0ff00f10) == 0x0e200010) {
2542        /* Multiply with Internal Accumulate Format */
2543        rd0 = (insn >> 12) & 0xf;
2544        rd1 = insn & 0xf;
2545        acc = (insn >> 5) & 7;
2546
2547        if (acc != 0)
2548            return 1;
2549
2550        tmp = load_reg(s, rd0);
2551        tmp2 = load_reg(s, rd1);
2552        switch ((insn >> 16) & 0xf) {
2553        case 0x0:                                       /* MIA */
2554            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2555            break;
2556        case 0x8:                                       /* MIAPH */
2557            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2558            break;
2559        case 0xc:                                       /* MIABB */
2560        case 0xd:                                       /* MIABT */
2561        case 0xe:                                       /* MIATB */
2562        case 0xf:                                       /* MIATT */
2563            if (insn & (1 << 16))
2564                tcg_gen_shri_i32(tmp, tmp, 16);
2565            if (insn & (1 << 17))
2566                tcg_gen_shri_i32(tmp2, tmp2, 16);
2567            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2568            break;
2569        default:
2570            return 1;
2571        }
2572        tcg_temp_free_i32(tmp2);
2573        tcg_temp_free_i32(tmp);
2574
2575        gen_op_iwmmxt_movq_wRn_M0(acc);
2576        return 0;
2577    }
2578
2579    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2580        /* Internal Accumulator Access Format */
2581        rdhi = (insn >> 16) & 0xf;
2582        rdlo = (insn >> 12) & 0xf;
2583        acc = insn & 7;
2584
2585        if (acc != 0)
2586            return 1;
2587
2588        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2589            iwmmxt_load_reg(cpu_V0, acc);
2590            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2591            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2592            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2593        } else {                                        /* MAR */
2594            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2595            iwmmxt_store_reg(cpu_V0, acc);
2596        }
2597        return 0;
2598    }
2599
2600    return 1;
2601}
2602
2603static void gen_goto_ptr(void)
2604{
2605    tcg_gen_lookup_and_goto_ptr();
2606}
2607
2608/* This will end the TB but doesn't guarantee we'll return to
2609 * cpu_loop_exec. Any live exit_requests will be processed as we
2610 * enter the next TB.
2611 */
2612static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2613{
2614    if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2615        /*
2616         * For pcrel, the pc must always be up-to-date on entry to
2617         * the linked TB, so that it can use simple additions for all
2618         * further adjustments.  For !pcrel, the linked TB is compiled
2619         * to know its full virtual address, so we can delay the
2620         * update to pc to the unlinked path.  A long chain of links
2621         * can thus avoid many updates to the PC.
2622         */
2623        if (TARGET_TB_PCREL) {
2624            gen_update_pc(s, diff);
2625            tcg_gen_goto_tb(n);
2626        } else {
2627            tcg_gen_goto_tb(n);
2628            gen_update_pc(s, diff);
2629        }
2630        tcg_gen_exit_tb(s->base.tb, n);
2631    } else {
2632        gen_update_pc(s, diff);
2633        gen_goto_ptr();
2634    }
2635    s->base.is_jmp = DISAS_NORETURN;
2636}
2637
2638/* Jump, specifying which TB number to use if we gen_goto_tb() */
2639static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2640{
2641    if (unlikely(s->ss_active)) {
2642        /* An indirect jump so that we still trigger the debug exception.  */
2643        gen_update_pc(s, diff);
2644        s->base.is_jmp = DISAS_JUMP;
2645        return;
2646    }
2647    switch (s->base.is_jmp) {
2648    case DISAS_NEXT:
2649    case DISAS_TOO_MANY:
2650    case DISAS_NORETURN:
2651        /*
2652         * The normal case: just go to the destination TB.
2653         * NB: NORETURN happens if we generate code like
2654         *    gen_brcondi(l);
2655         *    gen_jmp();
2656         *    gen_set_label(l);
2657         *    gen_jmp();
2658         * on the second call to gen_jmp().
2659         */
2660        gen_goto_tb(s, tbno, diff);
2661        break;
2662    case DISAS_UPDATE_NOCHAIN:
2663    case DISAS_UPDATE_EXIT:
2664        /*
2665         * We already decided we're leaving the TB for some other reason.
2666         * Avoid using goto_tb so we really do exit back to the main loop
2667         * and don't chain to another TB.
2668         */
2669        gen_update_pc(s, diff);
2670        gen_goto_ptr();
2671        s->base.is_jmp = DISAS_NORETURN;
2672        break;
2673    default:
2674        /*
2675         * We shouldn't be emitting code for a jump and also have
2676         * is_jmp set to one of the special cases like DISAS_SWI.
2677         */
2678        g_assert_not_reached();
2679    }
2680}
2681
2682static inline void gen_jmp(DisasContext *s, target_long diff)
2683{
2684    gen_jmp_tb(s, diff, 0);
2685}
2686
2687static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2688{
2689    if (x)
2690        tcg_gen_sari_i32(t0, t0, 16);
2691    else
2692        gen_sxth(t0);
2693    if (y)
2694        tcg_gen_sari_i32(t1, t1, 16);
2695    else
2696        gen_sxth(t1);
2697    tcg_gen_mul_i32(t0, t0, t1);
2698}
2699
2700/* Return the mask of PSR bits set by a MSR instruction.  */
2701static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2702{
2703    uint32_t mask = 0;
2704
2705    if (flags & (1 << 0)) {
2706        mask |= 0xff;
2707    }
2708    if (flags & (1 << 1)) {
2709        mask |= 0xff00;
2710    }
2711    if (flags & (1 << 2)) {
2712        mask |= 0xff0000;
2713    }
2714    if (flags & (1 << 3)) {
2715        mask |= 0xff000000;
2716    }
2717
2718    /* Mask out undefined and reserved bits.  */
2719    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2720
2721    /* Mask out execution state.  */
2722    if (!spsr) {
2723        mask &= ~CPSR_EXEC;
2724    }
2725
2726    /* Mask out privileged bits.  */
2727    if (IS_USER(s)) {
2728        mask &= CPSR_USER;
2729    }
2730    return mask;
2731}
2732
2733/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2734static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2735{
2736    TCGv_i32 tmp;
2737    if (spsr) {
2738        /* ??? This is also undefined in system mode.  */
2739        if (IS_USER(s))
2740            return 1;
2741
2742        tmp = load_cpu_field(spsr);
2743        tcg_gen_andi_i32(tmp, tmp, ~mask);
2744        tcg_gen_andi_i32(t0, t0, mask);
2745        tcg_gen_or_i32(tmp, tmp, t0);
2746        store_cpu_field(tmp, spsr);
2747    } else {
2748        gen_set_cpsr(t0, mask);
2749    }
2750    tcg_temp_free_i32(t0);
2751    gen_lookup_tb(s);
2752    return 0;
2753}
2754
2755/* Returns nonzero if access to the PSR is not permitted.  */
2756static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2757{
2758    TCGv_i32 tmp;
2759    tmp = tcg_temp_new_i32();
2760    tcg_gen_movi_i32(tmp, val);
2761    return gen_set_psr(s, mask, spsr, tmp);
2762}
2763
2764static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2765                                     int *tgtmode, int *regno)
2766{
2767    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2768     * the target mode and register number, and identify the various
2769     * unpredictable cases.
2770     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2771     *  + executed in user mode
2772     *  + using R15 as the src/dest register
2773     *  + accessing an unimplemented register
2774     *  + accessing a register that's inaccessible at current PL/security state*
2775     *  + accessing a register that you could access with a different insn
2776     * We choose to UNDEF in all these cases.
2777     * Since we don't know which of the various AArch32 modes we are in
2778     * we have to defer some checks to runtime.
2779     * Accesses to Monitor mode registers from Secure EL1 (which implies
2780     * that EL3 is AArch64) must trap to EL3.
2781     *
2782     * If the access checks fail this function will emit code to take
2783     * an exception and return false. Otherwise it will return true,
2784     * and set *tgtmode and *regno appropriately.
2785     */
2786    /* These instructions are present only in ARMv8, or in ARMv7 with the
2787     * Virtualization Extensions.
2788     */
2789    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2790        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2791        goto undef;
2792    }
2793
2794    if (IS_USER(s) || rn == 15) {
2795        goto undef;
2796    }
2797
2798    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2799     * of registers into (r, sysm).
2800     */
2801    if (r) {
2802        /* SPSRs for other modes */
2803        switch (sysm) {
2804        case 0xe: /* SPSR_fiq */
2805            *tgtmode = ARM_CPU_MODE_FIQ;
2806            break;
2807        case 0x10: /* SPSR_irq */
2808            *tgtmode = ARM_CPU_MODE_IRQ;
2809            break;
2810        case 0x12: /* SPSR_svc */
2811            *tgtmode = ARM_CPU_MODE_SVC;
2812            break;
2813        case 0x14: /* SPSR_abt */
2814            *tgtmode = ARM_CPU_MODE_ABT;
2815            break;
2816        case 0x16: /* SPSR_und */
2817            *tgtmode = ARM_CPU_MODE_UND;
2818            break;
2819        case 0x1c: /* SPSR_mon */
2820            *tgtmode = ARM_CPU_MODE_MON;
2821            break;
2822        case 0x1e: /* SPSR_hyp */
2823            *tgtmode = ARM_CPU_MODE_HYP;
2824            break;
2825        default: /* unallocated */
2826            goto undef;
2827        }
2828        /* We arbitrarily assign SPSR a register number of 16. */
2829        *regno = 16;
2830    } else {
2831        /* general purpose registers for other modes */
2832        switch (sysm) {
2833        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2834            *tgtmode = ARM_CPU_MODE_USR;
2835            *regno = sysm + 8;
2836            break;
2837        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2838            *tgtmode = ARM_CPU_MODE_FIQ;
2839            *regno = sysm;
2840            break;
2841        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2842            *tgtmode = ARM_CPU_MODE_IRQ;
2843            *regno = sysm & 1 ? 13 : 14;
2844            break;
2845        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2846            *tgtmode = ARM_CPU_MODE_SVC;
2847            *regno = sysm & 1 ? 13 : 14;
2848            break;
2849        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2850            *tgtmode = ARM_CPU_MODE_ABT;
2851            *regno = sysm & 1 ? 13 : 14;
2852            break;
2853        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2854            *tgtmode = ARM_CPU_MODE_UND;
2855            *regno = sysm & 1 ? 13 : 14;
2856            break;
2857        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2858            *tgtmode = ARM_CPU_MODE_MON;
2859            *regno = sysm & 1 ? 13 : 14;
2860            break;
2861        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2862            *tgtmode = ARM_CPU_MODE_HYP;
2863            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2864            *regno = sysm & 1 ? 13 : 17;
2865            break;
2866        default: /* unallocated */
2867            goto undef;
2868        }
2869    }
2870
2871    /* Catch the 'accessing inaccessible register' cases we can detect
2872     * at translate time.
2873     */
2874    switch (*tgtmode) {
2875    case ARM_CPU_MODE_MON:
2876        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2877            goto undef;
2878        }
2879        if (s->current_el == 1) {
2880            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2881             * then accesses to Mon registers trap to Secure EL2, if it exists,
2882             * otherwise EL3.
2883             */
2884            TCGv_i32 tcg_el;
2885
2886            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2887                dc_isar_feature(aa64_sel2, s)) {
2888                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2889                tcg_el = load_cpu_field(cp15.scr_el3);
2890                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2891                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2892            } else {
2893                tcg_el = tcg_constant_i32(3);
2894            }
2895
2896            gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2897                                    syn_uncategorized(), tcg_el);
2898            tcg_temp_free_i32(tcg_el);
2899            return false;
2900        }
2901        break;
2902    case ARM_CPU_MODE_HYP:
2903        /*
2904         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2905         * (and so we can forbid accesses from EL2 or below). elr_hyp
2906         * can be accessed also from Hyp mode, so forbid accesses from
2907         * EL0 or EL1.
2908         */
2909        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2910            (s->current_el < 3 && *regno != 17)) {
2911            goto undef;
2912        }
2913        break;
2914    default:
2915        break;
2916    }
2917
2918    return true;
2919
2920undef:
2921    /* If we get here then some access check did not pass */
2922    gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2923    return false;
2924}
2925
2926static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2927{
2928    TCGv_i32 tcg_reg;
2929    int tgtmode = 0, regno = 0;
2930
2931    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2932        return;
2933    }
2934
2935    /* Sync state because msr_banked() can raise exceptions */
2936    gen_set_condexec(s);
2937    gen_update_pc(s, 0);
2938    tcg_reg = load_reg(s, rn);
2939    gen_helper_msr_banked(cpu_env, tcg_reg,
2940                          tcg_constant_i32(tgtmode),
2941                          tcg_constant_i32(regno));
2942    tcg_temp_free_i32(tcg_reg);
2943    s->base.is_jmp = DISAS_UPDATE_EXIT;
2944}
2945
2946static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2947{
2948    TCGv_i32 tcg_reg;
2949    int tgtmode = 0, regno = 0;
2950
2951    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2952        return;
2953    }
2954
2955    /* Sync state because mrs_banked() can raise exceptions */
2956    gen_set_condexec(s);
2957    gen_update_pc(s, 0);
2958    tcg_reg = tcg_temp_new_i32();
2959    gen_helper_mrs_banked(tcg_reg, cpu_env,
2960                          tcg_constant_i32(tgtmode),
2961                          tcg_constant_i32(regno));
2962    store_reg(s, rn, tcg_reg);
2963    s->base.is_jmp = DISAS_UPDATE_EXIT;
2964}
2965
2966/* Store value to PC as for an exception return (ie don't
2967 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2968 * will do the masking based on the new value of the Thumb bit.
2969 */
2970static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2971{
2972    tcg_gen_mov_i32(cpu_R[15], pc);
2973    tcg_temp_free_i32(pc);
2974}
2975
2976/* Generate a v6 exception return.  Marks both values as dead.  */
2977static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2978{
2979    store_pc_exc_ret(s, pc);
2980    /* The cpsr_write_eret helper will mask the low bits of PC
2981     * appropriately depending on the new Thumb bit, so it must
2982     * be called after storing the new PC.
2983     */
2984    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2985        gen_io_start();
2986    }
2987    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2988    tcg_temp_free_i32(cpsr);
2989    /* Must exit loop to check un-masked IRQs */
2990    s->base.is_jmp = DISAS_EXIT;
2991}
2992
2993/* Generate an old-style exception return. Marks pc as dead. */
2994static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2995{
2996    gen_rfe(s, pc, load_cpu_field(spsr));
2997}
2998
2999static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3000                            uint32_t opr_sz, uint32_t max_sz,
3001                            gen_helper_gvec_3_ptr *fn)
3002{
3003    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3004
3005    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3006    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3007                       opr_sz, max_sz, 0, fn);
3008    tcg_temp_free_ptr(qc_ptr);
3009}
3010
3011void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3012                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3013{
3014    static gen_helper_gvec_3_ptr * const fns[2] = {
3015        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3016    };
3017    tcg_debug_assert(vece >= 1 && vece <= 2);
3018    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3019}
3020
3021void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3022                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3023{
3024    static gen_helper_gvec_3_ptr * const fns[2] = {
3025        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3026    };
3027    tcg_debug_assert(vece >= 1 && vece <= 2);
3028    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3029}
3030
3031#define GEN_CMP0(NAME, COND)                                            \
3032    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3033    {                                                                   \
3034        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3035        tcg_gen_neg_i32(d, d);                                          \
3036    }                                                                   \
3037    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3038    {                                                                   \
3039        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3040        tcg_gen_neg_i64(d, d);                                          \
3041    }                                                                   \
3042    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3043    {                                                                   \
3044        TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
3045        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3046    }                                                                   \
3047    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3048                            uint32_t opr_sz, uint32_t max_sz)           \
3049    {                                                                   \
3050        const GVecGen2 op[4] = {                                        \
3051            { .fno = gen_helper_gvec_##NAME##0_b,                       \
3052              .fniv = gen_##NAME##0_vec,                                \
3053              .opt_opc = vecop_list_cmp,                                \
3054              .vece = MO_8 },                                           \
3055            { .fno = gen_helper_gvec_##NAME##0_h,                       \
3056              .fniv = gen_##NAME##0_vec,                                \
3057              .opt_opc = vecop_list_cmp,                                \
3058              .vece = MO_16 },                                          \
3059            { .fni4 = gen_##NAME##0_i32,                                \
3060              .fniv = gen_##NAME##0_vec,                                \
3061              .opt_opc = vecop_list_cmp,                                \
3062              .vece = MO_32 },                                          \
3063            { .fni8 = gen_##NAME##0_i64,                                \
3064              .fniv = gen_##NAME##0_vec,                                \
3065              .opt_opc = vecop_list_cmp,                                \
3066              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3067              .vece = MO_64 },                                          \
3068        };                                                              \
3069        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3070    }
3071
3072static const TCGOpcode vecop_list_cmp[] = {
3073    INDEX_op_cmp_vec, 0
3074};
3075
3076GEN_CMP0(ceq, TCG_COND_EQ)
3077GEN_CMP0(cle, TCG_COND_LE)
3078GEN_CMP0(cge, TCG_COND_GE)
3079GEN_CMP0(clt, TCG_COND_LT)
3080GEN_CMP0(cgt, TCG_COND_GT)
3081
3082#undef GEN_CMP0
3083
3084static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3085{
3086    tcg_gen_vec_sar8i_i64(a, a, shift);
3087    tcg_gen_vec_add8_i64(d, d, a);
3088}
3089
3090static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3091{
3092    tcg_gen_vec_sar16i_i64(a, a, shift);
3093    tcg_gen_vec_add16_i64(d, d, a);
3094}
3095
3096static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3097{
3098    tcg_gen_sari_i32(a, a, shift);
3099    tcg_gen_add_i32(d, d, a);
3100}
3101
3102static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3103{
3104    tcg_gen_sari_i64(a, a, shift);
3105    tcg_gen_add_i64(d, d, a);
3106}
3107
3108static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3109{
3110    tcg_gen_sari_vec(vece, a, a, sh);
3111    tcg_gen_add_vec(vece, d, d, a);
3112}
3113
3114void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3115                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3116{
3117    static const TCGOpcode vecop_list[] = {
3118        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3119    };
3120    static const GVecGen2i ops[4] = {
3121        { .fni8 = gen_ssra8_i64,
3122          .fniv = gen_ssra_vec,
3123          .fno = gen_helper_gvec_ssra_b,
3124          .load_dest = true,
3125          .opt_opc = vecop_list,
3126          .vece = MO_8 },
3127        { .fni8 = gen_ssra16_i64,
3128          .fniv = gen_ssra_vec,
3129          .fno = gen_helper_gvec_ssra_h,
3130          .load_dest = true,
3131          .opt_opc = vecop_list,
3132          .vece = MO_16 },
3133        { .fni4 = gen_ssra32_i32,
3134          .fniv = gen_ssra_vec,
3135          .fno = gen_helper_gvec_ssra_s,
3136          .load_dest = true,
3137          .opt_opc = vecop_list,
3138          .vece = MO_32 },
3139        { .fni8 = gen_ssra64_i64,
3140          .fniv = gen_ssra_vec,
3141          .fno = gen_helper_gvec_ssra_b,
3142          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3143          .opt_opc = vecop_list,
3144          .load_dest = true,
3145          .vece = MO_64 },
3146    };
3147
3148    /* tszimm encoding produces immediates in the range [1..esize]. */
3149    tcg_debug_assert(shift > 0);
3150    tcg_debug_assert(shift <= (8 << vece));
3151
3152    /*
3153     * Shifts larger than the element size are architecturally valid.
3154     * Signed results in all sign bits.
3155     */
3156    shift = MIN(shift, (8 << vece) - 1);
3157    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3158}
3159
3160static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3161{
3162    tcg_gen_vec_shr8i_i64(a, a, shift);
3163    tcg_gen_vec_add8_i64(d, d, a);
3164}
3165
3166static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3167{
3168    tcg_gen_vec_shr16i_i64(a, a, shift);
3169    tcg_gen_vec_add16_i64(d, d, a);
3170}
3171
3172static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3173{
3174    tcg_gen_shri_i32(a, a, shift);
3175    tcg_gen_add_i32(d, d, a);
3176}
3177
3178static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3179{
3180    tcg_gen_shri_i64(a, a, shift);
3181    tcg_gen_add_i64(d, d, a);
3182}
3183
3184static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3185{
3186    tcg_gen_shri_vec(vece, a, a, sh);
3187    tcg_gen_add_vec(vece, d, d, a);
3188}
3189
3190void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3191                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3192{
3193    static const TCGOpcode vecop_list[] = {
3194        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3195    };
3196    static const GVecGen2i ops[4] = {
3197        { .fni8 = gen_usra8_i64,
3198          .fniv = gen_usra_vec,
3199          .fno = gen_helper_gvec_usra_b,
3200          .load_dest = true,
3201          .opt_opc = vecop_list,
3202          .vece = MO_8, },
3203        { .fni8 = gen_usra16_i64,
3204          .fniv = gen_usra_vec,
3205          .fno = gen_helper_gvec_usra_h,
3206          .load_dest = true,
3207          .opt_opc = vecop_list,
3208          .vece = MO_16, },
3209        { .fni4 = gen_usra32_i32,
3210          .fniv = gen_usra_vec,
3211          .fno = gen_helper_gvec_usra_s,
3212          .load_dest = true,
3213          .opt_opc = vecop_list,
3214          .vece = MO_32, },
3215        { .fni8 = gen_usra64_i64,
3216          .fniv = gen_usra_vec,
3217          .fno = gen_helper_gvec_usra_d,
3218          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3219          .load_dest = true,
3220          .opt_opc = vecop_list,
3221          .vece = MO_64, },
3222    };
3223
3224    /* tszimm encoding produces immediates in the range [1..esize]. */
3225    tcg_debug_assert(shift > 0);
3226    tcg_debug_assert(shift <= (8 << vece));
3227
3228    /*
3229     * Shifts larger than the element size are architecturally valid.
3230     * Unsigned results in all zeros as input to accumulate: nop.
3231     */
3232    if (shift < (8 << vece)) {
3233        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3234    } else {
3235        /* Nop, but we do need to clear the tail. */
3236        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3237    }
3238}
3239
3240/*
3241 * Shift one less than the requested amount, and the low bit is
3242 * the rounding bit.  For the 8 and 16-bit operations, because we
3243 * mask the low bit, we can perform a normal integer shift instead
3244 * of a vector shift.
3245 */
3246static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3247{
3248    TCGv_i64 t = tcg_temp_new_i64();
3249
3250    tcg_gen_shri_i64(t, a, sh - 1);
3251    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3252    tcg_gen_vec_sar8i_i64(d, a, sh);
3253    tcg_gen_vec_add8_i64(d, d, t);
3254    tcg_temp_free_i64(t);
3255}
3256
3257static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3258{
3259    TCGv_i64 t = tcg_temp_new_i64();
3260
3261    tcg_gen_shri_i64(t, a, sh - 1);
3262    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3263    tcg_gen_vec_sar16i_i64(d, a, sh);
3264    tcg_gen_vec_add16_i64(d, d, t);
3265    tcg_temp_free_i64(t);
3266}
3267
3268static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3269{
3270    TCGv_i32 t;
3271
3272    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3273    if (sh == 32) {
3274        tcg_gen_movi_i32(d, 0);
3275        return;
3276    }
3277    t = tcg_temp_new_i32();
3278    tcg_gen_extract_i32(t, a, sh - 1, 1);
3279    tcg_gen_sari_i32(d, a, sh);
3280    tcg_gen_add_i32(d, d, t);
3281    tcg_temp_free_i32(t);
3282}
3283
3284static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3285{
3286    TCGv_i64 t = tcg_temp_new_i64();
3287
3288    tcg_gen_extract_i64(t, a, sh - 1, 1);
3289    tcg_gen_sari_i64(d, a, sh);
3290    tcg_gen_add_i64(d, d, t);
3291    tcg_temp_free_i64(t);
3292}
3293
3294static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3295{
3296    TCGv_vec t = tcg_temp_new_vec_matching(d);
3297    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3298
3299    tcg_gen_shri_vec(vece, t, a, sh - 1);
3300    tcg_gen_dupi_vec(vece, ones, 1);
3301    tcg_gen_and_vec(vece, t, t, ones);
3302    tcg_gen_sari_vec(vece, d, a, sh);
3303    tcg_gen_add_vec(vece, d, d, t);
3304
3305    tcg_temp_free_vec(t);
3306    tcg_temp_free_vec(ones);
3307}
3308
3309void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3310                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3311{
3312    static const TCGOpcode vecop_list[] = {
3313        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3314    };
3315    static const GVecGen2i ops[4] = {
3316        { .fni8 = gen_srshr8_i64,
3317          .fniv = gen_srshr_vec,
3318          .fno = gen_helper_gvec_srshr_b,
3319          .opt_opc = vecop_list,
3320          .vece = MO_8 },
3321        { .fni8 = gen_srshr16_i64,
3322          .fniv = gen_srshr_vec,
3323          .fno = gen_helper_gvec_srshr_h,
3324          .opt_opc = vecop_list,
3325          .vece = MO_16 },
3326        { .fni4 = gen_srshr32_i32,
3327          .fniv = gen_srshr_vec,
3328          .fno = gen_helper_gvec_srshr_s,
3329          .opt_opc = vecop_list,
3330          .vece = MO_32 },
3331        { .fni8 = gen_srshr64_i64,
3332          .fniv = gen_srshr_vec,
3333          .fno = gen_helper_gvec_srshr_d,
3334          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3335          .opt_opc = vecop_list,
3336          .vece = MO_64 },
3337    };
3338
3339    /* tszimm encoding produces immediates in the range [1..esize] */
3340    tcg_debug_assert(shift > 0);
3341    tcg_debug_assert(shift <= (8 << vece));
3342
3343    if (shift == (8 << vece)) {
3344        /*
3345         * Shifts larger than the element size are architecturally valid.
3346         * Signed results in all sign bits.  With rounding, this produces
3347         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3348         * I.e. always zero.
3349         */
3350        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3351    } else {
3352        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3353    }
3354}
3355
3356static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3357{
3358    TCGv_i64 t = tcg_temp_new_i64();
3359
3360    gen_srshr8_i64(t, a, sh);
3361    tcg_gen_vec_add8_i64(d, d, t);
3362    tcg_temp_free_i64(t);
3363}
3364
3365static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3366{
3367    TCGv_i64 t = tcg_temp_new_i64();
3368
3369    gen_srshr16_i64(t, a, sh);
3370    tcg_gen_vec_add16_i64(d, d, t);
3371    tcg_temp_free_i64(t);
3372}
3373
3374static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3375{
3376    TCGv_i32 t = tcg_temp_new_i32();
3377
3378    gen_srshr32_i32(t, a, sh);
3379    tcg_gen_add_i32(d, d, t);
3380    tcg_temp_free_i32(t);
3381}
3382
3383static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3384{
3385    TCGv_i64 t = tcg_temp_new_i64();
3386
3387    gen_srshr64_i64(t, a, sh);
3388    tcg_gen_add_i64(d, d, t);
3389    tcg_temp_free_i64(t);
3390}
3391
3392static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3393{
3394    TCGv_vec t = tcg_temp_new_vec_matching(d);
3395
3396    gen_srshr_vec(vece, t, a, sh);
3397    tcg_gen_add_vec(vece, d, d, t);
3398    tcg_temp_free_vec(t);
3399}
3400
3401void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3402                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3403{
3404    static const TCGOpcode vecop_list[] = {
3405        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3406    };
3407    static const GVecGen2i ops[4] = {
3408        { .fni8 = gen_srsra8_i64,
3409          .fniv = gen_srsra_vec,
3410          .fno = gen_helper_gvec_srsra_b,
3411          .opt_opc = vecop_list,
3412          .load_dest = true,
3413          .vece = MO_8 },
3414        { .fni8 = gen_srsra16_i64,
3415          .fniv = gen_srsra_vec,
3416          .fno = gen_helper_gvec_srsra_h,
3417          .opt_opc = vecop_list,
3418          .load_dest = true,
3419          .vece = MO_16 },
3420        { .fni4 = gen_srsra32_i32,
3421          .fniv = gen_srsra_vec,
3422          .fno = gen_helper_gvec_srsra_s,
3423          .opt_opc = vecop_list,
3424          .load_dest = true,
3425          .vece = MO_32 },
3426        { .fni8 = gen_srsra64_i64,
3427          .fniv = gen_srsra_vec,
3428          .fno = gen_helper_gvec_srsra_d,
3429          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3430          .opt_opc = vecop_list,
3431          .load_dest = true,
3432          .vece = MO_64 },
3433    };
3434
3435    /* tszimm encoding produces immediates in the range [1..esize] */
3436    tcg_debug_assert(shift > 0);
3437    tcg_debug_assert(shift <= (8 << vece));
3438
3439    /*
3440     * Shifts larger than the element size are architecturally valid.
3441     * Signed results in all sign bits.  With rounding, this produces
3442     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3443     * I.e. always zero.  With accumulation, this leaves D unchanged.
3444     */
3445    if (shift == (8 << vece)) {
3446        /* Nop, but we do need to clear the tail. */
3447        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3448    } else {
3449        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3450    }
3451}
3452
3453static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3454{
3455    TCGv_i64 t = tcg_temp_new_i64();
3456
3457    tcg_gen_shri_i64(t, a, sh - 1);
3458    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3459    tcg_gen_vec_shr8i_i64(d, a, sh);
3460    tcg_gen_vec_add8_i64(d, d, t);
3461    tcg_temp_free_i64(t);
3462}
3463
3464static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3465{
3466    TCGv_i64 t = tcg_temp_new_i64();
3467
3468    tcg_gen_shri_i64(t, a, sh - 1);
3469    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3470    tcg_gen_vec_shr16i_i64(d, a, sh);
3471    tcg_gen_vec_add16_i64(d, d, t);
3472    tcg_temp_free_i64(t);
3473}
3474
3475static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3476{
3477    TCGv_i32 t;
3478
3479    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3480    if (sh == 32) {
3481        tcg_gen_extract_i32(d, a, sh - 1, 1);
3482        return;
3483    }
3484    t = tcg_temp_new_i32();
3485    tcg_gen_extract_i32(t, a, sh - 1, 1);
3486    tcg_gen_shri_i32(d, a, sh);
3487    tcg_gen_add_i32(d, d, t);
3488    tcg_temp_free_i32(t);
3489}
3490
3491static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3492{
3493    TCGv_i64 t = tcg_temp_new_i64();
3494
3495    tcg_gen_extract_i64(t, a, sh - 1, 1);
3496    tcg_gen_shri_i64(d, a, sh);
3497    tcg_gen_add_i64(d, d, t);
3498    tcg_temp_free_i64(t);
3499}
3500
3501static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3502{
3503    TCGv_vec t = tcg_temp_new_vec_matching(d);
3504    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3505
3506    tcg_gen_shri_vec(vece, t, a, shift - 1);
3507    tcg_gen_dupi_vec(vece, ones, 1);
3508    tcg_gen_and_vec(vece, t, t, ones);
3509    tcg_gen_shri_vec(vece, d, a, shift);
3510    tcg_gen_add_vec(vece, d, d, t);
3511
3512    tcg_temp_free_vec(t);
3513    tcg_temp_free_vec(ones);
3514}
3515
3516void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3517                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3518{
3519    static const TCGOpcode vecop_list[] = {
3520        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3521    };
3522    static const GVecGen2i ops[4] = {
3523        { .fni8 = gen_urshr8_i64,
3524          .fniv = gen_urshr_vec,
3525          .fno = gen_helper_gvec_urshr_b,
3526          .opt_opc = vecop_list,
3527          .vece = MO_8 },
3528        { .fni8 = gen_urshr16_i64,
3529          .fniv = gen_urshr_vec,
3530          .fno = gen_helper_gvec_urshr_h,
3531          .opt_opc = vecop_list,
3532          .vece = MO_16 },
3533        { .fni4 = gen_urshr32_i32,
3534          .fniv = gen_urshr_vec,
3535          .fno = gen_helper_gvec_urshr_s,
3536          .opt_opc = vecop_list,
3537          .vece = MO_32 },
3538        { .fni8 = gen_urshr64_i64,
3539          .fniv = gen_urshr_vec,
3540          .fno = gen_helper_gvec_urshr_d,
3541          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3542          .opt_opc = vecop_list,
3543          .vece = MO_64 },
3544    };
3545
3546    /* tszimm encoding produces immediates in the range [1..esize] */
3547    tcg_debug_assert(shift > 0);
3548    tcg_debug_assert(shift <= (8 << vece));
3549
3550    if (shift == (8 << vece)) {
3551        /*
3552         * Shifts larger than the element size are architecturally valid.
3553         * Unsigned results in zero.  With rounding, this produces a
3554         * copy of the most significant bit.
3555         */
3556        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3557    } else {
3558        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3559    }
3560}
3561
3562static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3563{
3564    TCGv_i64 t = tcg_temp_new_i64();
3565
3566    if (sh == 8) {
3567        tcg_gen_vec_shr8i_i64(t, a, 7);
3568    } else {
3569        gen_urshr8_i64(t, a, sh);
3570    }
3571    tcg_gen_vec_add8_i64(d, d, t);
3572    tcg_temp_free_i64(t);
3573}
3574
3575static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3576{
3577    TCGv_i64 t = tcg_temp_new_i64();
3578
3579    if (sh == 16) {
3580        tcg_gen_vec_shr16i_i64(t, a, 15);
3581    } else {
3582        gen_urshr16_i64(t, a, sh);
3583    }
3584    tcg_gen_vec_add16_i64(d, d, t);
3585    tcg_temp_free_i64(t);
3586}
3587
3588static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3589{
3590    TCGv_i32 t = tcg_temp_new_i32();
3591
3592    if (sh == 32) {
3593        tcg_gen_shri_i32(t, a, 31);
3594    } else {
3595        gen_urshr32_i32(t, a, sh);
3596    }
3597    tcg_gen_add_i32(d, d, t);
3598    tcg_temp_free_i32(t);
3599}
3600
3601static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3602{
3603    TCGv_i64 t = tcg_temp_new_i64();
3604
3605    if (sh == 64) {
3606        tcg_gen_shri_i64(t, a, 63);
3607    } else {
3608        gen_urshr64_i64(t, a, sh);
3609    }
3610    tcg_gen_add_i64(d, d, t);
3611    tcg_temp_free_i64(t);
3612}
3613
3614static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3615{
3616    TCGv_vec t = tcg_temp_new_vec_matching(d);
3617
3618    if (sh == (8 << vece)) {
3619        tcg_gen_shri_vec(vece, t, a, sh - 1);
3620    } else {
3621        gen_urshr_vec(vece, t, a, sh);
3622    }
3623    tcg_gen_add_vec(vece, d, d, t);
3624    tcg_temp_free_vec(t);
3625}
3626
3627void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3628                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3629{
3630    static const TCGOpcode vecop_list[] = {
3631        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3632    };
3633    static const GVecGen2i ops[4] = {
3634        { .fni8 = gen_ursra8_i64,
3635          .fniv = gen_ursra_vec,
3636          .fno = gen_helper_gvec_ursra_b,
3637          .opt_opc = vecop_list,
3638          .load_dest = true,
3639          .vece = MO_8 },
3640        { .fni8 = gen_ursra16_i64,
3641          .fniv = gen_ursra_vec,
3642          .fno = gen_helper_gvec_ursra_h,
3643          .opt_opc = vecop_list,
3644          .load_dest = true,
3645          .vece = MO_16 },
3646        { .fni4 = gen_ursra32_i32,
3647          .fniv = gen_ursra_vec,
3648          .fno = gen_helper_gvec_ursra_s,
3649          .opt_opc = vecop_list,
3650          .load_dest = true,
3651          .vece = MO_32 },
3652        { .fni8 = gen_ursra64_i64,
3653          .fniv = gen_ursra_vec,
3654          .fno = gen_helper_gvec_ursra_d,
3655          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3656          .opt_opc = vecop_list,
3657          .load_dest = true,
3658          .vece = MO_64 },
3659    };
3660
3661    /* tszimm encoding produces immediates in the range [1..esize] */
3662    tcg_debug_assert(shift > 0);
3663    tcg_debug_assert(shift <= (8 << vece));
3664
3665    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3666}
3667
3668static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3669{
3670    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3671    TCGv_i64 t = tcg_temp_new_i64();
3672
3673    tcg_gen_shri_i64(t, a, shift);
3674    tcg_gen_andi_i64(t, t, mask);
3675    tcg_gen_andi_i64(d, d, ~mask);
3676    tcg_gen_or_i64(d, d, t);
3677    tcg_temp_free_i64(t);
3678}
3679
3680static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3681{
3682    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3683    TCGv_i64 t = tcg_temp_new_i64();
3684
3685    tcg_gen_shri_i64(t, a, shift);
3686    tcg_gen_andi_i64(t, t, mask);
3687    tcg_gen_andi_i64(d, d, ~mask);
3688    tcg_gen_or_i64(d, d, t);
3689    tcg_temp_free_i64(t);
3690}
3691
3692static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3693{
3694    tcg_gen_shri_i32(a, a, shift);
3695    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3696}
3697
3698static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3699{
3700    tcg_gen_shri_i64(a, a, shift);
3701    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3702}
3703
3704static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3705{
3706    TCGv_vec t = tcg_temp_new_vec_matching(d);
3707    TCGv_vec m = tcg_temp_new_vec_matching(d);
3708
3709    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3710    tcg_gen_shri_vec(vece, t, a, sh);
3711    tcg_gen_and_vec(vece, d, d, m);
3712    tcg_gen_or_vec(vece, d, d, t);
3713
3714    tcg_temp_free_vec(t);
3715    tcg_temp_free_vec(m);
3716}
3717
3718void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3719                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3720{
3721    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3722    const GVecGen2i ops[4] = {
3723        { .fni8 = gen_shr8_ins_i64,
3724          .fniv = gen_shr_ins_vec,
3725          .fno = gen_helper_gvec_sri_b,
3726          .load_dest = true,
3727          .opt_opc = vecop_list,
3728          .vece = MO_8 },
3729        { .fni8 = gen_shr16_ins_i64,
3730          .fniv = gen_shr_ins_vec,
3731          .fno = gen_helper_gvec_sri_h,
3732          .load_dest = true,
3733          .opt_opc = vecop_list,
3734          .vece = MO_16 },
3735        { .fni4 = gen_shr32_ins_i32,
3736          .fniv = gen_shr_ins_vec,
3737          .fno = gen_helper_gvec_sri_s,
3738          .load_dest = true,
3739          .opt_opc = vecop_list,
3740          .vece = MO_32 },
3741        { .fni8 = gen_shr64_ins_i64,
3742          .fniv = gen_shr_ins_vec,
3743          .fno = gen_helper_gvec_sri_d,
3744          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3745          .load_dest = true,
3746          .opt_opc = vecop_list,
3747          .vece = MO_64 },
3748    };
3749
3750    /* tszimm encoding produces immediates in the range [1..esize]. */
3751    tcg_debug_assert(shift > 0);
3752    tcg_debug_assert(shift <= (8 << vece));
3753
3754    /* Shift of esize leaves destination unchanged. */
3755    if (shift < (8 << vece)) {
3756        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3757    } else {
3758        /* Nop, but we do need to clear the tail. */
3759        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3760    }
3761}
3762
3763static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3764{
3765    uint64_t mask = dup_const(MO_8, 0xff << shift);
3766    TCGv_i64 t = tcg_temp_new_i64();
3767
3768    tcg_gen_shli_i64(t, a, shift);
3769    tcg_gen_andi_i64(t, t, mask);
3770    tcg_gen_andi_i64(d, d, ~mask);
3771    tcg_gen_or_i64(d, d, t);
3772    tcg_temp_free_i64(t);
3773}
3774
3775static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3776{
3777    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3778    TCGv_i64 t = tcg_temp_new_i64();
3779
3780    tcg_gen_shli_i64(t, a, shift);
3781    tcg_gen_andi_i64(t, t, mask);
3782    tcg_gen_andi_i64(d, d, ~mask);
3783    tcg_gen_or_i64(d, d, t);
3784    tcg_temp_free_i64(t);
3785}
3786
3787static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3788{
3789    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3790}
3791
3792static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3793{
3794    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3795}
3796
3797static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3798{
3799    TCGv_vec t = tcg_temp_new_vec_matching(d);
3800    TCGv_vec m = tcg_temp_new_vec_matching(d);
3801
3802    tcg_gen_shli_vec(vece, t, a, sh);
3803    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3804    tcg_gen_and_vec(vece, d, d, m);
3805    tcg_gen_or_vec(vece, d, d, t);
3806
3807    tcg_temp_free_vec(t);
3808    tcg_temp_free_vec(m);
3809}
3810
3811void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3812                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3813{
3814    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3815    const GVecGen2i ops[4] = {
3816        { .fni8 = gen_shl8_ins_i64,
3817          .fniv = gen_shl_ins_vec,
3818          .fno = gen_helper_gvec_sli_b,
3819          .load_dest = true,
3820          .opt_opc = vecop_list,
3821          .vece = MO_8 },
3822        { .fni8 = gen_shl16_ins_i64,
3823          .fniv = gen_shl_ins_vec,
3824          .fno = gen_helper_gvec_sli_h,
3825          .load_dest = true,
3826          .opt_opc = vecop_list,
3827          .vece = MO_16 },
3828        { .fni4 = gen_shl32_ins_i32,
3829          .fniv = gen_shl_ins_vec,
3830          .fno = gen_helper_gvec_sli_s,
3831          .load_dest = true,
3832          .opt_opc = vecop_list,
3833          .vece = MO_32 },
3834        { .fni8 = gen_shl64_ins_i64,
3835          .fniv = gen_shl_ins_vec,
3836          .fno = gen_helper_gvec_sli_d,
3837          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3838          .load_dest = true,
3839          .opt_opc = vecop_list,
3840          .vece = MO_64 },
3841    };
3842
3843    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3844    tcg_debug_assert(shift >= 0);
3845    tcg_debug_assert(shift < (8 << vece));
3846
3847    if (shift == 0) {
3848        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3849    } else {
3850        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3851    }
3852}
3853
3854static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3855{
3856    gen_helper_neon_mul_u8(a, a, b);
3857    gen_helper_neon_add_u8(d, d, a);
3858}
3859
3860static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3861{
3862    gen_helper_neon_mul_u8(a, a, b);
3863    gen_helper_neon_sub_u8(d, d, a);
3864}
3865
3866static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3867{
3868    gen_helper_neon_mul_u16(a, a, b);
3869    gen_helper_neon_add_u16(d, d, a);
3870}
3871
3872static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3873{
3874    gen_helper_neon_mul_u16(a, a, b);
3875    gen_helper_neon_sub_u16(d, d, a);
3876}
3877
3878static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3879{
3880    tcg_gen_mul_i32(a, a, b);
3881    tcg_gen_add_i32(d, d, a);
3882}
3883
3884static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3885{
3886    tcg_gen_mul_i32(a, a, b);
3887    tcg_gen_sub_i32(d, d, a);
3888}
3889
3890static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3891{
3892    tcg_gen_mul_i64(a, a, b);
3893    tcg_gen_add_i64(d, d, a);
3894}
3895
3896static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3897{
3898    tcg_gen_mul_i64(a, a, b);
3899    tcg_gen_sub_i64(d, d, a);
3900}
3901
3902static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3903{
3904    tcg_gen_mul_vec(vece, a, a, b);
3905    tcg_gen_add_vec(vece, d, d, a);
3906}
3907
3908static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3909{
3910    tcg_gen_mul_vec(vece, a, a, b);
3911    tcg_gen_sub_vec(vece, d, d, a);
3912}
3913
3914/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3915 * these tables are shared with AArch64 which does support them.
3916 */
3917void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3918                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3919{
3920    static const TCGOpcode vecop_list[] = {
3921        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3922    };
3923    static const GVecGen3 ops[4] = {
3924        { .fni4 = gen_mla8_i32,
3925          .fniv = gen_mla_vec,
3926          .load_dest = true,
3927          .opt_opc = vecop_list,
3928          .vece = MO_8 },
3929        { .fni4 = gen_mla16_i32,
3930          .fniv = gen_mla_vec,
3931          .load_dest = true,
3932          .opt_opc = vecop_list,
3933          .vece = MO_16 },
3934        { .fni4 = gen_mla32_i32,
3935          .fniv = gen_mla_vec,
3936          .load_dest = true,
3937          .opt_opc = vecop_list,
3938          .vece = MO_32 },
3939        { .fni8 = gen_mla64_i64,
3940          .fniv = gen_mla_vec,
3941          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3942          .load_dest = true,
3943          .opt_opc = vecop_list,
3944          .vece = MO_64 },
3945    };
3946    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3947}
3948
3949void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3950                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3951{
3952    static const TCGOpcode vecop_list[] = {
3953        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3954    };
3955    static const GVecGen3 ops[4] = {
3956        { .fni4 = gen_mls8_i32,
3957          .fniv = gen_mls_vec,
3958          .load_dest = true,
3959          .opt_opc = vecop_list,
3960          .vece = MO_8 },
3961        { .fni4 = gen_mls16_i32,
3962          .fniv = gen_mls_vec,
3963          .load_dest = true,
3964          .opt_opc = vecop_list,
3965          .vece = MO_16 },
3966        { .fni4 = gen_mls32_i32,
3967          .fniv = gen_mls_vec,
3968          .load_dest = true,
3969          .opt_opc = vecop_list,
3970          .vece = MO_32 },
3971        { .fni8 = gen_mls64_i64,
3972          .fniv = gen_mls_vec,
3973          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3974          .load_dest = true,
3975          .opt_opc = vecop_list,
3976          .vece = MO_64 },
3977    };
3978    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3979}
3980
3981/* CMTST : test is "if (X & Y != 0)". */
3982static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3983{
3984    tcg_gen_and_i32(d, a, b);
3985    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3986    tcg_gen_neg_i32(d, d);
3987}
3988
3989void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3990{
3991    tcg_gen_and_i64(d, a, b);
3992    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3993    tcg_gen_neg_i64(d, d);
3994}
3995
3996static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3997{
3998    tcg_gen_and_vec(vece, d, a, b);
3999    tcg_gen_dupi_vec(vece, a, 0);
4000    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4001}
4002
4003void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4004                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4005{
4006    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4007    static const GVecGen3 ops[4] = {
4008        { .fni4 = gen_helper_neon_tst_u8,
4009          .fniv = gen_cmtst_vec,
4010          .opt_opc = vecop_list,
4011          .vece = MO_8 },
4012        { .fni4 = gen_helper_neon_tst_u16,
4013          .fniv = gen_cmtst_vec,
4014          .opt_opc = vecop_list,
4015          .vece = MO_16 },
4016        { .fni4 = gen_cmtst_i32,
4017          .fniv = gen_cmtst_vec,
4018          .opt_opc = vecop_list,
4019          .vece = MO_32 },
4020        { .fni8 = gen_cmtst_i64,
4021          .fniv = gen_cmtst_vec,
4022          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4023          .opt_opc = vecop_list,
4024          .vece = MO_64 },
4025    };
4026    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4027}
4028
4029void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4030{
4031    TCGv_i32 lval = tcg_temp_new_i32();
4032    TCGv_i32 rval = tcg_temp_new_i32();
4033    TCGv_i32 lsh = tcg_temp_new_i32();
4034    TCGv_i32 rsh = tcg_temp_new_i32();
4035    TCGv_i32 zero = tcg_constant_i32(0);
4036    TCGv_i32 max = tcg_constant_i32(32);
4037
4038    /*
4039     * Rely on the TCG guarantee that out of range shifts produce
4040     * unspecified results, not undefined behaviour (i.e. no trap).
4041     * Discard out-of-range results after the fact.
4042     */
4043    tcg_gen_ext8s_i32(lsh, shift);
4044    tcg_gen_neg_i32(rsh, lsh);
4045    tcg_gen_shl_i32(lval, src, lsh);
4046    tcg_gen_shr_i32(rval, src, rsh);
4047    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4048    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4049
4050    tcg_temp_free_i32(lval);
4051    tcg_temp_free_i32(rval);
4052    tcg_temp_free_i32(lsh);
4053    tcg_temp_free_i32(rsh);
4054}
4055
4056void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4057{
4058    TCGv_i64 lval = tcg_temp_new_i64();
4059    TCGv_i64 rval = tcg_temp_new_i64();
4060    TCGv_i64 lsh = tcg_temp_new_i64();
4061    TCGv_i64 rsh = tcg_temp_new_i64();
4062    TCGv_i64 zero = tcg_constant_i64(0);
4063    TCGv_i64 max = tcg_constant_i64(64);
4064
4065    /*
4066     * Rely on the TCG guarantee that out of range shifts produce
4067     * unspecified results, not undefined behaviour (i.e. no trap).
4068     * Discard out-of-range results after the fact.
4069     */
4070    tcg_gen_ext8s_i64(lsh, shift);
4071    tcg_gen_neg_i64(rsh, lsh);
4072    tcg_gen_shl_i64(lval, src, lsh);
4073    tcg_gen_shr_i64(rval, src, rsh);
4074    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4075    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4076
4077    tcg_temp_free_i64(lval);
4078    tcg_temp_free_i64(rval);
4079    tcg_temp_free_i64(lsh);
4080    tcg_temp_free_i64(rsh);
4081}
4082
4083static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4084                         TCGv_vec src, TCGv_vec shift)
4085{
4086    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4087    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4088    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4089    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4090    TCGv_vec msk, max;
4091
4092    tcg_gen_neg_vec(vece, rsh, shift);
4093    if (vece == MO_8) {
4094        tcg_gen_mov_vec(lsh, shift);
4095    } else {
4096        msk = tcg_temp_new_vec_matching(dst);
4097        tcg_gen_dupi_vec(vece, msk, 0xff);
4098        tcg_gen_and_vec(vece, lsh, shift, msk);
4099        tcg_gen_and_vec(vece, rsh, rsh, msk);
4100        tcg_temp_free_vec(msk);
4101    }
4102
4103    /*
4104     * Rely on the TCG guarantee that out of range shifts produce
4105     * unspecified results, not undefined behaviour (i.e. no trap).
4106     * Discard out-of-range results after the fact.
4107     */
4108    tcg_gen_shlv_vec(vece, lval, src, lsh);
4109    tcg_gen_shrv_vec(vece, rval, src, rsh);
4110
4111    max = tcg_temp_new_vec_matching(dst);
4112    tcg_gen_dupi_vec(vece, max, 8 << vece);
4113
4114    /*
4115     * The choice of LT (signed) and GEU (unsigned) are biased toward
4116     * the instructions of the x86_64 host.  For MO_8, the whole byte
4117     * is significant so we must use an unsigned compare; otherwise we
4118     * have already masked to a byte and so a signed compare works.
4119     * Other tcg hosts have a full set of comparisons and do not care.
4120     */
4121    if (vece == MO_8) {
4122        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4123        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4124        tcg_gen_andc_vec(vece, lval, lval, lsh);
4125        tcg_gen_andc_vec(vece, rval, rval, rsh);
4126    } else {
4127        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4128        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4129        tcg_gen_and_vec(vece, lval, lval, lsh);
4130        tcg_gen_and_vec(vece, rval, rval, rsh);
4131    }
4132    tcg_gen_or_vec(vece, dst, lval, rval);
4133
4134    tcg_temp_free_vec(max);
4135    tcg_temp_free_vec(lval);
4136    tcg_temp_free_vec(rval);
4137    tcg_temp_free_vec(lsh);
4138    tcg_temp_free_vec(rsh);
4139}
4140
4141void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4142                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4143{
4144    static const TCGOpcode vecop_list[] = {
4145        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4146        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4147    };
4148    static const GVecGen3 ops[4] = {
4149        { .fniv = gen_ushl_vec,
4150          .fno = gen_helper_gvec_ushl_b,
4151          .opt_opc = vecop_list,
4152          .vece = MO_8 },
4153        { .fniv = gen_ushl_vec,
4154          .fno = gen_helper_gvec_ushl_h,
4155          .opt_opc = vecop_list,
4156          .vece = MO_16 },
4157        { .fni4 = gen_ushl_i32,
4158          .fniv = gen_ushl_vec,
4159          .opt_opc = vecop_list,
4160          .vece = MO_32 },
4161        { .fni8 = gen_ushl_i64,
4162          .fniv = gen_ushl_vec,
4163          .opt_opc = vecop_list,
4164          .vece = MO_64 },
4165    };
4166    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4167}
4168
4169void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4170{
4171    TCGv_i32 lval = tcg_temp_new_i32();
4172    TCGv_i32 rval = tcg_temp_new_i32();
4173    TCGv_i32 lsh = tcg_temp_new_i32();
4174    TCGv_i32 rsh = tcg_temp_new_i32();
4175    TCGv_i32 zero = tcg_constant_i32(0);
4176    TCGv_i32 max = tcg_constant_i32(31);
4177
4178    /*
4179     * Rely on the TCG guarantee that out of range shifts produce
4180     * unspecified results, not undefined behaviour (i.e. no trap).
4181     * Discard out-of-range results after the fact.
4182     */
4183    tcg_gen_ext8s_i32(lsh, shift);
4184    tcg_gen_neg_i32(rsh, lsh);
4185    tcg_gen_shl_i32(lval, src, lsh);
4186    tcg_gen_umin_i32(rsh, rsh, max);
4187    tcg_gen_sar_i32(rval, src, rsh);
4188    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4189    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4190
4191    tcg_temp_free_i32(lval);
4192    tcg_temp_free_i32(rval);
4193    tcg_temp_free_i32(lsh);
4194    tcg_temp_free_i32(rsh);
4195}
4196
4197void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4198{
4199    TCGv_i64 lval = tcg_temp_new_i64();
4200    TCGv_i64 rval = tcg_temp_new_i64();
4201    TCGv_i64 lsh = tcg_temp_new_i64();
4202    TCGv_i64 rsh = tcg_temp_new_i64();
4203    TCGv_i64 zero = tcg_constant_i64(0);
4204    TCGv_i64 max = tcg_constant_i64(63);
4205
4206    /*
4207     * Rely on the TCG guarantee that out of range shifts produce
4208     * unspecified results, not undefined behaviour (i.e. no trap).
4209     * Discard out-of-range results after the fact.
4210     */
4211    tcg_gen_ext8s_i64(lsh, shift);
4212    tcg_gen_neg_i64(rsh, lsh);
4213    tcg_gen_shl_i64(lval, src, lsh);
4214    tcg_gen_umin_i64(rsh, rsh, max);
4215    tcg_gen_sar_i64(rval, src, rsh);
4216    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4217    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4218
4219    tcg_temp_free_i64(lval);
4220    tcg_temp_free_i64(rval);
4221    tcg_temp_free_i64(lsh);
4222    tcg_temp_free_i64(rsh);
4223}
4224
4225static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4226                         TCGv_vec src, TCGv_vec shift)
4227{
4228    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4229    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4230    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4231    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4232    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4233
4234    /*
4235     * Rely on the TCG guarantee that out of range shifts produce
4236     * unspecified results, not undefined behaviour (i.e. no trap).
4237     * Discard out-of-range results after the fact.
4238     */
4239    tcg_gen_neg_vec(vece, rsh, shift);
4240    if (vece == MO_8) {
4241        tcg_gen_mov_vec(lsh, shift);
4242    } else {
4243        tcg_gen_dupi_vec(vece, tmp, 0xff);
4244        tcg_gen_and_vec(vece, lsh, shift, tmp);
4245        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4246    }
4247
4248    /* Bound rsh so out of bound right shift gets -1.  */
4249    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4250    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4251    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4252
4253    tcg_gen_shlv_vec(vece, lval, src, lsh);
4254    tcg_gen_sarv_vec(vece, rval, src, rsh);
4255
4256    /* Select in-bound left shift.  */
4257    tcg_gen_andc_vec(vece, lval, lval, tmp);
4258
4259    /* Select between left and right shift.  */
4260    if (vece == MO_8) {
4261        tcg_gen_dupi_vec(vece, tmp, 0);
4262        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4263    } else {
4264        tcg_gen_dupi_vec(vece, tmp, 0x80);
4265        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4266    }
4267
4268    tcg_temp_free_vec(lval);
4269    tcg_temp_free_vec(rval);
4270    tcg_temp_free_vec(lsh);
4271    tcg_temp_free_vec(rsh);
4272    tcg_temp_free_vec(tmp);
4273}
4274
4275void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4276                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4277{
4278    static const TCGOpcode vecop_list[] = {
4279        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4280        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4281    };
4282    static const GVecGen3 ops[4] = {
4283        { .fniv = gen_sshl_vec,
4284          .fno = gen_helper_gvec_sshl_b,
4285          .opt_opc = vecop_list,
4286          .vece = MO_8 },
4287        { .fniv = gen_sshl_vec,
4288          .fno = gen_helper_gvec_sshl_h,
4289          .opt_opc = vecop_list,
4290          .vece = MO_16 },
4291        { .fni4 = gen_sshl_i32,
4292          .fniv = gen_sshl_vec,
4293          .opt_opc = vecop_list,
4294          .vece = MO_32 },
4295        { .fni8 = gen_sshl_i64,
4296          .fniv = gen_sshl_vec,
4297          .opt_opc = vecop_list,
4298          .vece = MO_64 },
4299    };
4300    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4301}
4302
4303static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4304                          TCGv_vec a, TCGv_vec b)
4305{
4306    TCGv_vec x = tcg_temp_new_vec_matching(t);
4307    tcg_gen_add_vec(vece, x, a, b);
4308    tcg_gen_usadd_vec(vece, t, a, b);
4309    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4310    tcg_gen_or_vec(vece, sat, sat, x);
4311    tcg_temp_free_vec(x);
4312}
4313
4314void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4315                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4316{
4317    static const TCGOpcode vecop_list[] = {
4318        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4319    };
4320    static const GVecGen4 ops[4] = {
4321        { .fniv = gen_uqadd_vec,
4322          .fno = gen_helper_gvec_uqadd_b,
4323          .write_aofs = true,
4324          .opt_opc = vecop_list,
4325          .vece = MO_8 },
4326        { .fniv = gen_uqadd_vec,
4327          .fno = gen_helper_gvec_uqadd_h,
4328          .write_aofs = true,
4329          .opt_opc = vecop_list,
4330          .vece = MO_16 },
4331        { .fniv = gen_uqadd_vec,
4332          .fno = gen_helper_gvec_uqadd_s,
4333          .write_aofs = true,
4334          .opt_opc = vecop_list,
4335          .vece = MO_32 },
4336        { .fniv = gen_uqadd_vec,
4337          .fno = gen_helper_gvec_uqadd_d,
4338          .write_aofs = true,
4339          .opt_opc = vecop_list,
4340          .vece = MO_64 },
4341    };
4342    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4343                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4344}
4345
4346static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4347                          TCGv_vec a, TCGv_vec b)
4348{
4349    TCGv_vec x = tcg_temp_new_vec_matching(t);
4350    tcg_gen_add_vec(vece, x, a, b);
4351    tcg_gen_ssadd_vec(vece, t, a, b);
4352    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4353    tcg_gen_or_vec(vece, sat, sat, x);
4354    tcg_temp_free_vec(x);
4355}
4356
4357void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4358                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4359{
4360    static const TCGOpcode vecop_list[] = {
4361        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4362    };
4363    static const GVecGen4 ops[4] = {
4364        { .fniv = gen_sqadd_vec,
4365          .fno = gen_helper_gvec_sqadd_b,
4366          .opt_opc = vecop_list,
4367          .write_aofs = true,
4368          .vece = MO_8 },
4369        { .fniv = gen_sqadd_vec,
4370          .fno = gen_helper_gvec_sqadd_h,
4371          .opt_opc = vecop_list,
4372          .write_aofs = true,
4373          .vece = MO_16 },
4374        { .fniv = gen_sqadd_vec,
4375          .fno = gen_helper_gvec_sqadd_s,
4376          .opt_opc = vecop_list,
4377          .write_aofs = true,
4378          .vece = MO_32 },
4379        { .fniv = gen_sqadd_vec,
4380          .fno = gen_helper_gvec_sqadd_d,
4381          .opt_opc = vecop_list,
4382          .write_aofs = true,
4383          .vece = MO_64 },
4384    };
4385    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4386                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4387}
4388
4389static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4390                          TCGv_vec a, TCGv_vec b)
4391{
4392    TCGv_vec x = tcg_temp_new_vec_matching(t);
4393    tcg_gen_sub_vec(vece, x, a, b);
4394    tcg_gen_ussub_vec(vece, t, a, b);
4395    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4396    tcg_gen_or_vec(vece, sat, sat, x);
4397    tcg_temp_free_vec(x);
4398}
4399
4400void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4401                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4402{
4403    static const TCGOpcode vecop_list[] = {
4404        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4405    };
4406    static const GVecGen4 ops[4] = {
4407        { .fniv = gen_uqsub_vec,
4408          .fno = gen_helper_gvec_uqsub_b,
4409          .opt_opc = vecop_list,
4410          .write_aofs = true,
4411          .vece = MO_8 },
4412        { .fniv = gen_uqsub_vec,
4413          .fno = gen_helper_gvec_uqsub_h,
4414          .opt_opc = vecop_list,
4415          .write_aofs = true,
4416          .vece = MO_16 },
4417        { .fniv = gen_uqsub_vec,
4418          .fno = gen_helper_gvec_uqsub_s,
4419          .opt_opc = vecop_list,
4420          .write_aofs = true,
4421          .vece = MO_32 },
4422        { .fniv = gen_uqsub_vec,
4423          .fno = gen_helper_gvec_uqsub_d,
4424          .opt_opc = vecop_list,
4425          .write_aofs = true,
4426          .vece = MO_64 },
4427    };
4428    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4429                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4430}
4431
4432static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4433                          TCGv_vec a, TCGv_vec b)
4434{
4435    TCGv_vec x = tcg_temp_new_vec_matching(t);
4436    tcg_gen_sub_vec(vece, x, a, b);
4437    tcg_gen_sssub_vec(vece, t, a, b);
4438    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4439    tcg_gen_or_vec(vece, sat, sat, x);
4440    tcg_temp_free_vec(x);
4441}
4442
4443void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4444                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4445{
4446    static const TCGOpcode vecop_list[] = {
4447        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4448    };
4449    static const GVecGen4 ops[4] = {
4450        { .fniv = gen_sqsub_vec,
4451          .fno = gen_helper_gvec_sqsub_b,
4452          .opt_opc = vecop_list,
4453          .write_aofs = true,
4454          .vece = MO_8 },
4455        { .fniv = gen_sqsub_vec,
4456          .fno = gen_helper_gvec_sqsub_h,
4457          .opt_opc = vecop_list,
4458          .write_aofs = true,
4459          .vece = MO_16 },
4460        { .fniv = gen_sqsub_vec,
4461          .fno = gen_helper_gvec_sqsub_s,
4462          .opt_opc = vecop_list,
4463          .write_aofs = true,
4464          .vece = MO_32 },
4465        { .fniv = gen_sqsub_vec,
4466          .fno = gen_helper_gvec_sqsub_d,
4467          .opt_opc = vecop_list,
4468          .write_aofs = true,
4469          .vece = MO_64 },
4470    };
4471    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4472                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4473}
4474
4475static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4476{
4477    TCGv_i32 t = tcg_temp_new_i32();
4478
4479    tcg_gen_sub_i32(t, a, b);
4480    tcg_gen_sub_i32(d, b, a);
4481    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4482    tcg_temp_free_i32(t);
4483}
4484
4485static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4486{
4487    TCGv_i64 t = tcg_temp_new_i64();
4488
4489    tcg_gen_sub_i64(t, a, b);
4490    tcg_gen_sub_i64(d, b, a);
4491    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4492    tcg_temp_free_i64(t);
4493}
4494
4495static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4496{
4497    TCGv_vec t = tcg_temp_new_vec_matching(d);
4498
4499    tcg_gen_smin_vec(vece, t, a, b);
4500    tcg_gen_smax_vec(vece, d, a, b);
4501    tcg_gen_sub_vec(vece, d, d, t);
4502    tcg_temp_free_vec(t);
4503}
4504
4505void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4506                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4507{
4508    static const TCGOpcode vecop_list[] = {
4509        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4510    };
4511    static const GVecGen3 ops[4] = {
4512        { .fniv = gen_sabd_vec,
4513          .fno = gen_helper_gvec_sabd_b,
4514          .opt_opc = vecop_list,
4515          .vece = MO_8 },
4516        { .fniv = gen_sabd_vec,
4517          .fno = gen_helper_gvec_sabd_h,
4518          .opt_opc = vecop_list,
4519          .vece = MO_16 },
4520        { .fni4 = gen_sabd_i32,
4521          .fniv = gen_sabd_vec,
4522          .fno = gen_helper_gvec_sabd_s,
4523          .opt_opc = vecop_list,
4524          .vece = MO_32 },
4525        { .fni8 = gen_sabd_i64,
4526          .fniv = gen_sabd_vec,
4527          .fno = gen_helper_gvec_sabd_d,
4528          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4529          .opt_opc = vecop_list,
4530          .vece = MO_64 },
4531    };
4532    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4533}
4534
4535static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4536{
4537    TCGv_i32 t = tcg_temp_new_i32();
4538
4539    tcg_gen_sub_i32(t, a, b);
4540    tcg_gen_sub_i32(d, b, a);
4541    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4542    tcg_temp_free_i32(t);
4543}
4544
4545static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4546{
4547    TCGv_i64 t = tcg_temp_new_i64();
4548
4549    tcg_gen_sub_i64(t, a, b);
4550    tcg_gen_sub_i64(d, b, a);
4551    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4552    tcg_temp_free_i64(t);
4553}
4554
4555static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4556{
4557    TCGv_vec t = tcg_temp_new_vec_matching(d);
4558
4559    tcg_gen_umin_vec(vece, t, a, b);
4560    tcg_gen_umax_vec(vece, d, a, b);
4561    tcg_gen_sub_vec(vece, d, d, t);
4562    tcg_temp_free_vec(t);
4563}
4564
4565void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4566                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4567{
4568    static const TCGOpcode vecop_list[] = {
4569        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4570    };
4571    static const GVecGen3 ops[4] = {
4572        { .fniv = gen_uabd_vec,
4573          .fno = gen_helper_gvec_uabd_b,
4574          .opt_opc = vecop_list,
4575          .vece = MO_8 },
4576        { .fniv = gen_uabd_vec,
4577          .fno = gen_helper_gvec_uabd_h,
4578          .opt_opc = vecop_list,
4579          .vece = MO_16 },
4580        { .fni4 = gen_uabd_i32,
4581          .fniv = gen_uabd_vec,
4582          .fno = gen_helper_gvec_uabd_s,
4583          .opt_opc = vecop_list,
4584          .vece = MO_32 },
4585        { .fni8 = gen_uabd_i64,
4586          .fniv = gen_uabd_vec,
4587          .fno = gen_helper_gvec_uabd_d,
4588          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4589          .opt_opc = vecop_list,
4590          .vece = MO_64 },
4591    };
4592    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4593}
4594
4595static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4596{
4597    TCGv_i32 t = tcg_temp_new_i32();
4598    gen_sabd_i32(t, a, b);
4599    tcg_gen_add_i32(d, d, t);
4600    tcg_temp_free_i32(t);
4601}
4602
4603static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4604{
4605    TCGv_i64 t = tcg_temp_new_i64();
4606    gen_sabd_i64(t, a, b);
4607    tcg_gen_add_i64(d, d, t);
4608    tcg_temp_free_i64(t);
4609}
4610
4611static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4612{
4613    TCGv_vec t = tcg_temp_new_vec_matching(d);
4614    gen_sabd_vec(vece, t, a, b);
4615    tcg_gen_add_vec(vece, d, d, t);
4616    tcg_temp_free_vec(t);
4617}
4618
4619void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4620                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4621{
4622    static const TCGOpcode vecop_list[] = {
4623        INDEX_op_sub_vec, INDEX_op_add_vec,
4624        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4625    };
4626    static const GVecGen3 ops[4] = {
4627        { .fniv = gen_saba_vec,
4628          .fno = gen_helper_gvec_saba_b,
4629          .opt_opc = vecop_list,
4630          .load_dest = true,
4631          .vece = MO_8 },
4632        { .fniv = gen_saba_vec,
4633          .fno = gen_helper_gvec_saba_h,
4634          .opt_opc = vecop_list,
4635          .load_dest = true,
4636          .vece = MO_16 },
4637        { .fni4 = gen_saba_i32,
4638          .fniv = gen_saba_vec,
4639          .fno = gen_helper_gvec_saba_s,
4640          .opt_opc = vecop_list,
4641          .load_dest = true,
4642          .vece = MO_32 },
4643        { .fni8 = gen_saba_i64,
4644          .fniv = gen_saba_vec,
4645          .fno = gen_helper_gvec_saba_d,
4646          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4647          .opt_opc = vecop_list,
4648          .load_dest = true,
4649          .vece = MO_64 },
4650    };
4651    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4652}
4653
4654static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4655{
4656    TCGv_i32 t = tcg_temp_new_i32();
4657    gen_uabd_i32(t, a, b);
4658    tcg_gen_add_i32(d, d, t);
4659    tcg_temp_free_i32(t);
4660}
4661
4662static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4663{
4664    TCGv_i64 t = tcg_temp_new_i64();
4665    gen_uabd_i64(t, a, b);
4666    tcg_gen_add_i64(d, d, t);
4667    tcg_temp_free_i64(t);
4668}
4669
4670static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4671{
4672    TCGv_vec t = tcg_temp_new_vec_matching(d);
4673    gen_uabd_vec(vece, t, a, b);
4674    tcg_gen_add_vec(vece, d, d, t);
4675    tcg_temp_free_vec(t);
4676}
4677
4678void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4679                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4680{
4681    static const TCGOpcode vecop_list[] = {
4682        INDEX_op_sub_vec, INDEX_op_add_vec,
4683        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4684    };
4685    static const GVecGen3 ops[4] = {
4686        { .fniv = gen_uaba_vec,
4687          .fno = gen_helper_gvec_uaba_b,
4688          .opt_opc = vecop_list,
4689          .load_dest = true,
4690          .vece = MO_8 },
4691        { .fniv = gen_uaba_vec,
4692          .fno = gen_helper_gvec_uaba_h,
4693          .opt_opc = vecop_list,
4694          .load_dest = true,
4695          .vece = MO_16 },
4696        { .fni4 = gen_uaba_i32,
4697          .fniv = gen_uaba_vec,
4698          .fno = gen_helper_gvec_uaba_s,
4699          .opt_opc = vecop_list,
4700          .load_dest = true,
4701          .vece = MO_32 },
4702        { .fni8 = gen_uaba_i64,
4703          .fniv = gen_uaba_vec,
4704          .fno = gen_helper_gvec_uaba_d,
4705          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4706          .opt_opc = vecop_list,
4707          .load_dest = true,
4708          .vece = MO_64 },
4709    };
4710    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4711}
4712
4713static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4714                           int opc1, int crn, int crm, int opc2,
4715                           bool isread, int rt, int rt2)
4716{
4717    const ARMCPRegInfo *ri;
4718
4719    ri = get_arm_cp_reginfo(s->cp_regs,
4720            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4721    if (ri) {
4722        bool need_exit_tb;
4723
4724        /* Check access permissions */
4725        if (!cp_access_ok(s->current_el, ri, isread)) {
4726            unallocated_encoding(s);
4727            return;
4728        }
4729
4730        if (s->hstr_active || ri->accessfn ||
4731            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4732            /* Emit code to perform further access permissions checks at
4733             * runtime; this may result in an exception.
4734             * Note that on XScale all cp0..c13 registers do an access check
4735             * call in order to handle c15_cpar.
4736             */
4737            uint32_t syndrome;
4738
4739            /* Note that since we are an implementation which takes an
4740             * exception on a trapped conditional instruction only if the
4741             * instruction passes its condition code check, we can take
4742             * advantage of the clause in the ARM ARM that allows us to set
4743             * the COND field in the instruction to 0xE in all cases.
4744             * We could fish the actual condition out of the insn (ARM)
4745             * or the condexec bits (Thumb) but it isn't necessary.
4746             */
4747            switch (cpnum) {
4748            case 14:
4749                if (is64) {
4750                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4751                                                 isread, false);
4752                } else {
4753                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4754                                                rt, isread, false);
4755                }
4756                break;
4757            case 15:
4758                if (is64) {
4759                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4760                                                 isread, false);
4761                } else {
4762                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4763                                                rt, isread, false);
4764                }
4765                break;
4766            default:
4767                /* ARMv8 defines that only coprocessors 14 and 15 exist,
4768                 * so this can only happen if this is an ARMv7 or earlier CPU,
4769                 * in which case the syndrome information won't actually be
4770                 * guest visible.
4771                 */
4772                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4773                syndrome = syn_uncategorized();
4774                break;
4775            }
4776
4777            gen_set_condexec(s);
4778            gen_update_pc(s, 0);
4779            gen_helper_access_check_cp_reg(cpu_env,
4780                                           tcg_constant_ptr(ri),
4781                                           tcg_constant_i32(syndrome),
4782                                           tcg_constant_i32(isread));
4783        } else if (ri->type & ARM_CP_RAISES_EXC) {
4784            /*
4785             * The readfn or writefn might raise an exception;
4786             * synchronize the CPU state in case it does.
4787             */
4788            gen_set_condexec(s);
4789            gen_update_pc(s, 0);
4790        }
4791
4792        /* Handle special cases first */
4793        switch (ri->type & ARM_CP_SPECIAL_MASK) {
4794        case 0:
4795            break;
4796        case ARM_CP_NOP:
4797            return;
4798        case ARM_CP_WFI:
4799            if (isread) {
4800                unallocated_encoding(s);
4801                return;
4802            }
4803            gen_update_pc(s, curr_insn_len(s));
4804            s->base.is_jmp = DISAS_WFI;
4805            return;
4806        default:
4807            g_assert_not_reached();
4808        }
4809
4810        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4811            gen_io_start();
4812        }
4813
4814        if (isread) {
4815            /* Read */
4816            if (is64) {
4817                TCGv_i64 tmp64;
4818                TCGv_i32 tmp;
4819                if (ri->type & ARM_CP_CONST) {
4820                    tmp64 = tcg_constant_i64(ri->resetvalue);
4821                } else if (ri->readfn) {
4822                    tmp64 = tcg_temp_new_i64();
4823                    gen_helper_get_cp_reg64(tmp64, cpu_env,
4824                                            tcg_constant_ptr(ri));
4825                } else {
4826                    tmp64 = tcg_temp_new_i64();
4827                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4828                }
4829                tmp = tcg_temp_new_i32();
4830                tcg_gen_extrl_i64_i32(tmp, tmp64);
4831                store_reg(s, rt, tmp);
4832                tmp = tcg_temp_new_i32();
4833                tcg_gen_extrh_i64_i32(tmp, tmp64);
4834                tcg_temp_free_i64(tmp64);
4835                store_reg(s, rt2, tmp);
4836            } else {
4837                TCGv_i32 tmp;
4838                if (ri->type & ARM_CP_CONST) {
4839                    tmp = tcg_constant_i32(ri->resetvalue);
4840                } else if (ri->readfn) {
4841                    tmp = tcg_temp_new_i32();
4842                    gen_helper_get_cp_reg(tmp, cpu_env, tcg_constant_ptr(ri));
4843                } else {
4844                    tmp = load_cpu_offset(ri->fieldoffset);
4845                }
4846                if (rt == 15) {
4847                    /* Destination register of r15 for 32 bit loads sets
4848                     * the condition codes from the high 4 bits of the value
4849                     */
4850                    gen_set_nzcv(tmp);
4851                    tcg_temp_free_i32(tmp);
4852                } else {
4853                    store_reg(s, rt, tmp);
4854                }
4855            }
4856        } else {
4857            /* Write */
4858            if (ri->type & ARM_CP_CONST) {
4859                /* If not forbidden by access permissions, treat as WI */
4860                return;
4861            }
4862
4863            if (is64) {
4864                TCGv_i32 tmplo, tmphi;
4865                TCGv_i64 tmp64 = tcg_temp_new_i64();
4866                tmplo = load_reg(s, rt);
4867                tmphi = load_reg(s, rt2);
4868                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4869                tcg_temp_free_i32(tmplo);
4870                tcg_temp_free_i32(tmphi);
4871                if (ri->writefn) {
4872                    gen_helper_set_cp_reg64(cpu_env, tcg_constant_ptr(ri),
4873                                            tmp64);
4874                } else {
4875                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4876                }
4877                tcg_temp_free_i64(tmp64);
4878            } else {
4879                TCGv_i32 tmp = load_reg(s, rt);
4880                if (ri->writefn) {
4881                    gen_helper_set_cp_reg(cpu_env, tcg_constant_ptr(ri), tmp);
4882                    tcg_temp_free_i32(tmp);
4883                } else {
4884                    store_cpu_offset(tmp, ri->fieldoffset, 4);
4885                }
4886            }
4887        }
4888
4889        /* I/O operations must end the TB here (whether read or write) */
4890        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4891                        (ri->type & ARM_CP_IO));
4892
4893        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4894            /*
4895             * A write to any coprocessor register that ends a TB
4896             * must rebuild the hflags for the next TB.
4897             */
4898            gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4899            /*
4900             * We default to ending the TB on a coprocessor register write,
4901             * but allow this to be suppressed by the register definition
4902             * (usually only necessary to work around guest bugs).
4903             */
4904            need_exit_tb = true;
4905        }
4906        if (need_exit_tb) {
4907            gen_lookup_tb(s);
4908        }
4909
4910        return;
4911    }
4912
4913    /* Unknown register; this might be a guest error or a QEMU
4914     * unimplemented feature.
4915     */
4916    if (is64) {
4917        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4918                      "64 bit system register cp:%d opc1: %d crm:%d "
4919                      "(%s)\n",
4920                      isread ? "read" : "write", cpnum, opc1, crm,
4921                      s->ns ? "non-secure" : "secure");
4922    } else {
4923        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4924                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4925                      "(%s)\n",
4926                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4927                      s->ns ? "non-secure" : "secure");
4928    }
4929
4930    unallocated_encoding(s);
4931    return;
4932}
4933
4934/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4935static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4936{
4937    int cpnum = (insn >> 8) & 0xf;
4938
4939    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4940        unallocated_encoding(s);
4941    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4942        if (disas_iwmmxt_insn(s, insn)) {
4943            unallocated_encoding(s);
4944        }
4945    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4946        if (disas_dsp_insn(s, insn)) {
4947            unallocated_encoding(s);
4948        }
4949    }
4950}
4951
4952/* Store a 64-bit value to a register pair.  Clobbers val.  */
4953static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4954{
4955    TCGv_i32 tmp;
4956    tmp = tcg_temp_new_i32();
4957    tcg_gen_extrl_i64_i32(tmp, val);
4958    store_reg(s, rlow, tmp);
4959    tmp = tcg_temp_new_i32();
4960    tcg_gen_extrh_i64_i32(tmp, val);
4961    store_reg(s, rhigh, tmp);
4962}
4963
4964/* load and add a 64-bit value from a register pair.  */
4965static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4966{
4967    TCGv_i64 tmp;
4968    TCGv_i32 tmpl;
4969    TCGv_i32 tmph;
4970
4971    /* Load 64-bit value rd:rn.  */
4972    tmpl = load_reg(s, rlow);
4973    tmph = load_reg(s, rhigh);
4974    tmp = tcg_temp_new_i64();
4975    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4976    tcg_temp_free_i32(tmpl);
4977    tcg_temp_free_i32(tmph);
4978    tcg_gen_add_i64(val, val, tmp);
4979    tcg_temp_free_i64(tmp);
4980}
4981
4982/* Set N and Z flags from hi|lo.  */
4983static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4984{
4985    tcg_gen_mov_i32(cpu_NF, hi);
4986    tcg_gen_or_i32(cpu_ZF, lo, hi);
4987}
4988
4989/* Load/Store exclusive instructions are implemented by remembering
4990   the value/address loaded, and seeing if these are the same
4991   when the store is performed.  This should be sufficient to implement
4992   the architecturally mandated semantics, and avoids having to monitor
4993   regular stores.  The compare vs the remembered value is done during
4994   the cmpxchg operation, but we must compare the addresses manually.  */
4995static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4996                               TCGv_i32 addr, int size)
4997{
4998    TCGv_i32 tmp = tcg_temp_new_i32();
4999    MemOp opc = size | MO_ALIGN | s->be_data;
5000
5001    s->is_ldex = true;
5002
5003    if (size == 3) {
5004        TCGv_i32 tmp2 = tcg_temp_new_i32();
5005        TCGv_i64 t64 = tcg_temp_new_i64();
5006
5007        /*
5008         * For AArch32, architecturally the 32-bit word at the lowest
5009         * address is always Rt and the one at addr+4 is Rt2, even if
5010         * the CPU is big-endian. That means we don't want to do a
5011         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
5012         * architecturally 64-bit access, but instead do a 64-bit access
5013         * using MO_BE if appropriate and then split the two halves.
5014         */
5015        TCGv taddr = gen_aa32_addr(s, addr, opc);
5016
5017        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
5018        tcg_temp_free(taddr);
5019        tcg_gen_mov_i64(cpu_exclusive_val, t64);
5020        if (s->be_data == MO_BE) {
5021            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5022        } else {
5023            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5024        }
5025        tcg_temp_free_i64(t64);
5026
5027        store_reg(s, rt2, tmp2);
5028    } else {
5029        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5030        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5031    }
5032
5033    store_reg(s, rt, tmp);
5034    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5035}
5036
5037static void gen_clrex(DisasContext *s)
5038{
5039    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5040}
5041
5042static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5043                                TCGv_i32 addr, int size)
5044{
5045    TCGv_i32 t0, t1, t2;
5046    TCGv_i64 extaddr;
5047    TCGv taddr;
5048    TCGLabel *done_label;
5049    TCGLabel *fail_label;
5050    MemOp opc = size | MO_ALIGN | s->be_data;
5051
5052    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5053         [addr] = {Rt};
5054         {Rd} = 0;
5055       } else {
5056         {Rd} = 1;
5057       } */
5058    fail_label = gen_new_label();
5059    done_label = gen_new_label();
5060    extaddr = tcg_temp_new_i64();
5061    tcg_gen_extu_i32_i64(extaddr, addr);
5062    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5063    tcg_temp_free_i64(extaddr);
5064
5065    taddr = gen_aa32_addr(s, addr, opc);
5066    t0 = tcg_temp_new_i32();
5067    t1 = load_reg(s, rt);
5068    if (size == 3) {
5069        TCGv_i64 o64 = tcg_temp_new_i64();
5070        TCGv_i64 n64 = tcg_temp_new_i64();
5071
5072        t2 = load_reg(s, rt2);
5073
5074        /*
5075         * For AArch32, architecturally the 32-bit word at the lowest
5076         * address is always Rt and the one at addr+4 is Rt2, even if
5077         * the CPU is big-endian. Since we're going to treat this as a
5078         * single 64-bit BE store, we need to put the two halves in the
5079         * opposite order for BE to LE, so that they end up in the right
5080         * places.  We don't want gen_aa32_st_i64, because that checks
5081         * SCTLR_B as if for an architectural 64-bit access.
5082         */
5083        if (s->be_data == MO_BE) {
5084            tcg_gen_concat_i32_i64(n64, t2, t1);
5085        } else {
5086            tcg_gen_concat_i32_i64(n64, t1, t2);
5087        }
5088        tcg_temp_free_i32(t2);
5089
5090        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5091                                   get_mem_index(s), opc);
5092        tcg_temp_free_i64(n64);
5093
5094        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5095        tcg_gen_extrl_i64_i32(t0, o64);
5096
5097        tcg_temp_free_i64(o64);
5098    } else {
5099        t2 = tcg_temp_new_i32();
5100        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5101        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5102        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5103        tcg_temp_free_i32(t2);
5104    }
5105    tcg_temp_free_i32(t1);
5106    tcg_temp_free(taddr);
5107    tcg_gen_mov_i32(cpu_R[rd], t0);
5108    tcg_temp_free_i32(t0);
5109    tcg_gen_br(done_label);
5110
5111    gen_set_label(fail_label);
5112    tcg_gen_movi_i32(cpu_R[rd], 1);
5113    gen_set_label(done_label);
5114    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5115}
5116
5117/* gen_srs:
5118 * @env: CPUARMState
5119 * @s: DisasContext
5120 * @mode: mode field from insn (which stack to store to)
5121 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5122 * @writeback: true if writeback bit set
5123 *
5124 * Generate code for the SRS (Store Return State) insn.
5125 */
5126static void gen_srs(DisasContext *s,
5127                    uint32_t mode, uint32_t amode, bool writeback)
5128{
5129    int32_t offset;
5130    TCGv_i32 addr, tmp;
5131    bool undef = false;
5132
5133    /* SRS is:
5134     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5135     *   and specified mode is monitor mode
5136     * - UNDEFINED in Hyp mode
5137     * - UNPREDICTABLE in User or System mode
5138     * - UNPREDICTABLE if the specified mode is:
5139     * -- not implemented
5140     * -- not a valid mode number
5141     * -- a mode that's at a higher exception level
5142     * -- Monitor, if we are Non-secure
5143     * For the UNPREDICTABLE cases we choose to UNDEF.
5144     */
5145    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5146        gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5147        return;
5148    }
5149
5150    if (s->current_el == 0 || s->current_el == 2) {
5151        undef = true;
5152    }
5153
5154    switch (mode) {
5155    case ARM_CPU_MODE_USR:
5156    case ARM_CPU_MODE_FIQ:
5157    case ARM_CPU_MODE_IRQ:
5158    case ARM_CPU_MODE_SVC:
5159    case ARM_CPU_MODE_ABT:
5160    case ARM_CPU_MODE_UND:
5161    case ARM_CPU_MODE_SYS:
5162        break;
5163    case ARM_CPU_MODE_HYP:
5164        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5165            undef = true;
5166        }
5167        break;
5168    case ARM_CPU_MODE_MON:
5169        /* No need to check specifically for "are we non-secure" because
5170         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5171         * so if this isn't EL3 then we must be non-secure.
5172         */
5173        if (s->current_el != 3) {
5174            undef = true;
5175        }
5176        break;
5177    default:
5178        undef = true;
5179    }
5180
5181    if (undef) {
5182        unallocated_encoding(s);
5183        return;
5184    }
5185
5186    addr = tcg_temp_new_i32();
5187    /* get_r13_banked() will raise an exception if called from System mode */
5188    gen_set_condexec(s);
5189    gen_update_pc(s, 0);
5190    gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5191    switch (amode) {
5192    case 0: /* DA */
5193        offset = -4;
5194        break;
5195    case 1: /* IA */
5196        offset = 0;
5197        break;
5198    case 2: /* DB */
5199        offset = -8;
5200        break;
5201    case 3: /* IB */
5202        offset = 4;
5203        break;
5204    default:
5205        g_assert_not_reached();
5206    }
5207    tcg_gen_addi_i32(addr, addr, offset);
5208    tmp = load_reg(s, 14);
5209    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5210    tcg_temp_free_i32(tmp);
5211    tmp = load_cpu_field(spsr);
5212    tcg_gen_addi_i32(addr, addr, 4);
5213    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5214    tcg_temp_free_i32(tmp);
5215    if (writeback) {
5216        switch (amode) {
5217        case 0:
5218            offset = -8;
5219            break;
5220        case 1:
5221            offset = 4;
5222            break;
5223        case 2:
5224            offset = -4;
5225            break;
5226        case 3:
5227            offset = 0;
5228            break;
5229        default:
5230            g_assert_not_reached();
5231        }
5232        tcg_gen_addi_i32(addr, addr, offset);
5233        gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5234    }
5235    tcg_temp_free_i32(addr);
5236    s->base.is_jmp = DISAS_UPDATE_EXIT;
5237}
5238
5239/* Skip this instruction if the ARM condition is false */
5240static void arm_skip_unless(DisasContext *s, uint32_t cond)
5241{
5242    arm_gen_condlabel(s);
5243    arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5244}
5245
5246
5247/*
5248 * Constant expanders used by T16/T32 decode
5249 */
5250
5251/* Return only the rotation part of T32ExpandImm.  */
5252static int t32_expandimm_rot(DisasContext *s, int x)
5253{
5254    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5255}
5256
5257/* Return the unrotated immediate from T32ExpandImm.  */
5258static int t32_expandimm_imm(DisasContext *s, int x)
5259{
5260    int imm = extract32(x, 0, 8);
5261
5262    switch (extract32(x, 8, 4)) {
5263    case 0: /* XY */
5264        /* Nothing to do.  */
5265        break;
5266    case 1: /* 00XY00XY */
5267        imm *= 0x00010001;
5268        break;
5269    case 2: /* XY00XY00 */
5270        imm *= 0x01000100;
5271        break;
5272    case 3: /* XYXYXYXY */
5273        imm *= 0x01010101;
5274        break;
5275    default:
5276        /* Rotated constant.  */
5277        imm |= 0x80;
5278        break;
5279    }
5280    return imm;
5281}
5282
5283static int t32_branch24(DisasContext *s, int x)
5284{
5285    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5286    x ^= !(x < 0) * (3 << 21);
5287    /* Append the final zero.  */
5288    return x << 1;
5289}
5290
5291static int t16_setflags(DisasContext *s)
5292{
5293    return s->condexec_mask == 0;
5294}
5295
5296static int t16_push_list(DisasContext *s, int x)
5297{
5298    return (x & 0xff) | (x & 0x100) << (14 - 8);
5299}
5300
5301static int t16_pop_list(DisasContext *s, int x)
5302{
5303    return (x & 0xff) | (x & 0x100) << (15 - 8);
5304}
5305
5306/*
5307 * Include the generated decoders.
5308 */
5309
5310#include "decode-a32.c.inc"
5311#include "decode-a32-uncond.c.inc"
5312#include "decode-t32.c.inc"
5313#include "decode-t16.c.inc"
5314
5315static bool valid_cp(DisasContext *s, int cp)
5316{
5317    /*
5318     * Return true if this coprocessor field indicates something
5319     * that's really a possible coprocessor.
5320     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5321     * and of those only cp14 and cp15 were used for registers.
5322     * cp10 and cp11 were used for VFP and Neon, whose decode is
5323     * dealt with elsewhere. With the advent of fp16, cp9 is also
5324     * now part of VFP.
5325     * For v8A and later, the encoding has been tightened so that
5326     * only cp14 and cp15 are valid, and other values aren't considered
5327     * to be in the coprocessor-instruction space at all. v8M still
5328     * permits coprocessors 0..7.
5329     * For XScale, we must not decode the XScale cp0, cp1 space as
5330     * a standard coprocessor insn, because we want to fall through to
5331     * the legacy disas_xscale_insn() decoder after decodetree is done.
5332     */
5333    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5334        return false;
5335    }
5336
5337    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5338        !arm_dc_feature(s, ARM_FEATURE_M)) {
5339        return cp >= 14;
5340    }
5341    return cp < 8 || cp >= 14;
5342}
5343
5344static bool trans_MCR(DisasContext *s, arg_MCR *a)
5345{
5346    if (!valid_cp(s, a->cp)) {
5347        return false;
5348    }
5349    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5350                   false, a->rt, 0);
5351    return true;
5352}
5353
5354static bool trans_MRC(DisasContext *s, arg_MRC *a)
5355{
5356    if (!valid_cp(s, a->cp)) {
5357        return false;
5358    }
5359    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5360                   true, a->rt, 0);
5361    return true;
5362}
5363
5364static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5365{
5366    if (!valid_cp(s, a->cp)) {
5367        return false;
5368    }
5369    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5370                   false, a->rt, a->rt2);
5371    return true;
5372}
5373
5374static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5375{
5376    if (!valid_cp(s, a->cp)) {
5377        return false;
5378    }
5379    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5380                   true, a->rt, a->rt2);
5381    return true;
5382}
5383
5384/* Helpers to swap operands for reverse-subtract.  */
5385static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5386{
5387    tcg_gen_sub_i32(dst, b, a);
5388}
5389
5390static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5391{
5392    gen_sub_CC(dst, b, a);
5393}
5394
5395static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5396{
5397    gen_sub_carry(dest, b, a);
5398}
5399
5400static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5401{
5402    gen_sbc_CC(dest, b, a);
5403}
5404
5405/*
5406 * Helpers for the data processing routines.
5407 *
5408 * After the computation store the results back.
5409 * This may be suppressed altogether (STREG_NONE), require a runtime
5410 * check against the stack limits (STREG_SP_CHECK), or generate an
5411 * exception return.  Oh, or store into a register.
5412 *
5413 * Always return true, indicating success for a trans_* function.
5414 */
5415typedef enum {
5416   STREG_NONE,
5417   STREG_NORMAL,
5418   STREG_SP_CHECK,
5419   STREG_EXC_RET,
5420} StoreRegKind;
5421
5422static bool store_reg_kind(DisasContext *s, int rd,
5423                            TCGv_i32 val, StoreRegKind kind)
5424{
5425    switch (kind) {
5426    case STREG_NONE:
5427        tcg_temp_free_i32(val);
5428        return true;
5429    case STREG_NORMAL:
5430        /* See ALUWritePC: Interworking only from a32 mode. */
5431        if (s->thumb) {
5432            store_reg(s, rd, val);
5433        } else {
5434            store_reg_bx(s, rd, val);
5435        }
5436        return true;
5437    case STREG_SP_CHECK:
5438        store_sp_checked(s, val);
5439        return true;
5440    case STREG_EXC_RET:
5441        gen_exception_return(s, val);
5442        return true;
5443    }
5444    g_assert_not_reached();
5445}
5446
5447/*
5448 * Data Processing (register)
5449 *
5450 * Operate, with set flags, one register source,
5451 * one immediate shifted register source, and a destination.
5452 */
5453static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5454                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5455                         int logic_cc, StoreRegKind kind)
5456{
5457    TCGv_i32 tmp1, tmp2;
5458
5459    tmp2 = load_reg(s, a->rm);
5460    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5461    tmp1 = load_reg(s, a->rn);
5462
5463    gen(tmp1, tmp1, tmp2);
5464    tcg_temp_free_i32(tmp2);
5465
5466    if (logic_cc) {
5467        gen_logic_CC(tmp1);
5468    }
5469    return store_reg_kind(s, a->rd, tmp1, kind);
5470}
5471
5472static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5473                         void (*gen)(TCGv_i32, TCGv_i32),
5474                         int logic_cc, StoreRegKind kind)
5475{
5476    TCGv_i32 tmp;
5477
5478    tmp = load_reg(s, a->rm);
5479    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5480
5481    gen(tmp, tmp);
5482    if (logic_cc) {
5483        gen_logic_CC(tmp);
5484    }
5485    return store_reg_kind(s, a->rd, tmp, kind);
5486}
5487
5488/*
5489 * Data-processing (register-shifted register)
5490 *
5491 * Operate, with set flags, one register source,
5492 * one register shifted register source, and a destination.
5493 */
5494static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5495                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5496                         int logic_cc, StoreRegKind kind)
5497{
5498    TCGv_i32 tmp1, tmp2;
5499
5500    tmp1 = load_reg(s, a->rs);
5501    tmp2 = load_reg(s, a->rm);
5502    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5503    tmp1 = load_reg(s, a->rn);
5504
5505    gen(tmp1, tmp1, tmp2);
5506    tcg_temp_free_i32(tmp2);
5507
5508    if (logic_cc) {
5509        gen_logic_CC(tmp1);
5510    }
5511    return store_reg_kind(s, a->rd, tmp1, kind);
5512}
5513
5514static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5515                         void (*gen)(TCGv_i32, TCGv_i32),
5516                         int logic_cc, StoreRegKind kind)
5517{
5518    TCGv_i32 tmp1, tmp2;
5519
5520    tmp1 = load_reg(s, a->rs);
5521    tmp2 = load_reg(s, a->rm);
5522    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5523
5524    gen(tmp2, tmp2);
5525    if (logic_cc) {
5526        gen_logic_CC(tmp2);
5527    }
5528    return store_reg_kind(s, a->rd, tmp2, kind);
5529}
5530
5531/*
5532 * Data-processing (immediate)
5533 *
5534 * Operate, with set flags, one register source,
5535 * one rotated immediate, and a destination.
5536 *
5537 * Note that logic_cc && a->rot setting CF based on the msb of the
5538 * immediate is the reason why we must pass in the unrotated form
5539 * of the immediate.
5540 */
5541static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5542                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5543                         int logic_cc, StoreRegKind kind)
5544{
5545    TCGv_i32 tmp1;
5546    uint32_t imm;
5547
5548    imm = ror32(a->imm, a->rot);
5549    if (logic_cc && a->rot) {
5550        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5551    }
5552    tmp1 = load_reg(s, a->rn);
5553
5554    gen(tmp1, tmp1, tcg_constant_i32(imm));
5555
5556    if (logic_cc) {
5557        gen_logic_CC(tmp1);
5558    }
5559    return store_reg_kind(s, a->rd, tmp1, kind);
5560}
5561
5562static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5563                         void (*gen)(TCGv_i32, TCGv_i32),
5564                         int logic_cc, StoreRegKind kind)
5565{
5566    TCGv_i32 tmp;
5567    uint32_t imm;
5568
5569    imm = ror32(a->imm, a->rot);
5570    if (logic_cc && a->rot) {
5571        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5572    }
5573
5574    tmp = tcg_temp_new_i32();
5575    gen(tmp, tcg_constant_i32(imm));
5576
5577    if (logic_cc) {
5578        gen_logic_CC(tmp);
5579    }
5580    return store_reg_kind(s, a->rd, tmp, kind);
5581}
5582
5583#define DO_ANY3(NAME, OP, L, K)                                         \
5584    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5585    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5586    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5587    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5588    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5589    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5590
5591#define DO_ANY2(NAME, OP, L, K)                                         \
5592    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5593    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5594    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5595    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5596    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5597    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5598
5599#define DO_CMP2(NAME, OP, L)                                            \
5600    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5601    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5602    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5603    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5604    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5605    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5606
5607DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5608DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5609DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5610DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5611
5612DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5613DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5614DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5615DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5616
5617DO_CMP2(TST, tcg_gen_and_i32, true)
5618DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5619DO_CMP2(CMN, gen_add_CC, false)
5620DO_CMP2(CMP, gen_sub_CC, false)
5621
5622DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5623        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5624
5625/*
5626 * Note for the computation of StoreRegKind we return out of the
5627 * middle of the functions that are expanded by DO_ANY3, and that
5628 * we modify a->s via that parameter before it is used by OP.
5629 */
5630DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5631        ({
5632            StoreRegKind ret = STREG_NORMAL;
5633            if (a->rd == 15 && a->s) {
5634                /*
5635                 * See ALUExceptionReturn:
5636                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5637                 * In Hyp mode, UNDEFINED.
5638                 */
5639                if (IS_USER(s) || s->current_el == 2) {
5640                    unallocated_encoding(s);
5641                    return true;
5642                }
5643                /* There is no writeback of nzcv to PSTATE.  */
5644                a->s = 0;
5645                ret = STREG_EXC_RET;
5646            } else if (a->rd == 13 && a->rn == 13) {
5647                ret = STREG_SP_CHECK;
5648            }
5649            ret;
5650        }))
5651
5652DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5653        ({
5654            StoreRegKind ret = STREG_NORMAL;
5655            if (a->rd == 15 && a->s) {
5656                /*
5657                 * See ALUExceptionReturn:
5658                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5659                 * In Hyp mode, UNDEFINED.
5660                 */
5661                if (IS_USER(s) || s->current_el == 2) {
5662                    unallocated_encoding(s);
5663                    return true;
5664                }
5665                /* There is no writeback of nzcv to PSTATE.  */
5666                a->s = 0;
5667                ret = STREG_EXC_RET;
5668            } else if (a->rd == 13) {
5669                ret = STREG_SP_CHECK;
5670            }
5671            ret;
5672        }))
5673
5674DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5675
5676/*
5677 * ORN is only available with T32, so there is no register-shifted-register
5678 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5679 */
5680static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5681{
5682    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5683}
5684
5685static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5686{
5687    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5688}
5689
5690#undef DO_ANY3
5691#undef DO_ANY2
5692#undef DO_CMP2
5693
5694static bool trans_ADR(DisasContext *s, arg_ri *a)
5695{
5696    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5697    return true;
5698}
5699
5700static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5701{
5702    if (!ENABLE_ARCH_6T2) {
5703        return false;
5704    }
5705
5706    store_reg(s, a->rd, tcg_constant_i32(a->imm));
5707    return true;
5708}
5709
5710static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5711{
5712    TCGv_i32 tmp;
5713
5714    if (!ENABLE_ARCH_6T2) {
5715        return false;
5716    }
5717
5718    tmp = load_reg(s, a->rd);
5719    tcg_gen_ext16u_i32(tmp, tmp);
5720    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5721    store_reg(s, a->rd, tmp);
5722    return true;
5723}
5724
5725/*
5726 * v8.1M MVE wide-shifts
5727 */
5728static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5729                          WideShiftImmFn *fn)
5730{
5731    TCGv_i64 rda;
5732    TCGv_i32 rdalo, rdahi;
5733
5734    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5735        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5736        return false;
5737    }
5738    if (a->rdahi == 15) {
5739        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5740        return false;
5741    }
5742    if (!dc_isar_feature(aa32_mve, s) ||
5743        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5744        a->rdahi == 13) {
5745        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5746        unallocated_encoding(s);
5747        return true;
5748    }
5749
5750    if (a->shim == 0) {
5751        a->shim = 32;
5752    }
5753
5754    rda = tcg_temp_new_i64();
5755    rdalo = load_reg(s, a->rdalo);
5756    rdahi = load_reg(s, a->rdahi);
5757    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5758
5759    fn(rda, rda, a->shim);
5760
5761    tcg_gen_extrl_i64_i32(rdalo, rda);
5762    tcg_gen_extrh_i64_i32(rdahi, rda);
5763    store_reg(s, a->rdalo, rdalo);
5764    store_reg(s, a->rdahi, rdahi);
5765    tcg_temp_free_i64(rda);
5766
5767    return true;
5768}
5769
5770static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5771{
5772    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5773}
5774
5775static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5776{
5777    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5778}
5779
5780static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5781{
5782    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5783}
5784
5785static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5786{
5787    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5788}
5789
5790static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5791{
5792    return do_mve_shl_ri(s, a, gen_mve_sqshll);
5793}
5794
5795static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5796{
5797    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5798}
5799
5800static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5801{
5802    return do_mve_shl_ri(s, a, gen_mve_uqshll);
5803}
5804
5805static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5806{
5807    return do_mve_shl_ri(s, a, gen_srshr64_i64);
5808}
5809
5810static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5811{
5812    return do_mve_shl_ri(s, a, gen_urshr64_i64);
5813}
5814
5815static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5816{
5817    TCGv_i64 rda;
5818    TCGv_i32 rdalo, rdahi;
5819
5820    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5821        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5822        return false;
5823    }
5824    if (a->rdahi == 15) {
5825        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5826        return false;
5827    }
5828    if (!dc_isar_feature(aa32_mve, s) ||
5829        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5830        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5831        a->rm == a->rdahi || a->rm == a->rdalo) {
5832        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5833        unallocated_encoding(s);
5834        return true;
5835    }
5836
5837    rda = tcg_temp_new_i64();
5838    rdalo = load_reg(s, a->rdalo);
5839    rdahi = load_reg(s, a->rdahi);
5840    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5841
5842    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5843    fn(rda, cpu_env, rda, cpu_R[a->rm]);
5844
5845    tcg_gen_extrl_i64_i32(rdalo, rda);
5846    tcg_gen_extrh_i64_i32(rdahi, rda);
5847    store_reg(s, a->rdalo, rdalo);
5848    store_reg(s, a->rdahi, rdahi);
5849    tcg_temp_free_i64(rda);
5850
5851    return true;
5852}
5853
5854static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5855{
5856    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5857}
5858
5859static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5860{
5861    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5862}
5863
5864static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5865{
5866    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5867}
5868
5869static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5870{
5871    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5872}
5873
5874static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5875{
5876    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5877}
5878
5879static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5880{
5881    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5882}
5883
5884static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5885{
5886    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5887        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5888        return false;
5889    }
5890    if (!dc_isar_feature(aa32_mve, s) ||
5891        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5892        a->rda == 13 || a->rda == 15) {
5893        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5894        unallocated_encoding(s);
5895        return true;
5896    }
5897
5898    if (a->shim == 0) {
5899        a->shim = 32;
5900    }
5901    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5902
5903    return true;
5904}
5905
5906static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5907{
5908    return do_mve_sh_ri(s, a, gen_urshr32_i32);
5909}
5910
5911static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5912{
5913    return do_mve_sh_ri(s, a, gen_srshr32_i32);
5914}
5915
5916static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5917{
5918    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5919}
5920
5921static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5922{
5923    return do_mve_sh_ri(s, a, gen_mve_sqshl);
5924}
5925
5926static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5927{
5928    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5929}
5930
5931static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5932{
5933    return do_mve_sh_ri(s, a, gen_mve_uqshl);
5934}
5935
5936static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5937{
5938    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5939        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5940        return false;
5941    }
5942    if (!dc_isar_feature(aa32_mve, s) ||
5943        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5944        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5945        a->rm == a->rda) {
5946        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5947        unallocated_encoding(s);
5948        return true;
5949    }
5950
5951    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5952    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5953    return true;
5954}
5955
5956static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5957{
5958    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5959}
5960
5961static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5962{
5963    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5964}
5965
5966/*
5967 * Multiply and multiply accumulate
5968 */
5969
5970static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5971{
5972    TCGv_i32 t1, t2;
5973
5974    t1 = load_reg(s, a->rn);
5975    t2 = load_reg(s, a->rm);
5976    tcg_gen_mul_i32(t1, t1, t2);
5977    tcg_temp_free_i32(t2);
5978    if (add) {
5979        t2 = load_reg(s, a->ra);
5980        tcg_gen_add_i32(t1, t1, t2);
5981        tcg_temp_free_i32(t2);
5982    }
5983    if (a->s) {
5984        gen_logic_CC(t1);
5985    }
5986    store_reg(s, a->rd, t1);
5987    return true;
5988}
5989
5990static bool trans_MUL(DisasContext *s, arg_MUL *a)
5991{
5992    return op_mla(s, a, false);
5993}
5994
5995static bool trans_MLA(DisasContext *s, arg_MLA *a)
5996{
5997    return op_mla(s, a, true);
5998}
5999
6000static bool trans_MLS(DisasContext *s, arg_MLS *a)
6001{
6002    TCGv_i32 t1, t2;
6003
6004    if (!ENABLE_ARCH_6T2) {
6005        return false;
6006    }
6007    t1 = load_reg(s, a->rn);
6008    t2 = load_reg(s, a->rm);
6009    tcg_gen_mul_i32(t1, t1, t2);
6010    tcg_temp_free_i32(t2);
6011    t2 = load_reg(s, a->ra);
6012    tcg_gen_sub_i32(t1, t2, t1);
6013    tcg_temp_free_i32(t2);
6014    store_reg(s, a->rd, t1);
6015    return true;
6016}
6017
6018static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
6019{
6020    TCGv_i32 t0, t1, t2, t3;
6021
6022    t0 = load_reg(s, a->rm);
6023    t1 = load_reg(s, a->rn);
6024    if (uns) {
6025        tcg_gen_mulu2_i32(t0, t1, t0, t1);
6026    } else {
6027        tcg_gen_muls2_i32(t0, t1, t0, t1);
6028    }
6029    if (add) {
6030        t2 = load_reg(s, a->ra);
6031        t3 = load_reg(s, a->rd);
6032        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6033        tcg_temp_free_i32(t2);
6034        tcg_temp_free_i32(t3);
6035    }
6036    if (a->s) {
6037        gen_logicq_cc(t0, t1);
6038    }
6039    store_reg(s, a->ra, t0);
6040    store_reg(s, a->rd, t1);
6041    return true;
6042}
6043
6044static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6045{
6046    return op_mlal(s, a, true, false);
6047}
6048
6049static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6050{
6051    return op_mlal(s, a, false, false);
6052}
6053
6054static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6055{
6056    return op_mlal(s, a, true, true);
6057}
6058
6059static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6060{
6061    return op_mlal(s, a, false, true);
6062}
6063
6064static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6065{
6066    TCGv_i32 t0, t1, t2, zero;
6067
6068    if (s->thumb
6069        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6070        : !ENABLE_ARCH_6) {
6071        return false;
6072    }
6073
6074    t0 = load_reg(s, a->rm);
6075    t1 = load_reg(s, a->rn);
6076    tcg_gen_mulu2_i32(t0, t1, t0, t1);
6077    zero = tcg_constant_i32(0);
6078    t2 = load_reg(s, a->ra);
6079    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6080    tcg_temp_free_i32(t2);
6081    t2 = load_reg(s, a->rd);
6082    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6083    tcg_temp_free_i32(t2);
6084    store_reg(s, a->ra, t0);
6085    store_reg(s, a->rd, t1);
6086    return true;
6087}
6088
6089/*
6090 * Saturating addition and subtraction
6091 */
6092
6093static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6094{
6095    TCGv_i32 t0, t1;
6096
6097    if (s->thumb
6098        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6099        : !ENABLE_ARCH_5TE) {
6100        return false;
6101    }
6102
6103    t0 = load_reg(s, a->rm);
6104    t1 = load_reg(s, a->rn);
6105    if (doub) {
6106        gen_helper_add_saturate(t1, cpu_env, t1, t1);
6107    }
6108    if (add) {
6109        gen_helper_add_saturate(t0, cpu_env, t0, t1);
6110    } else {
6111        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6112    }
6113    tcg_temp_free_i32(t1);
6114    store_reg(s, a->rd, t0);
6115    return true;
6116}
6117
6118#define DO_QADDSUB(NAME, ADD, DOUB) \
6119static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6120{                                                        \
6121    return op_qaddsub(s, a, ADD, DOUB);                  \
6122}
6123
6124DO_QADDSUB(QADD, true, false)
6125DO_QADDSUB(QSUB, false, false)
6126DO_QADDSUB(QDADD, true, true)
6127DO_QADDSUB(QDSUB, false, true)
6128
6129#undef DO_QADDSUB
6130
6131/*
6132 * Halfword multiply and multiply accumulate
6133 */
6134
6135static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6136                       int add_long, bool nt, bool mt)
6137{
6138    TCGv_i32 t0, t1, tl, th;
6139
6140    if (s->thumb
6141        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6142        : !ENABLE_ARCH_5TE) {
6143        return false;
6144    }
6145
6146    t0 = load_reg(s, a->rn);
6147    t1 = load_reg(s, a->rm);
6148    gen_mulxy(t0, t1, nt, mt);
6149    tcg_temp_free_i32(t1);
6150
6151    switch (add_long) {
6152    case 0:
6153        store_reg(s, a->rd, t0);
6154        break;
6155    case 1:
6156        t1 = load_reg(s, a->ra);
6157        gen_helper_add_setq(t0, cpu_env, t0, t1);
6158        tcg_temp_free_i32(t1);
6159        store_reg(s, a->rd, t0);
6160        break;
6161    case 2:
6162        tl = load_reg(s, a->ra);
6163        th = load_reg(s, a->rd);
6164        /* Sign-extend the 32-bit product to 64 bits.  */
6165        t1 = tcg_temp_new_i32();
6166        tcg_gen_sari_i32(t1, t0, 31);
6167        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6168        tcg_temp_free_i32(t0);
6169        tcg_temp_free_i32(t1);
6170        store_reg(s, a->ra, tl);
6171        store_reg(s, a->rd, th);
6172        break;
6173    default:
6174        g_assert_not_reached();
6175    }
6176    return true;
6177}
6178
6179#define DO_SMLAX(NAME, add, nt, mt) \
6180static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6181{                                                          \
6182    return op_smlaxxx(s, a, add, nt, mt);                  \
6183}
6184
6185DO_SMLAX(SMULBB, 0, 0, 0)
6186DO_SMLAX(SMULBT, 0, 0, 1)
6187DO_SMLAX(SMULTB, 0, 1, 0)
6188DO_SMLAX(SMULTT, 0, 1, 1)
6189
6190DO_SMLAX(SMLABB, 1, 0, 0)
6191DO_SMLAX(SMLABT, 1, 0, 1)
6192DO_SMLAX(SMLATB, 1, 1, 0)
6193DO_SMLAX(SMLATT, 1, 1, 1)
6194
6195DO_SMLAX(SMLALBB, 2, 0, 0)
6196DO_SMLAX(SMLALBT, 2, 0, 1)
6197DO_SMLAX(SMLALTB, 2, 1, 0)
6198DO_SMLAX(SMLALTT, 2, 1, 1)
6199
6200#undef DO_SMLAX
6201
6202static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6203{
6204    TCGv_i32 t0, t1;
6205
6206    if (!ENABLE_ARCH_5TE) {
6207        return false;
6208    }
6209
6210    t0 = load_reg(s, a->rn);
6211    t1 = load_reg(s, a->rm);
6212    /*
6213     * Since the nominal result is product<47:16>, shift the 16-bit
6214     * input up by 16 bits, so that the result is at product<63:32>.
6215     */
6216    if (mt) {
6217        tcg_gen_andi_i32(t1, t1, 0xffff0000);
6218    } else {
6219        tcg_gen_shli_i32(t1, t1, 16);
6220    }
6221    tcg_gen_muls2_i32(t0, t1, t0, t1);
6222    tcg_temp_free_i32(t0);
6223    if (add) {
6224        t0 = load_reg(s, a->ra);
6225        gen_helper_add_setq(t1, cpu_env, t1, t0);
6226        tcg_temp_free_i32(t0);
6227    }
6228    store_reg(s, a->rd, t1);
6229    return true;
6230}
6231
6232#define DO_SMLAWX(NAME, add, mt) \
6233static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6234{                                                          \
6235    return op_smlawx(s, a, add, mt);                       \
6236}
6237
6238DO_SMLAWX(SMULWB, 0, 0)
6239DO_SMLAWX(SMULWT, 0, 1)
6240DO_SMLAWX(SMLAWB, 1, 0)
6241DO_SMLAWX(SMLAWT, 1, 1)
6242
6243#undef DO_SMLAWX
6244
6245/*
6246 * MSR (immediate) and hints
6247 */
6248
6249static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6250{
6251    /*
6252     * When running single-threaded TCG code, use the helper to ensure that
6253     * the next round-robin scheduled vCPU gets a crack.  When running in
6254     * MTTCG we don't generate jumps to the helper as it won't affect the
6255     * scheduling of other vCPUs.
6256     */
6257    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6258        gen_update_pc(s, curr_insn_len(s));
6259        s->base.is_jmp = DISAS_YIELD;
6260    }
6261    return true;
6262}
6263
6264static bool trans_WFE(DisasContext *s, arg_WFE *a)
6265{
6266    /*
6267     * When running single-threaded TCG code, use the helper to ensure that
6268     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6269     * just skip this instruction.  Currently the SEV/SEVL instructions,
6270     * which are *one* of many ways to wake the CPU from WFE, are not
6271     * implemented so we can't sleep like WFI does.
6272     */
6273    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6274        gen_update_pc(s, curr_insn_len(s));
6275        s->base.is_jmp = DISAS_WFE;
6276    }
6277    return true;
6278}
6279
6280static bool trans_WFI(DisasContext *s, arg_WFI *a)
6281{
6282    /* For WFI, halt the vCPU until an IRQ. */
6283    gen_update_pc(s, curr_insn_len(s));
6284    s->base.is_jmp = DISAS_WFI;
6285    return true;
6286}
6287
6288static bool trans_ESB(DisasContext *s, arg_ESB *a)
6289{
6290    /*
6291     * For M-profile, minimal-RAS ESB can be a NOP.
6292     * Without RAS, we must implement this as NOP.
6293     */
6294    if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6295        /*
6296         * QEMU does not have a source of physical SErrors,
6297         * so we are only concerned with virtual SErrors.
6298         * The pseudocode in the ARM for this case is
6299         *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6300         *      AArch32.vESBOperation();
6301         * Most of the condition can be evaluated at translation time.
6302         * Test for EL2 present, and defer test for SEL2 to runtime.
6303         */
6304        if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6305            gen_helper_vesb(cpu_env);
6306        }
6307    }
6308    return true;
6309}
6310
6311static bool trans_NOP(DisasContext *s, arg_NOP *a)
6312{
6313    return true;
6314}
6315
6316static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6317{
6318    uint32_t val = ror32(a->imm, a->rot * 2);
6319    uint32_t mask = msr_mask(s, a->mask, a->r);
6320
6321    if (gen_set_psr_im(s, mask, a->r, val)) {
6322        unallocated_encoding(s);
6323    }
6324    return true;
6325}
6326
6327/*
6328 * Cyclic Redundancy Check
6329 */
6330
6331static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6332{
6333    TCGv_i32 t1, t2, t3;
6334
6335    if (!dc_isar_feature(aa32_crc32, s)) {
6336        return false;
6337    }
6338
6339    t1 = load_reg(s, a->rn);
6340    t2 = load_reg(s, a->rm);
6341    switch (sz) {
6342    case MO_8:
6343        gen_uxtb(t2);
6344        break;
6345    case MO_16:
6346        gen_uxth(t2);
6347        break;
6348    case MO_32:
6349        break;
6350    default:
6351        g_assert_not_reached();
6352    }
6353    t3 = tcg_constant_i32(1 << sz);
6354    if (c) {
6355        gen_helper_crc32c(t1, t1, t2, t3);
6356    } else {
6357        gen_helper_crc32(t1, t1, t2, t3);
6358    }
6359    tcg_temp_free_i32(t2);
6360    store_reg(s, a->rd, t1);
6361    return true;
6362}
6363
6364#define DO_CRC32(NAME, c, sz) \
6365static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6366    { return op_crc32(s, a, c, sz); }
6367
6368DO_CRC32(CRC32B, false, MO_8)
6369DO_CRC32(CRC32H, false, MO_16)
6370DO_CRC32(CRC32W, false, MO_32)
6371DO_CRC32(CRC32CB, true, MO_8)
6372DO_CRC32(CRC32CH, true, MO_16)
6373DO_CRC32(CRC32CW, true, MO_32)
6374
6375#undef DO_CRC32
6376
6377/*
6378 * Miscellaneous instructions
6379 */
6380
6381static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6382{
6383    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6384        return false;
6385    }
6386    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6387    return true;
6388}
6389
6390static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6391{
6392    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6393        return false;
6394    }
6395    gen_msr_banked(s, a->r, a->sysm, a->rn);
6396    return true;
6397}
6398
6399static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6400{
6401    TCGv_i32 tmp;
6402
6403    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6404        return false;
6405    }
6406    if (a->r) {
6407        if (IS_USER(s)) {
6408            unallocated_encoding(s);
6409            return true;
6410        }
6411        tmp = load_cpu_field(spsr);
6412    } else {
6413        tmp = tcg_temp_new_i32();
6414        gen_helper_cpsr_read(tmp, cpu_env);
6415    }
6416    store_reg(s, a->rd, tmp);
6417    return true;
6418}
6419
6420static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6421{
6422    TCGv_i32 tmp;
6423    uint32_t mask = msr_mask(s, a->mask, a->r);
6424
6425    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6426        return false;
6427    }
6428    tmp = load_reg(s, a->rn);
6429    if (gen_set_psr(s, mask, a->r, tmp)) {
6430        unallocated_encoding(s);
6431    }
6432    return true;
6433}
6434
6435static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6436{
6437    TCGv_i32 tmp;
6438
6439    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6440        return false;
6441    }
6442    tmp = tcg_temp_new_i32();
6443    gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6444    store_reg(s, a->rd, tmp);
6445    return true;
6446}
6447
6448static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6449{
6450    TCGv_i32 addr, reg;
6451
6452    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6453        return false;
6454    }
6455    addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6456    reg = load_reg(s, a->rn);
6457    gen_helper_v7m_msr(cpu_env, addr, reg);
6458    tcg_temp_free_i32(reg);
6459    /* If we wrote to CONTROL, the EL might have changed */
6460    gen_rebuild_hflags(s, true);
6461    gen_lookup_tb(s);
6462    return true;
6463}
6464
6465static bool trans_BX(DisasContext *s, arg_BX *a)
6466{
6467    if (!ENABLE_ARCH_4T) {
6468        return false;
6469    }
6470    gen_bx_excret(s, load_reg(s, a->rm));
6471    return true;
6472}
6473
6474static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6475{
6476    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6477        return false;
6478    }
6479    /*
6480     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6481     * TBFLAGS bit on a basically-never-happens case, so call a helper
6482     * function to check for the trap and raise the exception if needed
6483     * (passing it the register number for the syndrome value).
6484     * v8A doesn't have this HSTR bit.
6485     */
6486    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6487        arm_dc_feature(s, ARM_FEATURE_EL2) &&
6488        s->current_el < 2 && s->ns) {
6489        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6490    }
6491    /* Trivial implementation equivalent to bx.  */
6492    gen_bx(s, load_reg(s, a->rm));
6493    return true;
6494}
6495
6496static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6497{
6498    TCGv_i32 tmp;
6499
6500    if (!ENABLE_ARCH_5) {
6501        return false;
6502    }
6503    tmp = load_reg(s, a->rm);
6504    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6505    gen_bx(s, tmp);
6506    return true;
6507}
6508
6509/*
6510 * BXNS/BLXNS: only exist for v8M with the security extensions,
6511 * and always UNDEF if NonSecure.  We don't implement these in
6512 * the user-only mode either (in theory you can use them from
6513 * Secure User mode but they are too tied in to system emulation).
6514 */
6515static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6516{
6517    if (!s->v8m_secure || IS_USER_ONLY) {
6518        unallocated_encoding(s);
6519    } else {
6520        gen_bxns(s, a->rm);
6521    }
6522    return true;
6523}
6524
6525static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6526{
6527    if (!s->v8m_secure || IS_USER_ONLY) {
6528        unallocated_encoding(s);
6529    } else {
6530        gen_blxns(s, a->rm);
6531    }
6532    return true;
6533}
6534
6535static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6536{
6537    TCGv_i32 tmp;
6538
6539    if (!ENABLE_ARCH_5) {
6540        return false;
6541    }
6542    tmp = load_reg(s, a->rm);
6543    tcg_gen_clzi_i32(tmp, tmp, 32);
6544    store_reg(s, a->rd, tmp);
6545    return true;
6546}
6547
6548static bool trans_ERET(DisasContext *s, arg_ERET *a)
6549{
6550    TCGv_i32 tmp;
6551
6552    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6553        return false;
6554    }
6555    if (IS_USER(s)) {
6556        unallocated_encoding(s);
6557        return true;
6558    }
6559    if (s->current_el == 2) {
6560        /* ERET from Hyp uses ELR_Hyp, not LR */
6561        tmp = load_cpu_field(elr_el[2]);
6562    } else {
6563        tmp = load_reg(s, 14);
6564    }
6565    gen_exception_return(s, tmp);
6566    return true;
6567}
6568
6569static bool trans_HLT(DisasContext *s, arg_HLT *a)
6570{
6571    gen_hlt(s, a->imm);
6572    return true;
6573}
6574
6575static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6576{
6577    if (!ENABLE_ARCH_5) {
6578        return false;
6579    }
6580    /* BKPT is OK with ECI set and leaves it untouched */
6581    s->eci_handled = true;
6582    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6583        semihosting_enabled(s->current_el == 0) &&
6584        (a->imm == 0xab)) {
6585        gen_exception_internal_insn(s, EXCP_SEMIHOST);
6586    } else {
6587        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6588    }
6589    return true;
6590}
6591
6592static bool trans_HVC(DisasContext *s, arg_HVC *a)
6593{
6594    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6595        return false;
6596    }
6597    if (IS_USER(s)) {
6598        unallocated_encoding(s);
6599    } else {
6600        gen_hvc(s, a->imm);
6601    }
6602    return true;
6603}
6604
6605static bool trans_SMC(DisasContext *s, arg_SMC *a)
6606{
6607    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6608        return false;
6609    }
6610    if (IS_USER(s)) {
6611        unallocated_encoding(s);
6612    } else {
6613        gen_smc(s);
6614    }
6615    return true;
6616}
6617
6618static bool trans_SG(DisasContext *s, arg_SG *a)
6619{
6620    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6621        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6622        return false;
6623    }
6624    /*
6625     * SG (v8M only)
6626     * The bulk of the behaviour for this instruction is implemented
6627     * in v7m_handle_execute_nsc(), which deals with the insn when
6628     * it is executed by a CPU in non-secure state from memory
6629     * which is Secure & NonSecure-Callable.
6630     * Here we only need to handle the remaining cases:
6631     *  * in NS memory (including the "security extension not
6632     *    implemented" case) : NOP
6633     *  * in S memory but CPU already secure (clear IT bits)
6634     * We know that the attribute for the memory this insn is
6635     * in must match the current CPU state, because otherwise
6636     * get_phys_addr_pmsav8 would have generated an exception.
6637     */
6638    if (s->v8m_secure) {
6639        /* Like the IT insn, we don't need to generate any code */
6640        s->condexec_cond = 0;
6641        s->condexec_mask = 0;
6642    }
6643    return true;
6644}
6645
6646static bool trans_TT(DisasContext *s, arg_TT *a)
6647{
6648    TCGv_i32 addr, tmp;
6649
6650    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6651        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6652        return false;
6653    }
6654    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6655        /* We UNDEF for these UNPREDICTABLE cases */
6656        unallocated_encoding(s);
6657        return true;
6658    }
6659    if (a->A && !s->v8m_secure) {
6660        /* This case is UNDEFINED.  */
6661        unallocated_encoding(s);
6662        return true;
6663    }
6664
6665    addr = load_reg(s, a->rn);
6666    tmp = tcg_temp_new_i32();
6667    gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6668    tcg_temp_free_i32(addr);
6669    store_reg(s, a->rd, tmp);
6670    return true;
6671}
6672
6673/*
6674 * Load/store register index
6675 */
6676
6677static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6678{
6679    ISSInfo ret;
6680
6681    /* ISS not valid if writeback */
6682    if (p && !w) {
6683        ret = rd;
6684        if (curr_insn_len(s) == 2) {
6685            ret |= ISSIs16Bit;
6686        }
6687    } else {
6688        ret = ISSInvalid;
6689    }
6690    return ret;
6691}
6692
6693static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6694{
6695    TCGv_i32 addr = load_reg(s, a->rn);
6696
6697    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6698        gen_helper_v8m_stackcheck(cpu_env, addr);
6699    }
6700
6701    if (a->p) {
6702        TCGv_i32 ofs = load_reg(s, a->rm);
6703        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6704        if (a->u) {
6705            tcg_gen_add_i32(addr, addr, ofs);
6706        } else {
6707            tcg_gen_sub_i32(addr, addr, ofs);
6708        }
6709        tcg_temp_free_i32(ofs);
6710    }
6711    return addr;
6712}
6713
6714static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6715                            TCGv_i32 addr, int address_offset)
6716{
6717    if (!a->p) {
6718        TCGv_i32 ofs = load_reg(s, a->rm);
6719        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6720        if (a->u) {
6721            tcg_gen_add_i32(addr, addr, ofs);
6722        } else {
6723            tcg_gen_sub_i32(addr, addr, ofs);
6724        }
6725        tcg_temp_free_i32(ofs);
6726    } else if (!a->w) {
6727        tcg_temp_free_i32(addr);
6728        return;
6729    }
6730    tcg_gen_addi_i32(addr, addr, address_offset);
6731    store_reg(s, a->rn, addr);
6732}
6733
6734static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6735                       MemOp mop, int mem_idx)
6736{
6737    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6738    TCGv_i32 addr, tmp;
6739
6740    addr = op_addr_rr_pre(s, a);
6741
6742    tmp = tcg_temp_new_i32();
6743    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6744    disas_set_da_iss(s, mop, issinfo);
6745
6746    /*
6747     * Perform base writeback before the loaded value to
6748     * ensure correct behavior with overlapping index registers.
6749     */
6750    op_addr_rr_post(s, a, addr, 0);
6751    store_reg_from_load(s, a->rt, tmp);
6752    return true;
6753}
6754
6755static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6756                        MemOp mop, int mem_idx)
6757{
6758    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6759    TCGv_i32 addr, tmp;
6760
6761    /*
6762     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6763     * is either UNPREDICTABLE or has defined behaviour
6764     */
6765    if (s->thumb && a->rn == 15) {
6766        return false;
6767    }
6768
6769    addr = op_addr_rr_pre(s, a);
6770
6771    tmp = load_reg(s, a->rt);
6772    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6773    disas_set_da_iss(s, mop, issinfo);
6774    tcg_temp_free_i32(tmp);
6775
6776    op_addr_rr_post(s, a, addr, 0);
6777    return true;
6778}
6779
6780static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6781{
6782    int mem_idx = get_mem_index(s);
6783    TCGv_i32 addr, tmp;
6784
6785    if (!ENABLE_ARCH_5TE) {
6786        return false;
6787    }
6788    if (a->rt & 1) {
6789        unallocated_encoding(s);
6790        return true;
6791    }
6792    addr = op_addr_rr_pre(s, a);
6793
6794    tmp = tcg_temp_new_i32();
6795    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6796    store_reg(s, a->rt, tmp);
6797
6798    tcg_gen_addi_i32(addr, addr, 4);
6799
6800    tmp = tcg_temp_new_i32();
6801    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6802    store_reg(s, a->rt + 1, tmp);
6803
6804    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6805    op_addr_rr_post(s, a, addr, -4);
6806    return true;
6807}
6808
6809static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6810{
6811    int mem_idx = get_mem_index(s);
6812    TCGv_i32 addr, tmp;
6813
6814    if (!ENABLE_ARCH_5TE) {
6815        return false;
6816    }
6817    if (a->rt & 1) {
6818        unallocated_encoding(s);
6819        return true;
6820    }
6821    addr = op_addr_rr_pre(s, a);
6822
6823    tmp = load_reg(s, a->rt);
6824    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6825    tcg_temp_free_i32(tmp);
6826
6827    tcg_gen_addi_i32(addr, addr, 4);
6828
6829    tmp = load_reg(s, a->rt + 1);
6830    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6831    tcg_temp_free_i32(tmp);
6832
6833    op_addr_rr_post(s, a, addr, -4);
6834    return true;
6835}
6836
6837/*
6838 * Load/store immediate index
6839 */
6840
6841static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6842{
6843    int ofs = a->imm;
6844
6845    if (!a->u) {
6846        ofs = -ofs;
6847    }
6848
6849    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6850        /*
6851         * Stackcheck. Here we know 'addr' is the current SP;
6852         * U is set if we're moving SP up, else down. It is
6853         * UNKNOWN whether the limit check triggers when SP starts
6854         * below the limit and ends up above it; we chose to do so.
6855         */
6856        if (!a->u) {
6857            TCGv_i32 newsp = tcg_temp_new_i32();
6858            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6859            gen_helper_v8m_stackcheck(cpu_env, newsp);
6860            tcg_temp_free_i32(newsp);
6861        } else {
6862            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6863        }
6864    }
6865
6866    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6867}
6868
6869static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6870                            TCGv_i32 addr, int address_offset)
6871{
6872    if (!a->p) {
6873        if (a->u) {
6874            address_offset += a->imm;
6875        } else {
6876            address_offset -= a->imm;
6877        }
6878    } else if (!a->w) {
6879        tcg_temp_free_i32(addr);
6880        return;
6881    }
6882    tcg_gen_addi_i32(addr, addr, address_offset);
6883    store_reg(s, a->rn, addr);
6884}
6885
6886static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6887                       MemOp mop, int mem_idx)
6888{
6889    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6890    TCGv_i32 addr, tmp;
6891
6892    addr = op_addr_ri_pre(s, a);
6893
6894    tmp = tcg_temp_new_i32();
6895    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6896    disas_set_da_iss(s, mop, issinfo);
6897
6898    /*
6899     * Perform base writeback before the loaded value to
6900     * ensure correct behavior with overlapping index registers.
6901     */
6902    op_addr_ri_post(s, a, addr, 0);
6903    store_reg_from_load(s, a->rt, tmp);
6904    return true;
6905}
6906
6907static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6908                        MemOp mop, int mem_idx)
6909{
6910    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6911    TCGv_i32 addr, tmp;
6912
6913    /*
6914     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6915     * is either UNPREDICTABLE or has defined behaviour
6916     */
6917    if (s->thumb && a->rn == 15) {
6918        return false;
6919    }
6920
6921    addr = op_addr_ri_pre(s, a);
6922
6923    tmp = load_reg(s, a->rt);
6924    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6925    disas_set_da_iss(s, mop, issinfo);
6926    tcg_temp_free_i32(tmp);
6927
6928    op_addr_ri_post(s, a, addr, 0);
6929    return true;
6930}
6931
6932static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6933{
6934    int mem_idx = get_mem_index(s);
6935    TCGv_i32 addr, tmp;
6936
6937    addr = op_addr_ri_pre(s, a);
6938
6939    tmp = tcg_temp_new_i32();
6940    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6941    store_reg(s, a->rt, tmp);
6942
6943    tcg_gen_addi_i32(addr, addr, 4);
6944
6945    tmp = tcg_temp_new_i32();
6946    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6947    store_reg(s, rt2, tmp);
6948
6949    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6950    op_addr_ri_post(s, a, addr, -4);
6951    return true;
6952}
6953
6954static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6955{
6956    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6957        return false;
6958    }
6959    return op_ldrd_ri(s, a, a->rt + 1);
6960}
6961
6962static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6963{
6964    arg_ldst_ri b = {
6965        .u = a->u, .w = a->w, .p = a->p,
6966        .rn = a->rn, .rt = a->rt, .imm = a->imm
6967    };
6968    return op_ldrd_ri(s, &b, a->rt2);
6969}
6970
6971static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6972{
6973    int mem_idx = get_mem_index(s);
6974    TCGv_i32 addr, tmp;
6975
6976    addr = op_addr_ri_pre(s, a);
6977
6978    tmp = load_reg(s, a->rt);
6979    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6980    tcg_temp_free_i32(tmp);
6981
6982    tcg_gen_addi_i32(addr, addr, 4);
6983
6984    tmp = load_reg(s, rt2);
6985    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6986    tcg_temp_free_i32(tmp);
6987
6988    op_addr_ri_post(s, a, addr, -4);
6989    return true;
6990}
6991
6992static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6993{
6994    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6995        return false;
6996    }
6997    return op_strd_ri(s, a, a->rt + 1);
6998}
6999
7000static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7001{
7002    arg_ldst_ri b = {
7003        .u = a->u, .w = a->w, .p = a->p,
7004        .rn = a->rn, .rt = a->rt, .imm = a->imm
7005    };
7006    return op_strd_ri(s, &b, a->rt2);
7007}
7008
7009#define DO_LDST(NAME, WHICH, MEMOP) \
7010static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
7011{                                                                     \
7012    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
7013}                                                                     \
7014static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
7015{                                                                     \
7016    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
7017}                                                                     \
7018static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
7019{                                                                     \
7020    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
7021}                                                                     \
7022static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
7023{                                                                     \
7024    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
7025}
7026
7027DO_LDST(LDR, load, MO_UL)
7028DO_LDST(LDRB, load, MO_UB)
7029DO_LDST(LDRH, load, MO_UW)
7030DO_LDST(LDRSB, load, MO_SB)
7031DO_LDST(LDRSH, load, MO_SW)
7032
7033DO_LDST(STR, store, MO_UL)
7034DO_LDST(STRB, store, MO_UB)
7035DO_LDST(STRH, store, MO_UW)
7036
7037#undef DO_LDST
7038
7039/*
7040 * Synchronization primitives
7041 */
7042
7043static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7044{
7045    TCGv_i32 addr, tmp;
7046    TCGv taddr;
7047
7048    opc |= s->be_data;
7049    addr = load_reg(s, a->rn);
7050    taddr = gen_aa32_addr(s, addr, opc);
7051    tcg_temp_free_i32(addr);
7052
7053    tmp = load_reg(s, a->rt2);
7054    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7055    tcg_temp_free(taddr);
7056
7057    store_reg(s, a->rt, tmp);
7058    return true;
7059}
7060
7061static bool trans_SWP(DisasContext *s, arg_SWP *a)
7062{
7063    return op_swp(s, a, MO_UL | MO_ALIGN);
7064}
7065
7066static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7067{
7068    return op_swp(s, a, MO_UB);
7069}
7070
7071/*
7072 * Load/Store Exclusive and Load-Acquire/Store-Release
7073 */
7074
7075static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7076{
7077    TCGv_i32 addr;
7078    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7079    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7080
7081    /* We UNDEF for these UNPREDICTABLE cases.  */
7082    if (a->rd == 15 || a->rn == 15 || a->rt == 15
7083        || a->rd == a->rn || a->rd == a->rt
7084        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7085        || (mop == MO_64
7086            && (a->rt2 == 15
7087                || a->rd == a->rt2
7088                || (!v8a && s->thumb && a->rt2 == 13)))) {
7089        unallocated_encoding(s);
7090        return true;
7091    }
7092
7093    if (rel) {
7094        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7095    }
7096
7097    addr = tcg_temp_local_new_i32();
7098    load_reg_var(s, addr, a->rn);
7099    tcg_gen_addi_i32(addr, addr, a->imm);
7100
7101    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7102    tcg_temp_free_i32(addr);
7103    return true;
7104}
7105
7106static bool trans_STREX(DisasContext *s, arg_STREX *a)
7107{
7108    if (!ENABLE_ARCH_6) {
7109        return false;
7110    }
7111    return op_strex(s, a, MO_32, false);
7112}
7113
7114static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7115{
7116    if (!ENABLE_ARCH_6K) {
7117        return false;
7118    }
7119    /* We UNDEF for these UNPREDICTABLE cases.  */
7120    if (a->rt & 1) {
7121        unallocated_encoding(s);
7122        return true;
7123    }
7124    a->rt2 = a->rt + 1;
7125    return op_strex(s, a, MO_64, false);
7126}
7127
7128static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7129{
7130    return op_strex(s, a, MO_64, false);
7131}
7132
7133static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7134{
7135    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7136        return false;
7137    }
7138    return op_strex(s, a, MO_8, false);
7139}
7140
7141static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7142{
7143    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7144        return false;
7145    }
7146    return op_strex(s, a, MO_16, false);
7147}
7148
7149static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7150{
7151    if (!ENABLE_ARCH_8) {
7152        return false;
7153    }
7154    return op_strex(s, a, MO_32, true);
7155}
7156
7157static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7158{
7159    if (!ENABLE_ARCH_8) {
7160        return false;
7161    }
7162    /* We UNDEF for these UNPREDICTABLE cases.  */
7163    if (a->rt & 1) {
7164        unallocated_encoding(s);
7165        return true;
7166    }
7167    a->rt2 = a->rt + 1;
7168    return op_strex(s, a, MO_64, true);
7169}
7170
7171static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7172{
7173    if (!ENABLE_ARCH_8) {
7174        return false;
7175    }
7176    return op_strex(s, a, MO_64, true);
7177}
7178
7179static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7180{
7181    if (!ENABLE_ARCH_8) {
7182        return false;
7183    }
7184    return op_strex(s, a, MO_8, true);
7185}
7186
7187static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7188{
7189    if (!ENABLE_ARCH_8) {
7190        return false;
7191    }
7192    return op_strex(s, a, MO_16, true);
7193}
7194
7195static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7196{
7197    TCGv_i32 addr, tmp;
7198
7199    if (!ENABLE_ARCH_8) {
7200        return false;
7201    }
7202    /* We UNDEF for these UNPREDICTABLE cases.  */
7203    if (a->rn == 15 || a->rt == 15) {
7204        unallocated_encoding(s);
7205        return true;
7206    }
7207
7208    addr = load_reg(s, a->rn);
7209    tmp = load_reg(s, a->rt);
7210    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7211    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7212    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7213
7214    tcg_temp_free_i32(tmp);
7215    tcg_temp_free_i32(addr);
7216    return true;
7217}
7218
7219static bool trans_STL(DisasContext *s, arg_STL *a)
7220{
7221    return op_stl(s, a, MO_UL);
7222}
7223
7224static bool trans_STLB(DisasContext *s, arg_STL *a)
7225{
7226    return op_stl(s, a, MO_UB);
7227}
7228
7229static bool trans_STLH(DisasContext *s, arg_STL *a)
7230{
7231    return op_stl(s, a, MO_UW);
7232}
7233
7234static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7235{
7236    TCGv_i32 addr;
7237    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7238    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7239
7240    /* We UNDEF for these UNPREDICTABLE cases.  */
7241    if (a->rn == 15 || a->rt == 15
7242        || (!v8a && s->thumb && a->rt == 13)
7243        || (mop == MO_64
7244            && (a->rt2 == 15 || a->rt == a->rt2
7245                || (!v8a && s->thumb && a->rt2 == 13)))) {
7246        unallocated_encoding(s);
7247        return true;
7248    }
7249
7250    addr = tcg_temp_local_new_i32();
7251    load_reg_var(s, addr, a->rn);
7252    tcg_gen_addi_i32(addr, addr, a->imm);
7253
7254    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7255    tcg_temp_free_i32(addr);
7256
7257    if (acq) {
7258        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7259    }
7260    return true;
7261}
7262
7263static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7264{
7265    if (!ENABLE_ARCH_6) {
7266        return false;
7267    }
7268    return op_ldrex(s, a, MO_32, false);
7269}
7270
7271static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7272{
7273    if (!ENABLE_ARCH_6K) {
7274        return false;
7275    }
7276    /* We UNDEF for these UNPREDICTABLE cases.  */
7277    if (a->rt & 1) {
7278        unallocated_encoding(s);
7279        return true;
7280    }
7281    a->rt2 = a->rt + 1;
7282    return op_ldrex(s, a, MO_64, false);
7283}
7284
7285static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7286{
7287    return op_ldrex(s, a, MO_64, false);
7288}
7289
7290static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7291{
7292    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7293        return false;
7294    }
7295    return op_ldrex(s, a, MO_8, false);
7296}
7297
7298static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7299{
7300    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7301        return false;
7302    }
7303    return op_ldrex(s, a, MO_16, false);
7304}
7305
7306static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7307{
7308    if (!ENABLE_ARCH_8) {
7309        return false;
7310    }
7311    return op_ldrex(s, a, MO_32, true);
7312}
7313
7314static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7315{
7316    if (!ENABLE_ARCH_8) {
7317        return false;
7318    }
7319    /* We UNDEF for these UNPREDICTABLE cases.  */
7320    if (a->rt & 1) {
7321        unallocated_encoding(s);
7322        return true;
7323    }
7324    a->rt2 = a->rt + 1;
7325    return op_ldrex(s, a, MO_64, true);
7326}
7327
7328static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7329{
7330    if (!ENABLE_ARCH_8) {
7331        return false;
7332    }
7333    return op_ldrex(s, a, MO_64, true);
7334}
7335
7336static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7337{
7338    if (!ENABLE_ARCH_8) {
7339        return false;
7340    }
7341    return op_ldrex(s, a, MO_8, true);
7342}
7343
7344static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7345{
7346    if (!ENABLE_ARCH_8) {
7347        return false;
7348    }
7349    return op_ldrex(s, a, MO_16, true);
7350}
7351
7352static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7353{
7354    TCGv_i32 addr, tmp;
7355
7356    if (!ENABLE_ARCH_8) {
7357        return false;
7358    }
7359    /* We UNDEF for these UNPREDICTABLE cases.  */
7360    if (a->rn == 15 || a->rt == 15) {
7361        unallocated_encoding(s);
7362        return true;
7363    }
7364
7365    addr = load_reg(s, a->rn);
7366    tmp = tcg_temp_new_i32();
7367    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7368    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7369    tcg_temp_free_i32(addr);
7370
7371    store_reg(s, a->rt, tmp);
7372    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7373    return true;
7374}
7375
7376static bool trans_LDA(DisasContext *s, arg_LDA *a)
7377{
7378    return op_lda(s, a, MO_UL);
7379}
7380
7381static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7382{
7383    return op_lda(s, a, MO_UB);
7384}
7385
7386static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7387{
7388    return op_lda(s, a, MO_UW);
7389}
7390
7391/*
7392 * Media instructions
7393 */
7394
7395static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7396{
7397    TCGv_i32 t1, t2;
7398
7399    if (!ENABLE_ARCH_6) {
7400        return false;
7401    }
7402
7403    t1 = load_reg(s, a->rn);
7404    t2 = load_reg(s, a->rm);
7405    gen_helper_usad8(t1, t1, t2);
7406    tcg_temp_free_i32(t2);
7407    if (a->ra != 15) {
7408        t2 = load_reg(s, a->ra);
7409        tcg_gen_add_i32(t1, t1, t2);
7410        tcg_temp_free_i32(t2);
7411    }
7412    store_reg(s, a->rd, t1);
7413    return true;
7414}
7415
7416static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7417{
7418    TCGv_i32 tmp;
7419    int width = a->widthm1 + 1;
7420    int shift = a->lsb;
7421
7422    if (!ENABLE_ARCH_6T2) {
7423        return false;
7424    }
7425    if (shift + width > 32) {
7426        /* UNPREDICTABLE; we choose to UNDEF */
7427        unallocated_encoding(s);
7428        return true;
7429    }
7430
7431    tmp = load_reg(s, a->rn);
7432    if (u) {
7433        tcg_gen_extract_i32(tmp, tmp, shift, width);
7434    } else {
7435        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7436    }
7437    store_reg(s, a->rd, tmp);
7438    return true;
7439}
7440
7441static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7442{
7443    return op_bfx(s, a, false);
7444}
7445
7446static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7447{
7448    return op_bfx(s, a, true);
7449}
7450
7451static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7452{
7453    TCGv_i32 tmp;
7454    int msb = a->msb, lsb = a->lsb;
7455    int width;
7456
7457    if (!ENABLE_ARCH_6T2) {
7458        return false;
7459    }
7460    if (msb < lsb) {
7461        /* UNPREDICTABLE; we choose to UNDEF */
7462        unallocated_encoding(s);
7463        return true;
7464    }
7465
7466    width = msb + 1 - lsb;
7467    if (a->rn == 15) {
7468        /* BFC */
7469        tmp = tcg_const_i32(0);
7470    } else {
7471        /* BFI */
7472        tmp = load_reg(s, a->rn);
7473    }
7474    if (width != 32) {
7475        TCGv_i32 tmp2 = load_reg(s, a->rd);
7476        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7477        tcg_temp_free_i32(tmp2);
7478    }
7479    store_reg(s, a->rd, tmp);
7480    return true;
7481}
7482
7483static bool trans_UDF(DisasContext *s, arg_UDF *a)
7484{
7485    unallocated_encoding(s);
7486    return true;
7487}
7488
7489/*
7490 * Parallel addition and subtraction
7491 */
7492
7493static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7494                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7495{
7496    TCGv_i32 t0, t1;
7497
7498    if (s->thumb
7499        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7500        : !ENABLE_ARCH_6) {
7501        return false;
7502    }
7503
7504    t0 = load_reg(s, a->rn);
7505    t1 = load_reg(s, a->rm);
7506
7507    gen(t0, t0, t1);
7508
7509    tcg_temp_free_i32(t1);
7510    store_reg(s, a->rd, t0);
7511    return true;
7512}
7513
7514static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7515                             void (*gen)(TCGv_i32, TCGv_i32,
7516                                         TCGv_i32, TCGv_ptr))
7517{
7518    TCGv_i32 t0, t1;
7519    TCGv_ptr ge;
7520
7521    if (s->thumb
7522        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7523        : !ENABLE_ARCH_6) {
7524        return false;
7525    }
7526
7527    t0 = load_reg(s, a->rn);
7528    t1 = load_reg(s, a->rm);
7529
7530    ge = tcg_temp_new_ptr();
7531    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7532    gen(t0, t0, t1, ge);
7533
7534    tcg_temp_free_ptr(ge);
7535    tcg_temp_free_i32(t1);
7536    store_reg(s, a->rd, t0);
7537    return true;
7538}
7539
7540#define DO_PAR_ADDSUB(NAME, helper) \
7541static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7542{                                                       \
7543    return op_par_addsub(s, a, helper);                 \
7544}
7545
7546#define DO_PAR_ADDSUB_GE(NAME, helper) \
7547static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7548{                                                       \
7549    return op_par_addsub_ge(s, a, helper);              \
7550}
7551
7552DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7553DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7554DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7555DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7556DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7557DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7558
7559DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7560DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7561DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7562DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7563DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7564DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7565
7566DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7567DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7568DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7569DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7570DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7571DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7572
7573DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7574DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7575DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7576DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7577DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7578DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7579
7580DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7581DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7582DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7583DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7584DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7585DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7586
7587DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7588DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7589DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7590DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7591DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7592DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7593
7594#undef DO_PAR_ADDSUB
7595#undef DO_PAR_ADDSUB_GE
7596
7597/*
7598 * Packing, unpacking, saturation, and reversal
7599 */
7600
7601static bool trans_PKH(DisasContext *s, arg_PKH *a)
7602{
7603    TCGv_i32 tn, tm;
7604    int shift = a->imm;
7605
7606    if (s->thumb
7607        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7608        : !ENABLE_ARCH_6) {
7609        return false;
7610    }
7611
7612    tn = load_reg(s, a->rn);
7613    tm = load_reg(s, a->rm);
7614    if (a->tb) {
7615        /* PKHTB */
7616        if (shift == 0) {
7617            shift = 31;
7618        }
7619        tcg_gen_sari_i32(tm, tm, shift);
7620        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7621    } else {
7622        /* PKHBT */
7623        tcg_gen_shli_i32(tm, tm, shift);
7624        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7625    }
7626    tcg_temp_free_i32(tm);
7627    store_reg(s, a->rd, tn);
7628    return true;
7629}
7630
7631static bool op_sat(DisasContext *s, arg_sat *a,
7632                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7633{
7634    TCGv_i32 tmp;
7635    int shift = a->imm;
7636
7637    if (!ENABLE_ARCH_6) {
7638        return false;
7639    }
7640
7641    tmp = load_reg(s, a->rn);
7642    if (a->sh) {
7643        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7644    } else {
7645        tcg_gen_shli_i32(tmp, tmp, shift);
7646    }
7647
7648    gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7649
7650    store_reg(s, a->rd, tmp);
7651    return true;
7652}
7653
7654static bool trans_SSAT(DisasContext *s, arg_sat *a)
7655{
7656    return op_sat(s, a, gen_helper_ssat);
7657}
7658
7659static bool trans_USAT(DisasContext *s, arg_sat *a)
7660{
7661    return op_sat(s, a, gen_helper_usat);
7662}
7663
7664static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7665{
7666    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7667        return false;
7668    }
7669    return op_sat(s, a, gen_helper_ssat16);
7670}
7671
7672static bool trans_USAT16(DisasContext *s, arg_sat *a)
7673{
7674    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7675        return false;
7676    }
7677    return op_sat(s, a, gen_helper_usat16);
7678}
7679
7680static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7681                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7682                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7683{
7684    TCGv_i32 tmp;
7685
7686    if (!ENABLE_ARCH_6) {
7687        return false;
7688    }
7689
7690    tmp = load_reg(s, a->rm);
7691    /*
7692     * TODO: In many cases we could do a shift instead of a rotate.
7693     * Combined with a simple extend, that becomes an extract.
7694     */
7695    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7696    gen_extract(tmp, tmp);
7697
7698    if (a->rn != 15) {
7699        TCGv_i32 tmp2 = load_reg(s, a->rn);
7700        gen_add(tmp, tmp, tmp2);
7701        tcg_temp_free_i32(tmp2);
7702    }
7703    store_reg(s, a->rd, tmp);
7704    return true;
7705}
7706
7707static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7708{
7709    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7710}
7711
7712static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7713{
7714    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7715}
7716
7717static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7718{
7719    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7720        return false;
7721    }
7722    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7723}
7724
7725static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7726{
7727    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7728}
7729
7730static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7731{
7732    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7733}
7734
7735static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7736{
7737    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7738        return false;
7739    }
7740    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7741}
7742
7743static bool trans_SEL(DisasContext *s, arg_rrr *a)
7744{
7745    TCGv_i32 t1, t2, t3;
7746
7747    if (s->thumb
7748        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7749        : !ENABLE_ARCH_6) {
7750        return false;
7751    }
7752
7753    t1 = load_reg(s, a->rn);
7754    t2 = load_reg(s, a->rm);
7755    t3 = tcg_temp_new_i32();
7756    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7757    gen_helper_sel_flags(t1, t3, t1, t2);
7758    tcg_temp_free_i32(t3);
7759    tcg_temp_free_i32(t2);
7760    store_reg(s, a->rd, t1);
7761    return true;
7762}
7763
7764static bool op_rr(DisasContext *s, arg_rr *a,
7765                  void (*gen)(TCGv_i32, TCGv_i32))
7766{
7767    TCGv_i32 tmp;
7768
7769    tmp = load_reg(s, a->rm);
7770    gen(tmp, tmp);
7771    store_reg(s, a->rd, tmp);
7772    return true;
7773}
7774
7775static bool trans_REV(DisasContext *s, arg_rr *a)
7776{
7777    if (!ENABLE_ARCH_6) {
7778        return false;
7779    }
7780    return op_rr(s, a, tcg_gen_bswap32_i32);
7781}
7782
7783static bool trans_REV16(DisasContext *s, arg_rr *a)
7784{
7785    if (!ENABLE_ARCH_6) {
7786        return false;
7787    }
7788    return op_rr(s, a, gen_rev16);
7789}
7790
7791static bool trans_REVSH(DisasContext *s, arg_rr *a)
7792{
7793    if (!ENABLE_ARCH_6) {
7794        return false;
7795    }
7796    return op_rr(s, a, gen_revsh);
7797}
7798
7799static bool trans_RBIT(DisasContext *s, arg_rr *a)
7800{
7801    if (!ENABLE_ARCH_6T2) {
7802        return false;
7803    }
7804    return op_rr(s, a, gen_helper_rbit);
7805}
7806
7807/*
7808 * Signed multiply, signed and unsigned divide
7809 */
7810
7811static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7812{
7813    TCGv_i32 t1, t2;
7814
7815    if (!ENABLE_ARCH_6) {
7816        return false;
7817    }
7818
7819    t1 = load_reg(s, a->rn);
7820    t2 = load_reg(s, a->rm);
7821    if (m_swap) {
7822        gen_swap_half(t2, t2);
7823    }
7824    gen_smul_dual(t1, t2);
7825
7826    if (sub) {
7827        /*
7828         * This subtraction cannot overflow, so we can do a simple
7829         * 32-bit subtraction and then a possible 32-bit saturating
7830         * addition of Ra.
7831         */
7832        tcg_gen_sub_i32(t1, t1, t2);
7833        tcg_temp_free_i32(t2);
7834
7835        if (a->ra != 15) {
7836            t2 = load_reg(s, a->ra);
7837            gen_helper_add_setq(t1, cpu_env, t1, t2);
7838            tcg_temp_free_i32(t2);
7839        }
7840    } else if (a->ra == 15) {
7841        /* Single saturation-checking addition */
7842        gen_helper_add_setq(t1, cpu_env, t1, t2);
7843        tcg_temp_free_i32(t2);
7844    } else {
7845        /*
7846         * We need to add the products and Ra together and then
7847         * determine whether the final result overflowed. Doing
7848         * this as two separate add-and-check-overflow steps incorrectly
7849         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7850         * Do all the arithmetic at 64-bits and then check for overflow.
7851         */
7852        TCGv_i64 p64, q64;
7853        TCGv_i32 t3, qf, one;
7854
7855        p64 = tcg_temp_new_i64();
7856        q64 = tcg_temp_new_i64();
7857        tcg_gen_ext_i32_i64(p64, t1);
7858        tcg_gen_ext_i32_i64(q64, t2);
7859        tcg_gen_add_i64(p64, p64, q64);
7860        load_reg_var(s, t2, a->ra);
7861        tcg_gen_ext_i32_i64(q64, t2);
7862        tcg_gen_add_i64(p64, p64, q64);
7863        tcg_temp_free_i64(q64);
7864
7865        tcg_gen_extr_i64_i32(t1, t2, p64);
7866        tcg_temp_free_i64(p64);
7867        /*
7868         * t1 is the low half of the result which goes into Rd.
7869         * We have overflow and must set Q if the high half (t2)
7870         * is different from the sign-extension of t1.
7871         */
7872        t3 = tcg_temp_new_i32();
7873        tcg_gen_sari_i32(t3, t1, 31);
7874        qf = load_cpu_field(QF);
7875        one = tcg_constant_i32(1);
7876        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7877        store_cpu_field(qf, QF);
7878        tcg_temp_free_i32(t3);
7879        tcg_temp_free_i32(t2);
7880    }
7881    store_reg(s, a->rd, t1);
7882    return true;
7883}
7884
7885static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7886{
7887    return op_smlad(s, a, false, false);
7888}
7889
7890static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7891{
7892    return op_smlad(s, a, true, false);
7893}
7894
7895static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7896{
7897    return op_smlad(s, a, false, true);
7898}
7899
7900static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7901{
7902    return op_smlad(s, a, true, true);
7903}
7904
7905static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7906{
7907    TCGv_i32 t1, t2;
7908    TCGv_i64 l1, l2;
7909
7910    if (!ENABLE_ARCH_6) {
7911        return false;
7912    }
7913
7914    t1 = load_reg(s, a->rn);
7915    t2 = load_reg(s, a->rm);
7916    if (m_swap) {
7917        gen_swap_half(t2, t2);
7918    }
7919    gen_smul_dual(t1, t2);
7920
7921    l1 = tcg_temp_new_i64();
7922    l2 = tcg_temp_new_i64();
7923    tcg_gen_ext_i32_i64(l1, t1);
7924    tcg_gen_ext_i32_i64(l2, t2);
7925    tcg_temp_free_i32(t1);
7926    tcg_temp_free_i32(t2);
7927
7928    if (sub) {
7929        tcg_gen_sub_i64(l1, l1, l2);
7930    } else {
7931        tcg_gen_add_i64(l1, l1, l2);
7932    }
7933    tcg_temp_free_i64(l2);
7934
7935    gen_addq(s, l1, a->ra, a->rd);
7936    gen_storeq_reg(s, a->ra, a->rd, l1);
7937    tcg_temp_free_i64(l1);
7938    return true;
7939}
7940
7941static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7942{
7943    return op_smlald(s, a, false, false);
7944}
7945
7946static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7947{
7948    return op_smlald(s, a, true, false);
7949}
7950
7951static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7952{
7953    return op_smlald(s, a, false, true);
7954}
7955
7956static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7957{
7958    return op_smlald(s, a, true, true);
7959}
7960
7961static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7962{
7963    TCGv_i32 t1, t2;
7964
7965    if (s->thumb
7966        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7967        : !ENABLE_ARCH_6) {
7968        return false;
7969    }
7970
7971    t1 = load_reg(s, a->rn);
7972    t2 = load_reg(s, a->rm);
7973    tcg_gen_muls2_i32(t2, t1, t1, t2);
7974
7975    if (a->ra != 15) {
7976        TCGv_i32 t3 = load_reg(s, a->ra);
7977        if (sub) {
7978            /*
7979             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7980             * a non-zero multiplicand lowpart, and the correct result
7981             * lowpart for rounding.
7982             */
7983            tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7984        } else {
7985            tcg_gen_add_i32(t1, t1, t3);
7986        }
7987        tcg_temp_free_i32(t3);
7988    }
7989    if (round) {
7990        /*
7991         * Adding 0x80000000 to the 64-bit quantity means that we have
7992         * carry in to the high word when the low word has the msb set.
7993         */
7994        tcg_gen_shri_i32(t2, t2, 31);
7995        tcg_gen_add_i32(t1, t1, t2);
7996    }
7997    tcg_temp_free_i32(t2);
7998    store_reg(s, a->rd, t1);
7999    return true;
8000}
8001
8002static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
8003{
8004    return op_smmla(s, a, false, false);
8005}
8006
8007static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
8008{
8009    return op_smmla(s, a, true, false);
8010}
8011
8012static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
8013{
8014    return op_smmla(s, a, false, true);
8015}
8016
8017static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
8018{
8019    return op_smmla(s, a, true, true);
8020}
8021
8022static bool op_div(DisasContext *s, arg_rrr *a, bool u)
8023{
8024    TCGv_i32 t1, t2;
8025
8026    if (s->thumb
8027        ? !dc_isar_feature(aa32_thumb_div, s)
8028        : !dc_isar_feature(aa32_arm_div, s)) {
8029        return false;
8030    }
8031
8032    t1 = load_reg(s, a->rn);
8033    t2 = load_reg(s, a->rm);
8034    if (u) {
8035        gen_helper_udiv(t1, cpu_env, t1, t2);
8036    } else {
8037        gen_helper_sdiv(t1, cpu_env, t1, t2);
8038    }
8039    tcg_temp_free_i32(t2);
8040    store_reg(s, a->rd, t1);
8041    return true;
8042}
8043
8044static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8045{
8046    return op_div(s, a, false);
8047}
8048
8049static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8050{
8051    return op_div(s, a, true);
8052}
8053
8054/*
8055 * Block data transfer
8056 */
8057
8058static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8059{
8060    TCGv_i32 addr = load_reg(s, a->rn);
8061
8062    if (a->b) {
8063        if (a->i) {
8064            /* pre increment */
8065            tcg_gen_addi_i32(addr, addr, 4);
8066        } else {
8067            /* pre decrement */
8068            tcg_gen_addi_i32(addr, addr, -(n * 4));
8069        }
8070    } else if (!a->i && n != 1) {
8071        /* post decrement */
8072        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8073    }
8074
8075    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8076        /*
8077         * If the writeback is incrementing SP rather than
8078         * decrementing it, and the initial SP is below the
8079         * stack limit but the final written-back SP would
8080         * be above, then we must not perform any memory
8081         * accesses, but it is IMPDEF whether we generate
8082         * an exception. We choose to do so in this case.
8083         * At this point 'addr' is the lowest address, so
8084         * either the original SP (if incrementing) or our
8085         * final SP (if decrementing), so that's what we check.
8086         */
8087        gen_helper_v8m_stackcheck(cpu_env, addr);
8088    }
8089
8090    return addr;
8091}
8092
8093static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8094                               TCGv_i32 addr, int n)
8095{
8096    if (a->w) {
8097        /* write back */
8098        if (!a->b) {
8099            if (a->i) {
8100                /* post increment */
8101                tcg_gen_addi_i32(addr, addr, 4);
8102            } else {
8103                /* post decrement */
8104                tcg_gen_addi_i32(addr, addr, -(n * 4));
8105            }
8106        } else if (!a->i && n != 1) {
8107            /* pre decrement */
8108            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8109        }
8110        store_reg(s, a->rn, addr);
8111    } else {
8112        tcg_temp_free_i32(addr);
8113    }
8114}
8115
8116static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8117{
8118    int i, j, n, list, mem_idx;
8119    bool user = a->u;
8120    TCGv_i32 addr, tmp;
8121
8122    if (user) {
8123        /* STM (user) */
8124        if (IS_USER(s)) {
8125            /* Only usable in supervisor mode.  */
8126            unallocated_encoding(s);
8127            return true;
8128        }
8129    }
8130
8131    list = a->list;
8132    n = ctpop16(list);
8133    if (n < min_n || a->rn == 15) {
8134        unallocated_encoding(s);
8135        return true;
8136    }
8137
8138    s->eci_handled = true;
8139
8140    addr = op_addr_block_pre(s, a, n);
8141    mem_idx = get_mem_index(s);
8142
8143    for (i = j = 0; i < 16; i++) {
8144        if (!(list & (1 << i))) {
8145            continue;
8146        }
8147
8148        if (user && i != 15) {
8149            tmp = tcg_temp_new_i32();
8150            gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
8151        } else {
8152            tmp = load_reg(s, i);
8153        }
8154        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8155        tcg_temp_free_i32(tmp);
8156
8157        /* No need to add after the last transfer.  */
8158        if (++j != n) {
8159            tcg_gen_addi_i32(addr, addr, 4);
8160        }
8161    }
8162
8163    op_addr_block_post(s, a, addr, n);
8164    clear_eci_state(s);
8165    return true;
8166}
8167
8168static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8169{
8170    /* BitCount(list) < 1 is UNPREDICTABLE */
8171    return op_stm(s, a, 1);
8172}
8173
8174static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8175{
8176    /* Writeback register in register list is UNPREDICTABLE for T32.  */
8177    if (a->w && (a->list & (1 << a->rn))) {
8178        unallocated_encoding(s);
8179        return true;
8180    }
8181    /* BitCount(list) < 2 is UNPREDICTABLE */
8182    return op_stm(s, a, 2);
8183}
8184
8185static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8186{
8187    int i, j, n, list, mem_idx;
8188    bool loaded_base;
8189    bool user = a->u;
8190    bool exc_return = false;
8191    TCGv_i32 addr, tmp, loaded_var;
8192
8193    if (user) {
8194        /* LDM (user), LDM (exception return) */
8195        if (IS_USER(s)) {
8196            /* Only usable in supervisor mode.  */
8197            unallocated_encoding(s);
8198            return true;
8199        }
8200        if (extract32(a->list, 15, 1)) {
8201            exc_return = true;
8202            user = false;
8203        } else {
8204            /* LDM (user) does not allow writeback.  */
8205            if (a->w) {
8206                unallocated_encoding(s);
8207                return true;
8208            }
8209        }
8210    }
8211
8212    list = a->list;
8213    n = ctpop16(list);
8214    if (n < min_n || a->rn == 15) {
8215        unallocated_encoding(s);
8216        return true;
8217    }
8218
8219    s->eci_handled = true;
8220
8221    addr = op_addr_block_pre(s, a, n);
8222    mem_idx = get_mem_index(s);
8223    loaded_base = false;
8224    loaded_var = NULL;
8225
8226    for (i = j = 0; i < 16; i++) {
8227        if (!(list & (1 << i))) {
8228            continue;
8229        }
8230
8231        tmp = tcg_temp_new_i32();
8232        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8233        if (user) {
8234            gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8235            tcg_temp_free_i32(tmp);
8236        } else if (i == a->rn) {
8237            loaded_var = tmp;
8238            loaded_base = true;
8239        } else if (i == 15 && exc_return) {
8240            store_pc_exc_ret(s, tmp);
8241        } else {
8242            store_reg_from_load(s, i, tmp);
8243        }
8244
8245        /* No need to add after the last transfer.  */
8246        if (++j != n) {
8247            tcg_gen_addi_i32(addr, addr, 4);
8248        }
8249    }
8250
8251    op_addr_block_post(s, a, addr, n);
8252
8253    if (loaded_base) {
8254        /* Note that we reject base == pc above.  */
8255        store_reg(s, a->rn, loaded_var);
8256    }
8257
8258    if (exc_return) {
8259        /* Restore CPSR from SPSR.  */
8260        tmp = load_cpu_field(spsr);
8261        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8262            gen_io_start();
8263        }
8264        gen_helper_cpsr_write_eret(cpu_env, tmp);
8265        tcg_temp_free_i32(tmp);
8266        /* Must exit loop to check un-masked IRQs */
8267        s->base.is_jmp = DISAS_EXIT;
8268    }
8269    clear_eci_state(s);
8270    return true;
8271}
8272
8273static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8274{
8275    /*
8276     * Writeback register in register list is UNPREDICTABLE
8277     * for ArchVersion() >= 7.  Prior to v7, A32 would write
8278     * an UNKNOWN value to the base register.
8279     */
8280    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8281        unallocated_encoding(s);
8282        return true;
8283    }
8284    /* BitCount(list) < 1 is UNPREDICTABLE */
8285    return do_ldm(s, a, 1);
8286}
8287
8288static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8289{
8290    /* Writeback register in register list is UNPREDICTABLE for T32. */
8291    if (a->w && (a->list & (1 << a->rn))) {
8292        unallocated_encoding(s);
8293        return true;
8294    }
8295    /* BitCount(list) < 2 is UNPREDICTABLE */
8296    return do_ldm(s, a, 2);
8297}
8298
8299static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8300{
8301    /* Writeback is conditional on the base register not being loaded.  */
8302    a->w = !(a->list & (1 << a->rn));
8303    /* BitCount(list) < 1 is UNPREDICTABLE */
8304    return do_ldm(s, a, 1);
8305}
8306
8307static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8308{
8309    int i;
8310    TCGv_i32 zero;
8311
8312    if (!dc_isar_feature(aa32_m_sec_state, s)) {
8313        return false;
8314    }
8315
8316    if (extract32(a->list, 13, 1)) {
8317        return false;
8318    }
8319
8320    if (!a->list) {
8321        /* UNPREDICTABLE; we choose to UNDEF */
8322        return false;
8323    }
8324
8325    s->eci_handled = true;
8326
8327    zero = tcg_constant_i32(0);
8328    for (i = 0; i < 15; i++) {
8329        if (extract32(a->list, i, 1)) {
8330            /* Clear R[i] */
8331            tcg_gen_mov_i32(cpu_R[i], zero);
8332        }
8333    }
8334    if (extract32(a->list, 15, 1)) {
8335        /*
8336         * Clear APSR (by calling the MSR helper with the same argument
8337         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8338         */
8339        gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8340    }
8341    clear_eci_state(s);
8342    return true;
8343}
8344
8345/*
8346 * Branch, branch with link
8347 */
8348
8349static bool trans_B(DisasContext *s, arg_i *a)
8350{
8351    gen_jmp(s, jmp_diff(s, a->imm));
8352    return true;
8353}
8354
8355static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8356{
8357    /* This has cond from encoding, required to be outside IT block.  */
8358    if (a->cond >= 0xe) {
8359        return false;
8360    }
8361    if (s->condexec_mask) {
8362        unallocated_encoding(s);
8363        return true;
8364    }
8365    arm_skip_unless(s, a->cond);
8366    gen_jmp(s, jmp_diff(s, a->imm));
8367    return true;
8368}
8369
8370static bool trans_BL(DisasContext *s, arg_i *a)
8371{
8372    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8373    gen_jmp(s, jmp_diff(s, a->imm));
8374    return true;
8375}
8376
8377static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8378{
8379    /*
8380     * BLX <imm> would be useless on M-profile; the encoding space
8381     * is used for other insns from v8.1M onward, and UNDEFs before that.
8382     */
8383    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8384        return false;
8385    }
8386
8387    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8388    if (s->thumb && (a->imm & 2)) {
8389        return false;
8390    }
8391    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8392    store_cpu_field_constant(!s->thumb, thumb);
8393    /* This jump is computed from an aligned PC: subtract off the low bits. */
8394    gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8395    return true;
8396}
8397
8398static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8399{
8400    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8401    gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8402    return true;
8403}
8404
8405static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8406{
8407    TCGv_i32 tmp = tcg_temp_new_i32();
8408
8409    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8410    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8411    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8412    gen_bx(s, tmp);
8413    return true;
8414}
8415
8416static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8417{
8418    TCGv_i32 tmp;
8419
8420    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8421    if (!ENABLE_ARCH_5) {
8422        return false;
8423    }
8424    tmp = tcg_temp_new_i32();
8425    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8426    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8427    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8428    gen_bx(s, tmp);
8429    return true;
8430}
8431
8432static bool trans_BF(DisasContext *s, arg_BF *a)
8433{
8434    /*
8435     * M-profile branch future insns. The architecture permits an
8436     * implementation to implement these as NOPs (equivalent to
8437     * discarding the LO_BRANCH_INFO cache immediately), and we
8438     * take that IMPDEF option because for QEMU a "real" implementation
8439     * would be complicated and wouldn't execute any faster.
8440     */
8441    if (!dc_isar_feature(aa32_lob, s)) {
8442        return false;
8443    }
8444    if (a->boff == 0) {
8445        /* SEE "Related encodings" (loop insns) */
8446        return false;
8447    }
8448    /* Handle as NOP */
8449    return true;
8450}
8451
8452static bool trans_DLS(DisasContext *s, arg_DLS *a)
8453{
8454    /* M-profile low-overhead loop start */
8455    TCGv_i32 tmp;
8456
8457    if (!dc_isar_feature(aa32_lob, s)) {
8458        return false;
8459    }
8460    if (a->rn == 13 || a->rn == 15) {
8461        /*
8462         * For DLSTP rn == 15 is a related encoding (LCTP); the
8463         * other cases caught by this condition are all
8464         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8465         */
8466        return false;
8467    }
8468
8469    if (a->size != 4) {
8470        /* DLSTP */
8471        if (!dc_isar_feature(aa32_mve, s)) {
8472            return false;
8473        }
8474        if (!vfp_access_check(s)) {
8475            return true;
8476        }
8477    }
8478
8479    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8480    tmp = load_reg(s, a->rn);
8481    store_reg(s, 14, tmp);
8482    if (a->size != 4) {
8483        /* DLSTP: set FPSCR.LTPSIZE */
8484        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8485        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8486    }
8487    return true;
8488}
8489
8490static bool trans_WLS(DisasContext *s, arg_WLS *a)
8491{
8492    /* M-profile low-overhead while-loop start */
8493    TCGv_i32 tmp;
8494    DisasLabel nextlabel;
8495
8496    if (!dc_isar_feature(aa32_lob, s)) {
8497        return false;
8498    }
8499    if (a->rn == 13 || a->rn == 15) {
8500        /*
8501         * For WLSTP rn == 15 is a related encoding (LE); the
8502         * other cases caught by this condition are all
8503         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8504         */
8505        return false;
8506    }
8507    if (s->condexec_mask) {
8508        /*
8509         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8510         * we choose to UNDEF, because otherwise our use of
8511         * gen_goto_tb(1) would clash with the use of TB exit 1
8512         * in the dc->condjmp condition-failed codepath in
8513         * arm_tr_tb_stop() and we'd get an assertion.
8514         */
8515        return false;
8516    }
8517    if (a->size != 4) {
8518        /* WLSTP */
8519        if (!dc_isar_feature(aa32_mve, s)) {
8520            return false;
8521        }
8522        /*
8523         * We need to check that the FPU is enabled here, but mustn't
8524         * call vfp_access_check() to do that because we don't want to
8525         * do the lazy state preservation in the "loop count is zero" case.
8526         * Do the check-and-raise-exception by hand.
8527         */
8528        if (s->fp_excp_el) {
8529            gen_exception_insn_el(s, 0, EXCP_NOCP,
8530                                  syn_uncategorized(), s->fp_excp_el);
8531            return true;
8532        }
8533    }
8534
8535    nextlabel = gen_disas_label(s);
8536    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8537    tmp = load_reg(s, a->rn);
8538    store_reg(s, 14, tmp);
8539    if (a->size != 4) {
8540        /*
8541         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8542         * lazy state preservation, new FP context creation, etc,
8543         * that vfp_access_check() does. We know that the actual
8544         * access check will succeed (ie it won't generate code that
8545         * throws an exception) because we did that check by hand earlier.
8546         */
8547        bool ok = vfp_access_check(s);
8548        assert(ok);
8549        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8550        /*
8551         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8552         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8553         */
8554    }
8555    gen_jmp_tb(s, curr_insn_len(s), 1);
8556
8557    set_disas_label(s, nextlabel);
8558    gen_jmp(s, jmp_diff(s, a->imm));
8559    return true;
8560}
8561
8562static bool trans_LE(DisasContext *s, arg_LE *a)
8563{
8564    /*
8565     * M-profile low-overhead loop end. The architecture permits an
8566     * implementation to discard the LO_BRANCH_INFO cache at any time,
8567     * and we take the IMPDEF option to never set it in the first place
8568     * (equivalent to always discarding it immediately), because for QEMU
8569     * a "real" implementation would be complicated and wouldn't execute
8570     * any faster.
8571     */
8572    TCGv_i32 tmp;
8573    DisasLabel loopend;
8574    bool fpu_active;
8575
8576    if (!dc_isar_feature(aa32_lob, s)) {
8577        return false;
8578    }
8579    if (a->f && a->tp) {
8580        return false;
8581    }
8582    if (s->condexec_mask) {
8583        /*
8584         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8585         * we choose to UNDEF, because otherwise our use of
8586         * gen_goto_tb(1) would clash with the use of TB exit 1
8587         * in the dc->condjmp condition-failed codepath in
8588         * arm_tr_tb_stop() and we'd get an assertion.
8589         */
8590        return false;
8591    }
8592    if (a->tp) {
8593        /* LETP */
8594        if (!dc_isar_feature(aa32_mve, s)) {
8595            return false;
8596        }
8597        if (!vfp_access_check(s)) {
8598            s->eci_handled = true;
8599            return true;
8600        }
8601    }
8602
8603    /* LE/LETP is OK with ECI set and leaves it untouched */
8604    s->eci_handled = true;
8605
8606    /*
8607     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8608     * UsageFault exception for the LE insn in that case. Note that we
8609     * are not directly checking FPSCR.LTPSIZE but instead check the
8610     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8611     * not currently active (ie ActiveFPState() returns false). We
8612     * can identify not-active purely from our TB state flags, as the
8613     * FPU is active only if:
8614     *  the FPU is enabled
8615     *  AND lazy state preservation is not active
8616     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8617     *
8618     * Usually we don't need to care about this distinction between
8619     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8620     * will either take an exception or clear the conditions that make
8621     * the FPU not active. But LE is an unusual case of a non-FP insn
8622     * that looks at LTPSIZE.
8623     */
8624    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8625
8626    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8627        /* Need to do a runtime check for LTPSIZE != 4 */
8628        DisasLabel skipexc = gen_disas_label(s);
8629        tmp = load_cpu_field(v7m.ltpsize);
8630        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8631        tcg_temp_free_i32(tmp);
8632        gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8633        set_disas_label(s, skipexc);
8634    }
8635
8636    if (a->f) {
8637        /* Loop-forever: just jump back to the loop start */
8638        gen_jmp(s, jmp_diff(s, -a->imm));
8639        return true;
8640    }
8641
8642    /*
8643     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8644     * For LE, we know at this point that LTPSIZE must be 4 and the
8645     * loop decrement value is 1. For LETP we need to calculate the decrement
8646     * value from LTPSIZE.
8647     */
8648    loopend = gen_disas_label(s);
8649    if (!a->tp) {
8650        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8651        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8652    } else {
8653        /*
8654         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8655         * so that decr stays live after the brcondi.
8656         */
8657        TCGv_i32 decr = tcg_temp_local_new_i32();
8658        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8659        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8660        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8661        tcg_temp_free_i32(ltpsize);
8662
8663        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8664
8665        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8666        tcg_temp_free_i32(decr);
8667    }
8668    /* Jump back to the loop start */
8669    gen_jmp(s, jmp_diff(s, -a->imm));
8670
8671    set_disas_label(s, loopend);
8672    if (a->tp) {
8673        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8674        store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8675    }
8676    /* End TB, continuing to following insn */
8677    gen_jmp_tb(s, curr_insn_len(s), 1);
8678    return true;
8679}
8680
8681static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8682{
8683    /*
8684     * M-profile Loop Clear with Tail Predication. Since our implementation
8685     * doesn't cache branch information, all we need to do is reset
8686     * FPSCR.LTPSIZE to 4.
8687     */
8688
8689    if (!dc_isar_feature(aa32_lob, s) ||
8690        !dc_isar_feature(aa32_mve, s)) {
8691        return false;
8692    }
8693
8694    if (!vfp_access_check(s)) {
8695        return true;
8696    }
8697
8698    store_cpu_field_constant(4, v7m.ltpsize);
8699    return true;
8700}
8701
8702static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8703{
8704    /*
8705     * M-profile Create Vector Tail Predicate. This insn is itself
8706     * predicated and is subject to beatwise execution.
8707     */
8708    TCGv_i32 rn_shifted, masklen;
8709
8710    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8711        return false;
8712    }
8713
8714    if (!mve_eci_check(s) || !vfp_access_check(s)) {
8715        return true;
8716    }
8717
8718    /*
8719     * We pre-calculate the mask length here to avoid having
8720     * to have multiple helpers specialized for size.
8721     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8722     */
8723    rn_shifted = tcg_temp_new_i32();
8724    masklen = load_reg(s, a->rn);
8725    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8726    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8727                        masklen, tcg_constant_i32(1 << (4 - a->size)),
8728                        rn_shifted, tcg_constant_i32(16));
8729    gen_helper_mve_vctp(cpu_env, masklen);
8730    tcg_temp_free_i32(masklen);
8731    tcg_temp_free_i32(rn_shifted);
8732    /* This insn updates predication bits */
8733    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8734    mve_update_eci(s);
8735    return true;
8736}
8737
8738static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8739{
8740    TCGv_i32 addr, tmp;
8741
8742    tmp = load_reg(s, a->rm);
8743    if (half) {
8744        tcg_gen_add_i32(tmp, tmp, tmp);
8745    }
8746    addr = load_reg(s, a->rn);
8747    tcg_gen_add_i32(addr, addr, tmp);
8748
8749    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8750
8751    tcg_gen_add_i32(tmp, tmp, tmp);
8752    gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8753    tcg_gen_add_i32(tmp, tmp, addr);
8754    tcg_temp_free_i32(addr);
8755    store_reg(s, 15, tmp);
8756    return true;
8757}
8758
8759static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8760{
8761    return op_tbranch(s, a, false);
8762}
8763
8764static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8765{
8766    return op_tbranch(s, a, true);
8767}
8768
8769static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8770{
8771    TCGv_i32 tmp = load_reg(s, a->rn);
8772
8773    arm_gen_condlabel(s);
8774    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8775                        tmp, 0, s->condlabel.label);
8776    tcg_temp_free_i32(tmp);
8777    gen_jmp(s, jmp_diff(s, a->imm));
8778    return true;
8779}
8780
8781/*
8782 * Supervisor call - both T32 & A32 come here so we need to check
8783 * which mode we are in when checking for semihosting.
8784 */
8785
8786static bool trans_SVC(DisasContext *s, arg_SVC *a)
8787{
8788    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8789
8790    if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8791        semihosting_enabled(s->current_el == 0) &&
8792        (a->imm == semihost_imm)) {
8793        gen_exception_internal_insn(s, EXCP_SEMIHOST);
8794    } else {
8795        gen_update_pc(s, curr_insn_len(s));
8796        s->svc_imm = a->imm;
8797        s->base.is_jmp = DISAS_SWI;
8798    }
8799    return true;
8800}
8801
8802/*
8803 * Unconditional system instructions
8804 */
8805
8806static bool trans_RFE(DisasContext *s, arg_RFE *a)
8807{
8808    static const int8_t pre_offset[4] = {
8809        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8810    };
8811    static const int8_t post_offset[4] = {
8812        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8813    };
8814    TCGv_i32 addr, t1, t2;
8815
8816    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8817        return false;
8818    }
8819    if (IS_USER(s)) {
8820        unallocated_encoding(s);
8821        return true;
8822    }
8823
8824    addr = load_reg(s, a->rn);
8825    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8826
8827    /* Load PC into tmp and CPSR into tmp2.  */
8828    t1 = tcg_temp_new_i32();
8829    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8830    tcg_gen_addi_i32(addr, addr, 4);
8831    t2 = tcg_temp_new_i32();
8832    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8833
8834    if (a->w) {
8835        /* Base writeback.  */
8836        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8837        store_reg(s, a->rn, addr);
8838    } else {
8839        tcg_temp_free_i32(addr);
8840    }
8841    gen_rfe(s, t1, t2);
8842    return true;
8843}
8844
8845static bool trans_SRS(DisasContext *s, arg_SRS *a)
8846{
8847    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8848        return false;
8849    }
8850    gen_srs(s, a->mode, a->pu, a->w);
8851    return true;
8852}
8853
8854static bool trans_CPS(DisasContext *s, arg_CPS *a)
8855{
8856    uint32_t mask, val;
8857
8858    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8859        return false;
8860    }
8861    if (IS_USER(s)) {
8862        /* Implemented as NOP in user mode.  */
8863        return true;
8864    }
8865    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8866
8867    mask = val = 0;
8868    if (a->imod & 2) {
8869        if (a->A) {
8870            mask |= CPSR_A;
8871        }
8872        if (a->I) {
8873            mask |= CPSR_I;
8874        }
8875        if (a->F) {
8876            mask |= CPSR_F;
8877        }
8878        if (a->imod & 1) {
8879            val |= mask;
8880        }
8881    }
8882    if (a->M) {
8883        mask |= CPSR_M;
8884        val |= a->mode;
8885    }
8886    if (mask) {
8887        gen_set_psr_im(s, mask, 0, val);
8888    }
8889    return true;
8890}
8891
8892static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8893{
8894    TCGv_i32 tmp, addr;
8895
8896    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8897        return false;
8898    }
8899    if (IS_USER(s)) {
8900        /* Implemented as NOP in user mode.  */
8901        return true;
8902    }
8903
8904    tmp = tcg_constant_i32(a->im);
8905    /* FAULTMASK */
8906    if (a->F) {
8907        addr = tcg_constant_i32(19);
8908        gen_helper_v7m_msr(cpu_env, addr, tmp);
8909    }
8910    /* PRIMASK */
8911    if (a->I) {
8912        addr = tcg_constant_i32(16);
8913        gen_helper_v7m_msr(cpu_env, addr, tmp);
8914    }
8915    gen_rebuild_hflags(s, false);
8916    gen_lookup_tb(s);
8917    return true;
8918}
8919
8920/*
8921 * Clear-Exclusive, Barriers
8922 */
8923
8924static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8925{
8926    if (s->thumb
8927        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8928        : !ENABLE_ARCH_6K) {
8929        return false;
8930    }
8931    gen_clrex(s);
8932    return true;
8933}
8934
8935static bool trans_DSB(DisasContext *s, arg_DSB *a)
8936{
8937    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8938        return false;
8939    }
8940    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8941    return true;
8942}
8943
8944static bool trans_DMB(DisasContext *s, arg_DMB *a)
8945{
8946    return trans_DSB(s, NULL);
8947}
8948
8949static bool trans_ISB(DisasContext *s, arg_ISB *a)
8950{
8951    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8952        return false;
8953    }
8954    /*
8955     * We need to break the TB after this insn to execute
8956     * self-modifying code correctly and also to take
8957     * any pending interrupts immediately.
8958     */
8959    s->base.is_jmp = DISAS_TOO_MANY;
8960    return true;
8961}
8962
8963static bool trans_SB(DisasContext *s, arg_SB *a)
8964{
8965    if (!dc_isar_feature(aa32_sb, s)) {
8966        return false;
8967    }
8968    /*
8969     * TODO: There is no speculation barrier opcode
8970     * for TCG; MB and end the TB instead.
8971     */
8972    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8973    s->base.is_jmp = DISAS_TOO_MANY;
8974    return true;
8975}
8976
8977static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8978{
8979    if (!ENABLE_ARCH_6) {
8980        return false;
8981    }
8982    if (a->E != (s->be_data == MO_BE)) {
8983        gen_helper_setend(cpu_env);
8984        s->base.is_jmp = DISAS_UPDATE_EXIT;
8985    }
8986    return true;
8987}
8988
8989/*
8990 * Preload instructions
8991 * All are nops, contingent on the appropriate arch level.
8992 */
8993
8994static bool trans_PLD(DisasContext *s, arg_PLD *a)
8995{
8996    return ENABLE_ARCH_5TE;
8997}
8998
8999static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9000{
9001    return arm_dc_feature(s, ARM_FEATURE_V7MP);
9002}
9003
9004static bool trans_PLI(DisasContext *s, arg_PLD *a)
9005{
9006    return ENABLE_ARCH_7;
9007}
9008
9009/*
9010 * If-then
9011 */
9012
9013static bool trans_IT(DisasContext *s, arg_IT *a)
9014{
9015    int cond_mask = a->cond_mask;
9016
9017    /*
9018     * No actual code generated for this insn, just setup state.
9019     *
9020     * Combinations of firstcond and mask which set up an 0b1111
9021     * condition are UNPREDICTABLE; we take the CONSTRAINED
9022     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9023     * i.e. both meaning "execute always".
9024     */
9025    s->condexec_cond = (cond_mask >> 4) & 0xe;
9026    s->condexec_mask = cond_mask & 0x1f;
9027    return true;
9028}
9029
9030/* v8.1M CSEL/CSINC/CSNEG/CSINV */
9031static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9032{
9033    TCGv_i32 rn, rm, zero;
9034    DisasCompare c;
9035
9036    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
9037        return false;
9038    }
9039
9040    if (a->rm == 13) {
9041        /* SEE "Related encodings" (MVE shifts) */
9042        return false;
9043    }
9044
9045    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
9046        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
9047        return false;
9048    }
9049
9050    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
9051    zero = tcg_constant_i32(0);
9052    if (a->rn == 15) {
9053        rn = zero;
9054    } else {
9055        rn = load_reg(s, a->rn);
9056    }
9057    if (a->rm == 15) {
9058        rm = zero;
9059    } else {
9060        rm = load_reg(s, a->rm);
9061    }
9062
9063    switch (a->op) {
9064    case 0: /* CSEL */
9065        break;
9066    case 1: /* CSINC */
9067        tcg_gen_addi_i32(rm, rm, 1);
9068        break;
9069    case 2: /* CSINV */
9070        tcg_gen_not_i32(rm, rm);
9071        break;
9072    case 3: /* CSNEG */
9073        tcg_gen_neg_i32(rm, rm);
9074        break;
9075    default:
9076        g_assert_not_reached();
9077    }
9078
9079    arm_test_cc(&c, a->fcond);
9080    tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
9081    arm_free_cc(&c);
9082
9083    store_reg(s, a->rd, rn);
9084    tcg_temp_free_i32(rm);
9085
9086    return true;
9087}
9088
9089/*
9090 * Legacy decoder.
9091 */
9092
9093static void disas_arm_insn(DisasContext *s, unsigned int insn)
9094{
9095    unsigned int cond = insn >> 28;
9096
9097    /* M variants do not implement ARM mode; this must raise the INVSTATE
9098     * UsageFault exception.
9099     */
9100    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9101        gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
9102        return;
9103    }
9104
9105    if (s->pstate_il) {
9106        /*
9107         * Illegal execution state. This has priority over BTI
9108         * exceptions, but comes after instruction abort exceptions.
9109         */
9110        gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9111        return;
9112    }
9113
9114    if (cond == 0xf) {
9115        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9116         * choose to UNDEF. In ARMv5 and above the space is used
9117         * for miscellaneous unconditional instructions.
9118         */
9119        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
9120            unallocated_encoding(s);
9121            return;
9122        }
9123
9124        /* Unconditional instructions.  */
9125        /* TODO: Perhaps merge these into one decodetree output file.  */
9126        if (disas_a32_uncond(s, insn) ||
9127            disas_vfp_uncond(s, insn) ||
9128            disas_neon_dp(s, insn) ||
9129            disas_neon_ls(s, insn) ||
9130            disas_neon_shared(s, insn)) {
9131            return;
9132        }
9133        /* fall back to legacy decoder */
9134
9135        if ((insn & 0x0e000f00) == 0x0c000100) {
9136            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9137                /* iWMMXt register transfer.  */
9138                if (extract32(s->c15_cpar, 1, 1)) {
9139                    if (!disas_iwmmxt_insn(s, insn)) {
9140                        return;
9141                    }
9142                }
9143            }
9144        }
9145        goto illegal_op;
9146    }
9147    if (cond != 0xe) {
9148        /* if not always execute, we generate a conditional jump to
9149           next instruction */
9150        arm_skip_unless(s, cond);
9151    }
9152
9153    /* TODO: Perhaps merge these into one decodetree output file.  */
9154    if (disas_a32(s, insn) ||
9155        disas_vfp(s, insn)) {
9156        return;
9157    }
9158    /* fall back to legacy decoder */
9159    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
9160    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
9161        if (((insn & 0x0c000e00) == 0x0c000000)
9162            && ((insn & 0x03000000) != 0x03000000)) {
9163            /* Coprocessor insn, coprocessor 0 or 1 */
9164            disas_xscale_insn(s, insn);
9165            return;
9166        }
9167    }
9168
9169illegal_op:
9170    unallocated_encoding(s);
9171}
9172
9173static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9174{
9175    /*
9176     * Return true if this is a 16 bit instruction. We must be precise
9177     * about this (matching the decode).
9178     */
9179    if ((insn >> 11) < 0x1d) {
9180        /* Definitely a 16-bit instruction */
9181        return true;
9182    }
9183
9184    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9185     * first half of a 32-bit Thumb insn. Thumb-1 cores might
9186     * end up actually treating this as two 16-bit insns, though,
9187     * if it's half of a bl/blx pair that might span a page boundary.
9188     */
9189    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9190        arm_dc_feature(s, ARM_FEATURE_M)) {
9191        /* Thumb2 cores (including all M profile ones) always treat
9192         * 32-bit insns as 32-bit.
9193         */
9194        return false;
9195    }
9196
9197    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9198        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9199         * is not on the next page; we merge this into a 32-bit
9200         * insn.
9201         */
9202        return false;
9203    }
9204    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9205     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9206     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9207     *  -- handle as single 16 bit insn
9208     */
9209    return true;
9210}
9211
9212/* Translate a 32-bit thumb instruction. */
9213static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9214{
9215    /*
9216     * ARMv6-M supports a limited subset of Thumb2 instructions.
9217     * Other Thumb1 architectures allow only 32-bit
9218     * combined BL/BLX prefix and suffix.
9219     */
9220    if (arm_dc_feature(s, ARM_FEATURE_M) &&
9221        !arm_dc_feature(s, ARM_FEATURE_V7)) {
9222        int i;
9223        bool found = false;
9224        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9225                                               0xf3b08040 /* dsb */,
9226                                               0xf3b08050 /* dmb */,
9227                                               0xf3b08060 /* isb */,
9228                                               0xf3e08000 /* mrs */,
9229                                               0xf000d000 /* bl */};
9230        static const uint32_t armv6m_mask[] = {0xffe0d000,
9231                                               0xfff0d0f0,
9232                                               0xfff0d0f0,
9233                                               0xfff0d0f0,
9234                                               0xffe0d000,
9235                                               0xf800d000};
9236
9237        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9238            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9239                found = true;
9240                break;
9241            }
9242        }
9243        if (!found) {
9244            goto illegal_op;
9245        }
9246    } else if ((insn & 0xf800e800) != 0xf000e800)  {
9247        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9248            unallocated_encoding(s);
9249            return;
9250        }
9251    }
9252
9253    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9254        /*
9255         * NOCP takes precedence over any UNDEF for (almost) the
9256         * entire wide range of coprocessor-space encodings, so check
9257         * for it first before proceeding to actually decode eg VFP
9258         * insns. This decode also handles the few insns which are
9259         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9260         */
9261        if (disas_m_nocp(s, insn)) {
9262            return;
9263        }
9264    }
9265
9266    if ((insn & 0xef000000) == 0xef000000) {
9267        /*
9268         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9269         * transform into
9270         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9271         */
9272        uint32_t a32_insn = (insn & 0xe2ffffff) |
9273            ((insn & (1 << 28)) >> 4) | (1 << 28);
9274
9275        if (disas_neon_dp(s, a32_insn)) {
9276            return;
9277        }
9278    }
9279
9280    if ((insn & 0xff100000) == 0xf9000000) {
9281        /*
9282         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9283         * transform into
9284         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9285         */
9286        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9287
9288        if (disas_neon_ls(s, a32_insn)) {
9289            return;
9290        }
9291    }
9292
9293    /*
9294     * TODO: Perhaps merge these into one decodetree output file.
9295     * Note disas_vfp is written for a32 with cond field in the
9296     * top nibble.  The t32 encoding requires 0xe in the top nibble.
9297     */
9298    if (disas_t32(s, insn) ||
9299        disas_vfp_uncond(s, insn) ||
9300        disas_neon_shared(s, insn) ||
9301        disas_mve(s, insn) ||
9302        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9303        return;
9304    }
9305
9306illegal_op:
9307    unallocated_encoding(s);
9308}
9309
9310static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9311{
9312    if (!disas_t16(s, insn)) {
9313        unallocated_encoding(s);
9314    }
9315}
9316
9317static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9318{
9319    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9320     * (False positives are OK, false negatives are not.)
9321     * We know this is a Thumb insn, and our caller ensures we are
9322     * only called if dc->base.pc_next is less than 4 bytes from the page
9323     * boundary, so we cross the page if the first 16 bits indicate
9324     * that this is a 32 bit insn.
9325     */
9326    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9327
9328    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9329}
9330
9331static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9332{
9333    DisasContext *dc = container_of(dcbase, DisasContext, base);
9334    CPUARMState *env = cs->env_ptr;
9335    ARMCPU *cpu = env_archcpu(env);
9336    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9337    uint32_t condexec, core_mmu_idx;
9338
9339    dc->isar = &cpu->isar;
9340    dc->condjmp = 0;
9341    dc->pc_save = dc->base.pc_first;
9342    dc->aarch64 = false;
9343    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9344    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9345    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9346    /*
9347     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9348     * is always the IT bits. On M-profile, some of the reserved encodings
9349     * of IT are used instead to indicate either ICI or ECI, which
9350     * indicate partial progress of a restartable insn that was interrupted
9351     * partway through by an exception:
9352     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9353     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9354     * In all cases CONDEXEC == 0 means "not in IT block or restartable
9355     * insn, behave normally".
9356     */
9357    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9358    dc->eci_handled = false;
9359    if (condexec & 0xf) {
9360        dc->condexec_mask = (condexec & 0xf) << 1;
9361        dc->condexec_cond = condexec >> 4;
9362    } else {
9363        if (arm_feature(env, ARM_FEATURE_M)) {
9364            dc->eci = condexec >> 4;
9365        }
9366    }
9367
9368    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9369    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9370    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9371#if !defined(CONFIG_USER_ONLY)
9372    dc->user = (dc->current_el == 0);
9373#endif
9374    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9375    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9376    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9377
9378    if (arm_feature(env, ARM_FEATURE_M)) {
9379        dc->vfp_enabled = 1;
9380        dc->be_data = MO_TE;
9381        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9382        dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9383        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9384        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9385        dc->v7m_new_fp_ctxt_needed =
9386            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9387        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9388        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9389    } else {
9390        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9391        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9392        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9393        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9394        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9395            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9396        } else {
9397            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9398            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9399        }
9400        dc->sme_trap_nonstreaming =
9401            EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9402    }
9403    dc->cp_regs = cpu->cp_regs;
9404    dc->features = env->features;
9405
9406    /* Single step state. The code-generation logic here is:
9407     *  SS_ACTIVE == 0:
9408     *   generate code with no special handling for single-stepping (except
9409     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9410     *   this happens anyway because those changes are all system register or
9411     *   PSTATE writes).
9412     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9413     *   emit code for one insn
9414     *   emit code to clear PSTATE.SS
9415     *   emit code to generate software step exception for completed step
9416     *   end TB (as usual for having generated an exception)
9417     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9418     *   emit code to generate a software step exception
9419     *   end the TB
9420     */
9421    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9422    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9423    dc->is_ldex = false;
9424
9425    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9426
9427    /* If architectural single step active, limit to 1.  */
9428    if (dc->ss_active) {
9429        dc->base.max_insns = 1;
9430    }
9431
9432    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9433       to those left on the page.  */
9434    if (!dc->thumb) {
9435        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9436        dc->base.max_insns = MIN(dc->base.max_insns, bound);
9437    }
9438
9439    cpu_V0 = tcg_temp_new_i64();
9440    cpu_V1 = tcg_temp_new_i64();
9441    cpu_M0 = tcg_temp_new_i64();
9442}
9443
9444static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9445{
9446    DisasContext *dc = container_of(dcbase, DisasContext, base);
9447
9448    /* A note on handling of the condexec (IT) bits:
9449     *
9450     * We want to avoid the overhead of having to write the updated condexec
9451     * bits back to the CPUARMState for every instruction in an IT block. So:
9452     * (1) if the condexec bits are not already zero then we write
9453     * zero back into the CPUARMState now. This avoids complications trying
9454     * to do it at the end of the block. (For example if we don't do this
9455     * it's hard to identify whether we can safely skip writing condexec
9456     * at the end of the TB, which we definitely want to do for the case
9457     * where a TB doesn't do anything with the IT state at all.)
9458     * (2) if we are going to leave the TB then we call gen_set_condexec()
9459     * which will write the correct value into CPUARMState if zero is wrong.
9460     * This is done both for leaving the TB at the end, and for leaving
9461     * it because of an exception we know will happen, which is done in
9462     * gen_exception_insn(). The latter is necessary because we need to
9463     * leave the TB with the PC/IT state just prior to execution of the
9464     * instruction which caused the exception.
9465     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9466     * then the CPUARMState will be wrong and we need to reset it.
9467     * This is handled in the same way as restoration of the
9468     * PC in these situations; we save the value of the condexec bits
9469     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9470     * then uses this to restore them after an exception.
9471     *
9472     * Note that there are no instructions which can read the condexec
9473     * bits, and none which can write non-static values to them, so
9474     * we don't need to care about whether CPUARMState is correct in the
9475     * middle of a TB.
9476     */
9477
9478    /* Reset the conditional execution bits immediately. This avoids
9479       complications trying to do it at the end of the block.  */
9480    if (dc->condexec_mask || dc->condexec_cond) {
9481        store_cpu_field_constant(0, condexec_bits);
9482    }
9483}
9484
9485static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9486{
9487    DisasContext *dc = container_of(dcbase, DisasContext, base);
9488    /*
9489     * The ECI/ICI bits share PSR bits with the IT bits, so we
9490     * need to reconstitute the bits from the split-out DisasContext
9491     * fields here.
9492     */
9493    uint32_t condexec_bits;
9494    target_ulong pc_arg = dc->base.pc_next;
9495
9496    if (TARGET_TB_PCREL) {
9497        pc_arg &= ~TARGET_PAGE_MASK;
9498    }
9499    if (dc->eci) {
9500        condexec_bits = dc->eci << 4;
9501    } else {
9502        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9503    }
9504    tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9505    dc->insn_start = tcg_last_op();
9506}
9507
9508static bool arm_check_kernelpage(DisasContext *dc)
9509{
9510#ifdef CONFIG_USER_ONLY
9511    /* Intercept jump to the magic kernel page.  */
9512    if (dc->base.pc_next >= 0xffff0000) {
9513        /* We always get here via a jump, so know we are not in a
9514           conditional execution block.  */
9515        gen_exception_internal(EXCP_KERNEL_TRAP);
9516        dc->base.is_jmp = DISAS_NORETURN;
9517        return true;
9518    }
9519#endif
9520    return false;
9521}
9522
9523static bool arm_check_ss_active(DisasContext *dc)
9524{
9525    if (dc->ss_active && !dc->pstate_ss) {
9526        /* Singlestep state is Active-pending.
9527         * If we're in this state at the start of a TB then either
9528         *  a) we just took an exception to an EL which is being debugged
9529         *     and this is the first insn in the exception handler
9530         *  b) debug exceptions were masked and we just unmasked them
9531         *     without changing EL (eg by clearing PSTATE.D)
9532         * In either case we're going to take a swstep exception in the
9533         * "did not step an insn" case, and so the syndrome ISV and EX
9534         * bits should be zero.
9535         */
9536        assert(dc->base.num_insns == 1);
9537        gen_swstep_exception(dc, 0, 0);
9538        dc->base.is_jmp = DISAS_NORETURN;
9539        return true;
9540    }
9541
9542    return false;
9543}
9544
9545static void arm_post_translate_insn(DisasContext *dc)
9546{
9547    if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9548        if (dc->pc_save != dc->condlabel.pc_save) {
9549            gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9550        }
9551        gen_set_label(dc->condlabel.label);
9552        dc->condjmp = 0;
9553    }
9554    translator_loop_temp_check(&dc->base);
9555}
9556
9557static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9558{
9559    DisasContext *dc = container_of(dcbase, DisasContext, base);
9560    CPUARMState *env = cpu->env_ptr;
9561    uint32_t pc = dc->base.pc_next;
9562    unsigned int insn;
9563
9564    /* Singlestep exceptions have the highest priority. */
9565    if (arm_check_ss_active(dc)) {
9566        dc->base.pc_next = pc + 4;
9567        return;
9568    }
9569
9570    if (pc & 3) {
9571        /*
9572         * PC alignment fault.  This has priority over the instruction abort
9573         * that we would receive from a translation fault via arm_ldl_code
9574         * (or the execution of the kernelpage entrypoint). This should only
9575         * be possible after an indirect branch, at the start of the TB.
9576         */
9577        assert(dc->base.num_insns == 1);
9578        gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9579        dc->base.is_jmp = DISAS_NORETURN;
9580        dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9581        return;
9582    }
9583
9584    if (arm_check_kernelpage(dc)) {
9585        dc->base.pc_next = pc + 4;
9586        return;
9587    }
9588
9589    dc->pc_curr = pc;
9590    insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9591    dc->insn = insn;
9592    dc->base.pc_next = pc + 4;
9593    disas_arm_insn(dc, insn);
9594
9595    arm_post_translate_insn(dc);
9596
9597    /* ARM is a fixed-length ISA.  We performed the cross-page check
9598       in init_disas_context by adjusting max_insns.  */
9599}
9600
9601static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9602{
9603    /* Return true if this Thumb insn is always unconditional,
9604     * even inside an IT block. This is true of only a very few
9605     * instructions: BKPT, HLT, and SG.
9606     *
9607     * A larger class of instructions are UNPREDICTABLE if used
9608     * inside an IT block; we do not need to detect those here, because
9609     * what we do by default (perform the cc check and update the IT
9610     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9611     * choice for those situations.
9612     *
9613     * insn is either a 16-bit or a 32-bit instruction; the two are
9614     * distinguishable because for the 16-bit case the top 16 bits
9615     * are zeroes, and that isn't a valid 32-bit encoding.
9616     */
9617    if ((insn & 0xffffff00) == 0xbe00) {
9618        /* BKPT */
9619        return true;
9620    }
9621
9622    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9623        !arm_dc_feature(s, ARM_FEATURE_M)) {
9624        /* HLT: v8A only. This is unconditional even when it is going to
9625         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9626         * For v7 cores this was a plain old undefined encoding and so
9627         * honours its cc check. (We might be using the encoding as
9628         * a semihosting trap, but we don't change the cc check behaviour
9629         * on that account, because a debugger connected to a real v7A
9630         * core and emulating semihosting traps by catching the UNDEF
9631         * exception would also only see cases where the cc check passed.
9632         * No guest code should be trying to do a HLT semihosting trap
9633         * in an IT block anyway.
9634         */
9635        return true;
9636    }
9637
9638    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9639        arm_dc_feature(s, ARM_FEATURE_M)) {
9640        /* SG: v8M only */
9641        return true;
9642    }
9643
9644    return false;
9645}
9646
9647static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9648{
9649    DisasContext *dc = container_of(dcbase, DisasContext, base);
9650    CPUARMState *env = cpu->env_ptr;
9651    uint32_t pc = dc->base.pc_next;
9652    uint32_t insn;
9653    bool is_16bit;
9654    /* TCG op to rewind to if this turns out to be an invalid ECI state */
9655    TCGOp *insn_eci_rewind = NULL;
9656    target_ulong insn_eci_pc_save = -1;
9657
9658    /* Misaligned thumb PC is architecturally impossible. */
9659    assert((dc->base.pc_next & 1) == 0);
9660
9661    if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9662        dc->base.pc_next = pc + 2;
9663        return;
9664    }
9665
9666    dc->pc_curr = pc;
9667    insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9668    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9669    pc += 2;
9670    if (!is_16bit) {
9671        uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9672        insn = insn << 16 | insn2;
9673        pc += 2;
9674    }
9675    dc->base.pc_next = pc;
9676    dc->insn = insn;
9677
9678    if (dc->pstate_il) {
9679        /*
9680         * Illegal execution state. This has priority over BTI
9681         * exceptions, but comes after instruction abort exceptions.
9682         */
9683        gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9684        return;
9685    }
9686
9687    if (dc->eci) {
9688        /*
9689         * For M-profile continuable instructions, ECI/ICI handling
9690         * falls into these cases:
9691         *  - interrupt-continuable instructions
9692         *     These are the various load/store multiple insns (both
9693         *     integer and fp). The ICI bits indicate the register
9694         *     where the load/store can resume. We make the IMPDEF
9695         *     choice to always do "instruction restart", ie ignore
9696         *     the ICI value and always execute the ldm/stm from the
9697         *     start. So all we need to do is zero PSR.ICI if the
9698         *     insn executes.
9699         *  - MVE instructions subject to beat-wise execution
9700         *     Here the ECI bits indicate which beats have already been
9701         *     executed, and we must honour this. Each insn of this
9702         *     type will handle it correctly. We will update PSR.ECI
9703         *     in the helper function for the insn (some ECI values
9704         *     mean that the following insn also has been partially
9705         *     executed).
9706         *  - Special cases which don't advance ECI
9707         *     The insns LE, LETP and BKPT leave the ECI/ICI state
9708         *     bits untouched.
9709         *  - all other insns (the common case)
9710         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9711         *     We place a rewind-marker here. Insns in the previous
9712         *     three categories will set a flag in the DisasContext.
9713         *     If the flag isn't set after we call disas_thumb_insn()
9714         *     or disas_thumb2_insn() then we know we have a "some other
9715         *     insn" case. We will rewind to the marker (ie throwing away
9716         *     all the generated code) and instead emit "take exception".
9717         */
9718        insn_eci_rewind = tcg_last_op();
9719        insn_eci_pc_save = dc->pc_save;
9720    }
9721
9722    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9723        uint32_t cond = dc->condexec_cond;
9724
9725        /*
9726         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9727         * "always"; 0xf is not "never".
9728         */
9729        if (cond < 0x0e) {
9730            arm_skip_unless(dc, cond);
9731        }
9732    }
9733
9734    if (is_16bit) {
9735        disas_thumb_insn(dc, insn);
9736    } else {
9737        disas_thumb2_insn(dc, insn);
9738    }
9739
9740    /* Advance the Thumb condexec condition.  */
9741    if (dc->condexec_mask) {
9742        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9743                             ((dc->condexec_mask >> 4) & 1));
9744        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9745        if (dc->condexec_mask == 0) {
9746            dc->condexec_cond = 0;
9747        }
9748    }
9749
9750    if (dc->eci && !dc->eci_handled) {
9751        /*
9752         * Insn wasn't valid for ECI/ICI at all: undo what we
9753         * just generated and instead emit an exception
9754         */
9755        tcg_remove_ops_after(insn_eci_rewind);
9756        dc->pc_save = insn_eci_pc_save;
9757        dc->condjmp = 0;
9758        gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9759    }
9760
9761    arm_post_translate_insn(dc);
9762
9763    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9764     * will touch a new page.  This ensures that prefetch aborts occur at
9765     * the right place.
9766     *
9767     * We want to stop the TB if the next insn starts in a new page,
9768     * or if it spans between this page and the next. This means that
9769     * if we're looking at the last halfword in the page we need to
9770     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9771     * or a 32-bit Thumb insn (which won't).
9772     * This is to avoid generating a silly TB with a single 16-bit insn
9773     * in it at the end of this page (which would execute correctly
9774     * but isn't very efficient).
9775     */
9776    if (dc->base.is_jmp == DISAS_NEXT
9777        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9778            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9779                && insn_crosses_page(env, dc)))) {
9780        dc->base.is_jmp = DISAS_TOO_MANY;
9781    }
9782}
9783
9784static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9785{
9786    DisasContext *dc = container_of(dcbase, DisasContext, base);
9787
9788    /* At this stage dc->condjmp will only be set when the skipped
9789       instruction was a conditional branch or trap, and the PC has
9790       already been written.  */
9791    gen_set_condexec(dc);
9792    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9793        /* Exception return branches need some special case code at the
9794         * end of the TB, which is complex enough that it has to
9795         * handle the single-step vs not and the condition-failed
9796         * insn codepath itself.
9797         */
9798        gen_bx_excret_final_code(dc);
9799    } else if (unlikely(dc->ss_active)) {
9800        /* Unconditional and "condition passed" instruction codepath. */
9801        switch (dc->base.is_jmp) {
9802        case DISAS_SWI:
9803            gen_ss_advance(dc);
9804            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9805            break;
9806        case DISAS_HVC:
9807            gen_ss_advance(dc);
9808            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9809            break;
9810        case DISAS_SMC:
9811            gen_ss_advance(dc);
9812            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9813            break;
9814        case DISAS_NEXT:
9815        case DISAS_TOO_MANY:
9816        case DISAS_UPDATE_EXIT:
9817        case DISAS_UPDATE_NOCHAIN:
9818            gen_update_pc(dc, curr_insn_len(dc));
9819            /* fall through */
9820        default:
9821            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9822            gen_singlestep_exception(dc);
9823            break;
9824        case DISAS_NORETURN:
9825            break;
9826        }
9827    } else {
9828        /* While branches must always occur at the end of an IT block,
9829           there are a few other things that can cause us to terminate
9830           the TB in the middle of an IT block:
9831            - Exception generating instructions (bkpt, swi, undefined).
9832            - Page boundaries.
9833            - Hardware watchpoints.
9834           Hardware breakpoints have already been handled and skip this code.
9835         */
9836        switch (dc->base.is_jmp) {
9837        case DISAS_NEXT:
9838        case DISAS_TOO_MANY:
9839            gen_goto_tb(dc, 1, curr_insn_len(dc));
9840            break;
9841        case DISAS_UPDATE_NOCHAIN:
9842            gen_update_pc(dc, curr_insn_len(dc));
9843            /* fall through */
9844        case DISAS_JUMP:
9845            gen_goto_ptr();
9846            break;
9847        case DISAS_UPDATE_EXIT:
9848            gen_update_pc(dc, curr_insn_len(dc));
9849            /* fall through */
9850        default:
9851            /* indicate that the hash table must be used to find the next TB */
9852            tcg_gen_exit_tb(NULL, 0);
9853            break;
9854        case DISAS_NORETURN:
9855            /* nothing more to generate */
9856            break;
9857        case DISAS_WFI:
9858            gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9859            /*
9860             * The helper doesn't necessarily throw an exception, but we
9861             * must go back to the main loop to check for interrupts anyway.
9862             */
9863            tcg_gen_exit_tb(NULL, 0);
9864            break;
9865        case DISAS_WFE:
9866            gen_helper_wfe(cpu_env);
9867            break;
9868        case DISAS_YIELD:
9869            gen_helper_yield(cpu_env);
9870            break;
9871        case DISAS_SWI:
9872            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9873            break;
9874        case DISAS_HVC:
9875            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9876            break;
9877        case DISAS_SMC:
9878            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9879            break;
9880        }
9881    }
9882
9883    if (dc->condjmp) {
9884        /* "Condition failed" instruction codepath for the branch/trap insn */
9885        set_disas_label(dc, dc->condlabel);
9886        gen_set_condexec(dc);
9887        if (unlikely(dc->ss_active)) {
9888            gen_update_pc(dc, curr_insn_len(dc));
9889            gen_singlestep_exception(dc);
9890        } else {
9891            gen_goto_tb(dc, 1, curr_insn_len(dc));
9892        }
9893    }
9894}
9895
9896static void arm_tr_disas_log(const DisasContextBase *dcbase,
9897                             CPUState *cpu, FILE *logfile)
9898{
9899    DisasContext *dc = container_of(dcbase, DisasContext, base);
9900
9901    fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9902    target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9903}
9904
9905static const TranslatorOps arm_translator_ops = {
9906    .init_disas_context = arm_tr_init_disas_context,
9907    .tb_start           = arm_tr_tb_start,
9908    .insn_start         = arm_tr_insn_start,
9909    .translate_insn     = arm_tr_translate_insn,
9910    .tb_stop            = arm_tr_tb_stop,
9911    .disas_log          = arm_tr_disas_log,
9912};
9913
9914static const TranslatorOps thumb_translator_ops = {
9915    .init_disas_context = arm_tr_init_disas_context,
9916    .tb_start           = arm_tr_tb_start,
9917    .insn_start         = arm_tr_insn_start,
9918    .translate_insn     = thumb_tr_translate_insn,
9919    .tb_stop            = arm_tr_tb_stop,
9920    .disas_log          = arm_tr_disas_log,
9921};
9922
9923/* generate intermediate code for basic block 'tb'.  */
9924void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
9925                           target_ulong pc, void *host_pc)
9926{
9927    DisasContext dc = { };
9928    const TranslatorOps *ops = &arm_translator_ops;
9929    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9930
9931    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9932        ops = &thumb_translator_ops;
9933    }
9934#ifdef TARGET_AARCH64
9935    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9936        ops = &aarch64_translator_ops;
9937    }
9938#endif
9939
9940    translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9941}
9942