qemu/target/arm/tcg/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg/tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "semihosting/semihost.h"
  33#include "exec/helper-proto.h"
  34#include "exec/helper-gen.h"
  35#include "exec/log.h"
  36#include "cpregs.h"
  37
  38
  39#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  40#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  41/* currently all emulated v5 cores are also v5TE, so don't bother */
  42#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  43#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  44#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  45#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  46#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  47#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  48#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  49
  50#include "translate.h"
  51#include "translate-a32.h"
  52
  53/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  54static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  55/* These are TCG globals which alias CPUARMState fields */
  56static TCGv_i32 cpu_R[16];
  57TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  58TCGv_i64 cpu_exclusive_addr;
  59TCGv_i64 cpu_exclusive_val;
  60
  61#include "exec/gen-icount.h"
  62
  63static const char * const regnames[] =
  64    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  65      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  66
  67
  68/* initialize TCG globals.  */
  69void arm_translate_init(void)
  70{
  71    int i;
  72
  73    for (i = 0; i < 16; i++) {
  74        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  75                                          offsetof(CPUARMState, regs[i]),
  76                                          regnames[i]);
  77    }
  78    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  79    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  80    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  81    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  82
  83    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  84        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  85    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  86        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  87
  88    a64_translate_init();
  89}
  90
  91uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
  92{
  93    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
  94    switch (cmode) {
  95    case 0: case 1:
  96        /* no-op */
  97        break;
  98    case 2: case 3:
  99        imm <<= 8;
 100        break;
 101    case 4: case 5:
 102        imm <<= 16;
 103        break;
 104    case 6: case 7:
 105        imm <<= 24;
 106        break;
 107    case 8: case 9:
 108        imm |= imm << 16;
 109        break;
 110    case 10: case 11:
 111        imm = (imm << 8) | (imm << 24);
 112        break;
 113    case 12:
 114        imm = (imm << 8) | 0xff;
 115        break;
 116    case 13:
 117        imm = (imm << 16) | 0xffff;
 118        break;
 119    case 14:
 120        if (op) {
 121            /*
 122             * This and cmode == 15 op == 1 are the only cases where
 123             * the top and bottom 32 bits of the encoded constant differ.
 124             */
 125            uint64_t imm64 = 0;
 126            int n;
 127
 128            for (n = 0; n < 8; n++) {
 129                if (imm & (1 << n)) {
 130                    imm64 |= (0xffULL << (n * 8));
 131                }
 132            }
 133            return imm64;
 134        }
 135        imm |= (imm << 8) | (imm << 16) | (imm << 24);
 136        break;
 137    case 15:
 138        if (op) {
 139            /* Reserved encoding for AArch32; valid for AArch64 */
 140            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
 141            if (imm & 0x80) {
 142                imm64 |= 0x8000000000000000ULL;
 143            }
 144            if (imm & 0x40) {
 145                imm64 |= 0x3fc0000000000000ULL;
 146            } else {
 147                imm64 |= 0x4000000000000000ULL;
 148            }
 149            return imm64;
 150        }
 151        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
 152            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
 153        break;
 154    }
 155    if (op) {
 156        imm = ~imm;
 157    }
 158    return dup_const(MO_32, imm);
 159}
 160
 161/* Generate a label used for skipping this instruction */
 162void arm_gen_condlabel(DisasContext *s)
 163{
 164    if (!s->condjmp) {
 165        s->condlabel = gen_disas_label(s);
 166        s->condjmp = 1;
 167    }
 168}
 169
 170/* Flags for the disas_set_da_iss info argument:
 171 * lower bits hold the Rt register number, higher bits are flags.
 172 */
 173typedef enum ISSInfo {
 174    ISSNone = 0,
 175    ISSRegMask = 0x1f,
 176    ISSInvalid = (1 << 5),
 177    ISSIsAcqRel = (1 << 6),
 178    ISSIsWrite = (1 << 7),
 179    ISSIs16Bit = (1 << 8),
 180} ISSInfo;
 181
 182/*
 183 * Store var into env + offset to a member with size bytes.
 184 * Free var after use.
 185 */
 186void store_cpu_offset(TCGv_i32 var, int offset, int size)
 187{
 188    switch (size) {
 189    case 1:
 190        tcg_gen_st8_i32(var, cpu_env, offset);
 191        break;
 192    case 4:
 193        tcg_gen_st_i32(var, cpu_env, offset);
 194        break;
 195    default:
 196        g_assert_not_reached();
 197    }
 198}
 199
 200/* Save the syndrome information for a Data Abort */
 201static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 202{
 203    uint32_t syn;
 204    int sas = memop & MO_SIZE;
 205    bool sse = memop & MO_SIGN;
 206    bool is_acqrel = issinfo & ISSIsAcqRel;
 207    bool is_write = issinfo & ISSIsWrite;
 208    bool is_16bit = issinfo & ISSIs16Bit;
 209    int srt = issinfo & ISSRegMask;
 210
 211    if (issinfo & ISSInvalid) {
 212        /* Some callsites want to conditionally provide ISS info,
 213         * eg "only if this was not a writeback"
 214         */
 215        return;
 216    }
 217
 218    if (srt == 15) {
 219        /* For AArch32, insns where the src/dest is R15 never generate
 220         * ISS information. Catching that here saves checking at all
 221         * the call sites.
 222         */
 223        return;
 224    }
 225
 226    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 227                                  0, 0, 0, is_write, 0, is_16bit);
 228    disas_set_insn_syndrome(s, syn);
 229}
 230
 231static inline int get_a32_user_mem_index(DisasContext *s)
 232{
 233    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 234     * insns:
 235     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 236     *  otherwise, access as if at PL0.
 237     */
 238    switch (s->mmu_idx) {
 239    case ARMMMUIdx_E3:
 240    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 241    case ARMMMUIdx_E10_0:
 242    case ARMMMUIdx_E10_1:
 243    case ARMMMUIdx_E10_1_PAN:
 244        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 245    case ARMMMUIdx_MUser:
 246    case ARMMMUIdx_MPriv:
 247        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 248    case ARMMMUIdx_MUserNegPri:
 249    case ARMMMUIdx_MPrivNegPri:
 250        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 251    case ARMMMUIdx_MSUser:
 252    case ARMMMUIdx_MSPriv:
 253        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 254    case ARMMMUIdx_MSUserNegPri:
 255    case ARMMMUIdx_MSPrivNegPri:
 256        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 257    default:
 258        g_assert_not_reached();
 259    }
 260}
 261
 262/* The pc_curr difference for an architectural jump. */
 263static target_long jmp_diff(DisasContext *s, target_long diff)
 264{
 265    return diff + (s->thumb ? 4 : 8);
 266}
 267
 268static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
 269{
 270    assert(s->pc_save != -1);
 271    if (tb_cflags(s->base.tb) & CF_PCREL) {
 272        tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
 273    } else {
 274        tcg_gen_movi_i32(var, s->pc_curr + diff);
 275    }
 276}
 277
 278/* Set a variable to the value of a CPU register.  */
 279void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 280{
 281    if (reg == 15) {
 282        gen_pc_plus_diff(s, var, jmp_diff(s, 0));
 283    } else {
 284        tcg_gen_mov_i32(var, cpu_R[reg]);
 285    }
 286}
 287
 288/*
 289 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 290 * This is used for load/store for which use of PC implies (literal),
 291 * or ADD that implies ADR.
 292 */
 293TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 294{
 295    TCGv_i32 tmp = tcg_temp_new_i32();
 296
 297    if (reg == 15) {
 298        /*
 299         * This address is computed from an aligned PC:
 300         * subtract off the low bits.
 301         */
 302        gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
 303    } else {
 304        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 305    }
 306    return tmp;
 307}
 308
 309/* Set a CPU register.  The source must be a temporary and will be
 310   marked as dead.  */
 311void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 312{
 313    if (reg == 15) {
 314        /* In Thumb mode, we must ignore bit 0.
 315         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 316         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 317         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 318         */
 319        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 320        s->base.is_jmp = DISAS_JUMP;
 321        s->pc_save = -1;
 322    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
 323        /* For M-profile SP bits [1:0] are always zero */
 324        tcg_gen_andi_i32(var, var, ~3);
 325    }
 326    tcg_gen_mov_i32(cpu_R[reg], var);
 327}
 328
 329/*
 330 * Variant of store_reg which applies v8M stack-limit checks before updating
 331 * SP. If the check fails this will result in an exception being taken.
 332 * We disable the stack checks for CONFIG_USER_ONLY because we have
 333 * no idea what the stack limits should be in that case.
 334 * If stack checking is not being done this just acts like store_reg().
 335 */
 336static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 337{
 338#ifndef CONFIG_USER_ONLY
 339    if (s->v8m_stackcheck) {
 340        gen_helper_v8m_stackcheck(cpu_env, var);
 341    }
 342#endif
 343    store_reg(s, 13, var);
 344}
 345
 346/* Value extensions.  */
 347#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 348#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 349#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 350#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 351
 352#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 353#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 354
 355void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 356{
 357    gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
 358}
 359
 360static void gen_rebuild_hflags(DisasContext *s, bool new_el)
 361{
 362    bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
 363
 364    if (new_el) {
 365        if (m_profile) {
 366            gen_helper_rebuild_hflags_m32_newel(cpu_env);
 367        } else {
 368            gen_helper_rebuild_hflags_a32_newel(cpu_env);
 369        }
 370    } else {
 371        TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
 372        if (m_profile) {
 373            gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
 374        } else {
 375            gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
 376        }
 377    }
 378}
 379
 380static void gen_exception_internal(int excp)
 381{
 382    assert(excp_is_internal(excp));
 383    gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
 384}
 385
 386static void gen_singlestep_exception(DisasContext *s)
 387{
 388    /* We just completed step of an insn. Move from Active-not-pending
 389     * to Active-pending, and then also take the swstep exception.
 390     * This corresponds to making the (IMPDEF) choice to prioritize
 391     * swstep exceptions over asynchronous exceptions taken to an exception
 392     * level where debug is disabled. This choice has the advantage that
 393     * we do not need to maintain internal state corresponding to the
 394     * ISV/EX syndrome bits between completion of the step and generation
 395     * of the exception, and our syndrome information is always correct.
 396     */
 397    gen_ss_advance(s);
 398    gen_swstep_exception(s, 1, s->is_ldex);
 399    s->base.is_jmp = DISAS_NORETURN;
 400}
 401
 402void clear_eci_state(DisasContext *s)
 403{
 404    /*
 405     * Clear any ECI/ICI state: used when a load multiple/store
 406     * multiple insn executes.
 407     */
 408    if (s->eci) {
 409        store_cpu_field_constant(0, condexec_bits);
 410        s->eci = 0;
 411    }
 412}
 413
 414static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 415{
 416    TCGv_i32 tmp1 = tcg_temp_new_i32();
 417    TCGv_i32 tmp2 = tcg_temp_new_i32();
 418    tcg_gen_ext16s_i32(tmp1, a);
 419    tcg_gen_ext16s_i32(tmp2, b);
 420    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 421    tcg_gen_sari_i32(a, a, 16);
 422    tcg_gen_sari_i32(b, b, 16);
 423    tcg_gen_mul_i32(b, b, a);
 424    tcg_gen_mov_i32(a, tmp1);
 425}
 426
 427/* Byteswap each halfword.  */
 428void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 429{
 430    TCGv_i32 tmp = tcg_temp_new_i32();
 431    TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
 432    tcg_gen_shri_i32(tmp, var, 8);
 433    tcg_gen_and_i32(tmp, tmp, mask);
 434    tcg_gen_and_i32(var, var, mask);
 435    tcg_gen_shli_i32(var, var, 8);
 436    tcg_gen_or_i32(dest, var, tmp);
 437}
 438
 439/* Byteswap low halfword and sign extend.  */
 440static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 441{
 442    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
 443}
 444
 445/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 446    tmp = (t0 ^ t1) & 0x8000;
 447    t0 &= ~0x8000;
 448    t1 &= ~0x8000;
 449    t0 = (t0 + t1) ^ tmp;
 450 */
 451
 452static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 453{
 454    TCGv_i32 tmp = tcg_temp_new_i32();
 455    tcg_gen_xor_i32(tmp, t0, t1);
 456    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 457    tcg_gen_andi_i32(t0, t0, ~0x8000);
 458    tcg_gen_andi_i32(t1, t1, ~0x8000);
 459    tcg_gen_add_i32(t0, t0, t1);
 460    tcg_gen_xor_i32(dest, t0, tmp);
 461}
 462
 463/* Set N and Z flags from var.  */
 464static inline void gen_logic_CC(TCGv_i32 var)
 465{
 466    tcg_gen_mov_i32(cpu_NF, var);
 467    tcg_gen_mov_i32(cpu_ZF, var);
 468}
 469
 470/* dest = T0 + T1 + CF. */
 471static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 472{
 473    tcg_gen_add_i32(dest, t0, t1);
 474    tcg_gen_add_i32(dest, dest, cpu_CF);
 475}
 476
 477/* dest = T0 - T1 + CF - 1.  */
 478static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 479{
 480    tcg_gen_sub_i32(dest, t0, t1);
 481    tcg_gen_add_i32(dest, dest, cpu_CF);
 482    tcg_gen_subi_i32(dest, dest, 1);
 483}
 484
 485/* dest = T0 + T1. Compute C, N, V and Z flags */
 486static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 487{
 488    TCGv_i32 tmp = tcg_temp_new_i32();
 489    tcg_gen_movi_i32(tmp, 0);
 490    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 491    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 492    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 493    tcg_gen_xor_i32(tmp, t0, t1);
 494    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 495    tcg_gen_mov_i32(dest, cpu_NF);
 496}
 497
 498/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 499static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 500{
 501    TCGv_i32 tmp = tcg_temp_new_i32();
 502    if (TCG_TARGET_HAS_add2_i32) {
 503        tcg_gen_movi_i32(tmp, 0);
 504        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 505        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 506    } else {
 507        TCGv_i64 q0 = tcg_temp_new_i64();
 508        TCGv_i64 q1 = tcg_temp_new_i64();
 509        tcg_gen_extu_i32_i64(q0, t0);
 510        tcg_gen_extu_i32_i64(q1, t1);
 511        tcg_gen_add_i64(q0, q0, q1);
 512        tcg_gen_extu_i32_i64(q1, cpu_CF);
 513        tcg_gen_add_i64(q0, q0, q1);
 514        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 515    }
 516    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 517    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 518    tcg_gen_xor_i32(tmp, t0, t1);
 519    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 520    tcg_gen_mov_i32(dest, cpu_NF);
 521}
 522
 523/* dest = T0 - T1. Compute C, N, V and Z flags */
 524static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 525{
 526    TCGv_i32 tmp;
 527    tcg_gen_sub_i32(cpu_NF, t0, t1);
 528    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 529    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 530    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 531    tmp = tcg_temp_new_i32();
 532    tcg_gen_xor_i32(tmp, t0, t1);
 533    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 534    tcg_gen_mov_i32(dest, cpu_NF);
 535}
 536
 537/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 538static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 539{
 540    TCGv_i32 tmp = tcg_temp_new_i32();
 541    tcg_gen_not_i32(tmp, t1);
 542    gen_adc_CC(dest, t0, tmp);
 543}
 544
 545#define GEN_SHIFT(name)                                               \
 546static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 547{                                                                     \
 548    TCGv_i32 tmpd = tcg_temp_new_i32();                               \
 549    TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
 550    TCGv_i32 zero = tcg_constant_i32(0);                              \
 551    tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
 552    tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
 553    tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
 554    tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
 555}
 556GEN_SHIFT(shl)
 557GEN_SHIFT(shr)
 558#undef GEN_SHIFT
 559
 560static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 561{
 562    TCGv_i32 tmp1 = tcg_temp_new_i32();
 563
 564    tcg_gen_andi_i32(tmp1, t1, 0xff);
 565    tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
 566    tcg_gen_sar_i32(dest, t0, tmp1);
 567}
 568
 569static void shifter_out_im(TCGv_i32 var, int shift)
 570{
 571    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 572}
 573
 574/* Shift by immediate.  Includes special handling for shift == 0.  */
 575static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 576                                    int shift, int flags)
 577{
 578    switch (shiftop) {
 579    case 0: /* LSL */
 580        if (shift != 0) {
 581            if (flags)
 582                shifter_out_im(var, 32 - shift);
 583            tcg_gen_shli_i32(var, var, shift);
 584        }
 585        break;
 586    case 1: /* LSR */
 587        if (shift == 0) {
 588            if (flags) {
 589                tcg_gen_shri_i32(cpu_CF, var, 31);
 590            }
 591            tcg_gen_movi_i32(var, 0);
 592        } else {
 593            if (flags)
 594                shifter_out_im(var, shift - 1);
 595            tcg_gen_shri_i32(var, var, shift);
 596        }
 597        break;
 598    case 2: /* ASR */
 599        if (shift == 0)
 600            shift = 32;
 601        if (flags)
 602            shifter_out_im(var, shift - 1);
 603        if (shift == 32)
 604          shift = 31;
 605        tcg_gen_sari_i32(var, var, shift);
 606        break;
 607    case 3: /* ROR/RRX */
 608        if (shift != 0) {
 609            if (flags)
 610                shifter_out_im(var, shift - 1);
 611            tcg_gen_rotri_i32(var, var, shift); break;
 612        } else {
 613            TCGv_i32 tmp = tcg_temp_new_i32();
 614            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 615            if (flags)
 616                shifter_out_im(var, 0);
 617            tcg_gen_shri_i32(var, var, 1);
 618            tcg_gen_or_i32(var, var, tmp);
 619        }
 620    }
 621};
 622
 623static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 624                                     TCGv_i32 shift, int flags)
 625{
 626    if (flags) {
 627        switch (shiftop) {
 628        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 629        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 630        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 631        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 632        }
 633    } else {
 634        switch (shiftop) {
 635        case 0:
 636            gen_shl(var, var, shift);
 637            break;
 638        case 1:
 639            gen_shr(var, var, shift);
 640            break;
 641        case 2:
 642            gen_sar(var, var, shift);
 643            break;
 644        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 645                tcg_gen_rotr_i32(var, var, shift); break;
 646        }
 647    }
 648}
 649
 650/*
 651 * Generate a conditional based on ARM condition code cc.
 652 * This is common between ARM and Aarch64 targets.
 653 */
 654void arm_test_cc(DisasCompare *cmp, int cc)
 655{
 656    TCGv_i32 value;
 657    TCGCond cond;
 658
 659    switch (cc) {
 660    case 0: /* eq: Z */
 661    case 1: /* ne: !Z */
 662        cond = TCG_COND_EQ;
 663        value = cpu_ZF;
 664        break;
 665
 666    case 2: /* cs: C */
 667    case 3: /* cc: !C */
 668        cond = TCG_COND_NE;
 669        value = cpu_CF;
 670        break;
 671
 672    case 4: /* mi: N */
 673    case 5: /* pl: !N */
 674        cond = TCG_COND_LT;
 675        value = cpu_NF;
 676        break;
 677
 678    case 6: /* vs: V */
 679    case 7: /* vc: !V */
 680        cond = TCG_COND_LT;
 681        value = cpu_VF;
 682        break;
 683
 684    case 8: /* hi: C && !Z */
 685    case 9: /* ls: !C || Z -> !(C && !Z) */
 686        cond = TCG_COND_NE;
 687        value = tcg_temp_new_i32();
 688        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 689           ZF is non-zero for !Z; so AND the two subexpressions.  */
 690        tcg_gen_neg_i32(value, cpu_CF);
 691        tcg_gen_and_i32(value, value, cpu_ZF);
 692        break;
 693
 694    case 10: /* ge: N == V -> N ^ V == 0 */
 695    case 11: /* lt: N != V -> N ^ V != 0 */
 696        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 697        cond = TCG_COND_GE;
 698        value = tcg_temp_new_i32();
 699        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 700        break;
 701
 702    case 12: /* gt: !Z && N == V */
 703    case 13: /* le: Z || N != V */
 704        cond = TCG_COND_NE;
 705        value = tcg_temp_new_i32();
 706        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 707         * the sign bit then AND with ZF to yield the result.  */
 708        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 709        tcg_gen_sari_i32(value, value, 31);
 710        tcg_gen_andc_i32(value, cpu_ZF, value);
 711        break;
 712
 713    case 14: /* always */
 714    case 15: /* always */
 715        /* Use the ALWAYS condition, which will fold early.
 716         * It doesn't matter what we use for the value.  */
 717        cond = TCG_COND_ALWAYS;
 718        value = cpu_ZF;
 719        goto no_invert;
 720
 721    default:
 722        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 723        abort();
 724    }
 725
 726    if (cc & 1) {
 727        cond = tcg_invert_cond(cond);
 728    }
 729
 730 no_invert:
 731    cmp->cond = cond;
 732    cmp->value = value;
 733}
 734
 735void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 736{
 737    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 738}
 739
 740void arm_gen_test_cc(int cc, TCGLabel *label)
 741{
 742    DisasCompare cmp;
 743    arm_test_cc(&cmp, cc);
 744    arm_jump_cc(&cmp, label);
 745}
 746
 747void gen_set_condexec(DisasContext *s)
 748{
 749    if (s->condexec_mask) {
 750        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 751
 752        store_cpu_field_constant(val, condexec_bits);
 753    }
 754}
 755
 756void gen_update_pc(DisasContext *s, target_long diff)
 757{
 758    gen_pc_plus_diff(s, cpu_R[15], diff);
 759    s->pc_save = s->pc_curr + diff;
 760}
 761
 762/* Set PC and Thumb state from var.  var is marked as dead.  */
 763static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 764{
 765    s->base.is_jmp = DISAS_JUMP;
 766    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 767    tcg_gen_andi_i32(var, var, 1);
 768    store_cpu_field(var, thumb);
 769    s->pc_save = -1;
 770}
 771
 772/*
 773 * Set PC and Thumb state from var. var is marked as dead.
 774 * For M-profile CPUs, include logic to detect exception-return
 775 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 776 * and BX reg, and no others, and happens only for code in Handler mode.
 777 * The Security Extension also requires us to check for the FNC_RETURN
 778 * which signals a function return from non-secure state; this can happen
 779 * in both Handler and Thread mode.
 780 * To avoid having to do multiple comparisons in inline generated code,
 781 * we make the check we do here loose, so it will match for EXC_RETURN
 782 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 783 * for these spurious cases and returns without doing anything (giving
 784 * the same behaviour as for a branch to a non-magic address).
 785 *
 786 * In linux-user mode it is unclear what the right behaviour for an
 787 * attempted FNC_RETURN should be, because in real hardware this will go
 788 * directly to Secure code (ie not the Linux kernel) which will then treat
 789 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 790 * attempt behave the way it would on a CPU without the security extension,
 791 * which is to say "like a normal branch". That means we can simply treat
 792 * all branches as normal with no magic address behaviour.
 793 */
 794static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 795{
 796    /* Generate the same code here as for a simple bx, but flag via
 797     * s->base.is_jmp that we need to do the rest of the work later.
 798     */
 799    gen_bx(s, var);
 800#ifndef CONFIG_USER_ONLY
 801    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 802        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 803        s->base.is_jmp = DISAS_BX_EXCRET;
 804    }
 805#endif
 806}
 807
 808static inline void gen_bx_excret_final_code(DisasContext *s)
 809{
 810    /* Generate the code to finish possible exception return and end the TB */
 811    DisasLabel excret_label = gen_disas_label(s);
 812    uint32_t min_magic;
 813
 814    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 815        /* Covers FNC_RETURN and EXC_RETURN magic */
 816        min_magic = FNC_RETURN_MIN_MAGIC;
 817    } else {
 818        /* EXC_RETURN magic only */
 819        min_magic = EXC_RETURN_MIN_MAGIC;
 820    }
 821
 822    /* Is the new PC value in the magic range indicating exception return? */
 823    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
 824    /* No: end the TB as we would for a DISAS_JMP */
 825    if (s->ss_active) {
 826        gen_singlestep_exception(s);
 827    } else {
 828        tcg_gen_exit_tb(NULL, 0);
 829    }
 830    set_disas_label(s, excret_label);
 831    /* Yes: this is an exception return.
 832     * At this point in runtime env->regs[15] and env->thumb will hold
 833     * the exception-return magic number, which do_v7m_exception_exit()
 834     * will read. Nothing else will be able to see those values because
 835     * the cpu-exec main loop guarantees that we will always go straight
 836     * from raising the exception to the exception-handling code.
 837     *
 838     * gen_ss_advance(s) does nothing on M profile currently but
 839     * calling it is conceptually the right thing as we have executed
 840     * this instruction (compare SWI, HVC, SMC handling).
 841     */
 842    gen_ss_advance(s);
 843    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 844}
 845
 846static inline void gen_bxns(DisasContext *s, int rm)
 847{
 848    TCGv_i32 var = load_reg(s, rm);
 849
 850    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 851     * we need to sync state before calling it, but:
 852     *  - we don't need to do gen_update_pc() because the bxns helper will
 853     *    always set the PC itself
 854     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 855     *    unless it's outside an IT block or the last insn in an IT block,
 856     *    so we know that condexec == 0 (already set at the top of the TB)
 857     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 858     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 859     */
 860    gen_helper_v7m_bxns(cpu_env, var);
 861    s->base.is_jmp = DISAS_EXIT;
 862}
 863
 864static inline void gen_blxns(DisasContext *s, int rm)
 865{
 866    TCGv_i32 var = load_reg(s, rm);
 867
 868    /* We don't need to sync condexec state, for the same reason as bxns.
 869     * We do however need to set the PC, because the blxns helper reads it.
 870     * The blxns helper may throw an exception.
 871     */
 872    gen_update_pc(s, curr_insn_len(s));
 873    gen_helper_v7m_blxns(cpu_env, var);
 874    s->base.is_jmp = DISAS_EXIT;
 875}
 876
 877/* Variant of store_reg which uses branch&exchange logic when storing
 878   to r15 in ARM architecture v7 and above. The source must be a temporary
 879   and will be marked as dead. */
 880static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 881{
 882    if (reg == 15 && ENABLE_ARCH_7) {
 883        gen_bx(s, var);
 884    } else {
 885        store_reg(s, reg, var);
 886    }
 887}
 888
 889/* Variant of store_reg which uses branch&exchange logic when storing
 890 * to r15 in ARM architecture v5T and above. This is used for storing
 891 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 892 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 893static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 894{
 895    if (reg == 15 && ENABLE_ARCH_5) {
 896        gen_bx_excret(s, var);
 897    } else {
 898        store_reg(s, reg, var);
 899    }
 900}
 901
 902#ifdef CONFIG_USER_ONLY
 903#define IS_USER_ONLY 1
 904#else
 905#define IS_USER_ONLY 0
 906#endif
 907
 908MemOp pow2_align(unsigned i)
 909{
 910    static const MemOp mop_align[] = {
 911        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
 912        /*
 913         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
 914         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
 915         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
 916         */
 917        MO_ALIGN_16
 918    };
 919    g_assert(i < ARRAY_SIZE(mop_align));
 920    return mop_align[i];
 921}
 922
 923/*
 924 * Abstractions of "generate code to do a guest load/store for
 925 * AArch32", where a vaddr is always 32 bits (and is zero
 926 * extended if we're a 64 bit core) and  data is also
 927 * 32 bits unless specifically doing a 64 bit access.
 928 * These functions work like tcg_gen_qemu_{ld,st}* except
 929 * that the address argument is TCGv_i32 rather than TCGv.
 930 */
 931
 932static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 933{
 934    TCGv addr = tcg_temp_new();
 935    tcg_gen_extu_i32_tl(addr, a32);
 936
 937    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 938    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 939        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 940    }
 941    return addr;
 942}
 943
 944/*
 945 * Internal routines are used for NEON cases where the endianness
 946 * and/or alignment has already been taken into account and manipulated.
 947 */
 948void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
 949                              TCGv_i32 a32, int index, MemOp opc)
 950{
 951    TCGv addr = gen_aa32_addr(s, a32, opc);
 952    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 953}
 954
 955void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
 956                              TCGv_i32 a32, int index, MemOp opc)
 957{
 958    TCGv addr = gen_aa32_addr(s, a32, opc);
 959    tcg_gen_qemu_st_i32(val, addr, index, opc);
 960}
 961
 962void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
 963                              TCGv_i32 a32, int index, MemOp opc)
 964{
 965    TCGv addr = gen_aa32_addr(s, a32, opc);
 966
 967    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 968
 969    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 970    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 971        tcg_gen_rotri_i64(val, val, 32);
 972    }
 973}
 974
 975void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
 976                              TCGv_i32 a32, int index, MemOp opc)
 977{
 978    TCGv addr = gen_aa32_addr(s, a32, opc);
 979
 980    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 981    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 982        TCGv_i64 tmp = tcg_temp_new_i64();
 983        tcg_gen_rotri_i64(tmp, val, 32);
 984        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 985    } else {
 986        tcg_gen_qemu_st_i64(val, addr, index, opc);
 987    }
 988}
 989
 990void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 991                     int index, MemOp opc)
 992{
 993    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
 994}
 995
 996void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 997                     int index, MemOp opc)
 998{
 999    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1000}
1001
1002void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003                     int index, MemOp opc)
1004{
1005    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1006}
1007
1008void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1009                     int index, MemOp opc)
1010{
1011    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1012}
1013
1014#define DO_GEN_LD(SUFF, OPC)                                            \
1015    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1016                                         TCGv_i32 a32, int index)       \
1017    {                                                                   \
1018        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1019    }
1020
1021#define DO_GEN_ST(SUFF, OPC)                                            \
1022    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1023                                         TCGv_i32 a32, int index)       \
1024    {                                                                   \
1025        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1026    }
1027
1028static inline void gen_hvc(DisasContext *s, int imm16)
1029{
1030    /* The pre HVC helper handles cases when HVC gets trapped
1031     * as an undefined insn by runtime configuration (ie before
1032     * the insn really executes).
1033     */
1034    gen_update_pc(s, 0);
1035    gen_helper_pre_hvc(cpu_env);
1036    /* Otherwise we will treat this as a real exception which
1037     * happens after execution of the insn. (The distinction matters
1038     * for the PC value reported to the exception handler and also
1039     * for single stepping.)
1040     */
1041    s->svc_imm = imm16;
1042    gen_update_pc(s, curr_insn_len(s));
1043    s->base.is_jmp = DISAS_HVC;
1044}
1045
1046static inline void gen_smc(DisasContext *s)
1047{
1048    /* As with HVC, we may take an exception either before or after
1049     * the insn executes.
1050     */
1051    gen_update_pc(s, 0);
1052    gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1053    gen_update_pc(s, curr_insn_len(s));
1054    s->base.is_jmp = DISAS_SMC;
1055}
1056
1057static void gen_exception_internal_insn(DisasContext *s, int excp)
1058{
1059    gen_set_condexec(s);
1060    gen_update_pc(s, 0);
1061    gen_exception_internal(excp);
1062    s->base.is_jmp = DISAS_NORETURN;
1063}
1064
1065static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1066{
1067    gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1068                                          tcg_constant_i32(syndrome), tcg_el);
1069}
1070
1071static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1072{
1073    gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1074}
1075
1076static void gen_exception(int excp, uint32_t syndrome)
1077{
1078    gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1079                                       tcg_constant_i32(syndrome));
1080}
1081
1082static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1083                                    int excp, uint32_t syn, TCGv_i32 tcg_el)
1084{
1085    if (s->aarch64) {
1086        gen_a64_update_pc(s, pc_diff);
1087    } else {
1088        gen_set_condexec(s);
1089        gen_update_pc(s, pc_diff);
1090    }
1091    gen_exception_el_v(excp, syn, tcg_el);
1092    s->base.is_jmp = DISAS_NORETURN;
1093}
1094
1095void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1096                           uint32_t syn, uint32_t target_el)
1097{
1098    gen_exception_insn_el_v(s, pc_diff, excp, syn,
1099                            tcg_constant_i32(target_el));
1100}
1101
1102void gen_exception_insn(DisasContext *s, target_long pc_diff,
1103                        int excp, uint32_t syn)
1104{
1105    if (s->aarch64) {
1106        gen_a64_update_pc(s, pc_diff);
1107    } else {
1108        gen_set_condexec(s);
1109        gen_update_pc(s, pc_diff);
1110    }
1111    gen_exception(excp, syn);
1112    s->base.is_jmp = DISAS_NORETURN;
1113}
1114
1115static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1116{
1117    gen_set_condexec(s);
1118    gen_update_pc(s, 0);
1119    gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1120    s->base.is_jmp = DISAS_NORETURN;
1121}
1122
1123void unallocated_encoding(DisasContext *s)
1124{
1125    /* Unallocated and reserved encodings are uncategorized */
1126    gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1127}
1128
1129/* Force a TB lookup after an instruction that changes the CPU state.  */
1130void gen_lookup_tb(DisasContext *s)
1131{
1132    gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1133    s->base.is_jmp = DISAS_EXIT;
1134}
1135
1136static inline void gen_hlt(DisasContext *s, int imm)
1137{
1138    /* HLT. This has two purposes.
1139     * Architecturally, it is an external halting debug instruction.
1140     * Since QEMU doesn't implement external debug, we treat this as
1141     * it is required for halting debug disabled: it will UNDEF.
1142     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1143     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1144     * must trigger semihosting even for ARMv7 and earlier, where
1145     * HLT was an undefined encoding.
1146     * In system mode, we don't allow userspace access to
1147     * semihosting, to provide some semblance of security
1148     * (and for consistency with our 32-bit semihosting).
1149     */
1150    if (semihosting_enabled(s->current_el == 0) &&
1151        (imm == (s->thumb ? 0x3c : 0xf000))) {
1152        gen_exception_internal_insn(s, EXCP_SEMIHOST);
1153        return;
1154    }
1155
1156    unallocated_encoding(s);
1157}
1158
1159/*
1160 * Return the offset of a "full" NEON Dreg.
1161 */
1162long neon_full_reg_offset(unsigned reg)
1163{
1164    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1165}
1166
1167/*
1168 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1169 * where 0 is the least significant end of the register.
1170 */
1171long neon_element_offset(int reg, int element, MemOp memop)
1172{
1173    int element_size = 1 << (memop & MO_SIZE);
1174    int ofs = element * element_size;
1175#if HOST_BIG_ENDIAN
1176    /*
1177     * Calculate the offset assuming fully little-endian,
1178     * then XOR to account for the order of the 8-byte units.
1179     */
1180    if (element_size < 8) {
1181        ofs ^= 8 - element_size;
1182    }
1183#endif
1184    return neon_full_reg_offset(reg) + ofs;
1185}
1186
1187/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1188long vfp_reg_offset(bool dp, unsigned reg)
1189{
1190    if (dp) {
1191        return neon_element_offset(reg, 0, MO_64);
1192    } else {
1193        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1194    }
1195}
1196
1197void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1198{
1199    long off = neon_element_offset(reg, ele, memop);
1200
1201    switch (memop) {
1202    case MO_SB:
1203        tcg_gen_ld8s_i32(dest, cpu_env, off);
1204        break;
1205    case MO_UB:
1206        tcg_gen_ld8u_i32(dest, cpu_env, off);
1207        break;
1208    case MO_SW:
1209        tcg_gen_ld16s_i32(dest, cpu_env, off);
1210        break;
1211    case MO_UW:
1212        tcg_gen_ld16u_i32(dest, cpu_env, off);
1213        break;
1214    case MO_UL:
1215    case MO_SL:
1216        tcg_gen_ld_i32(dest, cpu_env, off);
1217        break;
1218    default:
1219        g_assert_not_reached();
1220    }
1221}
1222
1223void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1224{
1225    long off = neon_element_offset(reg, ele, memop);
1226
1227    switch (memop) {
1228    case MO_SL:
1229        tcg_gen_ld32s_i64(dest, cpu_env, off);
1230        break;
1231    case MO_UL:
1232        tcg_gen_ld32u_i64(dest, cpu_env, off);
1233        break;
1234    case MO_UQ:
1235        tcg_gen_ld_i64(dest, cpu_env, off);
1236        break;
1237    default:
1238        g_assert_not_reached();
1239    }
1240}
1241
1242void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1243{
1244    long off = neon_element_offset(reg, ele, memop);
1245
1246    switch (memop) {
1247    case MO_8:
1248        tcg_gen_st8_i32(src, cpu_env, off);
1249        break;
1250    case MO_16:
1251        tcg_gen_st16_i32(src, cpu_env, off);
1252        break;
1253    case MO_32:
1254        tcg_gen_st_i32(src, cpu_env, off);
1255        break;
1256    default:
1257        g_assert_not_reached();
1258    }
1259}
1260
1261void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1262{
1263    long off = neon_element_offset(reg, ele, memop);
1264
1265    switch (memop) {
1266    case MO_32:
1267        tcg_gen_st32_i64(src, cpu_env, off);
1268        break;
1269    case MO_64:
1270        tcg_gen_st_i64(src, cpu_env, off);
1271        break;
1272    default:
1273        g_assert_not_reached();
1274    }
1275}
1276
1277#define ARM_CP_RW_BIT   (1 << 20)
1278
1279static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1280{
1281    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1282}
1283
1284static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1285{
1286    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1287}
1288
1289static inline TCGv_i32 iwmmxt_load_creg(int reg)
1290{
1291    TCGv_i32 var = tcg_temp_new_i32();
1292    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1293    return var;
1294}
1295
1296static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1297{
1298    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1299}
1300
1301static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1302{
1303    iwmmxt_store_reg(cpu_M0, rn);
1304}
1305
1306static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1307{
1308    iwmmxt_load_reg(cpu_M0, rn);
1309}
1310
1311static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1312{
1313    iwmmxt_load_reg(cpu_V1, rn);
1314    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1315}
1316
1317static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1318{
1319    iwmmxt_load_reg(cpu_V1, rn);
1320    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1321}
1322
1323static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1324{
1325    iwmmxt_load_reg(cpu_V1, rn);
1326    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1327}
1328
1329#define IWMMXT_OP(name) \
1330static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1331{ \
1332    iwmmxt_load_reg(cpu_V1, rn); \
1333    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1334}
1335
1336#define IWMMXT_OP_ENV(name) \
1337static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1338{ \
1339    iwmmxt_load_reg(cpu_V1, rn); \
1340    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1341}
1342
1343#define IWMMXT_OP_ENV_SIZE(name) \
1344IWMMXT_OP_ENV(name##b) \
1345IWMMXT_OP_ENV(name##w) \
1346IWMMXT_OP_ENV(name##l)
1347
1348#define IWMMXT_OP_ENV1(name) \
1349static inline void gen_op_iwmmxt_##name##_M0(void) \
1350{ \
1351    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1352}
1353
1354IWMMXT_OP(maddsq)
1355IWMMXT_OP(madduq)
1356IWMMXT_OP(sadb)
1357IWMMXT_OP(sadw)
1358IWMMXT_OP(mulslw)
1359IWMMXT_OP(mulshw)
1360IWMMXT_OP(mululw)
1361IWMMXT_OP(muluhw)
1362IWMMXT_OP(macsw)
1363IWMMXT_OP(macuw)
1364
1365IWMMXT_OP_ENV_SIZE(unpackl)
1366IWMMXT_OP_ENV_SIZE(unpackh)
1367
1368IWMMXT_OP_ENV1(unpacklub)
1369IWMMXT_OP_ENV1(unpackluw)
1370IWMMXT_OP_ENV1(unpacklul)
1371IWMMXT_OP_ENV1(unpackhub)
1372IWMMXT_OP_ENV1(unpackhuw)
1373IWMMXT_OP_ENV1(unpackhul)
1374IWMMXT_OP_ENV1(unpacklsb)
1375IWMMXT_OP_ENV1(unpacklsw)
1376IWMMXT_OP_ENV1(unpacklsl)
1377IWMMXT_OP_ENV1(unpackhsb)
1378IWMMXT_OP_ENV1(unpackhsw)
1379IWMMXT_OP_ENV1(unpackhsl)
1380
1381IWMMXT_OP_ENV_SIZE(cmpeq)
1382IWMMXT_OP_ENV_SIZE(cmpgtu)
1383IWMMXT_OP_ENV_SIZE(cmpgts)
1384
1385IWMMXT_OP_ENV_SIZE(mins)
1386IWMMXT_OP_ENV_SIZE(minu)
1387IWMMXT_OP_ENV_SIZE(maxs)
1388IWMMXT_OP_ENV_SIZE(maxu)
1389
1390IWMMXT_OP_ENV_SIZE(subn)
1391IWMMXT_OP_ENV_SIZE(addn)
1392IWMMXT_OP_ENV_SIZE(subu)
1393IWMMXT_OP_ENV_SIZE(addu)
1394IWMMXT_OP_ENV_SIZE(subs)
1395IWMMXT_OP_ENV_SIZE(adds)
1396
1397IWMMXT_OP_ENV(avgb0)
1398IWMMXT_OP_ENV(avgb1)
1399IWMMXT_OP_ENV(avgw0)
1400IWMMXT_OP_ENV(avgw1)
1401
1402IWMMXT_OP_ENV(packuw)
1403IWMMXT_OP_ENV(packul)
1404IWMMXT_OP_ENV(packuq)
1405IWMMXT_OP_ENV(packsw)
1406IWMMXT_OP_ENV(packsl)
1407IWMMXT_OP_ENV(packsq)
1408
1409static void gen_op_iwmmxt_set_mup(void)
1410{
1411    TCGv_i32 tmp;
1412    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413    tcg_gen_ori_i32(tmp, tmp, 2);
1414    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415}
1416
1417static void gen_op_iwmmxt_set_cup(void)
1418{
1419    TCGv_i32 tmp;
1420    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1421    tcg_gen_ori_i32(tmp, tmp, 1);
1422    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1423}
1424
1425static void gen_op_iwmmxt_setpsr_nz(void)
1426{
1427    TCGv_i32 tmp = tcg_temp_new_i32();
1428    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1429    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1430}
1431
1432static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1433{
1434    iwmmxt_load_reg(cpu_V1, rn);
1435    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1436    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1437}
1438
1439static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1440                                     TCGv_i32 dest)
1441{
1442    int rd;
1443    uint32_t offset;
1444    TCGv_i32 tmp;
1445
1446    rd = (insn >> 16) & 0xf;
1447    tmp = load_reg(s, rd);
1448
1449    offset = (insn & 0xff) << ((insn >> 7) & 2);
1450    if (insn & (1 << 24)) {
1451        /* Pre indexed */
1452        if (insn & (1 << 23))
1453            tcg_gen_addi_i32(tmp, tmp, offset);
1454        else
1455            tcg_gen_addi_i32(tmp, tmp, -offset);
1456        tcg_gen_mov_i32(dest, tmp);
1457        if (insn & (1 << 21)) {
1458            store_reg(s, rd, tmp);
1459        }
1460    } else if (insn & (1 << 21)) {
1461        /* Post indexed */
1462        tcg_gen_mov_i32(dest, tmp);
1463        if (insn & (1 << 23))
1464            tcg_gen_addi_i32(tmp, tmp, offset);
1465        else
1466            tcg_gen_addi_i32(tmp, tmp, -offset);
1467        store_reg(s, rd, tmp);
1468    } else if (!(insn & (1 << 23)))
1469        return 1;
1470    return 0;
1471}
1472
1473static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1474{
1475    int rd = (insn >> 0) & 0xf;
1476    TCGv_i32 tmp;
1477
1478    if (insn & (1 << 8)) {
1479        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1480            return 1;
1481        } else {
1482            tmp = iwmmxt_load_creg(rd);
1483        }
1484    } else {
1485        tmp = tcg_temp_new_i32();
1486        iwmmxt_load_reg(cpu_V0, rd);
1487        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1488    }
1489    tcg_gen_andi_i32(tmp, tmp, mask);
1490    tcg_gen_mov_i32(dest, tmp);
1491    return 0;
1492}
1493
1494/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1495   (ie. an undefined instruction).  */
1496static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1497{
1498    int rd, wrd;
1499    int rdhi, rdlo, rd0, rd1, i;
1500    TCGv_i32 addr;
1501    TCGv_i32 tmp, tmp2, tmp3;
1502
1503    if ((insn & 0x0e000e00) == 0x0c000000) {
1504        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1505            wrd = insn & 0xf;
1506            rdlo = (insn >> 12) & 0xf;
1507            rdhi = (insn >> 16) & 0xf;
1508            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1509                iwmmxt_load_reg(cpu_V0, wrd);
1510                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1511                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1512            } else {                                    /* TMCRR */
1513                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1514                iwmmxt_store_reg(cpu_V0, wrd);
1515                gen_op_iwmmxt_set_mup();
1516            }
1517            return 0;
1518        }
1519
1520        wrd = (insn >> 12) & 0xf;
1521        addr = tcg_temp_new_i32();
1522        if (gen_iwmmxt_address(s, insn, addr)) {
1523            return 1;
1524        }
1525        if (insn & ARM_CP_RW_BIT) {
1526            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1527                tmp = tcg_temp_new_i32();
1528                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1529                iwmmxt_store_creg(wrd, tmp);
1530            } else {
1531                i = 1;
1532                if (insn & (1 << 8)) {
1533                    if (insn & (1 << 22)) {             /* WLDRD */
1534                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1535                        i = 0;
1536                    } else {                            /* WLDRW wRd */
1537                        tmp = tcg_temp_new_i32();
1538                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1539                    }
1540                } else {
1541                    tmp = tcg_temp_new_i32();
1542                    if (insn & (1 << 22)) {             /* WLDRH */
1543                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1544                    } else {                            /* WLDRB */
1545                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1546                    }
1547                }
1548                if (i) {
1549                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1550                }
1551                gen_op_iwmmxt_movq_wRn_M0(wrd);
1552            }
1553        } else {
1554            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1555                tmp = iwmmxt_load_creg(wrd);
1556                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1557            } else {
1558                gen_op_iwmmxt_movq_M0_wRn(wrd);
1559                tmp = tcg_temp_new_i32();
1560                if (insn & (1 << 8)) {
1561                    if (insn & (1 << 22)) {             /* WSTRD */
1562                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1563                    } else {                            /* WSTRW wRd */
1564                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1566                    }
1567                } else {
1568                    if (insn & (1 << 22)) {             /* WSTRH */
1569                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1570                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1571                    } else {                            /* WSTRB */
1572                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1573                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1574                    }
1575                }
1576            }
1577        }
1578        return 0;
1579    }
1580
1581    if ((insn & 0x0f000000) != 0x0e000000)
1582        return 1;
1583
1584    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1585    case 0x000:                                                 /* WOR */
1586        wrd = (insn >> 12) & 0xf;
1587        rd0 = (insn >> 0) & 0xf;
1588        rd1 = (insn >> 16) & 0xf;
1589        gen_op_iwmmxt_movq_M0_wRn(rd0);
1590        gen_op_iwmmxt_orq_M0_wRn(rd1);
1591        gen_op_iwmmxt_setpsr_nz();
1592        gen_op_iwmmxt_movq_wRn_M0(wrd);
1593        gen_op_iwmmxt_set_mup();
1594        gen_op_iwmmxt_set_cup();
1595        break;
1596    case 0x011:                                                 /* TMCR */
1597        if (insn & 0xf)
1598            return 1;
1599        rd = (insn >> 12) & 0xf;
1600        wrd = (insn >> 16) & 0xf;
1601        switch (wrd) {
1602        case ARM_IWMMXT_wCID:
1603        case ARM_IWMMXT_wCASF:
1604            break;
1605        case ARM_IWMMXT_wCon:
1606            gen_op_iwmmxt_set_cup();
1607            /* Fall through.  */
1608        case ARM_IWMMXT_wCSSF:
1609            tmp = iwmmxt_load_creg(wrd);
1610            tmp2 = load_reg(s, rd);
1611            tcg_gen_andc_i32(tmp, tmp, tmp2);
1612            iwmmxt_store_creg(wrd, tmp);
1613            break;
1614        case ARM_IWMMXT_wCGR0:
1615        case ARM_IWMMXT_wCGR1:
1616        case ARM_IWMMXT_wCGR2:
1617        case ARM_IWMMXT_wCGR3:
1618            gen_op_iwmmxt_set_cup();
1619            tmp = load_reg(s, rd);
1620            iwmmxt_store_creg(wrd, tmp);
1621            break;
1622        default:
1623            return 1;
1624        }
1625        break;
1626    case 0x100:                                                 /* WXOR */
1627        wrd = (insn >> 12) & 0xf;
1628        rd0 = (insn >> 0) & 0xf;
1629        rd1 = (insn >> 16) & 0xf;
1630        gen_op_iwmmxt_movq_M0_wRn(rd0);
1631        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1632        gen_op_iwmmxt_setpsr_nz();
1633        gen_op_iwmmxt_movq_wRn_M0(wrd);
1634        gen_op_iwmmxt_set_mup();
1635        gen_op_iwmmxt_set_cup();
1636        break;
1637    case 0x111:                                                 /* TMRC */
1638        if (insn & 0xf)
1639            return 1;
1640        rd = (insn >> 12) & 0xf;
1641        wrd = (insn >> 16) & 0xf;
1642        tmp = iwmmxt_load_creg(wrd);
1643        store_reg(s, rd, tmp);
1644        break;
1645    case 0x300:                                                 /* WANDN */
1646        wrd = (insn >> 12) & 0xf;
1647        rd0 = (insn >> 0) & 0xf;
1648        rd1 = (insn >> 16) & 0xf;
1649        gen_op_iwmmxt_movq_M0_wRn(rd0);
1650        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1651        gen_op_iwmmxt_andq_M0_wRn(rd1);
1652        gen_op_iwmmxt_setpsr_nz();
1653        gen_op_iwmmxt_movq_wRn_M0(wrd);
1654        gen_op_iwmmxt_set_mup();
1655        gen_op_iwmmxt_set_cup();
1656        break;
1657    case 0x200:                                                 /* WAND */
1658        wrd = (insn >> 12) & 0xf;
1659        rd0 = (insn >> 0) & 0xf;
1660        rd1 = (insn >> 16) & 0xf;
1661        gen_op_iwmmxt_movq_M0_wRn(rd0);
1662        gen_op_iwmmxt_andq_M0_wRn(rd1);
1663        gen_op_iwmmxt_setpsr_nz();
1664        gen_op_iwmmxt_movq_wRn_M0(wrd);
1665        gen_op_iwmmxt_set_mup();
1666        gen_op_iwmmxt_set_cup();
1667        break;
1668    case 0x810: case 0xa10:                             /* WMADD */
1669        wrd = (insn >> 12) & 0xf;
1670        rd0 = (insn >> 0) & 0xf;
1671        rd1 = (insn >> 16) & 0xf;
1672        gen_op_iwmmxt_movq_M0_wRn(rd0);
1673        if (insn & (1 << 21))
1674            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1675        else
1676            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1677        gen_op_iwmmxt_movq_wRn_M0(wrd);
1678        gen_op_iwmmxt_set_mup();
1679        break;
1680    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1681        wrd = (insn >> 12) & 0xf;
1682        rd0 = (insn >> 16) & 0xf;
1683        rd1 = (insn >> 0) & 0xf;
1684        gen_op_iwmmxt_movq_M0_wRn(rd0);
1685        switch ((insn >> 22) & 3) {
1686        case 0:
1687            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1688            break;
1689        case 1:
1690            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1691            break;
1692        case 2:
1693            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1694            break;
1695        case 3:
1696            return 1;
1697        }
1698        gen_op_iwmmxt_movq_wRn_M0(wrd);
1699        gen_op_iwmmxt_set_mup();
1700        gen_op_iwmmxt_set_cup();
1701        break;
1702    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1703        wrd = (insn >> 12) & 0xf;
1704        rd0 = (insn >> 16) & 0xf;
1705        rd1 = (insn >> 0) & 0xf;
1706        gen_op_iwmmxt_movq_M0_wRn(rd0);
1707        switch ((insn >> 22) & 3) {
1708        case 0:
1709            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1710            break;
1711        case 1:
1712            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1713            break;
1714        case 2:
1715            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1716            break;
1717        case 3:
1718            return 1;
1719        }
1720        gen_op_iwmmxt_movq_wRn_M0(wrd);
1721        gen_op_iwmmxt_set_mup();
1722        gen_op_iwmmxt_set_cup();
1723        break;
1724    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1725        wrd = (insn >> 12) & 0xf;
1726        rd0 = (insn >> 16) & 0xf;
1727        rd1 = (insn >> 0) & 0xf;
1728        gen_op_iwmmxt_movq_M0_wRn(rd0);
1729        if (insn & (1 << 22))
1730            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1731        else
1732            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1733        if (!(insn & (1 << 20)))
1734            gen_op_iwmmxt_addl_M0_wRn(wrd);
1735        gen_op_iwmmxt_movq_wRn_M0(wrd);
1736        gen_op_iwmmxt_set_mup();
1737        break;
1738    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1739        wrd = (insn >> 12) & 0xf;
1740        rd0 = (insn >> 16) & 0xf;
1741        rd1 = (insn >> 0) & 0xf;
1742        gen_op_iwmmxt_movq_M0_wRn(rd0);
1743        if (insn & (1 << 21)) {
1744            if (insn & (1 << 20))
1745                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1746            else
1747                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1748        } else {
1749            if (insn & (1 << 20))
1750                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1751            else
1752                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1753        }
1754        gen_op_iwmmxt_movq_wRn_M0(wrd);
1755        gen_op_iwmmxt_set_mup();
1756        break;
1757    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1758        wrd = (insn >> 12) & 0xf;
1759        rd0 = (insn >> 16) & 0xf;
1760        rd1 = (insn >> 0) & 0xf;
1761        gen_op_iwmmxt_movq_M0_wRn(rd0);
1762        if (insn & (1 << 21))
1763            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1764        else
1765            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1766        if (!(insn & (1 << 20))) {
1767            iwmmxt_load_reg(cpu_V1, wrd);
1768            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1769        }
1770        gen_op_iwmmxt_movq_wRn_M0(wrd);
1771        gen_op_iwmmxt_set_mup();
1772        break;
1773    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1774        wrd = (insn >> 12) & 0xf;
1775        rd0 = (insn >> 16) & 0xf;
1776        rd1 = (insn >> 0) & 0xf;
1777        gen_op_iwmmxt_movq_M0_wRn(rd0);
1778        switch ((insn >> 22) & 3) {
1779        case 0:
1780            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1781            break;
1782        case 1:
1783            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1784            break;
1785        case 2:
1786            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1787            break;
1788        case 3:
1789            return 1;
1790        }
1791        gen_op_iwmmxt_movq_wRn_M0(wrd);
1792        gen_op_iwmmxt_set_mup();
1793        gen_op_iwmmxt_set_cup();
1794        break;
1795    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1796        wrd = (insn >> 12) & 0xf;
1797        rd0 = (insn >> 16) & 0xf;
1798        rd1 = (insn >> 0) & 0xf;
1799        gen_op_iwmmxt_movq_M0_wRn(rd0);
1800        if (insn & (1 << 22)) {
1801            if (insn & (1 << 20))
1802                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1803            else
1804                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1805        } else {
1806            if (insn & (1 << 20))
1807                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1808            else
1809                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1810        }
1811        gen_op_iwmmxt_movq_wRn_M0(wrd);
1812        gen_op_iwmmxt_set_mup();
1813        gen_op_iwmmxt_set_cup();
1814        break;
1815    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1816        wrd = (insn >> 12) & 0xf;
1817        rd0 = (insn >> 16) & 0xf;
1818        rd1 = (insn >> 0) & 0xf;
1819        gen_op_iwmmxt_movq_M0_wRn(rd0);
1820        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1821        tcg_gen_andi_i32(tmp, tmp, 7);
1822        iwmmxt_load_reg(cpu_V1, rd1);
1823        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1824        gen_op_iwmmxt_movq_wRn_M0(wrd);
1825        gen_op_iwmmxt_set_mup();
1826        break;
1827    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1828        if (((insn >> 6) & 3) == 3)
1829            return 1;
1830        rd = (insn >> 12) & 0xf;
1831        wrd = (insn >> 16) & 0xf;
1832        tmp = load_reg(s, rd);
1833        gen_op_iwmmxt_movq_M0_wRn(wrd);
1834        switch ((insn >> 6) & 3) {
1835        case 0:
1836            tmp2 = tcg_constant_i32(0xff);
1837            tmp3 = tcg_constant_i32((insn & 7) << 3);
1838            break;
1839        case 1:
1840            tmp2 = tcg_constant_i32(0xffff);
1841            tmp3 = tcg_constant_i32((insn & 3) << 4);
1842            break;
1843        case 2:
1844            tmp2 = tcg_constant_i32(0xffffffff);
1845            tmp3 = tcg_constant_i32((insn & 1) << 5);
1846            break;
1847        default:
1848            g_assert_not_reached();
1849        }
1850        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1851        gen_op_iwmmxt_movq_wRn_M0(wrd);
1852        gen_op_iwmmxt_set_mup();
1853        break;
1854    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1855        rd = (insn >> 12) & 0xf;
1856        wrd = (insn >> 16) & 0xf;
1857        if (rd == 15 || ((insn >> 22) & 3) == 3)
1858            return 1;
1859        gen_op_iwmmxt_movq_M0_wRn(wrd);
1860        tmp = tcg_temp_new_i32();
1861        switch ((insn >> 22) & 3) {
1862        case 0:
1863            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1864            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1865            if (insn & 8) {
1866                tcg_gen_ext8s_i32(tmp, tmp);
1867            } else {
1868                tcg_gen_andi_i32(tmp, tmp, 0xff);
1869            }
1870            break;
1871        case 1:
1872            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1873            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1874            if (insn & 8) {
1875                tcg_gen_ext16s_i32(tmp, tmp);
1876            } else {
1877                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1878            }
1879            break;
1880        case 2:
1881            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1882            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1883            break;
1884        }
1885        store_reg(s, rd, tmp);
1886        break;
1887    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1888        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1889            return 1;
1890        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1891        switch ((insn >> 22) & 3) {
1892        case 0:
1893            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1894            break;
1895        case 1:
1896            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1897            break;
1898        case 2:
1899            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1900            break;
1901        }
1902        tcg_gen_shli_i32(tmp, tmp, 28);
1903        gen_set_nzcv(tmp);
1904        break;
1905    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1906        if (((insn >> 6) & 3) == 3)
1907            return 1;
1908        rd = (insn >> 12) & 0xf;
1909        wrd = (insn >> 16) & 0xf;
1910        tmp = load_reg(s, rd);
1911        switch ((insn >> 6) & 3) {
1912        case 0:
1913            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1914            break;
1915        case 1:
1916            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1917            break;
1918        case 2:
1919            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1920            break;
1921        }
1922        gen_op_iwmmxt_movq_wRn_M0(wrd);
1923        gen_op_iwmmxt_set_mup();
1924        break;
1925    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1926        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1927            return 1;
1928        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1929        tmp2 = tcg_temp_new_i32();
1930        tcg_gen_mov_i32(tmp2, tmp);
1931        switch ((insn >> 22) & 3) {
1932        case 0:
1933            for (i = 0; i < 7; i ++) {
1934                tcg_gen_shli_i32(tmp2, tmp2, 4);
1935                tcg_gen_and_i32(tmp, tmp, tmp2);
1936            }
1937            break;
1938        case 1:
1939            for (i = 0; i < 3; i ++) {
1940                tcg_gen_shli_i32(tmp2, tmp2, 8);
1941                tcg_gen_and_i32(tmp, tmp, tmp2);
1942            }
1943            break;
1944        case 2:
1945            tcg_gen_shli_i32(tmp2, tmp2, 16);
1946            tcg_gen_and_i32(tmp, tmp, tmp2);
1947            break;
1948        }
1949        gen_set_nzcv(tmp);
1950        break;
1951    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1952        wrd = (insn >> 12) & 0xf;
1953        rd0 = (insn >> 16) & 0xf;
1954        gen_op_iwmmxt_movq_M0_wRn(rd0);
1955        switch ((insn >> 22) & 3) {
1956        case 0:
1957            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1958            break;
1959        case 1:
1960            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1961            break;
1962        case 2:
1963            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1964            break;
1965        case 3:
1966            return 1;
1967        }
1968        gen_op_iwmmxt_movq_wRn_M0(wrd);
1969        gen_op_iwmmxt_set_mup();
1970        break;
1971    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1972        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1973            return 1;
1974        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1975        tmp2 = tcg_temp_new_i32();
1976        tcg_gen_mov_i32(tmp2, tmp);
1977        switch ((insn >> 22) & 3) {
1978        case 0:
1979            for (i = 0; i < 7; i ++) {
1980                tcg_gen_shli_i32(tmp2, tmp2, 4);
1981                tcg_gen_or_i32(tmp, tmp, tmp2);
1982            }
1983            break;
1984        case 1:
1985            for (i = 0; i < 3; i ++) {
1986                tcg_gen_shli_i32(tmp2, tmp2, 8);
1987                tcg_gen_or_i32(tmp, tmp, tmp2);
1988            }
1989            break;
1990        case 2:
1991            tcg_gen_shli_i32(tmp2, tmp2, 16);
1992            tcg_gen_or_i32(tmp, tmp, tmp2);
1993            break;
1994        }
1995        gen_set_nzcv(tmp);
1996        break;
1997    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1998        rd = (insn >> 12) & 0xf;
1999        rd0 = (insn >> 16) & 0xf;
2000        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2001            return 1;
2002        gen_op_iwmmxt_movq_M0_wRn(rd0);
2003        tmp = tcg_temp_new_i32();
2004        switch ((insn >> 22) & 3) {
2005        case 0:
2006            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2007            break;
2008        case 1:
2009            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2010            break;
2011        case 2:
2012            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2013            break;
2014        }
2015        store_reg(s, rd, tmp);
2016        break;
2017    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2018    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2019        wrd = (insn >> 12) & 0xf;
2020        rd0 = (insn >> 16) & 0xf;
2021        rd1 = (insn >> 0) & 0xf;
2022        gen_op_iwmmxt_movq_M0_wRn(rd0);
2023        switch ((insn >> 22) & 3) {
2024        case 0:
2025            if (insn & (1 << 21))
2026                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2027            else
2028                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2029            break;
2030        case 1:
2031            if (insn & (1 << 21))
2032                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2033            else
2034                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2035            break;
2036        case 2:
2037            if (insn & (1 << 21))
2038                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2039            else
2040                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2041            break;
2042        case 3:
2043            return 1;
2044        }
2045        gen_op_iwmmxt_movq_wRn_M0(wrd);
2046        gen_op_iwmmxt_set_mup();
2047        gen_op_iwmmxt_set_cup();
2048        break;
2049    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2050    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2051        wrd = (insn >> 12) & 0xf;
2052        rd0 = (insn >> 16) & 0xf;
2053        gen_op_iwmmxt_movq_M0_wRn(rd0);
2054        switch ((insn >> 22) & 3) {
2055        case 0:
2056            if (insn & (1 << 21))
2057                gen_op_iwmmxt_unpacklsb_M0();
2058            else
2059                gen_op_iwmmxt_unpacklub_M0();
2060            break;
2061        case 1:
2062            if (insn & (1 << 21))
2063                gen_op_iwmmxt_unpacklsw_M0();
2064            else
2065                gen_op_iwmmxt_unpackluw_M0();
2066            break;
2067        case 2:
2068            if (insn & (1 << 21))
2069                gen_op_iwmmxt_unpacklsl_M0();
2070            else
2071                gen_op_iwmmxt_unpacklul_M0();
2072            break;
2073        case 3:
2074            return 1;
2075        }
2076        gen_op_iwmmxt_movq_wRn_M0(wrd);
2077        gen_op_iwmmxt_set_mup();
2078        gen_op_iwmmxt_set_cup();
2079        break;
2080    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2081    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2082        wrd = (insn >> 12) & 0xf;
2083        rd0 = (insn >> 16) & 0xf;
2084        gen_op_iwmmxt_movq_M0_wRn(rd0);
2085        switch ((insn >> 22) & 3) {
2086        case 0:
2087            if (insn & (1 << 21))
2088                gen_op_iwmmxt_unpackhsb_M0();
2089            else
2090                gen_op_iwmmxt_unpackhub_M0();
2091            break;
2092        case 1:
2093            if (insn & (1 << 21))
2094                gen_op_iwmmxt_unpackhsw_M0();
2095            else
2096                gen_op_iwmmxt_unpackhuw_M0();
2097            break;
2098        case 2:
2099            if (insn & (1 << 21))
2100                gen_op_iwmmxt_unpackhsl_M0();
2101            else
2102                gen_op_iwmmxt_unpackhul_M0();
2103            break;
2104        case 3:
2105            return 1;
2106        }
2107        gen_op_iwmmxt_movq_wRn_M0(wrd);
2108        gen_op_iwmmxt_set_mup();
2109        gen_op_iwmmxt_set_cup();
2110        break;
2111    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2112    case 0x214: case 0x614: case 0xa14: case 0xe14:
2113        if (((insn >> 22) & 3) == 0)
2114            return 1;
2115        wrd = (insn >> 12) & 0xf;
2116        rd0 = (insn >> 16) & 0xf;
2117        gen_op_iwmmxt_movq_M0_wRn(rd0);
2118        tmp = tcg_temp_new_i32();
2119        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2120            return 1;
2121        }
2122        switch ((insn >> 22) & 3) {
2123        case 1:
2124            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2125            break;
2126        case 2:
2127            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2128            break;
2129        case 3:
2130            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2131            break;
2132        }
2133        gen_op_iwmmxt_movq_wRn_M0(wrd);
2134        gen_op_iwmmxt_set_mup();
2135        gen_op_iwmmxt_set_cup();
2136        break;
2137    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2138    case 0x014: case 0x414: case 0x814: case 0xc14:
2139        if (((insn >> 22) & 3) == 0)
2140            return 1;
2141        wrd = (insn >> 12) & 0xf;
2142        rd0 = (insn >> 16) & 0xf;
2143        gen_op_iwmmxt_movq_M0_wRn(rd0);
2144        tmp = tcg_temp_new_i32();
2145        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2146            return 1;
2147        }
2148        switch ((insn >> 22) & 3) {
2149        case 1:
2150            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2151            break;
2152        case 2:
2153            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2154            break;
2155        case 3:
2156            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2157            break;
2158        }
2159        gen_op_iwmmxt_movq_wRn_M0(wrd);
2160        gen_op_iwmmxt_set_mup();
2161        gen_op_iwmmxt_set_cup();
2162        break;
2163    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2164    case 0x114: case 0x514: case 0x914: case 0xd14:
2165        if (((insn >> 22) & 3) == 0)
2166            return 1;
2167        wrd = (insn >> 12) & 0xf;
2168        rd0 = (insn >> 16) & 0xf;
2169        gen_op_iwmmxt_movq_M0_wRn(rd0);
2170        tmp = tcg_temp_new_i32();
2171        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2172            return 1;
2173        }
2174        switch ((insn >> 22) & 3) {
2175        case 1:
2176            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2177            break;
2178        case 2:
2179            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2180            break;
2181        case 3:
2182            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2183            break;
2184        }
2185        gen_op_iwmmxt_movq_wRn_M0(wrd);
2186        gen_op_iwmmxt_set_mup();
2187        gen_op_iwmmxt_set_cup();
2188        break;
2189    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2190    case 0x314: case 0x714: case 0xb14: case 0xf14:
2191        if (((insn >> 22) & 3) == 0)
2192            return 1;
2193        wrd = (insn >> 12) & 0xf;
2194        rd0 = (insn >> 16) & 0xf;
2195        gen_op_iwmmxt_movq_M0_wRn(rd0);
2196        tmp = tcg_temp_new_i32();
2197        switch ((insn >> 22) & 3) {
2198        case 1:
2199            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2200                return 1;
2201            }
2202            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2203            break;
2204        case 2:
2205            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2206                return 1;
2207            }
2208            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2209            break;
2210        case 3:
2211            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2212                return 1;
2213            }
2214            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2215            break;
2216        }
2217        gen_op_iwmmxt_movq_wRn_M0(wrd);
2218        gen_op_iwmmxt_set_mup();
2219        gen_op_iwmmxt_set_cup();
2220        break;
2221    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2222    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2223        wrd = (insn >> 12) & 0xf;
2224        rd0 = (insn >> 16) & 0xf;
2225        rd1 = (insn >> 0) & 0xf;
2226        gen_op_iwmmxt_movq_M0_wRn(rd0);
2227        switch ((insn >> 22) & 3) {
2228        case 0:
2229            if (insn & (1 << 21))
2230                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2231            else
2232                gen_op_iwmmxt_minub_M0_wRn(rd1);
2233            break;
2234        case 1:
2235            if (insn & (1 << 21))
2236                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2237            else
2238                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2239            break;
2240        case 2:
2241            if (insn & (1 << 21))
2242                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2243            else
2244                gen_op_iwmmxt_minul_M0_wRn(rd1);
2245            break;
2246        case 3:
2247            return 1;
2248        }
2249        gen_op_iwmmxt_movq_wRn_M0(wrd);
2250        gen_op_iwmmxt_set_mup();
2251        break;
2252    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2253    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2254        wrd = (insn >> 12) & 0xf;
2255        rd0 = (insn >> 16) & 0xf;
2256        rd1 = (insn >> 0) & 0xf;
2257        gen_op_iwmmxt_movq_M0_wRn(rd0);
2258        switch ((insn >> 22) & 3) {
2259        case 0:
2260            if (insn & (1 << 21))
2261                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2262            else
2263                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2264            break;
2265        case 1:
2266            if (insn & (1 << 21))
2267                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2268            else
2269                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2270            break;
2271        case 2:
2272            if (insn & (1 << 21))
2273                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2274            else
2275                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2276            break;
2277        case 3:
2278            return 1;
2279        }
2280        gen_op_iwmmxt_movq_wRn_M0(wrd);
2281        gen_op_iwmmxt_set_mup();
2282        break;
2283    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2284    case 0x402: case 0x502: case 0x602: case 0x702:
2285        wrd = (insn >> 12) & 0xf;
2286        rd0 = (insn >> 16) & 0xf;
2287        rd1 = (insn >> 0) & 0xf;
2288        gen_op_iwmmxt_movq_M0_wRn(rd0);
2289        iwmmxt_load_reg(cpu_V1, rd1);
2290        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2291                                tcg_constant_i32((insn >> 20) & 3));
2292        gen_op_iwmmxt_movq_wRn_M0(wrd);
2293        gen_op_iwmmxt_set_mup();
2294        break;
2295    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2296    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2297    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2298    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2299        wrd = (insn >> 12) & 0xf;
2300        rd0 = (insn >> 16) & 0xf;
2301        rd1 = (insn >> 0) & 0xf;
2302        gen_op_iwmmxt_movq_M0_wRn(rd0);
2303        switch ((insn >> 20) & 0xf) {
2304        case 0x0:
2305            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2306            break;
2307        case 0x1:
2308            gen_op_iwmmxt_subub_M0_wRn(rd1);
2309            break;
2310        case 0x3:
2311            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2312            break;
2313        case 0x4:
2314            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2315            break;
2316        case 0x5:
2317            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2318            break;
2319        case 0x7:
2320            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2321            break;
2322        case 0x8:
2323            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2324            break;
2325        case 0x9:
2326            gen_op_iwmmxt_subul_M0_wRn(rd1);
2327            break;
2328        case 0xb:
2329            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2330            break;
2331        default:
2332            return 1;
2333        }
2334        gen_op_iwmmxt_movq_wRn_M0(wrd);
2335        gen_op_iwmmxt_set_mup();
2336        gen_op_iwmmxt_set_cup();
2337        break;
2338    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2339    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2340    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2341    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2342        wrd = (insn >> 12) & 0xf;
2343        rd0 = (insn >> 16) & 0xf;
2344        gen_op_iwmmxt_movq_M0_wRn(rd0);
2345        tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2346        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2347        gen_op_iwmmxt_movq_wRn_M0(wrd);
2348        gen_op_iwmmxt_set_mup();
2349        gen_op_iwmmxt_set_cup();
2350        break;
2351    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2352    case 0x418: case 0x518: case 0x618: case 0x718:
2353    case 0x818: case 0x918: case 0xa18: case 0xb18:
2354    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2355        wrd = (insn >> 12) & 0xf;
2356        rd0 = (insn >> 16) & 0xf;
2357        rd1 = (insn >> 0) & 0xf;
2358        gen_op_iwmmxt_movq_M0_wRn(rd0);
2359        switch ((insn >> 20) & 0xf) {
2360        case 0x0:
2361            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2362            break;
2363        case 0x1:
2364            gen_op_iwmmxt_addub_M0_wRn(rd1);
2365            break;
2366        case 0x3:
2367            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2368            break;
2369        case 0x4:
2370            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2371            break;
2372        case 0x5:
2373            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2374            break;
2375        case 0x7:
2376            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2377            break;
2378        case 0x8:
2379            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2380            break;
2381        case 0x9:
2382            gen_op_iwmmxt_addul_M0_wRn(rd1);
2383            break;
2384        case 0xb:
2385            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2386            break;
2387        default:
2388            return 1;
2389        }
2390        gen_op_iwmmxt_movq_wRn_M0(wrd);
2391        gen_op_iwmmxt_set_mup();
2392        gen_op_iwmmxt_set_cup();
2393        break;
2394    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2395    case 0x408: case 0x508: case 0x608: case 0x708:
2396    case 0x808: case 0x908: case 0xa08: case 0xb08:
2397    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2398        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2399            return 1;
2400        wrd = (insn >> 12) & 0xf;
2401        rd0 = (insn >> 16) & 0xf;
2402        rd1 = (insn >> 0) & 0xf;
2403        gen_op_iwmmxt_movq_M0_wRn(rd0);
2404        switch ((insn >> 22) & 3) {
2405        case 1:
2406            if (insn & (1 << 21))
2407                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2408            else
2409                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2410            break;
2411        case 2:
2412            if (insn & (1 << 21))
2413                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2414            else
2415                gen_op_iwmmxt_packul_M0_wRn(rd1);
2416            break;
2417        case 3:
2418            if (insn & (1 << 21))
2419                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2420            else
2421                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2422            break;
2423        }
2424        gen_op_iwmmxt_movq_wRn_M0(wrd);
2425        gen_op_iwmmxt_set_mup();
2426        gen_op_iwmmxt_set_cup();
2427        break;
2428    case 0x201: case 0x203: case 0x205: case 0x207:
2429    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2430    case 0x211: case 0x213: case 0x215: case 0x217:
2431    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2432        wrd = (insn >> 5) & 0xf;
2433        rd0 = (insn >> 12) & 0xf;
2434        rd1 = (insn >> 0) & 0xf;
2435        if (rd0 == 0xf || rd1 == 0xf)
2436            return 1;
2437        gen_op_iwmmxt_movq_M0_wRn(wrd);
2438        tmp = load_reg(s, rd0);
2439        tmp2 = load_reg(s, rd1);
2440        switch ((insn >> 16) & 0xf) {
2441        case 0x0:                                       /* TMIA */
2442            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2443            break;
2444        case 0x8:                                       /* TMIAPH */
2445            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2446            break;
2447        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2448            if (insn & (1 << 16))
2449                tcg_gen_shri_i32(tmp, tmp, 16);
2450            if (insn & (1 << 17))
2451                tcg_gen_shri_i32(tmp2, tmp2, 16);
2452            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2453            break;
2454        default:
2455            return 1;
2456        }
2457        gen_op_iwmmxt_movq_wRn_M0(wrd);
2458        gen_op_iwmmxt_set_mup();
2459        break;
2460    default:
2461        return 1;
2462    }
2463
2464    return 0;
2465}
2466
2467/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2468   (ie. an undefined instruction).  */
2469static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2470{
2471    int acc, rd0, rd1, rdhi, rdlo;
2472    TCGv_i32 tmp, tmp2;
2473
2474    if ((insn & 0x0ff00f10) == 0x0e200010) {
2475        /* Multiply with Internal Accumulate Format */
2476        rd0 = (insn >> 12) & 0xf;
2477        rd1 = insn & 0xf;
2478        acc = (insn >> 5) & 7;
2479
2480        if (acc != 0)
2481            return 1;
2482
2483        tmp = load_reg(s, rd0);
2484        tmp2 = load_reg(s, rd1);
2485        switch ((insn >> 16) & 0xf) {
2486        case 0x0:                                       /* MIA */
2487            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2488            break;
2489        case 0x8:                                       /* MIAPH */
2490            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2491            break;
2492        case 0xc:                                       /* MIABB */
2493        case 0xd:                                       /* MIABT */
2494        case 0xe:                                       /* MIATB */
2495        case 0xf:                                       /* MIATT */
2496            if (insn & (1 << 16))
2497                tcg_gen_shri_i32(tmp, tmp, 16);
2498            if (insn & (1 << 17))
2499                tcg_gen_shri_i32(tmp2, tmp2, 16);
2500            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2501            break;
2502        default:
2503            return 1;
2504        }
2505
2506        gen_op_iwmmxt_movq_wRn_M0(acc);
2507        return 0;
2508    }
2509
2510    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2511        /* Internal Accumulator Access Format */
2512        rdhi = (insn >> 16) & 0xf;
2513        rdlo = (insn >> 12) & 0xf;
2514        acc = insn & 7;
2515
2516        if (acc != 0)
2517            return 1;
2518
2519        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2520            iwmmxt_load_reg(cpu_V0, acc);
2521            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2522            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2523            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2524        } else {                                        /* MAR */
2525            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2526            iwmmxt_store_reg(cpu_V0, acc);
2527        }
2528        return 0;
2529    }
2530
2531    return 1;
2532}
2533
2534static void gen_goto_ptr(void)
2535{
2536    tcg_gen_lookup_and_goto_ptr();
2537}
2538
2539/* This will end the TB but doesn't guarantee we'll return to
2540 * cpu_loop_exec. Any live exit_requests will be processed as we
2541 * enter the next TB.
2542 */
2543static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2544{
2545    if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2546        /*
2547         * For pcrel, the pc must always be up-to-date on entry to
2548         * the linked TB, so that it can use simple additions for all
2549         * further adjustments.  For !pcrel, the linked TB is compiled
2550         * to know its full virtual address, so we can delay the
2551         * update to pc to the unlinked path.  A long chain of links
2552         * can thus avoid many updates to the PC.
2553         */
2554        if (tb_cflags(s->base.tb) & CF_PCREL) {
2555            gen_update_pc(s, diff);
2556            tcg_gen_goto_tb(n);
2557        } else {
2558            tcg_gen_goto_tb(n);
2559            gen_update_pc(s, diff);
2560        }
2561        tcg_gen_exit_tb(s->base.tb, n);
2562    } else {
2563        gen_update_pc(s, diff);
2564        gen_goto_ptr();
2565    }
2566    s->base.is_jmp = DISAS_NORETURN;
2567}
2568
2569/* Jump, specifying which TB number to use if we gen_goto_tb() */
2570static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2571{
2572    if (unlikely(s->ss_active)) {
2573        /* An indirect jump so that we still trigger the debug exception.  */
2574        gen_update_pc(s, diff);
2575        s->base.is_jmp = DISAS_JUMP;
2576        return;
2577    }
2578    switch (s->base.is_jmp) {
2579    case DISAS_NEXT:
2580    case DISAS_TOO_MANY:
2581    case DISAS_NORETURN:
2582        /*
2583         * The normal case: just go to the destination TB.
2584         * NB: NORETURN happens if we generate code like
2585         *    gen_brcondi(l);
2586         *    gen_jmp();
2587         *    gen_set_label(l);
2588         *    gen_jmp();
2589         * on the second call to gen_jmp().
2590         */
2591        gen_goto_tb(s, tbno, diff);
2592        break;
2593    case DISAS_UPDATE_NOCHAIN:
2594    case DISAS_UPDATE_EXIT:
2595        /*
2596         * We already decided we're leaving the TB for some other reason.
2597         * Avoid using goto_tb so we really do exit back to the main loop
2598         * and don't chain to another TB.
2599         */
2600        gen_update_pc(s, diff);
2601        gen_goto_ptr();
2602        s->base.is_jmp = DISAS_NORETURN;
2603        break;
2604    default:
2605        /*
2606         * We shouldn't be emitting code for a jump and also have
2607         * is_jmp set to one of the special cases like DISAS_SWI.
2608         */
2609        g_assert_not_reached();
2610    }
2611}
2612
2613static inline void gen_jmp(DisasContext *s, target_long diff)
2614{
2615    gen_jmp_tb(s, diff, 0);
2616}
2617
2618static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2619{
2620    if (x)
2621        tcg_gen_sari_i32(t0, t0, 16);
2622    else
2623        gen_sxth(t0);
2624    if (y)
2625        tcg_gen_sari_i32(t1, t1, 16);
2626    else
2627        gen_sxth(t1);
2628    tcg_gen_mul_i32(t0, t0, t1);
2629}
2630
2631/* Return the mask of PSR bits set by a MSR instruction.  */
2632static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2633{
2634    uint32_t mask = 0;
2635
2636    if (flags & (1 << 0)) {
2637        mask |= 0xff;
2638    }
2639    if (flags & (1 << 1)) {
2640        mask |= 0xff00;
2641    }
2642    if (flags & (1 << 2)) {
2643        mask |= 0xff0000;
2644    }
2645    if (flags & (1 << 3)) {
2646        mask |= 0xff000000;
2647    }
2648
2649    /* Mask out undefined and reserved bits.  */
2650    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2651
2652    /* Mask out execution state.  */
2653    if (!spsr) {
2654        mask &= ~CPSR_EXEC;
2655    }
2656
2657    /* Mask out privileged bits.  */
2658    if (IS_USER(s)) {
2659        mask &= CPSR_USER;
2660    }
2661    return mask;
2662}
2663
2664/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2665static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2666{
2667    TCGv_i32 tmp;
2668    if (spsr) {
2669        /* ??? This is also undefined in system mode.  */
2670        if (IS_USER(s))
2671            return 1;
2672
2673        tmp = load_cpu_field(spsr);
2674        tcg_gen_andi_i32(tmp, tmp, ~mask);
2675        tcg_gen_andi_i32(t0, t0, mask);
2676        tcg_gen_or_i32(tmp, tmp, t0);
2677        store_cpu_field(tmp, spsr);
2678    } else {
2679        gen_set_cpsr(t0, mask);
2680    }
2681    gen_lookup_tb(s);
2682    return 0;
2683}
2684
2685/* Returns nonzero if access to the PSR is not permitted.  */
2686static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2687{
2688    TCGv_i32 tmp;
2689    tmp = tcg_temp_new_i32();
2690    tcg_gen_movi_i32(tmp, val);
2691    return gen_set_psr(s, mask, spsr, tmp);
2692}
2693
2694static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2695                                     int *tgtmode, int *regno)
2696{
2697    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2698     * the target mode and register number, and identify the various
2699     * unpredictable cases.
2700     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2701     *  + executed in user mode
2702     *  + using R15 as the src/dest register
2703     *  + accessing an unimplemented register
2704     *  + accessing a register that's inaccessible at current PL/security state*
2705     *  + accessing a register that you could access with a different insn
2706     * We choose to UNDEF in all these cases.
2707     * Since we don't know which of the various AArch32 modes we are in
2708     * we have to defer some checks to runtime.
2709     * Accesses to Monitor mode registers from Secure EL1 (which implies
2710     * that EL3 is AArch64) must trap to EL3.
2711     *
2712     * If the access checks fail this function will emit code to take
2713     * an exception and return false. Otherwise it will return true,
2714     * and set *tgtmode and *regno appropriately.
2715     */
2716    /* These instructions are present only in ARMv8, or in ARMv7 with the
2717     * Virtualization Extensions.
2718     */
2719    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2720        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2721        goto undef;
2722    }
2723
2724    if (IS_USER(s) || rn == 15) {
2725        goto undef;
2726    }
2727
2728    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2729     * of registers into (r, sysm).
2730     */
2731    if (r) {
2732        /* SPSRs for other modes */
2733        switch (sysm) {
2734        case 0xe: /* SPSR_fiq */
2735            *tgtmode = ARM_CPU_MODE_FIQ;
2736            break;
2737        case 0x10: /* SPSR_irq */
2738            *tgtmode = ARM_CPU_MODE_IRQ;
2739            break;
2740        case 0x12: /* SPSR_svc */
2741            *tgtmode = ARM_CPU_MODE_SVC;
2742            break;
2743        case 0x14: /* SPSR_abt */
2744            *tgtmode = ARM_CPU_MODE_ABT;
2745            break;
2746        case 0x16: /* SPSR_und */
2747            *tgtmode = ARM_CPU_MODE_UND;
2748            break;
2749        case 0x1c: /* SPSR_mon */
2750            *tgtmode = ARM_CPU_MODE_MON;
2751            break;
2752        case 0x1e: /* SPSR_hyp */
2753            *tgtmode = ARM_CPU_MODE_HYP;
2754            break;
2755        default: /* unallocated */
2756            goto undef;
2757        }
2758        /* We arbitrarily assign SPSR a register number of 16. */
2759        *regno = 16;
2760    } else {
2761        /* general purpose registers for other modes */
2762        switch (sysm) {
2763        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2764            *tgtmode = ARM_CPU_MODE_USR;
2765            *regno = sysm + 8;
2766            break;
2767        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2768            *tgtmode = ARM_CPU_MODE_FIQ;
2769            *regno = sysm;
2770            break;
2771        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2772            *tgtmode = ARM_CPU_MODE_IRQ;
2773            *regno = sysm & 1 ? 13 : 14;
2774            break;
2775        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2776            *tgtmode = ARM_CPU_MODE_SVC;
2777            *regno = sysm & 1 ? 13 : 14;
2778            break;
2779        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2780            *tgtmode = ARM_CPU_MODE_ABT;
2781            *regno = sysm & 1 ? 13 : 14;
2782            break;
2783        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2784            *tgtmode = ARM_CPU_MODE_UND;
2785            *regno = sysm & 1 ? 13 : 14;
2786            break;
2787        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2788            *tgtmode = ARM_CPU_MODE_MON;
2789            *regno = sysm & 1 ? 13 : 14;
2790            break;
2791        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2792            *tgtmode = ARM_CPU_MODE_HYP;
2793            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2794            *regno = sysm & 1 ? 13 : 17;
2795            break;
2796        default: /* unallocated */
2797            goto undef;
2798        }
2799    }
2800
2801    /* Catch the 'accessing inaccessible register' cases we can detect
2802     * at translate time.
2803     */
2804    switch (*tgtmode) {
2805    case ARM_CPU_MODE_MON:
2806        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2807            goto undef;
2808        }
2809        if (s->current_el == 1) {
2810            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2811             * then accesses to Mon registers trap to Secure EL2, if it exists,
2812             * otherwise EL3.
2813             */
2814            TCGv_i32 tcg_el;
2815
2816            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2817                dc_isar_feature(aa64_sel2, s)) {
2818                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2819                tcg_el = load_cpu_field_low32(cp15.scr_el3);
2820                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2821                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2822            } else {
2823                tcg_el = tcg_constant_i32(3);
2824            }
2825
2826            gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2827                                    syn_uncategorized(), tcg_el);
2828            return false;
2829        }
2830        break;
2831    case ARM_CPU_MODE_HYP:
2832        /*
2833         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2834         * (and so we can forbid accesses from EL2 or below). elr_hyp
2835         * can be accessed also from Hyp mode, so forbid accesses from
2836         * EL0 or EL1.
2837         */
2838        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2839            (s->current_el < 3 && *regno != 17)) {
2840            goto undef;
2841        }
2842        break;
2843    default:
2844        break;
2845    }
2846
2847    return true;
2848
2849undef:
2850    /* If we get here then some access check did not pass */
2851    gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2852    return false;
2853}
2854
2855static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2856{
2857    TCGv_i32 tcg_reg;
2858    int tgtmode = 0, regno = 0;
2859
2860    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2861        return;
2862    }
2863
2864    /* Sync state because msr_banked() can raise exceptions */
2865    gen_set_condexec(s);
2866    gen_update_pc(s, 0);
2867    tcg_reg = load_reg(s, rn);
2868    gen_helper_msr_banked(cpu_env, tcg_reg,
2869                          tcg_constant_i32(tgtmode),
2870                          tcg_constant_i32(regno));
2871    s->base.is_jmp = DISAS_UPDATE_EXIT;
2872}
2873
2874static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2875{
2876    TCGv_i32 tcg_reg;
2877    int tgtmode = 0, regno = 0;
2878
2879    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2880        return;
2881    }
2882
2883    /* Sync state because mrs_banked() can raise exceptions */
2884    gen_set_condexec(s);
2885    gen_update_pc(s, 0);
2886    tcg_reg = tcg_temp_new_i32();
2887    gen_helper_mrs_banked(tcg_reg, cpu_env,
2888                          tcg_constant_i32(tgtmode),
2889                          tcg_constant_i32(regno));
2890    store_reg(s, rn, tcg_reg);
2891    s->base.is_jmp = DISAS_UPDATE_EXIT;
2892}
2893
2894/* Store value to PC as for an exception return (ie don't
2895 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2896 * will do the masking based on the new value of the Thumb bit.
2897 */
2898static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2899{
2900    tcg_gen_mov_i32(cpu_R[15], pc);
2901}
2902
2903/* Generate a v6 exception return.  Marks both values as dead.  */
2904static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2905{
2906    store_pc_exc_ret(s, pc);
2907    /* The cpsr_write_eret helper will mask the low bits of PC
2908     * appropriately depending on the new Thumb bit, so it must
2909     * be called after storing the new PC.
2910     */
2911    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2912        gen_io_start();
2913    }
2914    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2915    /* Must exit loop to check un-masked IRQs */
2916    s->base.is_jmp = DISAS_EXIT;
2917}
2918
2919/* Generate an old-style exception return. Marks pc as dead. */
2920static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2921{
2922    gen_rfe(s, pc, load_cpu_field(spsr));
2923}
2924
2925static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2926                            uint32_t opr_sz, uint32_t max_sz,
2927                            gen_helper_gvec_3_ptr *fn)
2928{
2929    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2930
2931    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2932    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2933                       opr_sz, max_sz, 0, fn);
2934}
2935
2936void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938{
2939    static gen_helper_gvec_3_ptr * const fns[2] = {
2940        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2941    };
2942    tcg_debug_assert(vece >= 1 && vece <= 2);
2943    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944}
2945
2946void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2947                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2948{
2949    static gen_helper_gvec_3_ptr * const fns[2] = {
2950        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2951    };
2952    tcg_debug_assert(vece >= 1 && vece <= 2);
2953    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2954}
2955
2956#define GEN_CMP0(NAME, COND)                                            \
2957    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2958    {                                                                   \
2959        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2960        tcg_gen_neg_i32(d, d);                                          \
2961    }                                                                   \
2962    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2963    {                                                                   \
2964        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2965        tcg_gen_neg_i64(d, d);                                          \
2966    }                                                                   \
2967    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2968    {                                                                   \
2969        TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2970        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2971    }                                                                   \
2972    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2973                            uint32_t opr_sz, uint32_t max_sz)           \
2974    {                                                                   \
2975        const GVecGen2 op[4] = {                                        \
2976            { .fno = gen_helper_gvec_##NAME##0_b,                       \
2977              .fniv = gen_##NAME##0_vec,                                \
2978              .opt_opc = vecop_list_cmp,                                \
2979              .vece = MO_8 },                                           \
2980            { .fno = gen_helper_gvec_##NAME##0_h,                       \
2981              .fniv = gen_##NAME##0_vec,                                \
2982              .opt_opc = vecop_list_cmp,                                \
2983              .vece = MO_16 },                                          \
2984            { .fni4 = gen_##NAME##0_i32,                                \
2985              .fniv = gen_##NAME##0_vec,                                \
2986              .opt_opc = vecop_list_cmp,                                \
2987              .vece = MO_32 },                                          \
2988            { .fni8 = gen_##NAME##0_i64,                                \
2989              .fniv = gen_##NAME##0_vec,                                \
2990              .opt_opc = vecop_list_cmp,                                \
2991              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2992              .vece = MO_64 },                                          \
2993        };                                                              \
2994        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2995    }
2996
2997static const TCGOpcode vecop_list_cmp[] = {
2998    INDEX_op_cmp_vec, 0
2999};
3000
3001GEN_CMP0(ceq, TCG_COND_EQ)
3002GEN_CMP0(cle, TCG_COND_LE)
3003GEN_CMP0(cge, TCG_COND_GE)
3004GEN_CMP0(clt, TCG_COND_LT)
3005GEN_CMP0(cgt, TCG_COND_GT)
3006
3007#undef GEN_CMP0
3008
3009static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3010{
3011    tcg_gen_vec_sar8i_i64(a, a, shift);
3012    tcg_gen_vec_add8_i64(d, d, a);
3013}
3014
3015static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016{
3017    tcg_gen_vec_sar16i_i64(a, a, shift);
3018    tcg_gen_vec_add16_i64(d, d, a);
3019}
3020
3021static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3022{
3023    tcg_gen_sari_i32(a, a, shift);
3024    tcg_gen_add_i32(d, d, a);
3025}
3026
3027static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3028{
3029    tcg_gen_sari_i64(a, a, shift);
3030    tcg_gen_add_i64(d, d, a);
3031}
3032
3033static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3034{
3035    tcg_gen_sari_vec(vece, a, a, sh);
3036    tcg_gen_add_vec(vece, d, d, a);
3037}
3038
3039void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3040                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3041{
3042    static const TCGOpcode vecop_list[] = {
3043        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3044    };
3045    static const GVecGen2i ops[4] = {
3046        { .fni8 = gen_ssra8_i64,
3047          .fniv = gen_ssra_vec,
3048          .fno = gen_helper_gvec_ssra_b,
3049          .load_dest = true,
3050          .opt_opc = vecop_list,
3051          .vece = MO_8 },
3052        { .fni8 = gen_ssra16_i64,
3053          .fniv = gen_ssra_vec,
3054          .fno = gen_helper_gvec_ssra_h,
3055          .load_dest = true,
3056          .opt_opc = vecop_list,
3057          .vece = MO_16 },
3058        { .fni4 = gen_ssra32_i32,
3059          .fniv = gen_ssra_vec,
3060          .fno = gen_helper_gvec_ssra_s,
3061          .load_dest = true,
3062          .opt_opc = vecop_list,
3063          .vece = MO_32 },
3064        { .fni8 = gen_ssra64_i64,
3065          .fniv = gen_ssra_vec,
3066          .fno = gen_helper_gvec_ssra_d,
3067          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3068          .opt_opc = vecop_list,
3069          .load_dest = true,
3070          .vece = MO_64 },
3071    };
3072
3073    /* tszimm encoding produces immediates in the range [1..esize]. */
3074    tcg_debug_assert(shift > 0);
3075    tcg_debug_assert(shift <= (8 << vece));
3076
3077    /*
3078     * Shifts larger than the element size are architecturally valid.
3079     * Signed results in all sign bits.
3080     */
3081    shift = MIN(shift, (8 << vece) - 1);
3082    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3083}
3084
3085static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3086{
3087    tcg_gen_vec_shr8i_i64(a, a, shift);
3088    tcg_gen_vec_add8_i64(d, d, a);
3089}
3090
3091static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3092{
3093    tcg_gen_vec_shr16i_i64(a, a, shift);
3094    tcg_gen_vec_add16_i64(d, d, a);
3095}
3096
3097static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3098{
3099    tcg_gen_shri_i32(a, a, shift);
3100    tcg_gen_add_i32(d, d, a);
3101}
3102
3103static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3104{
3105    tcg_gen_shri_i64(a, a, shift);
3106    tcg_gen_add_i64(d, d, a);
3107}
3108
3109static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3110{
3111    tcg_gen_shri_vec(vece, a, a, sh);
3112    tcg_gen_add_vec(vece, d, d, a);
3113}
3114
3115void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3116                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3117{
3118    static const TCGOpcode vecop_list[] = {
3119        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3120    };
3121    static const GVecGen2i ops[4] = {
3122        { .fni8 = gen_usra8_i64,
3123          .fniv = gen_usra_vec,
3124          .fno = gen_helper_gvec_usra_b,
3125          .load_dest = true,
3126          .opt_opc = vecop_list,
3127          .vece = MO_8, },
3128        { .fni8 = gen_usra16_i64,
3129          .fniv = gen_usra_vec,
3130          .fno = gen_helper_gvec_usra_h,
3131          .load_dest = true,
3132          .opt_opc = vecop_list,
3133          .vece = MO_16, },
3134        { .fni4 = gen_usra32_i32,
3135          .fniv = gen_usra_vec,
3136          .fno = gen_helper_gvec_usra_s,
3137          .load_dest = true,
3138          .opt_opc = vecop_list,
3139          .vece = MO_32, },
3140        { .fni8 = gen_usra64_i64,
3141          .fniv = gen_usra_vec,
3142          .fno = gen_helper_gvec_usra_d,
3143          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3144          .load_dest = true,
3145          .opt_opc = vecop_list,
3146          .vece = MO_64, },
3147    };
3148
3149    /* tszimm encoding produces immediates in the range [1..esize]. */
3150    tcg_debug_assert(shift > 0);
3151    tcg_debug_assert(shift <= (8 << vece));
3152
3153    /*
3154     * Shifts larger than the element size are architecturally valid.
3155     * Unsigned results in all zeros as input to accumulate: nop.
3156     */
3157    if (shift < (8 << vece)) {
3158        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3159    } else {
3160        /* Nop, but we do need to clear the tail. */
3161        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3162    }
3163}
3164
3165/*
3166 * Shift one less than the requested amount, and the low bit is
3167 * the rounding bit.  For the 8 and 16-bit operations, because we
3168 * mask the low bit, we can perform a normal integer shift instead
3169 * of a vector shift.
3170 */
3171static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172{
3173    TCGv_i64 t = tcg_temp_new_i64();
3174
3175    tcg_gen_shri_i64(t, a, sh - 1);
3176    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3177    tcg_gen_vec_sar8i_i64(d, a, sh);
3178    tcg_gen_vec_add8_i64(d, d, t);
3179}
3180
3181static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3182{
3183    TCGv_i64 t = tcg_temp_new_i64();
3184
3185    tcg_gen_shri_i64(t, a, sh - 1);
3186    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3187    tcg_gen_vec_sar16i_i64(d, a, sh);
3188    tcg_gen_vec_add16_i64(d, d, t);
3189}
3190
3191static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3192{
3193    TCGv_i32 t;
3194
3195    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3196    if (sh == 32) {
3197        tcg_gen_movi_i32(d, 0);
3198        return;
3199    }
3200    t = tcg_temp_new_i32();
3201    tcg_gen_extract_i32(t, a, sh - 1, 1);
3202    tcg_gen_sari_i32(d, a, sh);
3203    tcg_gen_add_i32(d, d, t);
3204}
3205
3206static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3207{
3208    TCGv_i64 t = tcg_temp_new_i64();
3209
3210    tcg_gen_extract_i64(t, a, sh - 1, 1);
3211    tcg_gen_sari_i64(d, a, sh);
3212    tcg_gen_add_i64(d, d, t);
3213}
3214
3215static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3216{
3217    TCGv_vec t = tcg_temp_new_vec_matching(d);
3218    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3219
3220    tcg_gen_shri_vec(vece, t, a, sh - 1);
3221    tcg_gen_dupi_vec(vece, ones, 1);
3222    tcg_gen_and_vec(vece, t, t, ones);
3223    tcg_gen_sari_vec(vece, d, a, sh);
3224    tcg_gen_add_vec(vece, d, d, t);
3225}
3226
3227void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3228                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3229{
3230    static const TCGOpcode vecop_list[] = {
3231        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3232    };
3233    static const GVecGen2i ops[4] = {
3234        { .fni8 = gen_srshr8_i64,
3235          .fniv = gen_srshr_vec,
3236          .fno = gen_helper_gvec_srshr_b,
3237          .opt_opc = vecop_list,
3238          .vece = MO_8 },
3239        { .fni8 = gen_srshr16_i64,
3240          .fniv = gen_srshr_vec,
3241          .fno = gen_helper_gvec_srshr_h,
3242          .opt_opc = vecop_list,
3243          .vece = MO_16 },
3244        { .fni4 = gen_srshr32_i32,
3245          .fniv = gen_srshr_vec,
3246          .fno = gen_helper_gvec_srshr_s,
3247          .opt_opc = vecop_list,
3248          .vece = MO_32 },
3249        { .fni8 = gen_srshr64_i64,
3250          .fniv = gen_srshr_vec,
3251          .fno = gen_helper_gvec_srshr_d,
3252          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3253          .opt_opc = vecop_list,
3254          .vece = MO_64 },
3255    };
3256
3257    /* tszimm encoding produces immediates in the range [1..esize] */
3258    tcg_debug_assert(shift > 0);
3259    tcg_debug_assert(shift <= (8 << vece));
3260
3261    if (shift == (8 << vece)) {
3262        /*
3263         * Shifts larger than the element size are architecturally valid.
3264         * Signed results in all sign bits.  With rounding, this produces
3265         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3266         * I.e. always zero.
3267         */
3268        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3269    } else {
3270        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3271    }
3272}
3273
3274static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3275{
3276    TCGv_i64 t = tcg_temp_new_i64();
3277
3278    gen_srshr8_i64(t, a, sh);
3279    tcg_gen_vec_add8_i64(d, d, t);
3280}
3281
3282static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3283{
3284    TCGv_i64 t = tcg_temp_new_i64();
3285
3286    gen_srshr16_i64(t, a, sh);
3287    tcg_gen_vec_add16_i64(d, d, t);
3288}
3289
3290static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3291{
3292    TCGv_i32 t = tcg_temp_new_i32();
3293
3294    gen_srshr32_i32(t, a, sh);
3295    tcg_gen_add_i32(d, d, t);
3296}
3297
3298static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3299{
3300    TCGv_i64 t = tcg_temp_new_i64();
3301
3302    gen_srshr64_i64(t, a, sh);
3303    tcg_gen_add_i64(d, d, t);
3304}
3305
3306static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3307{
3308    TCGv_vec t = tcg_temp_new_vec_matching(d);
3309
3310    gen_srshr_vec(vece, t, a, sh);
3311    tcg_gen_add_vec(vece, d, d, t);
3312}
3313
3314void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3315                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3316{
3317    static const TCGOpcode vecop_list[] = {
3318        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3319    };
3320    static const GVecGen2i ops[4] = {
3321        { .fni8 = gen_srsra8_i64,
3322          .fniv = gen_srsra_vec,
3323          .fno = gen_helper_gvec_srsra_b,
3324          .opt_opc = vecop_list,
3325          .load_dest = true,
3326          .vece = MO_8 },
3327        { .fni8 = gen_srsra16_i64,
3328          .fniv = gen_srsra_vec,
3329          .fno = gen_helper_gvec_srsra_h,
3330          .opt_opc = vecop_list,
3331          .load_dest = true,
3332          .vece = MO_16 },
3333        { .fni4 = gen_srsra32_i32,
3334          .fniv = gen_srsra_vec,
3335          .fno = gen_helper_gvec_srsra_s,
3336          .opt_opc = vecop_list,
3337          .load_dest = true,
3338          .vece = MO_32 },
3339        { .fni8 = gen_srsra64_i64,
3340          .fniv = gen_srsra_vec,
3341          .fno = gen_helper_gvec_srsra_d,
3342          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3343          .opt_opc = vecop_list,
3344          .load_dest = true,
3345          .vece = MO_64 },
3346    };
3347
3348    /* tszimm encoding produces immediates in the range [1..esize] */
3349    tcg_debug_assert(shift > 0);
3350    tcg_debug_assert(shift <= (8 << vece));
3351
3352    /*
3353     * Shifts larger than the element size are architecturally valid.
3354     * Signed results in all sign bits.  With rounding, this produces
3355     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3356     * I.e. always zero.  With accumulation, this leaves D unchanged.
3357     */
3358    if (shift == (8 << vece)) {
3359        /* Nop, but we do need to clear the tail. */
3360        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3361    } else {
3362        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3363    }
3364}
3365
3366static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3367{
3368    TCGv_i64 t = tcg_temp_new_i64();
3369
3370    tcg_gen_shri_i64(t, a, sh - 1);
3371    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3372    tcg_gen_vec_shr8i_i64(d, a, sh);
3373    tcg_gen_vec_add8_i64(d, d, t);
3374}
3375
3376static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3377{
3378    TCGv_i64 t = tcg_temp_new_i64();
3379
3380    tcg_gen_shri_i64(t, a, sh - 1);
3381    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3382    tcg_gen_vec_shr16i_i64(d, a, sh);
3383    tcg_gen_vec_add16_i64(d, d, t);
3384}
3385
3386static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3387{
3388    TCGv_i32 t;
3389
3390    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3391    if (sh == 32) {
3392        tcg_gen_extract_i32(d, a, sh - 1, 1);
3393        return;
3394    }
3395    t = tcg_temp_new_i32();
3396    tcg_gen_extract_i32(t, a, sh - 1, 1);
3397    tcg_gen_shri_i32(d, a, sh);
3398    tcg_gen_add_i32(d, d, t);
3399}
3400
3401static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3402{
3403    TCGv_i64 t = tcg_temp_new_i64();
3404
3405    tcg_gen_extract_i64(t, a, sh - 1, 1);
3406    tcg_gen_shri_i64(d, a, sh);
3407    tcg_gen_add_i64(d, d, t);
3408}
3409
3410static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3411{
3412    TCGv_vec t = tcg_temp_new_vec_matching(d);
3413    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3414
3415    tcg_gen_shri_vec(vece, t, a, shift - 1);
3416    tcg_gen_dupi_vec(vece, ones, 1);
3417    tcg_gen_and_vec(vece, t, t, ones);
3418    tcg_gen_shri_vec(vece, d, a, shift);
3419    tcg_gen_add_vec(vece, d, d, t);
3420}
3421
3422void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3423                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3424{
3425    static const TCGOpcode vecop_list[] = {
3426        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3427    };
3428    static const GVecGen2i ops[4] = {
3429        { .fni8 = gen_urshr8_i64,
3430          .fniv = gen_urshr_vec,
3431          .fno = gen_helper_gvec_urshr_b,
3432          .opt_opc = vecop_list,
3433          .vece = MO_8 },
3434        { .fni8 = gen_urshr16_i64,
3435          .fniv = gen_urshr_vec,
3436          .fno = gen_helper_gvec_urshr_h,
3437          .opt_opc = vecop_list,
3438          .vece = MO_16 },
3439        { .fni4 = gen_urshr32_i32,
3440          .fniv = gen_urshr_vec,
3441          .fno = gen_helper_gvec_urshr_s,
3442          .opt_opc = vecop_list,
3443          .vece = MO_32 },
3444        { .fni8 = gen_urshr64_i64,
3445          .fniv = gen_urshr_vec,
3446          .fno = gen_helper_gvec_urshr_d,
3447          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3448          .opt_opc = vecop_list,
3449          .vece = MO_64 },
3450    };
3451
3452    /* tszimm encoding produces immediates in the range [1..esize] */
3453    tcg_debug_assert(shift > 0);
3454    tcg_debug_assert(shift <= (8 << vece));
3455
3456    if (shift == (8 << vece)) {
3457        /*
3458         * Shifts larger than the element size are architecturally valid.
3459         * Unsigned results in zero.  With rounding, this produces a
3460         * copy of the most significant bit.
3461         */
3462        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3463    } else {
3464        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3465    }
3466}
3467
3468static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3469{
3470    TCGv_i64 t = tcg_temp_new_i64();
3471
3472    if (sh == 8) {
3473        tcg_gen_vec_shr8i_i64(t, a, 7);
3474    } else {
3475        gen_urshr8_i64(t, a, sh);
3476    }
3477    tcg_gen_vec_add8_i64(d, d, t);
3478}
3479
3480static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3481{
3482    TCGv_i64 t = tcg_temp_new_i64();
3483
3484    if (sh == 16) {
3485        tcg_gen_vec_shr16i_i64(t, a, 15);
3486    } else {
3487        gen_urshr16_i64(t, a, sh);
3488    }
3489    tcg_gen_vec_add16_i64(d, d, t);
3490}
3491
3492static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3493{
3494    TCGv_i32 t = tcg_temp_new_i32();
3495
3496    if (sh == 32) {
3497        tcg_gen_shri_i32(t, a, 31);
3498    } else {
3499        gen_urshr32_i32(t, a, sh);
3500    }
3501    tcg_gen_add_i32(d, d, t);
3502}
3503
3504static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3505{
3506    TCGv_i64 t = tcg_temp_new_i64();
3507
3508    if (sh == 64) {
3509        tcg_gen_shri_i64(t, a, 63);
3510    } else {
3511        gen_urshr64_i64(t, a, sh);
3512    }
3513    tcg_gen_add_i64(d, d, t);
3514}
3515
3516static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3517{
3518    TCGv_vec t = tcg_temp_new_vec_matching(d);
3519
3520    if (sh == (8 << vece)) {
3521        tcg_gen_shri_vec(vece, t, a, sh - 1);
3522    } else {
3523        gen_urshr_vec(vece, t, a, sh);
3524    }
3525    tcg_gen_add_vec(vece, d, d, t);
3526}
3527
3528void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3529                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3530{
3531    static const TCGOpcode vecop_list[] = {
3532        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3533    };
3534    static const GVecGen2i ops[4] = {
3535        { .fni8 = gen_ursra8_i64,
3536          .fniv = gen_ursra_vec,
3537          .fno = gen_helper_gvec_ursra_b,
3538          .opt_opc = vecop_list,
3539          .load_dest = true,
3540          .vece = MO_8 },
3541        { .fni8 = gen_ursra16_i64,
3542          .fniv = gen_ursra_vec,
3543          .fno = gen_helper_gvec_ursra_h,
3544          .opt_opc = vecop_list,
3545          .load_dest = true,
3546          .vece = MO_16 },
3547        { .fni4 = gen_ursra32_i32,
3548          .fniv = gen_ursra_vec,
3549          .fno = gen_helper_gvec_ursra_s,
3550          .opt_opc = vecop_list,
3551          .load_dest = true,
3552          .vece = MO_32 },
3553        { .fni8 = gen_ursra64_i64,
3554          .fniv = gen_ursra_vec,
3555          .fno = gen_helper_gvec_ursra_d,
3556          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3557          .opt_opc = vecop_list,
3558          .load_dest = true,
3559          .vece = MO_64 },
3560    };
3561
3562    /* tszimm encoding produces immediates in the range [1..esize] */
3563    tcg_debug_assert(shift > 0);
3564    tcg_debug_assert(shift <= (8 << vece));
3565
3566    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3567}
3568
3569static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3570{
3571    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3572    TCGv_i64 t = tcg_temp_new_i64();
3573
3574    tcg_gen_shri_i64(t, a, shift);
3575    tcg_gen_andi_i64(t, t, mask);
3576    tcg_gen_andi_i64(d, d, ~mask);
3577    tcg_gen_or_i64(d, d, t);
3578}
3579
3580static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3581{
3582    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3583    TCGv_i64 t = tcg_temp_new_i64();
3584
3585    tcg_gen_shri_i64(t, a, shift);
3586    tcg_gen_andi_i64(t, t, mask);
3587    tcg_gen_andi_i64(d, d, ~mask);
3588    tcg_gen_or_i64(d, d, t);
3589}
3590
3591static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3592{
3593    tcg_gen_shri_i32(a, a, shift);
3594    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3595}
3596
3597static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3598{
3599    tcg_gen_shri_i64(a, a, shift);
3600    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3601}
3602
3603static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3604{
3605    TCGv_vec t = tcg_temp_new_vec_matching(d);
3606    TCGv_vec m = tcg_temp_new_vec_matching(d);
3607
3608    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3609    tcg_gen_shri_vec(vece, t, a, sh);
3610    tcg_gen_and_vec(vece, d, d, m);
3611    tcg_gen_or_vec(vece, d, d, t);
3612}
3613
3614void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3615                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3616{
3617    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3618    const GVecGen2i ops[4] = {
3619        { .fni8 = gen_shr8_ins_i64,
3620          .fniv = gen_shr_ins_vec,
3621          .fno = gen_helper_gvec_sri_b,
3622          .load_dest = true,
3623          .opt_opc = vecop_list,
3624          .vece = MO_8 },
3625        { .fni8 = gen_shr16_ins_i64,
3626          .fniv = gen_shr_ins_vec,
3627          .fno = gen_helper_gvec_sri_h,
3628          .load_dest = true,
3629          .opt_opc = vecop_list,
3630          .vece = MO_16 },
3631        { .fni4 = gen_shr32_ins_i32,
3632          .fniv = gen_shr_ins_vec,
3633          .fno = gen_helper_gvec_sri_s,
3634          .load_dest = true,
3635          .opt_opc = vecop_list,
3636          .vece = MO_32 },
3637        { .fni8 = gen_shr64_ins_i64,
3638          .fniv = gen_shr_ins_vec,
3639          .fno = gen_helper_gvec_sri_d,
3640          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3641          .load_dest = true,
3642          .opt_opc = vecop_list,
3643          .vece = MO_64 },
3644    };
3645
3646    /* tszimm encoding produces immediates in the range [1..esize]. */
3647    tcg_debug_assert(shift > 0);
3648    tcg_debug_assert(shift <= (8 << vece));
3649
3650    /* Shift of esize leaves destination unchanged. */
3651    if (shift < (8 << vece)) {
3652        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3653    } else {
3654        /* Nop, but we do need to clear the tail. */
3655        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3656    }
3657}
3658
3659static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3660{
3661    uint64_t mask = dup_const(MO_8, 0xff << shift);
3662    TCGv_i64 t = tcg_temp_new_i64();
3663
3664    tcg_gen_shli_i64(t, a, shift);
3665    tcg_gen_andi_i64(t, t, mask);
3666    tcg_gen_andi_i64(d, d, ~mask);
3667    tcg_gen_or_i64(d, d, t);
3668}
3669
3670static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3671{
3672    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3673    TCGv_i64 t = tcg_temp_new_i64();
3674
3675    tcg_gen_shli_i64(t, a, shift);
3676    tcg_gen_andi_i64(t, t, mask);
3677    tcg_gen_andi_i64(d, d, ~mask);
3678    tcg_gen_or_i64(d, d, t);
3679}
3680
3681static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3682{
3683    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3684}
3685
3686static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3687{
3688    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3689}
3690
3691static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3692{
3693    TCGv_vec t = tcg_temp_new_vec_matching(d);
3694    TCGv_vec m = tcg_temp_new_vec_matching(d);
3695
3696    tcg_gen_shli_vec(vece, t, a, sh);
3697    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3698    tcg_gen_and_vec(vece, d, d, m);
3699    tcg_gen_or_vec(vece, d, d, t);
3700}
3701
3702void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3703                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3704{
3705    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3706    const GVecGen2i ops[4] = {
3707        { .fni8 = gen_shl8_ins_i64,
3708          .fniv = gen_shl_ins_vec,
3709          .fno = gen_helper_gvec_sli_b,
3710          .load_dest = true,
3711          .opt_opc = vecop_list,
3712          .vece = MO_8 },
3713        { .fni8 = gen_shl16_ins_i64,
3714          .fniv = gen_shl_ins_vec,
3715          .fno = gen_helper_gvec_sli_h,
3716          .load_dest = true,
3717          .opt_opc = vecop_list,
3718          .vece = MO_16 },
3719        { .fni4 = gen_shl32_ins_i32,
3720          .fniv = gen_shl_ins_vec,
3721          .fno = gen_helper_gvec_sli_s,
3722          .load_dest = true,
3723          .opt_opc = vecop_list,
3724          .vece = MO_32 },
3725        { .fni8 = gen_shl64_ins_i64,
3726          .fniv = gen_shl_ins_vec,
3727          .fno = gen_helper_gvec_sli_d,
3728          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3729          .load_dest = true,
3730          .opt_opc = vecop_list,
3731          .vece = MO_64 },
3732    };
3733
3734    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3735    tcg_debug_assert(shift >= 0);
3736    tcg_debug_assert(shift < (8 << vece));
3737
3738    if (shift == 0) {
3739        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3740    } else {
3741        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3742    }
3743}
3744
3745static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3746{
3747    gen_helper_neon_mul_u8(a, a, b);
3748    gen_helper_neon_add_u8(d, d, a);
3749}
3750
3751static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3752{
3753    gen_helper_neon_mul_u8(a, a, b);
3754    gen_helper_neon_sub_u8(d, d, a);
3755}
3756
3757static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3758{
3759    gen_helper_neon_mul_u16(a, a, b);
3760    gen_helper_neon_add_u16(d, d, a);
3761}
3762
3763static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3764{
3765    gen_helper_neon_mul_u16(a, a, b);
3766    gen_helper_neon_sub_u16(d, d, a);
3767}
3768
3769static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3770{
3771    tcg_gen_mul_i32(a, a, b);
3772    tcg_gen_add_i32(d, d, a);
3773}
3774
3775static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3776{
3777    tcg_gen_mul_i32(a, a, b);
3778    tcg_gen_sub_i32(d, d, a);
3779}
3780
3781static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3782{
3783    tcg_gen_mul_i64(a, a, b);
3784    tcg_gen_add_i64(d, d, a);
3785}
3786
3787static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3788{
3789    tcg_gen_mul_i64(a, a, b);
3790    tcg_gen_sub_i64(d, d, a);
3791}
3792
3793static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3794{
3795    tcg_gen_mul_vec(vece, a, a, b);
3796    tcg_gen_add_vec(vece, d, d, a);
3797}
3798
3799static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3800{
3801    tcg_gen_mul_vec(vece, a, a, b);
3802    tcg_gen_sub_vec(vece, d, d, a);
3803}
3804
3805/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3806 * these tables are shared with AArch64 which does support them.
3807 */
3808void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3809                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3810{
3811    static const TCGOpcode vecop_list[] = {
3812        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3813    };
3814    static const GVecGen3 ops[4] = {
3815        { .fni4 = gen_mla8_i32,
3816          .fniv = gen_mla_vec,
3817          .load_dest = true,
3818          .opt_opc = vecop_list,
3819          .vece = MO_8 },
3820        { .fni4 = gen_mla16_i32,
3821          .fniv = gen_mla_vec,
3822          .load_dest = true,
3823          .opt_opc = vecop_list,
3824          .vece = MO_16 },
3825        { .fni4 = gen_mla32_i32,
3826          .fniv = gen_mla_vec,
3827          .load_dest = true,
3828          .opt_opc = vecop_list,
3829          .vece = MO_32 },
3830        { .fni8 = gen_mla64_i64,
3831          .fniv = gen_mla_vec,
3832          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3833          .load_dest = true,
3834          .opt_opc = vecop_list,
3835          .vece = MO_64 },
3836    };
3837    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3838}
3839
3840void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3841                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3842{
3843    static const TCGOpcode vecop_list[] = {
3844        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3845    };
3846    static const GVecGen3 ops[4] = {
3847        { .fni4 = gen_mls8_i32,
3848          .fniv = gen_mls_vec,
3849          .load_dest = true,
3850          .opt_opc = vecop_list,
3851          .vece = MO_8 },
3852        { .fni4 = gen_mls16_i32,
3853          .fniv = gen_mls_vec,
3854          .load_dest = true,
3855          .opt_opc = vecop_list,
3856          .vece = MO_16 },
3857        { .fni4 = gen_mls32_i32,
3858          .fniv = gen_mls_vec,
3859          .load_dest = true,
3860          .opt_opc = vecop_list,
3861          .vece = MO_32 },
3862        { .fni8 = gen_mls64_i64,
3863          .fniv = gen_mls_vec,
3864          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3865          .load_dest = true,
3866          .opt_opc = vecop_list,
3867          .vece = MO_64 },
3868    };
3869    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3870}
3871
3872/* CMTST : test is "if (X & Y != 0)". */
3873static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3874{
3875    tcg_gen_and_i32(d, a, b);
3876    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3877    tcg_gen_neg_i32(d, d);
3878}
3879
3880void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3881{
3882    tcg_gen_and_i64(d, a, b);
3883    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3884    tcg_gen_neg_i64(d, d);
3885}
3886
3887static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3888{
3889    tcg_gen_and_vec(vece, d, a, b);
3890    tcg_gen_dupi_vec(vece, a, 0);
3891    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3892}
3893
3894void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3895                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3896{
3897    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3898    static const GVecGen3 ops[4] = {
3899        { .fni4 = gen_helper_neon_tst_u8,
3900          .fniv = gen_cmtst_vec,
3901          .opt_opc = vecop_list,
3902          .vece = MO_8 },
3903        { .fni4 = gen_helper_neon_tst_u16,
3904          .fniv = gen_cmtst_vec,
3905          .opt_opc = vecop_list,
3906          .vece = MO_16 },
3907        { .fni4 = gen_cmtst_i32,
3908          .fniv = gen_cmtst_vec,
3909          .opt_opc = vecop_list,
3910          .vece = MO_32 },
3911        { .fni8 = gen_cmtst_i64,
3912          .fniv = gen_cmtst_vec,
3913          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3914          .opt_opc = vecop_list,
3915          .vece = MO_64 },
3916    };
3917    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3918}
3919
3920void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3921{
3922    TCGv_i32 lval = tcg_temp_new_i32();
3923    TCGv_i32 rval = tcg_temp_new_i32();
3924    TCGv_i32 lsh = tcg_temp_new_i32();
3925    TCGv_i32 rsh = tcg_temp_new_i32();
3926    TCGv_i32 zero = tcg_constant_i32(0);
3927    TCGv_i32 max = tcg_constant_i32(32);
3928
3929    /*
3930     * Rely on the TCG guarantee that out of range shifts produce
3931     * unspecified results, not undefined behaviour (i.e. no trap).
3932     * Discard out-of-range results after the fact.
3933     */
3934    tcg_gen_ext8s_i32(lsh, shift);
3935    tcg_gen_neg_i32(rsh, lsh);
3936    tcg_gen_shl_i32(lval, src, lsh);
3937    tcg_gen_shr_i32(rval, src, rsh);
3938    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3939    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3940}
3941
3942void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3943{
3944    TCGv_i64 lval = tcg_temp_new_i64();
3945    TCGv_i64 rval = tcg_temp_new_i64();
3946    TCGv_i64 lsh = tcg_temp_new_i64();
3947    TCGv_i64 rsh = tcg_temp_new_i64();
3948    TCGv_i64 zero = tcg_constant_i64(0);
3949    TCGv_i64 max = tcg_constant_i64(64);
3950
3951    /*
3952     * Rely on the TCG guarantee that out of range shifts produce
3953     * unspecified results, not undefined behaviour (i.e. no trap).
3954     * Discard out-of-range results after the fact.
3955     */
3956    tcg_gen_ext8s_i64(lsh, shift);
3957    tcg_gen_neg_i64(rsh, lsh);
3958    tcg_gen_shl_i64(lval, src, lsh);
3959    tcg_gen_shr_i64(rval, src, rsh);
3960    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3961    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3962}
3963
3964static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3965                         TCGv_vec src, TCGv_vec shift)
3966{
3967    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3968    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3969    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3970    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3971    TCGv_vec msk, max;
3972
3973    tcg_gen_neg_vec(vece, rsh, shift);
3974    if (vece == MO_8) {
3975        tcg_gen_mov_vec(lsh, shift);
3976    } else {
3977        msk = tcg_temp_new_vec_matching(dst);
3978        tcg_gen_dupi_vec(vece, msk, 0xff);
3979        tcg_gen_and_vec(vece, lsh, shift, msk);
3980        tcg_gen_and_vec(vece, rsh, rsh, msk);
3981    }
3982
3983    /*
3984     * Rely on the TCG guarantee that out of range shifts produce
3985     * unspecified results, not undefined behaviour (i.e. no trap).
3986     * Discard out-of-range results after the fact.
3987     */
3988    tcg_gen_shlv_vec(vece, lval, src, lsh);
3989    tcg_gen_shrv_vec(vece, rval, src, rsh);
3990
3991    max = tcg_temp_new_vec_matching(dst);
3992    tcg_gen_dupi_vec(vece, max, 8 << vece);
3993
3994    /*
3995     * The choice of LT (signed) and GEU (unsigned) are biased toward
3996     * the instructions of the x86_64 host.  For MO_8, the whole byte
3997     * is significant so we must use an unsigned compare; otherwise we
3998     * have already masked to a byte and so a signed compare works.
3999     * Other tcg hosts have a full set of comparisons and do not care.
4000     */
4001    if (vece == MO_8) {
4002        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4003        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4004        tcg_gen_andc_vec(vece, lval, lval, lsh);
4005        tcg_gen_andc_vec(vece, rval, rval, rsh);
4006    } else {
4007        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4008        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4009        tcg_gen_and_vec(vece, lval, lval, lsh);
4010        tcg_gen_and_vec(vece, rval, rval, rsh);
4011    }
4012    tcg_gen_or_vec(vece, dst, lval, rval);
4013}
4014
4015void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4016                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4017{
4018    static const TCGOpcode vecop_list[] = {
4019        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4020        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4021    };
4022    static const GVecGen3 ops[4] = {
4023        { .fniv = gen_ushl_vec,
4024          .fno = gen_helper_gvec_ushl_b,
4025          .opt_opc = vecop_list,
4026          .vece = MO_8 },
4027        { .fniv = gen_ushl_vec,
4028          .fno = gen_helper_gvec_ushl_h,
4029          .opt_opc = vecop_list,
4030          .vece = MO_16 },
4031        { .fni4 = gen_ushl_i32,
4032          .fniv = gen_ushl_vec,
4033          .opt_opc = vecop_list,
4034          .vece = MO_32 },
4035        { .fni8 = gen_ushl_i64,
4036          .fniv = gen_ushl_vec,
4037          .opt_opc = vecop_list,
4038          .vece = MO_64 },
4039    };
4040    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4041}
4042
4043void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4044{
4045    TCGv_i32 lval = tcg_temp_new_i32();
4046    TCGv_i32 rval = tcg_temp_new_i32();
4047    TCGv_i32 lsh = tcg_temp_new_i32();
4048    TCGv_i32 rsh = tcg_temp_new_i32();
4049    TCGv_i32 zero = tcg_constant_i32(0);
4050    TCGv_i32 max = tcg_constant_i32(31);
4051
4052    /*
4053     * Rely on the TCG guarantee that out of range shifts produce
4054     * unspecified results, not undefined behaviour (i.e. no trap).
4055     * Discard out-of-range results after the fact.
4056     */
4057    tcg_gen_ext8s_i32(lsh, shift);
4058    tcg_gen_neg_i32(rsh, lsh);
4059    tcg_gen_shl_i32(lval, src, lsh);
4060    tcg_gen_umin_i32(rsh, rsh, max);
4061    tcg_gen_sar_i32(rval, src, rsh);
4062    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4063    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4064}
4065
4066void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4067{
4068    TCGv_i64 lval = tcg_temp_new_i64();
4069    TCGv_i64 rval = tcg_temp_new_i64();
4070    TCGv_i64 lsh = tcg_temp_new_i64();
4071    TCGv_i64 rsh = tcg_temp_new_i64();
4072    TCGv_i64 zero = tcg_constant_i64(0);
4073    TCGv_i64 max = tcg_constant_i64(63);
4074
4075    /*
4076     * Rely on the TCG guarantee that out of range shifts produce
4077     * unspecified results, not undefined behaviour (i.e. no trap).
4078     * Discard out-of-range results after the fact.
4079     */
4080    tcg_gen_ext8s_i64(lsh, shift);
4081    tcg_gen_neg_i64(rsh, lsh);
4082    tcg_gen_shl_i64(lval, src, lsh);
4083    tcg_gen_umin_i64(rsh, rsh, max);
4084    tcg_gen_sar_i64(rval, src, rsh);
4085    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4086    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4087}
4088
4089static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4090                         TCGv_vec src, TCGv_vec shift)
4091{
4092    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4093    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4094    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4095    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4096    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4097
4098    /*
4099     * Rely on the TCG guarantee that out of range shifts produce
4100     * unspecified results, not undefined behaviour (i.e. no trap).
4101     * Discard out-of-range results after the fact.
4102     */
4103    tcg_gen_neg_vec(vece, rsh, shift);
4104    if (vece == MO_8) {
4105        tcg_gen_mov_vec(lsh, shift);
4106    } else {
4107        tcg_gen_dupi_vec(vece, tmp, 0xff);
4108        tcg_gen_and_vec(vece, lsh, shift, tmp);
4109        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4110    }
4111
4112    /* Bound rsh so out of bound right shift gets -1.  */
4113    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4114    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4115    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4116
4117    tcg_gen_shlv_vec(vece, lval, src, lsh);
4118    tcg_gen_sarv_vec(vece, rval, src, rsh);
4119
4120    /* Select in-bound left shift.  */
4121    tcg_gen_andc_vec(vece, lval, lval, tmp);
4122
4123    /* Select between left and right shift.  */
4124    if (vece == MO_8) {
4125        tcg_gen_dupi_vec(vece, tmp, 0);
4126        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4127    } else {
4128        tcg_gen_dupi_vec(vece, tmp, 0x80);
4129        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4130    }
4131}
4132
4133void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4134                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4135{
4136    static const TCGOpcode vecop_list[] = {
4137        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4138        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4139    };
4140    static const GVecGen3 ops[4] = {
4141        { .fniv = gen_sshl_vec,
4142          .fno = gen_helper_gvec_sshl_b,
4143          .opt_opc = vecop_list,
4144          .vece = MO_8 },
4145        { .fniv = gen_sshl_vec,
4146          .fno = gen_helper_gvec_sshl_h,
4147          .opt_opc = vecop_list,
4148          .vece = MO_16 },
4149        { .fni4 = gen_sshl_i32,
4150          .fniv = gen_sshl_vec,
4151          .opt_opc = vecop_list,
4152          .vece = MO_32 },
4153        { .fni8 = gen_sshl_i64,
4154          .fniv = gen_sshl_vec,
4155          .opt_opc = vecop_list,
4156          .vece = MO_64 },
4157    };
4158    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4159}
4160
4161static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4162                          TCGv_vec a, TCGv_vec b)
4163{
4164    TCGv_vec x = tcg_temp_new_vec_matching(t);
4165    tcg_gen_add_vec(vece, x, a, b);
4166    tcg_gen_usadd_vec(vece, t, a, b);
4167    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4168    tcg_gen_or_vec(vece, sat, sat, x);
4169}
4170
4171void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4172                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4173{
4174    static const TCGOpcode vecop_list[] = {
4175        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4176    };
4177    static const GVecGen4 ops[4] = {
4178        { .fniv = gen_uqadd_vec,
4179          .fno = gen_helper_gvec_uqadd_b,
4180          .write_aofs = true,
4181          .opt_opc = vecop_list,
4182          .vece = MO_8 },
4183        { .fniv = gen_uqadd_vec,
4184          .fno = gen_helper_gvec_uqadd_h,
4185          .write_aofs = true,
4186          .opt_opc = vecop_list,
4187          .vece = MO_16 },
4188        { .fniv = gen_uqadd_vec,
4189          .fno = gen_helper_gvec_uqadd_s,
4190          .write_aofs = true,
4191          .opt_opc = vecop_list,
4192          .vece = MO_32 },
4193        { .fniv = gen_uqadd_vec,
4194          .fno = gen_helper_gvec_uqadd_d,
4195          .write_aofs = true,
4196          .opt_opc = vecop_list,
4197          .vece = MO_64 },
4198    };
4199    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4200                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4201}
4202
4203static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4204                          TCGv_vec a, TCGv_vec b)
4205{
4206    TCGv_vec x = tcg_temp_new_vec_matching(t);
4207    tcg_gen_add_vec(vece, x, a, b);
4208    tcg_gen_ssadd_vec(vece, t, a, b);
4209    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4210    tcg_gen_or_vec(vece, sat, sat, x);
4211}
4212
4213void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4214                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4215{
4216    static const TCGOpcode vecop_list[] = {
4217        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4218    };
4219    static const GVecGen4 ops[4] = {
4220        { .fniv = gen_sqadd_vec,
4221          .fno = gen_helper_gvec_sqadd_b,
4222          .opt_opc = vecop_list,
4223          .write_aofs = true,
4224          .vece = MO_8 },
4225        { .fniv = gen_sqadd_vec,
4226          .fno = gen_helper_gvec_sqadd_h,
4227          .opt_opc = vecop_list,
4228          .write_aofs = true,
4229          .vece = MO_16 },
4230        { .fniv = gen_sqadd_vec,
4231          .fno = gen_helper_gvec_sqadd_s,
4232          .opt_opc = vecop_list,
4233          .write_aofs = true,
4234          .vece = MO_32 },
4235        { .fniv = gen_sqadd_vec,
4236          .fno = gen_helper_gvec_sqadd_d,
4237          .opt_opc = vecop_list,
4238          .write_aofs = true,
4239          .vece = MO_64 },
4240    };
4241    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4242                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4243}
4244
4245static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4246                          TCGv_vec a, TCGv_vec b)
4247{
4248    TCGv_vec x = tcg_temp_new_vec_matching(t);
4249    tcg_gen_sub_vec(vece, x, a, b);
4250    tcg_gen_ussub_vec(vece, t, a, b);
4251    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4252    tcg_gen_or_vec(vece, sat, sat, x);
4253}
4254
4255void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4256                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4257{
4258    static const TCGOpcode vecop_list[] = {
4259        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4260    };
4261    static const GVecGen4 ops[4] = {
4262        { .fniv = gen_uqsub_vec,
4263          .fno = gen_helper_gvec_uqsub_b,
4264          .opt_opc = vecop_list,
4265          .write_aofs = true,
4266          .vece = MO_8 },
4267        { .fniv = gen_uqsub_vec,
4268          .fno = gen_helper_gvec_uqsub_h,
4269          .opt_opc = vecop_list,
4270          .write_aofs = true,
4271          .vece = MO_16 },
4272        { .fniv = gen_uqsub_vec,
4273          .fno = gen_helper_gvec_uqsub_s,
4274          .opt_opc = vecop_list,
4275          .write_aofs = true,
4276          .vece = MO_32 },
4277        { .fniv = gen_uqsub_vec,
4278          .fno = gen_helper_gvec_uqsub_d,
4279          .opt_opc = vecop_list,
4280          .write_aofs = true,
4281          .vece = MO_64 },
4282    };
4283    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4284                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4285}
4286
4287static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4288                          TCGv_vec a, TCGv_vec b)
4289{
4290    TCGv_vec x = tcg_temp_new_vec_matching(t);
4291    tcg_gen_sub_vec(vece, x, a, b);
4292    tcg_gen_sssub_vec(vece, t, a, b);
4293    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4294    tcg_gen_or_vec(vece, sat, sat, x);
4295}
4296
4297void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4298                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4299{
4300    static const TCGOpcode vecop_list[] = {
4301        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4302    };
4303    static const GVecGen4 ops[4] = {
4304        { .fniv = gen_sqsub_vec,
4305          .fno = gen_helper_gvec_sqsub_b,
4306          .opt_opc = vecop_list,
4307          .write_aofs = true,
4308          .vece = MO_8 },
4309        { .fniv = gen_sqsub_vec,
4310          .fno = gen_helper_gvec_sqsub_h,
4311          .opt_opc = vecop_list,
4312          .write_aofs = true,
4313          .vece = MO_16 },
4314        { .fniv = gen_sqsub_vec,
4315          .fno = gen_helper_gvec_sqsub_s,
4316          .opt_opc = vecop_list,
4317          .write_aofs = true,
4318          .vece = MO_32 },
4319        { .fniv = gen_sqsub_vec,
4320          .fno = gen_helper_gvec_sqsub_d,
4321          .opt_opc = vecop_list,
4322          .write_aofs = true,
4323          .vece = MO_64 },
4324    };
4325    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4326                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4327}
4328
4329static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4330{
4331    TCGv_i32 t = tcg_temp_new_i32();
4332
4333    tcg_gen_sub_i32(t, a, b);
4334    tcg_gen_sub_i32(d, b, a);
4335    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4336}
4337
4338static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4339{
4340    TCGv_i64 t = tcg_temp_new_i64();
4341
4342    tcg_gen_sub_i64(t, a, b);
4343    tcg_gen_sub_i64(d, b, a);
4344    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4345}
4346
4347static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4348{
4349    TCGv_vec t = tcg_temp_new_vec_matching(d);
4350
4351    tcg_gen_smin_vec(vece, t, a, b);
4352    tcg_gen_smax_vec(vece, d, a, b);
4353    tcg_gen_sub_vec(vece, d, d, t);
4354}
4355
4356void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4357                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4358{
4359    static const TCGOpcode vecop_list[] = {
4360        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4361    };
4362    static const GVecGen3 ops[4] = {
4363        { .fniv = gen_sabd_vec,
4364          .fno = gen_helper_gvec_sabd_b,
4365          .opt_opc = vecop_list,
4366          .vece = MO_8 },
4367        { .fniv = gen_sabd_vec,
4368          .fno = gen_helper_gvec_sabd_h,
4369          .opt_opc = vecop_list,
4370          .vece = MO_16 },
4371        { .fni4 = gen_sabd_i32,
4372          .fniv = gen_sabd_vec,
4373          .fno = gen_helper_gvec_sabd_s,
4374          .opt_opc = vecop_list,
4375          .vece = MO_32 },
4376        { .fni8 = gen_sabd_i64,
4377          .fniv = gen_sabd_vec,
4378          .fno = gen_helper_gvec_sabd_d,
4379          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4380          .opt_opc = vecop_list,
4381          .vece = MO_64 },
4382    };
4383    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4384}
4385
4386static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4387{
4388    TCGv_i32 t = tcg_temp_new_i32();
4389
4390    tcg_gen_sub_i32(t, a, b);
4391    tcg_gen_sub_i32(d, b, a);
4392    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4393}
4394
4395static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4396{
4397    TCGv_i64 t = tcg_temp_new_i64();
4398
4399    tcg_gen_sub_i64(t, a, b);
4400    tcg_gen_sub_i64(d, b, a);
4401    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4402}
4403
4404static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4405{
4406    TCGv_vec t = tcg_temp_new_vec_matching(d);
4407
4408    tcg_gen_umin_vec(vece, t, a, b);
4409    tcg_gen_umax_vec(vece, d, a, b);
4410    tcg_gen_sub_vec(vece, d, d, t);
4411}
4412
4413void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4414                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4415{
4416    static const TCGOpcode vecop_list[] = {
4417        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4418    };
4419    static const GVecGen3 ops[4] = {
4420        { .fniv = gen_uabd_vec,
4421          .fno = gen_helper_gvec_uabd_b,
4422          .opt_opc = vecop_list,
4423          .vece = MO_8 },
4424        { .fniv = gen_uabd_vec,
4425          .fno = gen_helper_gvec_uabd_h,
4426          .opt_opc = vecop_list,
4427          .vece = MO_16 },
4428        { .fni4 = gen_uabd_i32,
4429          .fniv = gen_uabd_vec,
4430          .fno = gen_helper_gvec_uabd_s,
4431          .opt_opc = vecop_list,
4432          .vece = MO_32 },
4433        { .fni8 = gen_uabd_i64,
4434          .fniv = gen_uabd_vec,
4435          .fno = gen_helper_gvec_uabd_d,
4436          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4437          .opt_opc = vecop_list,
4438          .vece = MO_64 },
4439    };
4440    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4441}
4442
4443static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4444{
4445    TCGv_i32 t = tcg_temp_new_i32();
4446    gen_sabd_i32(t, a, b);
4447    tcg_gen_add_i32(d, d, t);
4448}
4449
4450static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4451{
4452    TCGv_i64 t = tcg_temp_new_i64();
4453    gen_sabd_i64(t, a, b);
4454    tcg_gen_add_i64(d, d, t);
4455}
4456
4457static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4458{
4459    TCGv_vec t = tcg_temp_new_vec_matching(d);
4460    gen_sabd_vec(vece, t, a, b);
4461    tcg_gen_add_vec(vece, d, d, t);
4462}
4463
4464void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4465                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4466{
4467    static const TCGOpcode vecop_list[] = {
4468        INDEX_op_sub_vec, INDEX_op_add_vec,
4469        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4470    };
4471    static const GVecGen3 ops[4] = {
4472        { .fniv = gen_saba_vec,
4473          .fno = gen_helper_gvec_saba_b,
4474          .opt_opc = vecop_list,
4475          .load_dest = true,
4476          .vece = MO_8 },
4477        { .fniv = gen_saba_vec,
4478          .fno = gen_helper_gvec_saba_h,
4479          .opt_opc = vecop_list,
4480          .load_dest = true,
4481          .vece = MO_16 },
4482        { .fni4 = gen_saba_i32,
4483          .fniv = gen_saba_vec,
4484          .fno = gen_helper_gvec_saba_s,
4485          .opt_opc = vecop_list,
4486          .load_dest = true,
4487          .vece = MO_32 },
4488        { .fni8 = gen_saba_i64,
4489          .fniv = gen_saba_vec,
4490          .fno = gen_helper_gvec_saba_d,
4491          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4492          .opt_opc = vecop_list,
4493          .load_dest = true,
4494          .vece = MO_64 },
4495    };
4496    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4497}
4498
4499static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4500{
4501    TCGv_i32 t = tcg_temp_new_i32();
4502    gen_uabd_i32(t, a, b);
4503    tcg_gen_add_i32(d, d, t);
4504}
4505
4506static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4507{
4508    TCGv_i64 t = tcg_temp_new_i64();
4509    gen_uabd_i64(t, a, b);
4510    tcg_gen_add_i64(d, d, t);
4511}
4512
4513static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4514{
4515    TCGv_vec t = tcg_temp_new_vec_matching(d);
4516    gen_uabd_vec(vece, t, a, b);
4517    tcg_gen_add_vec(vece, d, d, t);
4518}
4519
4520void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4521                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4522{
4523    static const TCGOpcode vecop_list[] = {
4524        INDEX_op_sub_vec, INDEX_op_add_vec,
4525        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4526    };
4527    static const GVecGen3 ops[4] = {
4528        { .fniv = gen_uaba_vec,
4529          .fno = gen_helper_gvec_uaba_b,
4530          .opt_opc = vecop_list,
4531          .load_dest = true,
4532          .vece = MO_8 },
4533        { .fniv = gen_uaba_vec,
4534          .fno = gen_helper_gvec_uaba_h,
4535          .opt_opc = vecop_list,
4536          .load_dest = true,
4537          .vece = MO_16 },
4538        { .fni4 = gen_uaba_i32,
4539          .fniv = gen_uaba_vec,
4540          .fno = gen_helper_gvec_uaba_s,
4541          .opt_opc = vecop_list,
4542          .load_dest = true,
4543          .vece = MO_32 },
4544        { .fni8 = gen_uaba_i64,
4545          .fniv = gen_uaba_vec,
4546          .fno = gen_helper_gvec_uaba_d,
4547          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4548          .opt_opc = vecop_list,
4549          .load_dest = true,
4550          .vece = MO_64 },
4551    };
4552    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4553}
4554
4555static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4556                           int opc1, int crn, int crm, int opc2,
4557                           bool isread, int rt, int rt2)
4558{
4559    uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4560    const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4561    TCGv_ptr tcg_ri = NULL;
4562    bool need_exit_tb;
4563    uint32_t syndrome;
4564
4565    /*
4566     * Note that since we are an implementation which takes an
4567     * exception on a trapped conditional instruction only if the
4568     * instruction passes its condition code check, we can take
4569     * advantage of the clause in the ARM ARM that allows us to set
4570     * the COND field in the instruction to 0xE in all cases.
4571     * We could fish the actual condition out of the insn (ARM)
4572     * or the condexec bits (Thumb) but it isn't necessary.
4573     */
4574    switch (cpnum) {
4575    case 14:
4576        if (is64) {
4577            syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4578                                         isread, false);
4579        } else {
4580            syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4581                                        rt, isread, false);
4582        }
4583        break;
4584    case 15:
4585        if (is64) {
4586            syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4587                                         isread, false);
4588        } else {
4589            syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4590                                        rt, isread, false);
4591        }
4592        break;
4593    default:
4594        /*
4595         * ARMv8 defines that only coprocessors 14 and 15 exist,
4596         * so this can only happen if this is an ARMv7 or earlier CPU,
4597         * in which case the syndrome information won't actually be
4598         * guest visible.
4599         */
4600        assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4601        syndrome = syn_uncategorized();
4602        break;
4603    }
4604
4605    if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4606        /*
4607         * At EL1, check for a HSTR_EL2 trap, which must take precedence
4608         * over the UNDEF for "no such register" or the UNDEF for "access
4609         * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4610         * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4611         * access_check_cp_reg(), after the checks for whether the access
4612         * configurably trapped to EL1.
4613         */
4614        uint32_t maskbit = is64 ? crm : crn;
4615
4616        if (maskbit != 4 && maskbit != 14) {
4617            /* T4 and T14 are RES0 so never cause traps */
4618            TCGv_i32 t;
4619            DisasLabel over = gen_disas_label(s);
4620
4621            t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4622            tcg_gen_andi_i32(t, t, 1u << maskbit);
4623            tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4624
4625            gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4626            /*
4627             * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4628             * but since we're conditionally branching over it, we want
4629             * to assume continue-to-next-instruction.
4630             */
4631            s->base.is_jmp = DISAS_NEXT;
4632            set_disas_label(s, over);
4633        }
4634    }
4635
4636    if (!ri) {
4637        /*
4638         * Unknown register; this might be a guest error or a QEMU
4639         * unimplemented feature.
4640         */
4641        if (is64) {
4642            qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4643                          "64 bit system register cp:%d opc1: %d crm:%d "
4644                          "(%s)\n",
4645                          isread ? "read" : "write", cpnum, opc1, crm,
4646                          s->ns ? "non-secure" : "secure");
4647        } else {
4648            qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4649                          "system register cp:%d opc1:%d crn:%d crm:%d "
4650                          "opc2:%d (%s)\n",
4651                          isread ? "read" : "write", cpnum, opc1, crn,
4652                          crm, opc2, s->ns ? "non-secure" : "secure");
4653        }
4654        unallocated_encoding(s);
4655        return;
4656    }
4657
4658    /* Check access permissions */
4659    if (!cp_access_ok(s->current_el, ri, isread)) {
4660        unallocated_encoding(s);
4661        return;
4662    }
4663
4664    if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4665        (ri->fgt && s->fgt_active) ||
4666        (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4667        /*
4668         * Emit code to perform further access permissions checks at
4669         * runtime; this may result in an exception.
4670         * Note that on XScale all cp0..c13 registers do an access check
4671         * call in order to handle c15_cpar.
4672         */
4673        gen_set_condexec(s);
4674        gen_update_pc(s, 0);
4675        tcg_ri = tcg_temp_new_ptr();
4676        gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4677                                       tcg_constant_i32(key),
4678                                       tcg_constant_i32(syndrome),
4679                                       tcg_constant_i32(isread));
4680    } else if (ri->type & ARM_CP_RAISES_EXC) {
4681        /*
4682         * The readfn or writefn might raise an exception;
4683         * synchronize the CPU state in case it does.
4684         */
4685        gen_set_condexec(s);
4686        gen_update_pc(s, 0);
4687    }
4688
4689    /* Handle special cases first */
4690    switch (ri->type & ARM_CP_SPECIAL_MASK) {
4691    case 0:
4692        break;
4693    case ARM_CP_NOP:
4694        return;
4695    case ARM_CP_WFI:
4696        if (isread) {
4697            unallocated_encoding(s);
4698        } else {
4699            gen_update_pc(s, curr_insn_len(s));
4700            s->base.is_jmp = DISAS_WFI;
4701        }
4702        return;
4703    default:
4704        g_assert_not_reached();
4705    }
4706
4707    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4708        gen_io_start();
4709    }
4710
4711    if (isread) {
4712        /* Read */
4713        if (is64) {
4714            TCGv_i64 tmp64;
4715            TCGv_i32 tmp;
4716            if (ri->type & ARM_CP_CONST) {
4717                tmp64 = tcg_constant_i64(ri->resetvalue);
4718            } else if (ri->readfn) {
4719                if (!tcg_ri) {
4720                    tcg_ri = gen_lookup_cp_reg(key);
4721                }
4722                tmp64 = tcg_temp_new_i64();
4723                gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4724            } else {
4725                tmp64 = tcg_temp_new_i64();
4726                tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4727            }
4728            tmp = tcg_temp_new_i32();
4729            tcg_gen_extrl_i64_i32(tmp, tmp64);
4730            store_reg(s, rt, tmp);
4731            tmp = tcg_temp_new_i32();
4732            tcg_gen_extrh_i64_i32(tmp, tmp64);
4733            store_reg(s, rt2, tmp);
4734        } else {
4735            TCGv_i32 tmp;
4736            if (ri->type & ARM_CP_CONST) {
4737                tmp = tcg_constant_i32(ri->resetvalue);
4738            } else if (ri->readfn) {
4739                if (!tcg_ri) {
4740                    tcg_ri = gen_lookup_cp_reg(key);
4741                }
4742                tmp = tcg_temp_new_i32();
4743                gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4744            } else {
4745                tmp = load_cpu_offset(ri->fieldoffset);
4746            }
4747            if (rt == 15) {
4748                /* Destination register of r15 for 32 bit loads sets
4749                 * the condition codes from the high 4 bits of the value
4750                 */
4751                gen_set_nzcv(tmp);
4752            } else {
4753                store_reg(s, rt, tmp);
4754            }
4755        }
4756    } else {
4757        /* Write */
4758        if (ri->type & ARM_CP_CONST) {
4759            /* If not forbidden by access permissions, treat as WI */
4760            return;
4761        }
4762
4763        if (is64) {
4764            TCGv_i32 tmplo, tmphi;
4765            TCGv_i64 tmp64 = tcg_temp_new_i64();
4766            tmplo = load_reg(s, rt);
4767            tmphi = load_reg(s, rt2);
4768            tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4769            if (ri->writefn) {
4770                if (!tcg_ri) {
4771                    tcg_ri = gen_lookup_cp_reg(key);
4772                }
4773                gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4774            } else {
4775                tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4776            }
4777        } else {
4778            TCGv_i32 tmp = load_reg(s, rt);
4779            if (ri->writefn) {
4780                if (!tcg_ri) {
4781                    tcg_ri = gen_lookup_cp_reg(key);
4782                }
4783                gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4784            } else {
4785                store_cpu_offset(tmp, ri->fieldoffset, 4);
4786            }
4787        }
4788    }
4789
4790    /* I/O operations must end the TB here (whether read or write) */
4791    need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4792                    (ri->type & ARM_CP_IO));
4793
4794    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4795        /*
4796         * A write to any coprocessor register that ends a TB
4797         * must rebuild the hflags for the next TB.
4798         */
4799        gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4800        /*
4801         * We default to ending the TB on a coprocessor register write,
4802         * but allow this to be suppressed by the register definition
4803         * (usually only necessary to work around guest bugs).
4804         */
4805        need_exit_tb = true;
4806    }
4807    if (need_exit_tb) {
4808        gen_lookup_tb(s);
4809    }
4810}
4811
4812/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4813static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4814{
4815    int cpnum = (insn >> 8) & 0xf;
4816
4817    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4818        unallocated_encoding(s);
4819    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4820        if (disas_iwmmxt_insn(s, insn)) {
4821            unallocated_encoding(s);
4822        }
4823    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4824        if (disas_dsp_insn(s, insn)) {
4825            unallocated_encoding(s);
4826        }
4827    }
4828}
4829
4830/* Store a 64-bit value to a register pair.  Clobbers val.  */
4831static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4832{
4833    TCGv_i32 tmp;
4834    tmp = tcg_temp_new_i32();
4835    tcg_gen_extrl_i64_i32(tmp, val);
4836    store_reg(s, rlow, tmp);
4837    tmp = tcg_temp_new_i32();
4838    tcg_gen_extrh_i64_i32(tmp, val);
4839    store_reg(s, rhigh, tmp);
4840}
4841
4842/* load and add a 64-bit value from a register pair.  */
4843static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4844{
4845    TCGv_i64 tmp;
4846    TCGv_i32 tmpl;
4847    TCGv_i32 tmph;
4848
4849    /* Load 64-bit value rd:rn.  */
4850    tmpl = load_reg(s, rlow);
4851    tmph = load_reg(s, rhigh);
4852    tmp = tcg_temp_new_i64();
4853    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4854    tcg_gen_add_i64(val, val, tmp);
4855}
4856
4857/* Set N and Z flags from hi|lo.  */
4858static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4859{
4860    tcg_gen_mov_i32(cpu_NF, hi);
4861    tcg_gen_or_i32(cpu_ZF, lo, hi);
4862}
4863
4864/* Load/Store exclusive instructions are implemented by remembering
4865   the value/address loaded, and seeing if these are the same
4866   when the store is performed.  This should be sufficient to implement
4867   the architecturally mandated semantics, and avoids having to monitor
4868   regular stores.  The compare vs the remembered value is done during
4869   the cmpxchg operation, but we must compare the addresses manually.  */
4870static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4871                               TCGv_i32 addr, int size)
4872{
4873    TCGv_i32 tmp = tcg_temp_new_i32();
4874    MemOp opc = size | MO_ALIGN | s->be_data;
4875
4876    s->is_ldex = true;
4877
4878    if (size == 3) {
4879        TCGv_i32 tmp2 = tcg_temp_new_i32();
4880        TCGv_i64 t64 = tcg_temp_new_i64();
4881
4882        /*
4883         * For AArch32, architecturally the 32-bit word at the lowest
4884         * address is always Rt and the one at addr+4 is Rt2, even if
4885         * the CPU is big-endian. That means we don't want to do a
4886         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4887         * architecturally 64-bit access, but instead do a 64-bit access
4888         * using MO_BE if appropriate and then split the two halves.
4889         */
4890        TCGv taddr = gen_aa32_addr(s, addr, opc);
4891
4892        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4893        tcg_gen_mov_i64(cpu_exclusive_val, t64);
4894        if (s->be_data == MO_BE) {
4895            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4896        } else {
4897            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4898        }
4899        store_reg(s, rt2, tmp2);
4900    } else {
4901        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4902        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4903    }
4904
4905    store_reg(s, rt, tmp);
4906    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4907}
4908
4909static void gen_clrex(DisasContext *s)
4910{
4911    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4912}
4913
4914static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4915                                TCGv_i32 addr, int size)
4916{
4917    TCGv_i32 t0, t1, t2;
4918    TCGv_i64 extaddr;
4919    TCGv taddr;
4920    TCGLabel *done_label;
4921    TCGLabel *fail_label;
4922    MemOp opc = size | MO_ALIGN | s->be_data;
4923
4924    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4925         [addr] = {Rt};
4926         {Rd} = 0;
4927       } else {
4928         {Rd} = 1;
4929       } */
4930    fail_label = gen_new_label();
4931    done_label = gen_new_label();
4932    extaddr = tcg_temp_new_i64();
4933    tcg_gen_extu_i32_i64(extaddr, addr);
4934    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4935
4936    taddr = gen_aa32_addr(s, addr, opc);
4937    t0 = tcg_temp_new_i32();
4938    t1 = load_reg(s, rt);
4939    if (size == 3) {
4940        TCGv_i64 o64 = tcg_temp_new_i64();
4941        TCGv_i64 n64 = tcg_temp_new_i64();
4942
4943        t2 = load_reg(s, rt2);
4944
4945        /*
4946         * For AArch32, architecturally the 32-bit word at the lowest
4947         * address is always Rt and the one at addr+4 is Rt2, even if
4948         * the CPU is big-endian. Since we're going to treat this as a
4949         * single 64-bit BE store, we need to put the two halves in the
4950         * opposite order for BE to LE, so that they end up in the right
4951         * places.  We don't want gen_aa32_st_i64, because that checks
4952         * SCTLR_B as if for an architectural 64-bit access.
4953         */
4954        if (s->be_data == MO_BE) {
4955            tcg_gen_concat_i32_i64(n64, t2, t1);
4956        } else {
4957            tcg_gen_concat_i32_i64(n64, t1, t2);
4958        }
4959
4960        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4961                                   get_mem_index(s), opc);
4962
4963        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4964        tcg_gen_extrl_i64_i32(t0, o64);
4965    } else {
4966        t2 = tcg_temp_new_i32();
4967        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4968        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4969        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4970    }
4971    tcg_gen_mov_i32(cpu_R[rd], t0);
4972    tcg_gen_br(done_label);
4973
4974    gen_set_label(fail_label);
4975    tcg_gen_movi_i32(cpu_R[rd], 1);
4976    gen_set_label(done_label);
4977    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4978}
4979
4980/* gen_srs:
4981 * @env: CPUARMState
4982 * @s: DisasContext
4983 * @mode: mode field from insn (which stack to store to)
4984 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4985 * @writeback: true if writeback bit set
4986 *
4987 * Generate code for the SRS (Store Return State) insn.
4988 */
4989static void gen_srs(DisasContext *s,
4990                    uint32_t mode, uint32_t amode, bool writeback)
4991{
4992    int32_t offset;
4993    TCGv_i32 addr, tmp;
4994    bool undef = false;
4995
4996    /* SRS is:
4997     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4998     *   and specified mode is monitor mode
4999     * - UNDEFINED in Hyp mode
5000     * - UNPREDICTABLE in User or System mode
5001     * - UNPREDICTABLE if the specified mode is:
5002     * -- not implemented
5003     * -- not a valid mode number
5004     * -- a mode that's at a higher exception level
5005     * -- Monitor, if we are Non-secure
5006     * For the UNPREDICTABLE cases we choose to UNDEF.
5007     */
5008    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5009        gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5010        return;
5011    }
5012
5013    if (s->current_el == 0 || s->current_el == 2) {
5014        undef = true;
5015    }
5016
5017    switch (mode) {
5018    case ARM_CPU_MODE_USR:
5019    case ARM_CPU_MODE_FIQ:
5020    case ARM_CPU_MODE_IRQ:
5021    case ARM_CPU_MODE_SVC:
5022    case ARM_CPU_MODE_ABT:
5023    case ARM_CPU_MODE_UND:
5024    case ARM_CPU_MODE_SYS:
5025        break;
5026    case ARM_CPU_MODE_HYP:
5027        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5028            undef = true;
5029        }
5030        break;
5031    case ARM_CPU_MODE_MON:
5032        /* No need to check specifically for "are we non-secure" because
5033         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5034         * so if this isn't EL3 then we must be non-secure.
5035         */
5036        if (s->current_el != 3) {
5037            undef = true;
5038        }
5039        break;
5040    default:
5041        undef = true;
5042    }
5043
5044    if (undef) {
5045        unallocated_encoding(s);
5046        return;
5047    }
5048
5049    addr = tcg_temp_new_i32();
5050    /* get_r13_banked() will raise an exception if called from System mode */
5051    gen_set_condexec(s);
5052    gen_update_pc(s, 0);
5053    gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5054    switch (amode) {
5055    case 0: /* DA */
5056        offset = -4;
5057        break;
5058    case 1: /* IA */
5059        offset = 0;
5060        break;
5061    case 2: /* DB */
5062        offset = -8;
5063        break;
5064    case 3: /* IB */
5065        offset = 4;
5066        break;
5067    default:
5068        g_assert_not_reached();
5069    }
5070    tcg_gen_addi_i32(addr, addr, offset);
5071    tmp = load_reg(s, 14);
5072    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5073    tmp = load_cpu_field(spsr);
5074    tcg_gen_addi_i32(addr, addr, 4);
5075    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5076    if (writeback) {
5077        switch (amode) {
5078        case 0:
5079            offset = -8;
5080            break;
5081        case 1:
5082            offset = 4;
5083            break;
5084        case 2:
5085            offset = -4;
5086            break;
5087        case 3:
5088            offset = 0;
5089            break;
5090        default:
5091            g_assert_not_reached();
5092        }
5093        tcg_gen_addi_i32(addr, addr, offset);
5094        gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5095    }
5096    s->base.is_jmp = DISAS_UPDATE_EXIT;
5097}
5098
5099/* Skip this instruction if the ARM condition is false */
5100static void arm_skip_unless(DisasContext *s, uint32_t cond)
5101{
5102    arm_gen_condlabel(s);
5103    arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5104}
5105
5106
5107/*
5108 * Constant expanders used by T16/T32 decode
5109 */
5110
5111/* Return only the rotation part of T32ExpandImm.  */
5112static int t32_expandimm_rot(DisasContext *s, int x)
5113{
5114    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5115}
5116
5117/* Return the unrotated immediate from T32ExpandImm.  */
5118static int t32_expandimm_imm(DisasContext *s, int x)
5119{
5120    int imm = extract32(x, 0, 8);
5121
5122    switch (extract32(x, 8, 4)) {
5123    case 0: /* XY */
5124        /* Nothing to do.  */
5125        break;
5126    case 1: /* 00XY00XY */
5127        imm *= 0x00010001;
5128        break;
5129    case 2: /* XY00XY00 */
5130        imm *= 0x01000100;
5131        break;
5132    case 3: /* XYXYXYXY */
5133        imm *= 0x01010101;
5134        break;
5135    default:
5136        /* Rotated constant.  */
5137        imm |= 0x80;
5138        break;
5139    }
5140    return imm;
5141}
5142
5143static int t32_branch24(DisasContext *s, int x)
5144{
5145    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5146    x ^= !(x < 0) * (3 << 21);
5147    /* Append the final zero.  */
5148    return x << 1;
5149}
5150
5151static int t16_setflags(DisasContext *s)
5152{
5153    return s->condexec_mask == 0;
5154}
5155
5156static int t16_push_list(DisasContext *s, int x)
5157{
5158    return (x & 0xff) | (x & 0x100) << (14 - 8);
5159}
5160
5161static int t16_pop_list(DisasContext *s, int x)
5162{
5163    return (x & 0xff) | (x & 0x100) << (15 - 8);
5164}
5165
5166/*
5167 * Include the generated decoders.
5168 */
5169
5170#include "decode-a32.c.inc"
5171#include "decode-a32-uncond.c.inc"
5172#include "decode-t32.c.inc"
5173#include "decode-t16.c.inc"
5174
5175static bool valid_cp(DisasContext *s, int cp)
5176{
5177    /*
5178     * Return true if this coprocessor field indicates something
5179     * that's really a possible coprocessor.
5180     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5181     * and of those only cp14 and cp15 were used for registers.
5182     * cp10 and cp11 were used for VFP and Neon, whose decode is
5183     * dealt with elsewhere. With the advent of fp16, cp9 is also
5184     * now part of VFP.
5185     * For v8A and later, the encoding has been tightened so that
5186     * only cp14 and cp15 are valid, and other values aren't considered
5187     * to be in the coprocessor-instruction space at all. v8M still
5188     * permits coprocessors 0..7.
5189     * For XScale, we must not decode the XScale cp0, cp1 space as
5190     * a standard coprocessor insn, because we want to fall through to
5191     * the legacy disas_xscale_insn() decoder after decodetree is done.
5192     */
5193    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5194        return false;
5195    }
5196
5197    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5198        !arm_dc_feature(s, ARM_FEATURE_M)) {
5199        return cp >= 14;
5200    }
5201    return cp < 8 || cp >= 14;
5202}
5203
5204static bool trans_MCR(DisasContext *s, arg_MCR *a)
5205{
5206    if (!valid_cp(s, a->cp)) {
5207        return false;
5208    }
5209    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5210                   false, a->rt, 0);
5211    return true;
5212}
5213
5214static bool trans_MRC(DisasContext *s, arg_MRC *a)
5215{
5216    if (!valid_cp(s, a->cp)) {
5217        return false;
5218    }
5219    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5220                   true, a->rt, 0);
5221    return true;
5222}
5223
5224static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5225{
5226    if (!valid_cp(s, a->cp)) {
5227        return false;
5228    }
5229    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5230                   false, a->rt, a->rt2);
5231    return true;
5232}
5233
5234static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5235{
5236    if (!valid_cp(s, a->cp)) {
5237        return false;
5238    }
5239    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5240                   true, a->rt, a->rt2);
5241    return true;
5242}
5243
5244/* Helpers to swap operands for reverse-subtract.  */
5245static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5246{
5247    tcg_gen_sub_i32(dst, b, a);
5248}
5249
5250static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5251{
5252    gen_sub_CC(dst, b, a);
5253}
5254
5255static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5256{
5257    gen_sub_carry(dest, b, a);
5258}
5259
5260static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5261{
5262    gen_sbc_CC(dest, b, a);
5263}
5264
5265/*
5266 * Helpers for the data processing routines.
5267 *
5268 * After the computation store the results back.
5269 * This may be suppressed altogether (STREG_NONE), require a runtime
5270 * check against the stack limits (STREG_SP_CHECK), or generate an
5271 * exception return.  Oh, or store into a register.
5272 *
5273 * Always return true, indicating success for a trans_* function.
5274 */
5275typedef enum {
5276   STREG_NONE,
5277   STREG_NORMAL,
5278   STREG_SP_CHECK,
5279   STREG_EXC_RET,
5280} StoreRegKind;
5281
5282static bool store_reg_kind(DisasContext *s, int rd,
5283                            TCGv_i32 val, StoreRegKind kind)
5284{
5285    switch (kind) {
5286    case STREG_NONE:
5287        return true;
5288    case STREG_NORMAL:
5289        /* See ALUWritePC: Interworking only from a32 mode. */
5290        if (s->thumb) {
5291            store_reg(s, rd, val);
5292        } else {
5293            store_reg_bx(s, rd, val);
5294        }
5295        return true;
5296    case STREG_SP_CHECK:
5297        store_sp_checked(s, val);
5298        return true;
5299    case STREG_EXC_RET:
5300        gen_exception_return(s, val);
5301        return true;
5302    }
5303    g_assert_not_reached();
5304}
5305
5306/*
5307 * Data Processing (register)
5308 *
5309 * Operate, with set flags, one register source,
5310 * one immediate shifted register source, and a destination.
5311 */
5312static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5313                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5314                         int logic_cc, StoreRegKind kind)
5315{
5316    TCGv_i32 tmp1, tmp2;
5317
5318    tmp2 = load_reg(s, a->rm);
5319    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5320    tmp1 = load_reg(s, a->rn);
5321
5322    gen(tmp1, tmp1, tmp2);
5323
5324    if (logic_cc) {
5325        gen_logic_CC(tmp1);
5326    }
5327    return store_reg_kind(s, a->rd, tmp1, kind);
5328}
5329
5330static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5331                         void (*gen)(TCGv_i32, TCGv_i32),
5332                         int logic_cc, StoreRegKind kind)
5333{
5334    TCGv_i32 tmp;
5335
5336    tmp = load_reg(s, a->rm);
5337    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5338
5339    gen(tmp, tmp);
5340    if (logic_cc) {
5341        gen_logic_CC(tmp);
5342    }
5343    return store_reg_kind(s, a->rd, tmp, kind);
5344}
5345
5346/*
5347 * Data-processing (register-shifted register)
5348 *
5349 * Operate, with set flags, one register source,
5350 * one register shifted register source, and a destination.
5351 */
5352static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5353                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5354                         int logic_cc, StoreRegKind kind)
5355{
5356    TCGv_i32 tmp1, tmp2;
5357
5358    tmp1 = load_reg(s, a->rs);
5359    tmp2 = load_reg(s, a->rm);
5360    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5361    tmp1 = load_reg(s, a->rn);
5362
5363    gen(tmp1, tmp1, tmp2);
5364
5365    if (logic_cc) {
5366        gen_logic_CC(tmp1);
5367    }
5368    return store_reg_kind(s, a->rd, tmp1, kind);
5369}
5370
5371static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5372                         void (*gen)(TCGv_i32, TCGv_i32),
5373                         int logic_cc, StoreRegKind kind)
5374{
5375    TCGv_i32 tmp1, tmp2;
5376
5377    tmp1 = load_reg(s, a->rs);
5378    tmp2 = load_reg(s, a->rm);
5379    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5380
5381    gen(tmp2, tmp2);
5382    if (logic_cc) {
5383        gen_logic_CC(tmp2);
5384    }
5385    return store_reg_kind(s, a->rd, tmp2, kind);
5386}
5387
5388/*
5389 * Data-processing (immediate)
5390 *
5391 * Operate, with set flags, one register source,
5392 * one rotated immediate, and a destination.
5393 *
5394 * Note that logic_cc && a->rot setting CF based on the msb of the
5395 * immediate is the reason why we must pass in the unrotated form
5396 * of the immediate.
5397 */
5398static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5399                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5400                         int logic_cc, StoreRegKind kind)
5401{
5402    TCGv_i32 tmp1;
5403    uint32_t imm;
5404
5405    imm = ror32(a->imm, a->rot);
5406    if (logic_cc && a->rot) {
5407        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5408    }
5409    tmp1 = load_reg(s, a->rn);
5410
5411    gen(tmp1, tmp1, tcg_constant_i32(imm));
5412
5413    if (logic_cc) {
5414        gen_logic_CC(tmp1);
5415    }
5416    return store_reg_kind(s, a->rd, tmp1, kind);
5417}
5418
5419static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5420                         void (*gen)(TCGv_i32, TCGv_i32),
5421                         int logic_cc, StoreRegKind kind)
5422{
5423    TCGv_i32 tmp;
5424    uint32_t imm;
5425
5426    imm = ror32(a->imm, a->rot);
5427    if (logic_cc && a->rot) {
5428        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5429    }
5430
5431    tmp = tcg_temp_new_i32();
5432    gen(tmp, tcg_constant_i32(imm));
5433
5434    if (logic_cc) {
5435        gen_logic_CC(tmp);
5436    }
5437    return store_reg_kind(s, a->rd, tmp, kind);
5438}
5439
5440#define DO_ANY3(NAME, OP, L, K)                                         \
5441    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5442    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5443    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5444    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5445    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5446    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5447
5448#define DO_ANY2(NAME, OP, L, K)                                         \
5449    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5450    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5451    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5452    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5453    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5454    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5455
5456#define DO_CMP2(NAME, OP, L)                                            \
5457    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5458    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5459    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5460    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5461    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5462    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5463
5464DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5465DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5466DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5467DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5468
5469DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5470DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5471DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5472DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5473
5474DO_CMP2(TST, tcg_gen_and_i32, true)
5475DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5476DO_CMP2(CMN, gen_add_CC, false)
5477DO_CMP2(CMP, gen_sub_CC, false)
5478
5479DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5480        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5481
5482/*
5483 * Note for the computation of StoreRegKind we return out of the
5484 * middle of the functions that are expanded by DO_ANY3, and that
5485 * we modify a->s via that parameter before it is used by OP.
5486 */
5487DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5488        ({
5489            StoreRegKind ret = STREG_NORMAL;
5490            if (a->rd == 15 && a->s) {
5491                /*
5492                 * See ALUExceptionReturn:
5493                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5494                 * In Hyp mode, UNDEFINED.
5495                 */
5496                if (IS_USER(s) || s->current_el == 2) {
5497                    unallocated_encoding(s);
5498                    return true;
5499                }
5500                /* There is no writeback of nzcv to PSTATE.  */
5501                a->s = 0;
5502                ret = STREG_EXC_RET;
5503            } else if (a->rd == 13 && a->rn == 13) {
5504                ret = STREG_SP_CHECK;
5505            }
5506            ret;
5507        }))
5508
5509DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5510        ({
5511            StoreRegKind ret = STREG_NORMAL;
5512            if (a->rd == 15 && a->s) {
5513                /*
5514                 * See ALUExceptionReturn:
5515                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5516                 * In Hyp mode, UNDEFINED.
5517                 */
5518                if (IS_USER(s) || s->current_el == 2) {
5519                    unallocated_encoding(s);
5520                    return true;
5521                }
5522                /* There is no writeback of nzcv to PSTATE.  */
5523                a->s = 0;
5524                ret = STREG_EXC_RET;
5525            } else if (a->rd == 13) {
5526                ret = STREG_SP_CHECK;
5527            }
5528            ret;
5529        }))
5530
5531DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5532
5533/*
5534 * ORN is only available with T32, so there is no register-shifted-register
5535 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5536 */
5537static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5538{
5539    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5540}
5541
5542static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5543{
5544    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5545}
5546
5547#undef DO_ANY3
5548#undef DO_ANY2
5549#undef DO_CMP2
5550
5551static bool trans_ADR(DisasContext *s, arg_ri *a)
5552{
5553    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5554    return true;
5555}
5556
5557static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5558{
5559    if (!ENABLE_ARCH_6T2) {
5560        return false;
5561    }
5562
5563    store_reg(s, a->rd, tcg_constant_i32(a->imm));
5564    return true;
5565}
5566
5567static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5568{
5569    TCGv_i32 tmp;
5570
5571    if (!ENABLE_ARCH_6T2) {
5572        return false;
5573    }
5574
5575    tmp = load_reg(s, a->rd);
5576    tcg_gen_ext16u_i32(tmp, tmp);
5577    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5578    store_reg(s, a->rd, tmp);
5579    return true;
5580}
5581
5582/*
5583 * v8.1M MVE wide-shifts
5584 */
5585static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5586                          WideShiftImmFn *fn)
5587{
5588    TCGv_i64 rda;
5589    TCGv_i32 rdalo, rdahi;
5590
5591    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5592        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5593        return false;
5594    }
5595    if (a->rdahi == 15) {
5596        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5597        return false;
5598    }
5599    if (!dc_isar_feature(aa32_mve, s) ||
5600        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5601        a->rdahi == 13) {
5602        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5603        unallocated_encoding(s);
5604        return true;
5605    }
5606
5607    if (a->shim == 0) {
5608        a->shim = 32;
5609    }
5610
5611    rda = tcg_temp_new_i64();
5612    rdalo = load_reg(s, a->rdalo);
5613    rdahi = load_reg(s, a->rdahi);
5614    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5615
5616    fn(rda, rda, a->shim);
5617
5618    tcg_gen_extrl_i64_i32(rdalo, rda);
5619    tcg_gen_extrh_i64_i32(rdahi, rda);
5620    store_reg(s, a->rdalo, rdalo);
5621    store_reg(s, a->rdahi, rdahi);
5622
5623    return true;
5624}
5625
5626static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5627{
5628    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5629}
5630
5631static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5632{
5633    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5634}
5635
5636static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5637{
5638    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5639}
5640
5641static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5642{
5643    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5644}
5645
5646static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5647{
5648    return do_mve_shl_ri(s, a, gen_mve_sqshll);
5649}
5650
5651static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5652{
5653    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5654}
5655
5656static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5657{
5658    return do_mve_shl_ri(s, a, gen_mve_uqshll);
5659}
5660
5661static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5662{
5663    return do_mve_shl_ri(s, a, gen_srshr64_i64);
5664}
5665
5666static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5667{
5668    return do_mve_shl_ri(s, a, gen_urshr64_i64);
5669}
5670
5671static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5672{
5673    TCGv_i64 rda;
5674    TCGv_i32 rdalo, rdahi;
5675
5676    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5677        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5678        return false;
5679    }
5680    if (a->rdahi == 15) {
5681        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5682        return false;
5683    }
5684    if (!dc_isar_feature(aa32_mve, s) ||
5685        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5686        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5687        a->rm == a->rdahi || a->rm == a->rdalo) {
5688        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5689        unallocated_encoding(s);
5690        return true;
5691    }
5692
5693    rda = tcg_temp_new_i64();
5694    rdalo = load_reg(s, a->rdalo);
5695    rdahi = load_reg(s, a->rdahi);
5696    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5697
5698    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5699    fn(rda, cpu_env, rda, cpu_R[a->rm]);
5700
5701    tcg_gen_extrl_i64_i32(rdalo, rda);
5702    tcg_gen_extrh_i64_i32(rdahi, rda);
5703    store_reg(s, a->rdalo, rdalo);
5704    store_reg(s, a->rdahi, rdahi);
5705
5706    return true;
5707}
5708
5709static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5710{
5711    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5712}
5713
5714static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5715{
5716    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5717}
5718
5719static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5720{
5721    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5722}
5723
5724static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5725{
5726    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5727}
5728
5729static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5730{
5731    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5732}
5733
5734static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5735{
5736    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5737}
5738
5739static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5740{
5741    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5742        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5743        return false;
5744    }
5745    if (!dc_isar_feature(aa32_mve, s) ||
5746        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5747        a->rda == 13 || a->rda == 15) {
5748        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5749        unallocated_encoding(s);
5750        return true;
5751    }
5752
5753    if (a->shim == 0) {
5754        a->shim = 32;
5755    }
5756    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5757
5758    return true;
5759}
5760
5761static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5762{
5763    return do_mve_sh_ri(s, a, gen_urshr32_i32);
5764}
5765
5766static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5767{
5768    return do_mve_sh_ri(s, a, gen_srshr32_i32);
5769}
5770
5771static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5772{
5773    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5774}
5775
5776static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5777{
5778    return do_mve_sh_ri(s, a, gen_mve_sqshl);
5779}
5780
5781static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5782{
5783    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5784}
5785
5786static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5787{
5788    return do_mve_sh_ri(s, a, gen_mve_uqshl);
5789}
5790
5791static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5792{
5793    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5794        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5795        return false;
5796    }
5797    if (!dc_isar_feature(aa32_mve, s) ||
5798        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5799        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5800        a->rm == a->rda) {
5801        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5802        unallocated_encoding(s);
5803        return true;
5804    }
5805
5806    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5807    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5808    return true;
5809}
5810
5811static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5812{
5813    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5814}
5815
5816static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5817{
5818    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5819}
5820
5821/*
5822 * Multiply and multiply accumulate
5823 */
5824
5825static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5826{
5827    TCGv_i32 t1, t2;
5828
5829    t1 = load_reg(s, a->rn);
5830    t2 = load_reg(s, a->rm);
5831    tcg_gen_mul_i32(t1, t1, t2);
5832    if (add) {
5833        t2 = load_reg(s, a->ra);
5834        tcg_gen_add_i32(t1, t1, t2);
5835    }
5836    if (a->s) {
5837        gen_logic_CC(t1);
5838    }
5839    store_reg(s, a->rd, t1);
5840    return true;
5841}
5842
5843static bool trans_MUL(DisasContext *s, arg_MUL *a)
5844{
5845    return op_mla(s, a, false);
5846}
5847
5848static bool trans_MLA(DisasContext *s, arg_MLA *a)
5849{
5850    return op_mla(s, a, true);
5851}
5852
5853static bool trans_MLS(DisasContext *s, arg_MLS *a)
5854{
5855    TCGv_i32 t1, t2;
5856
5857    if (!ENABLE_ARCH_6T2) {
5858        return false;
5859    }
5860    t1 = load_reg(s, a->rn);
5861    t2 = load_reg(s, a->rm);
5862    tcg_gen_mul_i32(t1, t1, t2);
5863    t2 = load_reg(s, a->ra);
5864    tcg_gen_sub_i32(t1, t2, t1);
5865    store_reg(s, a->rd, t1);
5866    return true;
5867}
5868
5869static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5870{
5871    TCGv_i32 t0, t1, t2, t3;
5872
5873    t0 = load_reg(s, a->rm);
5874    t1 = load_reg(s, a->rn);
5875    if (uns) {
5876        tcg_gen_mulu2_i32(t0, t1, t0, t1);
5877    } else {
5878        tcg_gen_muls2_i32(t0, t1, t0, t1);
5879    }
5880    if (add) {
5881        t2 = load_reg(s, a->ra);
5882        t3 = load_reg(s, a->rd);
5883        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5884    }
5885    if (a->s) {
5886        gen_logicq_cc(t0, t1);
5887    }
5888    store_reg(s, a->ra, t0);
5889    store_reg(s, a->rd, t1);
5890    return true;
5891}
5892
5893static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5894{
5895    return op_mlal(s, a, true, false);
5896}
5897
5898static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5899{
5900    return op_mlal(s, a, false, false);
5901}
5902
5903static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5904{
5905    return op_mlal(s, a, true, true);
5906}
5907
5908static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5909{
5910    return op_mlal(s, a, false, true);
5911}
5912
5913static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5914{
5915    TCGv_i32 t0, t1, t2, zero;
5916
5917    if (s->thumb
5918        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5919        : !ENABLE_ARCH_6) {
5920        return false;
5921    }
5922
5923    t0 = load_reg(s, a->rm);
5924    t1 = load_reg(s, a->rn);
5925    tcg_gen_mulu2_i32(t0, t1, t0, t1);
5926    zero = tcg_constant_i32(0);
5927    t2 = load_reg(s, a->ra);
5928    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5929    t2 = load_reg(s, a->rd);
5930    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5931    store_reg(s, a->ra, t0);
5932    store_reg(s, a->rd, t1);
5933    return true;
5934}
5935
5936/*
5937 * Saturating addition and subtraction
5938 */
5939
5940static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5941{
5942    TCGv_i32 t0, t1;
5943
5944    if (s->thumb
5945        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5946        : !ENABLE_ARCH_5TE) {
5947        return false;
5948    }
5949
5950    t0 = load_reg(s, a->rm);
5951    t1 = load_reg(s, a->rn);
5952    if (doub) {
5953        gen_helper_add_saturate(t1, cpu_env, t1, t1);
5954    }
5955    if (add) {
5956        gen_helper_add_saturate(t0, cpu_env, t0, t1);
5957    } else {
5958        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5959    }
5960    store_reg(s, a->rd, t0);
5961    return true;
5962}
5963
5964#define DO_QADDSUB(NAME, ADD, DOUB) \
5965static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5966{                                                        \
5967    return op_qaddsub(s, a, ADD, DOUB);                  \
5968}
5969
5970DO_QADDSUB(QADD, true, false)
5971DO_QADDSUB(QSUB, false, false)
5972DO_QADDSUB(QDADD, true, true)
5973DO_QADDSUB(QDSUB, false, true)
5974
5975#undef DO_QADDSUB
5976
5977/*
5978 * Halfword multiply and multiply accumulate
5979 */
5980
5981static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5982                       int add_long, bool nt, bool mt)
5983{
5984    TCGv_i32 t0, t1, tl, th;
5985
5986    if (s->thumb
5987        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5988        : !ENABLE_ARCH_5TE) {
5989        return false;
5990    }
5991
5992    t0 = load_reg(s, a->rn);
5993    t1 = load_reg(s, a->rm);
5994    gen_mulxy(t0, t1, nt, mt);
5995
5996    switch (add_long) {
5997    case 0:
5998        store_reg(s, a->rd, t0);
5999        break;
6000    case 1:
6001        t1 = load_reg(s, a->ra);
6002        gen_helper_add_setq(t0, cpu_env, t0, t1);
6003        store_reg(s, a->rd, t0);
6004        break;
6005    case 2:
6006        tl = load_reg(s, a->ra);
6007        th = load_reg(s, a->rd);
6008        /* Sign-extend the 32-bit product to 64 bits.  */
6009        t1 = tcg_temp_new_i32();
6010        tcg_gen_sari_i32(t1, t0, 31);
6011        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6012        store_reg(s, a->ra, tl);
6013        store_reg(s, a->rd, th);
6014        break;
6015    default:
6016        g_assert_not_reached();
6017    }
6018    return true;
6019}
6020
6021#define DO_SMLAX(NAME, add, nt, mt) \
6022static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6023{                                                          \
6024    return op_smlaxxx(s, a, add, nt, mt);                  \
6025}
6026
6027DO_SMLAX(SMULBB, 0, 0, 0)
6028DO_SMLAX(SMULBT, 0, 0, 1)
6029DO_SMLAX(SMULTB, 0, 1, 0)
6030DO_SMLAX(SMULTT, 0, 1, 1)
6031
6032DO_SMLAX(SMLABB, 1, 0, 0)
6033DO_SMLAX(SMLABT, 1, 0, 1)
6034DO_SMLAX(SMLATB, 1, 1, 0)
6035DO_SMLAX(SMLATT, 1, 1, 1)
6036
6037DO_SMLAX(SMLALBB, 2, 0, 0)
6038DO_SMLAX(SMLALBT, 2, 0, 1)
6039DO_SMLAX(SMLALTB, 2, 1, 0)
6040DO_SMLAX(SMLALTT, 2, 1, 1)
6041
6042#undef DO_SMLAX
6043
6044static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6045{
6046    TCGv_i32 t0, t1;
6047
6048    if (!ENABLE_ARCH_5TE) {
6049        return false;
6050    }
6051
6052    t0 = load_reg(s, a->rn);
6053    t1 = load_reg(s, a->rm);
6054    /*
6055     * Since the nominal result is product<47:16>, shift the 16-bit
6056     * input up by 16 bits, so that the result is at product<63:32>.
6057     */
6058    if (mt) {
6059        tcg_gen_andi_i32(t1, t1, 0xffff0000);
6060    } else {
6061        tcg_gen_shli_i32(t1, t1, 16);
6062    }
6063    tcg_gen_muls2_i32(t0, t1, t0, t1);
6064    if (add) {
6065        t0 = load_reg(s, a->ra);
6066        gen_helper_add_setq(t1, cpu_env, t1, t0);
6067    }
6068    store_reg(s, a->rd, t1);
6069    return true;
6070}
6071
6072#define DO_SMLAWX(NAME, add, mt) \
6073static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6074{                                                          \
6075    return op_smlawx(s, a, add, mt);                       \
6076}
6077
6078DO_SMLAWX(SMULWB, 0, 0)
6079DO_SMLAWX(SMULWT, 0, 1)
6080DO_SMLAWX(SMLAWB, 1, 0)
6081DO_SMLAWX(SMLAWT, 1, 1)
6082
6083#undef DO_SMLAWX
6084
6085/*
6086 * MSR (immediate) and hints
6087 */
6088
6089static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6090{
6091    /*
6092     * When running single-threaded TCG code, use the helper to ensure that
6093     * the next round-robin scheduled vCPU gets a crack.  When running in
6094     * MTTCG we don't generate jumps to the helper as it won't affect the
6095     * scheduling of other vCPUs.
6096     */
6097    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6098        gen_update_pc(s, curr_insn_len(s));
6099        s->base.is_jmp = DISAS_YIELD;
6100    }
6101    return true;
6102}
6103
6104static bool trans_WFE(DisasContext *s, arg_WFE *a)
6105{
6106    /*
6107     * When running single-threaded TCG code, use the helper to ensure that
6108     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6109     * just skip this instruction.  Currently the SEV/SEVL instructions,
6110     * which are *one* of many ways to wake the CPU from WFE, are not
6111     * implemented so we can't sleep like WFI does.
6112     */
6113    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6114        gen_update_pc(s, curr_insn_len(s));
6115        s->base.is_jmp = DISAS_WFE;
6116    }
6117    return true;
6118}
6119
6120static bool trans_WFI(DisasContext *s, arg_WFI *a)
6121{
6122    /* For WFI, halt the vCPU until an IRQ. */
6123    gen_update_pc(s, curr_insn_len(s));
6124    s->base.is_jmp = DISAS_WFI;
6125    return true;
6126}
6127
6128static bool trans_ESB(DisasContext *s, arg_ESB *a)
6129{
6130    /*
6131     * For M-profile, minimal-RAS ESB can be a NOP.
6132     * Without RAS, we must implement this as NOP.
6133     */
6134    if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6135        /*
6136         * QEMU does not have a source of physical SErrors,
6137         * so we are only concerned with virtual SErrors.
6138         * The pseudocode in the ARM for this case is
6139         *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6140         *      AArch32.vESBOperation();
6141         * Most of the condition can be evaluated at translation time.
6142         * Test for EL2 present, and defer test for SEL2 to runtime.
6143         */
6144        if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6145            gen_helper_vesb(cpu_env);
6146        }
6147    }
6148    return true;
6149}
6150
6151static bool trans_NOP(DisasContext *s, arg_NOP *a)
6152{
6153    return true;
6154}
6155
6156static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6157{
6158    uint32_t val = ror32(a->imm, a->rot * 2);
6159    uint32_t mask = msr_mask(s, a->mask, a->r);
6160
6161    if (gen_set_psr_im(s, mask, a->r, val)) {
6162        unallocated_encoding(s);
6163    }
6164    return true;
6165}
6166
6167/*
6168 * Cyclic Redundancy Check
6169 */
6170
6171static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6172{
6173    TCGv_i32 t1, t2, t3;
6174
6175    if (!dc_isar_feature(aa32_crc32, s)) {
6176        return false;
6177    }
6178
6179    t1 = load_reg(s, a->rn);
6180    t2 = load_reg(s, a->rm);
6181    switch (sz) {
6182    case MO_8:
6183        gen_uxtb(t2);
6184        break;
6185    case MO_16:
6186        gen_uxth(t2);
6187        break;
6188    case MO_32:
6189        break;
6190    default:
6191        g_assert_not_reached();
6192    }
6193    t3 = tcg_constant_i32(1 << sz);
6194    if (c) {
6195        gen_helper_crc32c(t1, t1, t2, t3);
6196    } else {
6197        gen_helper_crc32(t1, t1, t2, t3);
6198    }
6199    store_reg(s, a->rd, t1);
6200    return true;
6201}
6202
6203#define DO_CRC32(NAME, c, sz) \
6204static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6205    { return op_crc32(s, a, c, sz); }
6206
6207DO_CRC32(CRC32B, false, MO_8)
6208DO_CRC32(CRC32H, false, MO_16)
6209DO_CRC32(CRC32W, false, MO_32)
6210DO_CRC32(CRC32CB, true, MO_8)
6211DO_CRC32(CRC32CH, true, MO_16)
6212DO_CRC32(CRC32CW, true, MO_32)
6213
6214#undef DO_CRC32
6215
6216/*
6217 * Miscellaneous instructions
6218 */
6219
6220static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6221{
6222    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6223        return false;
6224    }
6225    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6226    return true;
6227}
6228
6229static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6230{
6231    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6232        return false;
6233    }
6234    gen_msr_banked(s, a->r, a->sysm, a->rn);
6235    return true;
6236}
6237
6238static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6239{
6240    TCGv_i32 tmp;
6241
6242    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6243        return false;
6244    }
6245    if (a->r) {
6246        if (IS_USER(s)) {
6247            unallocated_encoding(s);
6248            return true;
6249        }
6250        tmp = load_cpu_field(spsr);
6251    } else {
6252        tmp = tcg_temp_new_i32();
6253        gen_helper_cpsr_read(tmp, cpu_env);
6254    }
6255    store_reg(s, a->rd, tmp);
6256    return true;
6257}
6258
6259static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6260{
6261    TCGv_i32 tmp;
6262    uint32_t mask = msr_mask(s, a->mask, a->r);
6263
6264    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6265        return false;
6266    }
6267    tmp = load_reg(s, a->rn);
6268    if (gen_set_psr(s, mask, a->r, tmp)) {
6269        unallocated_encoding(s);
6270    }
6271    return true;
6272}
6273
6274static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6275{
6276    TCGv_i32 tmp;
6277
6278    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6279        return false;
6280    }
6281    tmp = tcg_temp_new_i32();
6282    gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6283    store_reg(s, a->rd, tmp);
6284    return true;
6285}
6286
6287static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6288{
6289    TCGv_i32 addr, reg;
6290
6291    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6292        return false;
6293    }
6294    addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6295    reg = load_reg(s, a->rn);
6296    gen_helper_v7m_msr(cpu_env, addr, reg);
6297    /* If we wrote to CONTROL, the EL might have changed */
6298    gen_rebuild_hflags(s, true);
6299    gen_lookup_tb(s);
6300    return true;
6301}
6302
6303static bool trans_BX(DisasContext *s, arg_BX *a)
6304{
6305    if (!ENABLE_ARCH_4T) {
6306        return false;
6307    }
6308    gen_bx_excret(s, load_reg(s, a->rm));
6309    return true;
6310}
6311
6312static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6313{
6314    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6315        return false;
6316    }
6317    /*
6318     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6319     * TBFLAGS bit on a basically-never-happens case, so call a helper
6320     * function to check for the trap and raise the exception if needed
6321     * (passing it the register number for the syndrome value).
6322     * v8A doesn't have this HSTR bit.
6323     */
6324    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6325        arm_dc_feature(s, ARM_FEATURE_EL2) &&
6326        s->current_el < 2 && s->ns) {
6327        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6328    }
6329    /* Trivial implementation equivalent to bx.  */
6330    gen_bx(s, load_reg(s, a->rm));
6331    return true;
6332}
6333
6334static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6335{
6336    TCGv_i32 tmp;
6337
6338    if (!ENABLE_ARCH_5) {
6339        return false;
6340    }
6341    tmp = load_reg(s, a->rm);
6342    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6343    gen_bx(s, tmp);
6344    return true;
6345}
6346
6347/*
6348 * BXNS/BLXNS: only exist for v8M with the security extensions,
6349 * and always UNDEF if NonSecure.  We don't implement these in
6350 * the user-only mode either (in theory you can use them from
6351 * Secure User mode but they are too tied in to system emulation).
6352 */
6353static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6354{
6355    if (!s->v8m_secure || IS_USER_ONLY) {
6356        unallocated_encoding(s);
6357    } else {
6358        gen_bxns(s, a->rm);
6359    }
6360    return true;
6361}
6362
6363static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6364{
6365    if (!s->v8m_secure || IS_USER_ONLY) {
6366        unallocated_encoding(s);
6367    } else {
6368        gen_blxns(s, a->rm);
6369    }
6370    return true;
6371}
6372
6373static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6374{
6375    TCGv_i32 tmp;
6376
6377    if (!ENABLE_ARCH_5) {
6378        return false;
6379    }
6380    tmp = load_reg(s, a->rm);
6381    tcg_gen_clzi_i32(tmp, tmp, 32);
6382    store_reg(s, a->rd, tmp);
6383    return true;
6384}
6385
6386static bool trans_ERET(DisasContext *s, arg_ERET *a)
6387{
6388    TCGv_i32 tmp;
6389
6390    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6391        return false;
6392    }
6393    if (IS_USER(s)) {
6394        unallocated_encoding(s);
6395        return true;
6396    }
6397    if (s->current_el == 2) {
6398        /* ERET from Hyp uses ELR_Hyp, not LR */
6399        tmp = load_cpu_field_low32(elr_el[2]);
6400    } else {
6401        tmp = load_reg(s, 14);
6402    }
6403    gen_exception_return(s, tmp);
6404    return true;
6405}
6406
6407static bool trans_HLT(DisasContext *s, arg_HLT *a)
6408{
6409    gen_hlt(s, a->imm);
6410    return true;
6411}
6412
6413static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6414{
6415    if (!ENABLE_ARCH_5) {
6416        return false;
6417    }
6418    /* BKPT is OK with ECI set and leaves it untouched */
6419    s->eci_handled = true;
6420    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6421        semihosting_enabled(s->current_el == 0) &&
6422        (a->imm == 0xab)) {
6423        gen_exception_internal_insn(s, EXCP_SEMIHOST);
6424    } else {
6425        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6426    }
6427    return true;
6428}
6429
6430static bool trans_HVC(DisasContext *s, arg_HVC *a)
6431{
6432    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6433        return false;
6434    }
6435    if (IS_USER(s)) {
6436        unallocated_encoding(s);
6437    } else {
6438        gen_hvc(s, a->imm);
6439    }
6440    return true;
6441}
6442
6443static bool trans_SMC(DisasContext *s, arg_SMC *a)
6444{
6445    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6446        return false;
6447    }
6448    if (IS_USER(s)) {
6449        unallocated_encoding(s);
6450    } else {
6451        gen_smc(s);
6452    }
6453    return true;
6454}
6455
6456static bool trans_SG(DisasContext *s, arg_SG *a)
6457{
6458    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6459        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6460        return false;
6461    }
6462    /*
6463     * SG (v8M only)
6464     * The bulk of the behaviour for this instruction is implemented
6465     * in v7m_handle_execute_nsc(), which deals with the insn when
6466     * it is executed by a CPU in non-secure state from memory
6467     * which is Secure & NonSecure-Callable.
6468     * Here we only need to handle the remaining cases:
6469     *  * in NS memory (including the "security extension not
6470     *    implemented" case) : NOP
6471     *  * in S memory but CPU already secure (clear IT bits)
6472     * We know that the attribute for the memory this insn is
6473     * in must match the current CPU state, because otherwise
6474     * get_phys_addr_pmsav8 would have generated an exception.
6475     */
6476    if (s->v8m_secure) {
6477        /* Like the IT insn, we don't need to generate any code */
6478        s->condexec_cond = 0;
6479        s->condexec_mask = 0;
6480    }
6481    return true;
6482}
6483
6484static bool trans_TT(DisasContext *s, arg_TT *a)
6485{
6486    TCGv_i32 addr, tmp;
6487
6488    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6489        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6490        return false;
6491    }
6492    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6493        /* We UNDEF for these UNPREDICTABLE cases */
6494        unallocated_encoding(s);
6495        return true;
6496    }
6497    if (a->A && !s->v8m_secure) {
6498        /* This case is UNDEFINED.  */
6499        unallocated_encoding(s);
6500        return true;
6501    }
6502
6503    addr = load_reg(s, a->rn);
6504    tmp = tcg_temp_new_i32();
6505    gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6506    store_reg(s, a->rd, tmp);
6507    return true;
6508}
6509
6510/*
6511 * Load/store register index
6512 */
6513
6514static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6515{
6516    ISSInfo ret;
6517
6518    /* ISS not valid if writeback */
6519    if (p && !w) {
6520        ret = rd;
6521        if (curr_insn_len(s) == 2) {
6522            ret |= ISSIs16Bit;
6523        }
6524    } else {
6525        ret = ISSInvalid;
6526    }
6527    return ret;
6528}
6529
6530static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6531{
6532    TCGv_i32 addr = load_reg(s, a->rn);
6533
6534    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6535        gen_helper_v8m_stackcheck(cpu_env, addr);
6536    }
6537
6538    if (a->p) {
6539        TCGv_i32 ofs = load_reg(s, a->rm);
6540        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6541        if (a->u) {
6542            tcg_gen_add_i32(addr, addr, ofs);
6543        } else {
6544            tcg_gen_sub_i32(addr, addr, ofs);
6545        }
6546    }
6547    return addr;
6548}
6549
6550static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6551                            TCGv_i32 addr, int address_offset)
6552{
6553    if (!a->p) {
6554        TCGv_i32 ofs = load_reg(s, a->rm);
6555        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6556        if (a->u) {
6557            tcg_gen_add_i32(addr, addr, ofs);
6558        } else {
6559            tcg_gen_sub_i32(addr, addr, ofs);
6560        }
6561    } else if (!a->w) {
6562        return;
6563    }
6564    tcg_gen_addi_i32(addr, addr, address_offset);
6565    store_reg(s, a->rn, addr);
6566}
6567
6568static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6569                       MemOp mop, int mem_idx)
6570{
6571    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6572    TCGv_i32 addr, tmp;
6573
6574    addr = op_addr_rr_pre(s, a);
6575
6576    tmp = tcg_temp_new_i32();
6577    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6578    disas_set_da_iss(s, mop, issinfo);
6579
6580    /*
6581     * Perform base writeback before the loaded value to
6582     * ensure correct behavior with overlapping index registers.
6583     */
6584    op_addr_rr_post(s, a, addr, 0);
6585    store_reg_from_load(s, a->rt, tmp);
6586    return true;
6587}
6588
6589static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6590                        MemOp mop, int mem_idx)
6591{
6592    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6593    TCGv_i32 addr, tmp;
6594
6595    /*
6596     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6597     * is either UNPREDICTABLE or has defined behaviour
6598     */
6599    if (s->thumb && a->rn == 15) {
6600        return false;
6601    }
6602
6603    addr = op_addr_rr_pre(s, a);
6604
6605    tmp = load_reg(s, a->rt);
6606    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6607    disas_set_da_iss(s, mop, issinfo);
6608
6609    op_addr_rr_post(s, a, addr, 0);
6610    return true;
6611}
6612
6613static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6614{
6615    int mem_idx = get_mem_index(s);
6616    TCGv_i32 addr, tmp;
6617
6618    if (!ENABLE_ARCH_5TE) {
6619        return false;
6620    }
6621    if (a->rt & 1) {
6622        unallocated_encoding(s);
6623        return true;
6624    }
6625    addr = op_addr_rr_pre(s, a);
6626
6627    tmp = tcg_temp_new_i32();
6628    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6629    store_reg(s, a->rt, tmp);
6630
6631    tcg_gen_addi_i32(addr, addr, 4);
6632
6633    tmp = tcg_temp_new_i32();
6634    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6635    store_reg(s, a->rt + 1, tmp);
6636
6637    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6638    op_addr_rr_post(s, a, addr, -4);
6639    return true;
6640}
6641
6642static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6643{
6644    int mem_idx = get_mem_index(s);
6645    TCGv_i32 addr, tmp;
6646
6647    if (!ENABLE_ARCH_5TE) {
6648        return false;
6649    }
6650    if (a->rt & 1) {
6651        unallocated_encoding(s);
6652        return true;
6653    }
6654    addr = op_addr_rr_pre(s, a);
6655
6656    tmp = load_reg(s, a->rt);
6657    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6658
6659    tcg_gen_addi_i32(addr, addr, 4);
6660
6661    tmp = load_reg(s, a->rt + 1);
6662    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6663
6664    op_addr_rr_post(s, a, addr, -4);
6665    return true;
6666}
6667
6668/*
6669 * Load/store immediate index
6670 */
6671
6672static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6673{
6674    int ofs = a->imm;
6675
6676    if (!a->u) {
6677        ofs = -ofs;
6678    }
6679
6680    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6681        /*
6682         * Stackcheck. Here we know 'addr' is the current SP;
6683         * U is set if we're moving SP up, else down. It is
6684         * UNKNOWN whether the limit check triggers when SP starts
6685         * below the limit and ends up above it; we chose to do so.
6686         */
6687        if (!a->u) {
6688            TCGv_i32 newsp = tcg_temp_new_i32();
6689            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6690            gen_helper_v8m_stackcheck(cpu_env, newsp);
6691        } else {
6692            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6693        }
6694    }
6695
6696    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6697}
6698
6699static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6700                            TCGv_i32 addr, int address_offset)
6701{
6702    if (!a->p) {
6703        if (a->u) {
6704            address_offset += a->imm;
6705        } else {
6706            address_offset -= a->imm;
6707        }
6708    } else if (!a->w) {
6709        return;
6710    }
6711    tcg_gen_addi_i32(addr, addr, address_offset);
6712    store_reg(s, a->rn, addr);
6713}
6714
6715static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6716                       MemOp mop, int mem_idx)
6717{
6718    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6719    TCGv_i32 addr, tmp;
6720
6721    addr = op_addr_ri_pre(s, a);
6722
6723    tmp = tcg_temp_new_i32();
6724    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6725    disas_set_da_iss(s, mop, issinfo);
6726
6727    /*
6728     * Perform base writeback before the loaded value to
6729     * ensure correct behavior with overlapping index registers.
6730     */
6731    op_addr_ri_post(s, a, addr, 0);
6732    store_reg_from_load(s, a->rt, tmp);
6733    return true;
6734}
6735
6736static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6737                        MemOp mop, int mem_idx)
6738{
6739    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6740    TCGv_i32 addr, tmp;
6741
6742    /*
6743     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6744     * is either UNPREDICTABLE or has defined behaviour
6745     */
6746    if (s->thumb && a->rn == 15) {
6747        return false;
6748    }
6749
6750    addr = op_addr_ri_pre(s, a);
6751
6752    tmp = load_reg(s, a->rt);
6753    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6754    disas_set_da_iss(s, mop, issinfo);
6755
6756    op_addr_ri_post(s, a, addr, 0);
6757    return true;
6758}
6759
6760static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6761{
6762    int mem_idx = get_mem_index(s);
6763    TCGv_i32 addr, tmp;
6764
6765    addr = op_addr_ri_pre(s, a);
6766
6767    tmp = tcg_temp_new_i32();
6768    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6769    store_reg(s, a->rt, tmp);
6770
6771    tcg_gen_addi_i32(addr, addr, 4);
6772
6773    tmp = tcg_temp_new_i32();
6774    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6775    store_reg(s, rt2, tmp);
6776
6777    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6778    op_addr_ri_post(s, a, addr, -4);
6779    return true;
6780}
6781
6782static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6783{
6784    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6785        return false;
6786    }
6787    return op_ldrd_ri(s, a, a->rt + 1);
6788}
6789
6790static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6791{
6792    arg_ldst_ri b = {
6793        .u = a->u, .w = a->w, .p = a->p,
6794        .rn = a->rn, .rt = a->rt, .imm = a->imm
6795    };
6796    return op_ldrd_ri(s, &b, a->rt2);
6797}
6798
6799static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6800{
6801    int mem_idx = get_mem_index(s);
6802    TCGv_i32 addr, tmp;
6803
6804    addr = op_addr_ri_pre(s, a);
6805
6806    tmp = load_reg(s, a->rt);
6807    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6808
6809    tcg_gen_addi_i32(addr, addr, 4);
6810
6811    tmp = load_reg(s, rt2);
6812    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6813
6814    op_addr_ri_post(s, a, addr, -4);
6815    return true;
6816}
6817
6818static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6819{
6820    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6821        return false;
6822    }
6823    return op_strd_ri(s, a, a->rt + 1);
6824}
6825
6826static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6827{
6828    arg_ldst_ri b = {
6829        .u = a->u, .w = a->w, .p = a->p,
6830        .rn = a->rn, .rt = a->rt, .imm = a->imm
6831    };
6832    return op_strd_ri(s, &b, a->rt2);
6833}
6834
6835#define DO_LDST(NAME, WHICH, MEMOP) \
6836static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6837{                                                                     \
6838    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6839}                                                                     \
6840static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6841{                                                                     \
6842    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6843}                                                                     \
6844static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6845{                                                                     \
6846    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6847}                                                                     \
6848static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6849{                                                                     \
6850    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6851}
6852
6853DO_LDST(LDR, load, MO_UL)
6854DO_LDST(LDRB, load, MO_UB)
6855DO_LDST(LDRH, load, MO_UW)
6856DO_LDST(LDRSB, load, MO_SB)
6857DO_LDST(LDRSH, load, MO_SW)
6858
6859DO_LDST(STR, store, MO_UL)
6860DO_LDST(STRB, store, MO_UB)
6861DO_LDST(STRH, store, MO_UW)
6862
6863#undef DO_LDST
6864
6865/*
6866 * Synchronization primitives
6867 */
6868
6869static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6870{
6871    TCGv_i32 addr, tmp;
6872    TCGv taddr;
6873
6874    opc |= s->be_data;
6875    addr = load_reg(s, a->rn);
6876    taddr = gen_aa32_addr(s, addr, opc);
6877
6878    tmp = load_reg(s, a->rt2);
6879    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6880
6881    store_reg(s, a->rt, tmp);
6882    return true;
6883}
6884
6885static bool trans_SWP(DisasContext *s, arg_SWP *a)
6886{
6887    return op_swp(s, a, MO_UL | MO_ALIGN);
6888}
6889
6890static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6891{
6892    return op_swp(s, a, MO_UB);
6893}
6894
6895/*
6896 * Load/Store Exclusive and Load-Acquire/Store-Release
6897 */
6898
6899static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6900{
6901    TCGv_i32 addr;
6902    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6903    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6904
6905    /* We UNDEF for these UNPREDICTABLE cases.  */
6906    if (a->rd == 15 || a->rn == 15 || a->rt == 15
6907        || a->rd == a->rn || a->rd == a->rt
6908        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6909        || (mop == MO_64
6910            && (a->rt2 == 15
6911                || a->rd == a->rt2
6912                || (!v8a && s->thumb && a->rt2 == 13)))) {
6913        unallocated_encoding(s);
6914        return true;
6915    }
6916
6917    if (rel) {
6918        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6919    }
6920
6921    addr = tcg_temp_new_i32();
6922    load_reg_var(s, addr, a->rn);
6923    tcg_gen_addi_i32(addr, addr, a->imm);
6924
6925    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6926    return true;
6927}
6928
6929static bool trans_STREX(DisasContext *s, arg_STREX *a)
6930{
6931    if (!ENABLE_ARCH_6) {
6932        return false;
6933    }
6934    return op_strex(s, a, MO_32, false);
6935}
6936
6937static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6938{
6939    if (!ENABLE_ARCH_6K) {
6940        return false;
6941    }
6942    /* We UNDEF for these UNPREDICTABLE cases.  */
6943    if (a->rt & 1) {
6944        unallocated_encoding(s);
6945        return true;
6946    }
6947    a->rt2 = a->rt + 1;
6948    return op_strex(s, a, MO_64, false);
6949}
6950
6951static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6952{
6953    return op_strex(s, a, MO_64, false);
6954}
6955
6956static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6957{
6958    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6959        return false;
6960    }
6961    return op_strex(s, a, MO_8, false);
6962}
6963
6964static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6965{
6966    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6967        return false;
6968    }
6969    return op_strex(s, a, MO_16, false);
6970}
6971
6972static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6973{
6974    if (!ENABLE_ARCH_8) {
6975        return false;
6976    }
6977    return op_strex(s, a, MO_32, true);
6978}
6979
6980static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6981{
6982    if (!ENABLE_ARCH_8) {
6983        return false;
6984    }
6985    /* We UNDEF for these UNPREDICTABLE cases.  */
6986    if (a->rt & 1) {
6987        unallocated_encoding(s);
6988        return true;
6989    }
6990    a->rt2 = a->rt + 1;
6991    return op_strex(s, a, MO_64, true);
6992}
6993
6994static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6995{
6996    if (!ENABLE_ARCH_8) {
6997        return false;
6998    }
6999    return op_strex(s, a, MO_64, true);
7000}
7001
7002static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7003{
7004    if (!ENABLE_ARCH_8) {
7005        return false;
7006    }
7007    return op_strex(s, a, MO_8, true);
7008}
7009
7010static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7011{
7012    if (!ENABLE_ARCH_8) {
7013        return false;
7014    }
7015    return op_strex(s, a, MO_16, true);
7016}
7017
7018static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7019{
7020    TCGv_i32 addr, tmp;
7021
7022    if (!ENABLE_ARCH_8) {
7023        return false;
7024    }
7025    /* We UNDEF for these UNPREDICTABLE cases.  */
7026    if (a->rn == 15 || a->rt == 15) {
7027        unallocated_encoding(s);
7028        return true;
7029    }
7030
7031    addr = load_reg(s, a->rn);
7032    tmp = load_reg(s, a->rt);
7033    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7034    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7035    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7036
7037    return true;
7038}
7039
7040static bool trans_STL(DisasContext *s, arg_STL *a)
7041{
7042    return op_stl(s, a, MO_UL);
7043}
7044
7045static bool trans_STLB(DisasContext *s, arg_STL *a)
7046{
7047    return op_stl(s, a, MO_UB);
7048}
7049
7050static bool trans_STLH(DisasContext *s, arg_STL *a)
7051{
7052    return op_stl(s, a, MO_UW);
7053}
7054
7055static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7056{
7057    TCGv_i32 addr;
7058    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7059    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7060
7061    /* We UNDEF for these UNPREDICTABLE cases.  */
7062    if (a->rn == 15 || a->rt == 15
7063        || (!v8a && s->thumb && a->rt == 13)
7064        || (mop == MO_64
7065            && (a->rt2 == 15 || a->rt == a->rt2
7066                || (!v8a && s->thumb && a->rt2 == 13)))) {
7067        unallocated_encoding(s);
7068        return true;
7069    }
7070
7071    addr = tcg_temp_new_i32();
7072    load_reg_var(s, addr, a->rn);
7073    tcg_gen_addi_i32(addr, addr, a->imm);
7074
7075    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7076
7077    if (acq) {
7078        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7079    }
7080    return true;
7081}
7082
7083static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7084{
7085    if (!ENABLE_ARCH_6) {
7086        return false;
7087    }
7088    return op_ldrex(s, a, MO_32, false);
7089}
7090
7091static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7092{
7093    if (!ENABLE_ARCH_6K) {
7094        return false;
7095    }
7096    /* We UNDEF for these UNPREDICTABLE cases.  */
7097    if (a->rt & 1) {
7098        unallocated_encoding(s);
7099        return true;
7100    }
7101    a->rt2 = a->rt + 1;
7102    return op_ldrex(s, a, MO_64, false);
7103}
7104
7105static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7106{
7107    return op_ldrex(s, a, MO_64, false);
7108}
7109
7110static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7111{
7112    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7113        return false;
7114    }
7115    return op_ldrex(s, a, MO_8, false);
7116}
7117
7118static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7119{
7120    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7121        return false;
7122    }
7123    return op_ldrex(s, a, MO_16, false);
7124}
7125
7126static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7127{
7128    if (!ENABLE_ARCH_8) {
7129        return false;
7130    }
7131    return op_ldrex(s, a, MO_32, true);
7132}
7133
7134static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7135{
7136    if (!ENABLE_ARCH_8) {
7137        return false;
7138    }
7139    /* We UNDEF for these UNPREDICTABLE cases.  */
7140    if (a->rt & 1) {
7141        unallocated_encoding(s);
7142        return true;
7143    }
7144    a->rt2 = a->rt + 1;
7145    return op_ldrex(s, a, MO_64, true);
7146}
7147
7148static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7149{
7150    if (!ENABLE_ARCH_8) {
7151        return false;
7152    }
7153    return op_ldrex(s, a, MO_64, true);
7154}
7155
7156static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7157{
7158    if (!ENABLE_ARCH_8) {
7159        return false;
7160    }
7161    return op_ldrex(s, a, MO_8, true);
7162}
7163
7164static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7165{
7166    if (!ENABLE_ARCH_8) {
7167        return false;
7168    }
7169    return op_ldrex(s, a, MO_16, true);
7170}
7171
7172static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7173{
7174    TCGv_i32 addr, tmp;
7175
7176    if (!ENABLE_ARCH_8) {
7177        return false;
7178    }
7179    /* We UNDEF for these UNPREDICTABLE cases.  */
7180    if (a->rn == 15 || a->rt == 15) {
7181        unallocated_encoding(s);
7182        return true;
7183    }
7184
7185    addr = load_reg(s, a->rn);
7186    tmp = tcg_temp_new_i32();
7187    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7188    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7189
7190    store_reg(s, a->rt, tmp);
7191    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7192    return true;
7193}
7194
7195static bool trans_LDA(DisasContext *s, arg_LDA *a)
7196{
7197    return op_lda(s, a, MO_UL);
7198}
7199
7200static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7201{
7202    return op_lda(s, a, MO_UB);
7203}
7204
7205static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7206{
7207    return op_lda(s, a, MO_UW);
7208}
7209
7210/*
7211 * Media instructions
7212 */
7213
7214static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7215{
7216    TCGv_i32 t1, t2;
7217
7218    if (!ENABLE_ARCH_6) {
7219        return false;
7220    }
7221
7222    t1 = load_reg(s, a->rn);
7223    t2 = load_reg(s, a->rm);
7224    gen_helper_usad8(t1, t1, t2);
7225    if (a->ra != 15) {
7226        t2 = load_reg(s, a->ra);
7227        tcg_gen_add_i32(t1, t1, t2);
7228    }
7229    store_reg(s, a->rd, t1);
7230    return true;
7231}
7232
7233static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7234{
7235    TCGv_i32 tmp;
7236    int width = a->widthm1 + 1;
7237    int shift = a->lsb;
7238
7239    if (!ENABLE_ARCH_6T2) {
7240        return false;
7241    }
7242    if (shift + width > 32) {
7243        /* UNPREDICTABLE; we choose to UNDEF */
7244        unallocated_encoding(s);
7245        return true;
7246    }
7247
7248    tmp = load_reg(s, a->rn);
7249    if (u) {
7250        tcg_gen_extract_i32(tmp, tmp, shift, width);
7251    } else {
7252        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7253    }
7254    store_reg(s, a->rd, tmp);
7255    return true;
7256}
7257
7258static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7259{
7260    return op_bfx(s, a, false);
7261}
7262
7263static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7264{
7265    return op_bfx(s, a, true);
7266}
7267
7268static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7269{
7270    int msb = a->msb, lsb = a->lsb;
7271    TCGv_i32 t_in, t_rd;
7272    int width;
7273
7274    if (!ENABLE_ARCH_6T2) {
7275        return false;
7276    }
7277    if (msb < lsb) {
7278        /* UNPREDICTABLE; we choose to UNDEF */
7279        unallocated_encoding(s);
7280        return true;
7281    }
7282
7283    width = msb + 1 - lsb;
7284    if (a->rn == 15) {
7285        /* BFC */
7286        t_in = tcg_constant_i32(0);
7287    } else {
7288        /* BFI */
7289        t_in = load_reg(s, a->rn);
7290    }
7291    t_rd = load_reg(s, a->rd);
7292    tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7293    store_reg(s, a->rd, t_rd);
7294    return true;
7295}
7296
7297static bool trans_UDF(DisasContext *s, arg_UDF *a)
7298{
7299    unallocated_encoding(s);
7300    return true;
7301}
7302
7303/*
7304 * Parallel addition and subtraction
7305 */
7306
7307static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7308                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7309{
7310    TCGv_i32 t0, t1;
7311
7312    if (s->thumb
7313        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7314        : !ENABLE_ARCH_6) {
7315        return false;
7316    }
7317
7318    t0 = load_reg(s, a->rn);
7319    t1 = load_reg(s, a->rm);
7320
7321    gen(t0, t0, t1);
7322
7323    store_reg(s, a->rd, t0);
7324    return true;
7325}
7326
7327static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7328                             void (*gen)(TCGv_i32, TCGv_i32,
7329                                         TCGv_i32, TCGv_ptr))
7330{
7331    TCGv_i32 t0, t1;
7332    TCGv_ptr ge;
7333
7334    if (s->thumb
7335        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7336        : !ENABLE_ARCH_6) {
7337        return false;
7338    }
7339
7340    t0 = load_reg(s, a->rn);
7341    t1 = load_reg(s, a->rm);
7342
7343    ge = tcg_temp_new_ptr();
7344    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7345    gen(t0, t0, t1, ge);
7346
7347    store_reg(s, a->rd, t0);
7348    return true;
7349}
7350
7351#define DO_PAR_ADDSUB(NAME, helper) \
7352static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7353{                                                       \
7354    return op_par_addsub(s, a, helper);                 \
7355}
7356
7357#define DO_PAR_ADDSUB_GE(NAME, helper) \
7358static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7359{                                                       \
7360    return op_par_addsub_ge(s, a, helper);              \
7361}
7362
7363DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7364DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7365DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7366DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7367DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7368DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7369
7370DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7371DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7372DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7373DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7374DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7375DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7376
7377DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7378DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7379DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7380DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7381DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7382DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7383
7384DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7385DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7386DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7387DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7388DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7389DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7390
7391DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7392DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7393DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7394DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7395DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7396DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7397
7398DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7399DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7400DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7401DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7402DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7403DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7404
7405#undef DO_PAR_ADDSUB
7406#undef DO_PAR_ADDSUB_GE
7407
7408/*
7409 * Packing, unpacking, saturation, and reversal
7410 */
7411
7412static bool trans_PKH(DisasContext *s, arg_PKH *a)
7413{
7414    TCGv_i32 tn, tm;
7415    int shift = a->imm;
7416
7417    if (s->thumb
7418        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7419        : !ENABLE_ARCH_6) {
7420        return false;
7421    }
7422
7423    tn = load_reg(s, a->rn);
7424    tm = load_reg(s, a->rm);
7425    if (a->tb) {
7426        /* PKHTB */
7427        if (shift == 0) {
7428            shift = 31;
7429        }
7430        tcg_gen_sari_i32(tm, tm, shift);
7431        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7432    } else {
7433        /* PKHBT */
7434        tcg_gen_shli_i32(tm, tm, shift);
7435        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7436    }
7437    store_reg(s, a->rd, tn);
7438    return true;
7439}
7440
7441static bool op_sat(DisasContext *s, arg_sat *a,
7442                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7443{
7444    TCGv_i32 tmp;
7445    int shift = a->imm;
7446
7447    if (!ENABLE_ARCH_6) {
7448        return false;
7449    }
7450
7451    tmp = load_reg(s, a->rn);
7452    if (a->sh) {
7453        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7454    } else {
7455        tcg_gen_shli_i32(tmp, tmp, shift);
7456    }
7457
7458    gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7459
7460    store_reg(s, a->rd, tmp);
7461    return true;
7462}
7463
7464static bool trans_SSAT(DisasContext *s, arg_sat *a)
7465{
7466    return op_sat(s, a, gen_helper_ssat);
7467}
7468
7469static bool trans_USAT(DisasContext *s, arg_sat *a)
7470{
7471    return op_sat(s, a, gen_helper_usat);
7472}
7473
7474static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7475{
7476    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7477        return false;
7478    }
7479    return op_sat(s, a, gen_helper_ssat16);
7480}
7481
7482static bool trans_USAT16(DisasContext *s, arg_sat *a)
7483{
7484    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7485        return false;
7486    }
7487    return op_sat(s, a, gen_helper_usat16);
7488}
7489
7490static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7491                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7492                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7493{
7494    TCGv_i32 tmp;
7495
7496    if (!ENABLE_ARCH_6) {
7497        return false;
7498    }
7499
7500    tmp = load_reg(s, a->rm);
7501    /*
7502     * TODO: In many cases we could do a shift instead of a rotate.
7503     * Combined with a simple extend, that becomes an extract.
7504     */
7505    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7506    gen_extract(tmp, tmp);
7507
7508    if (a->rn != 15) {
7509        TCGv_i32 tmp2 = load_reg(s, a->rn);
7510        gen_add(tmp, tmp, tmp2);
7511    }
7512    store_reg(s, a->rd, tmp);
7513    return true;
7514}
7515
7516static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7517{
7518    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7519}
7520
7521static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7522{
7523    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7524}
7525
7526static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7527{
7528    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7529        return false;
7530    }
7531    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7532}
7533
7534static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7535{
7536    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7537}
7538
7539static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7540{
7541    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7542}
7543
7544static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7545{
7546    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7547        return false;
7548    }
7549    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7550}
7551
7552static bool trans_SEL(DisasContext *s, arg_rrr *a)
7553{
7554    TCGv_i32 t1, t2, t3;
7555
7556    if (s->thumb
7557        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7558        : !ENABLE_ARCH_6) {
7559        return false;
7560    }
7561
7562    t1 = load_reg(s, a->rn);
7563    t2 = load_reg(s, a->rm);
7564    t3 = tcg_temp_new_i32();
7565    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7566    gen_helper_sel_flags(t1, t3, t1, t2);
7567    store_reg(s, a->rd, t1);
7568    return true;
7569}
7570
7571static bool op_rr(DisasContext *s, arg_rr *a,
7572                  void (*gen)(TCGv_i32, TCGv_i32))
7573{
7574    TCGv_i32 tmp;
7575
7576    tmp = load_reg(s, a->rm);
7577    gen(tmp, tmp);
7578    store_reg(s, a->rd, tmp);
7579    return true;
7580}
7581
7582static bool trans_REV(DisasContext *s, arg_rr *a)
7583{
7584    if (!ENABLE_ARCH_6) {
7585        return false;
7586    }
7587    return op_rr(s, a, tcg_gen_bswap32_i32);
7588}
7589
7590static bool trans_REV16(DisasContext *s, arg_rr *a)
7591{
7592    if (!ENABLE_ARCH_6) {
7593        return false;
7594    }
7595    return op_rr(s, a, gen_rev16);
7596}
7597
7598static bool trans_REVSH(DisasContext *s, arg_rr *a)
7599{
7600    if (!ENABLE_ARCH_6) {
7601        return false;
7602    }
7603    return op_rr(s, a, gen_revsh);
7604}
7605
7606static bool trans_RBIT(DisasContext *s, arg_rr *a)
7607{
7608    if (!ENABLE_ARCH_6T2) {
7609        return false;
7610    }
7611    return op_rr(s, a, gen_helper_rbit);
7612}
7613
7614/*
7615 * Signed multiply, signed and unsigned divide
7616 */
7617
7618static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7619{
7620    TCGv_i32 t1, t2;
7621
7622    if (!ENABLE_ARCH_6) {
7623        return false;
7624    }
7625
7626    t1 = load_reg(s, a->rn);
7627    t2 = load_reg(s, a->rm);
7628    if (m_swap) {
7629        gen_swap_half(t2, t2);
7630    }
7631    gen_smul_dual(t1, t2);
7632
7633    if (sub) {
7634        /*
7635         * This subtraction cannot overflow, so we can do a simple
7636         * 32-bit subtraction and then a possible 32-bit saturating
7637         * addition of Ra.
7638         */
7639        tcg_gen_sub_i32(t1, t1, t2);
7640
7641        if (a->ra != 15) {
7642            t2 = load_reg(s, a->ra);
7643            gen_helper_add_setq(t1, cpu_env, t1, t2);
7644        }
7645    } else if (a->ra == 15) {
7646        /* Single saturation-checking addition */
7647        gen_helper_add_setq(t1, cpu_env, t1, t2);
7648    } else {
7649        /*
7650         * We need to add the products and Ra together and then
7651         * determine whether the final result overflowed. Doing
7652         * this as two separate add-and-check-overflow steps incorrectly
7653         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7654         * Do all the arithmetic at 64-bits and then check for overflow.
7655         */
7656        TCGv_i64 p64, q64;
7657        TCGv_i32 t3, qf, one;
7658
7659        p64 = tcg_temp_new_i64();
7660        q64 = tcg_temp_new_i64();
7661        tcg_gen_ext_i32_i64(p64, t1);
7662        tcg_gen_ext_i32_i64(q64, t2);
7663        tcg_gen_add_i64(p64, p64, q64);
7664        load_reg_var(s, t2, a->ra);
7665        tcg_gen_ext_i32_i64(q64, t2);
7666        tcg_gen_add_i64(p64, p64, q64);
7667
7668        tcg_gen_extr_i64_i32(t1, t2, p64);
7669        /*
7670         * t1 is the low half of the result which goes into Rd.
7671         * We have overflow and must set Q if the high half (t2)
7672         * is different from the sign-extension of t1.
7673         */
7674        t3 = tcg_temp_new_i32();
7675        tcg_gen_sari_i32(t3, t1, 31);
7676        qf = load_cpu_field(QF);
7677        one = tcg_constant_i32(1);
7678        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7679        store_cpu_field(qf, QF);
7680    }
7681    store_reg(s, a->rd, t1);
7682    return true;
7683}
7684
7685static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7686{
7687    return op_smlad(s, a, false, false);
7688}
7689
7690static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7691{
7692    return op_smlad(s, a, true, false);
7693}
7694
7695static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7696{
7697    return op_smlad(s, a, false, true);
7698}
7699
7700static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7701{
7702    return op_smlad(s, a, true, true);
7703}
7704
7705static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7706{
7707    TCGv_i32 t1, t2;
7708    TCGv_i64 l1, l2;
7709
7710    if (!ENABLE_ARCH_6) {
7711        return false;
7712    }
7713
7714    t1 = load_reg(s, a->rn);
7715    t2 = load_reg(s, a->rm);
7716    if (m_swap) {
7717        gen_swap_half(t2, t2);
7718    }
7719    gen_smul_dual(t1, t2);
7720
7721    l1 = tcg_temp_new_i64();
7722    l2 = tcg_temp_new_i64();
7723    tcg_gen_ext_i32_i64(l1, t1);
7724    tcg_gen_ext_i32_i64(l2, t2);
7725
7726    if (sub) {
7727        tcg_gen_sub_i64(l1, l1, l2);
7728    } else {
7729        tcg_gen_add_i64(l1, l1, l2);
7730    }
7731
7732    gen_addq(s, l1, a->ra, a->rd);
7733    gen_storeq_reg(s, a->ra, a->rd, l1);
7734    return true;
7735}
7736
7737static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7738{
7739    return op_smlald(s, a, false, false);
7740}
7741
7742static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7743{
7744    return op_smlald(s, a, true, false);
7745}
7746
7747static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7748{
7749    return op_smlald(s, a, false, true);
7750}
7751
7752static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7753{
7754    return op_smlald(s, a, true, true);
7755}
7756
7757static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7758{
7759    TCGv_i32 t1, t2;
7760
7761    if (s->thumb
7762        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7763        : !ENABLE_ARCH_6) {
7764        return false;
7765    }
7766
7767    t1 = load_reg(s, a->rn);
7768    t2 = load_reg(s, a->rm);
7769    tcg_gen_muls2_i32(t2, t1, t1, t2);
7770
7771    if (a->ra != 15) {
7772        TCGv_i32 t3 = load_reg(s, a->ra);
7773        if (sub) {
7774            /*
7775             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7776             * a non-zero multiplicand lowpart, and the correct result
7777             * lowpart for rounding.
7778             */
7779            tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7780        } else {
7781            tcg_gen_add_i32(t1, t1, t3);
7782        }
7783    }
7784    if (round) {
7785        /*
7786         * Adding 0x80000000 to the 64-bit quantity means that we have
7787         * carry in to the high word when the low word has the msb set.
7788         */
7789        tcg_gen_shri_i32(t2, t2, 31);
7790        tcg_gen_add_i32(t1, t1, t2);
7791    }
7792    store_reg(s, a->rd, t1);
7793    return true;
7794}
7795
7796static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7797{
7798    return op_smmla(s, a, false, false);
7799}
7800
7801static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7802{
7803    return op_smmla(s, a, true, false);
7804}
7805
7806static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7807{
7808    return op_smmla(s, a, false, true);
7809}
7810
7811static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7812{
7813    return op_smmla(s, a, true, true);
7814}
7815
7816static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7817{
7818    TCGv_i32 t1, t2;
7819
7820    if (s->thumb
7821        ? !dc_isar_feature(aa32_thumb_div, s)
7822        : !dc_isar_feature(aa32_arm_div, s)) {
7823        return false;
7824    }
7825
7826    t1 = load_reg(s, a->rn);
7827    t2 = load_reg(s, a->rm);
7828    if (u) {
7829        gen_helper_udiv(t1, cpu_env, t1, t2);
7830    } else {
7831        gen_helper_sdiv(t1, cpu_env, t1, t2);
7832    }
7833    store_reg(s, a->rd, t1);
7834    return true;
7835}
7836
7837static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7838{
7839    return op_div(s, a, false);
7840}
7841
7842static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7843{
7844    return op_div(s, a, true);
7845}
7846
7847/*
7848 * Block data transfer
7849 */
7850
7851static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7852{
7853    TCGv_i32 addr = load_reg(s, a->rn);
7854
7855    if (a->b) {
7856        if (a->i) {
7857            /* pre increment */
7858            tcg_gen_addi_i32(addr, addr, 4);
7859        } else {
7860            /* pre decrement */
7861            tcg_gen_addi_i32(addr, addr, -(n * 4));
7862        }
7863    } else if (!a->i && n != 1) {
7864        /* post decrement */
7865        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7866    }
7867
7868    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7869        /*
7870         * If the writeback is incrementing SP rather than
7871         * decrementing it, and the initial SP is below the
7872         * stack limit but the final written-back SP would
7873         * be above, then we must not perform any memory
7874         * accesses, but it is IMPDEF whether we generate
7875         * an exception. We choose to do so in this case.
7876         * At this point 'addr' is the lowest address, so
7877         * either the original SP (if incrementing) or our
7878         * final SP (if decrementing), so that's what we check.
7879         */
7880        gen_helper_v8m_stackcheck(cpu_env, addr);
7881    }
7882
7883    return addr;
7884}
7885
7886static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7887                               TCGv_i32 addr, int n)
7888{
7889    if (a->w) {
7890        /* write back */
7891        if (!a->b) {
7892            if (a->i) {
7893                /* post increment */
7894                tcg_gen_addi_i32(addr, addr, 4);
7895            } else {
7896                /* post decrement */
7897                tcg_gen_addi_i32(addr, addr, -(n * 4));
7898            }
7899        } else if (!a->i && n != 1) {
7900            /* pre decrement */
7901            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7902        }
7903        store_reg(s, a->rn, addr);
7904    }
7905}
7906
7907static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7908{
7909    int i, j, n, list, mem_idx;
7910    bool user = a->u;
7911    TCGv_i32 addr, tmp;
7912
7913    if (user) {
7914        /* STM (user) */
7915        if (IS_USER(s)) {
7916            /* Only usable in supervisor mode.  */
7917            unallocated_encoding(s);
7918            return true;
7919        }
7920    }
7921
7922    list = a->list;
7923    n = ctpop16(list);
7924    if (n < min_n || a->rn == 15) {
7925        unallocated_encoding(s);
7926        return true;
7927    }
7928
7929    s->eci_handled = true;
7930
7931    addr = op_addr_block_pre(s, a, n);
7932    mem_idx = get_mem_index(s);
7933
7934    for (i = j = 0; i < 16; i++) {
7935        if (!(list & (1 << i))) {
7936            continue;
7937        }
7938
7939        if (user && i != 15) {
7940            tmp = tcg_temp_new_i32();
7941            gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7942        } else {
7943            tmp = load_reg(s, i);
7944        }
7945        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7946
7947        /* No need to add after the last transfer.  */
7948        if (++j != n) {
7949            tcg_gen_addi_i32(addr, addr, 4);
7950        }
7951    }
7952
7953    op_addr_block_post(s, a, addr, n);
7954    clear_eci_state(s);
7955    return true;
7956}
7957
7958static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7959{
7960    /* BitCount(list) < 1 is UNPREDICTABLE */
7961    return op_stm(s, a, 1);
7962}
7963
7964static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7965{
7966    /* Writeback register in register list is UNPREDICTABLE for T32.  */
7967    if (a->w && (a->list & (1 << a->rn))) {
7968        unallocated_encoding(s);
7969        return true;
7970    }
7971    /* BitCount(list) < 2 is UNPREDICTABLE */
7972    return op_stm(s, a, 2);
7973}
7974
7975static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7976{
7977    int i, j, n, list, mem_idx;
7978    bool loaded_base;
7979    bool user = a->u;
7980    bool exc_return = false;
7981    TCGv_i32 addr, tmp, loaded_var;
7982
7983    if (user) {
7984        /* LDM (user), LDM (exception return) */
7985        if (IS_USER(s)) {
7986            /* Only usable in supervisor mode.  */
7987            unallocated_encoding(s);
7988            return true;
7989        }
7990        if (extract32(a->list, 15, 1)) {
7991            exc_return = true;
7992            user = false;
7993        } else {
7994            /* LDM (user) does not allow writeback.  */
7995            if (a->w) {
7996                unallocated_encoding(s);
7997                return true;
7998            }
7999        }
8000    }
8001
8002    list = a->list;
8003    n = ctpop16(list);
8004    if (n < min_n || a->rn == 15) {
8005        unallocated_encoding(s);
8006        return true;
8007    }
8008
8009    s->eci_handled = true;
8010
8011    addr = op_addr_block_pre(s, a, n);
8012    mem_idx = get_mem_index(s);
8013    loaded_base = false;
8014    loaded_var = NULL;
8015
8016    for (i = j = 0; i < 16; i++) {
8017        if (!(list & (1 << i))) {
8018            continue;
8019        }
8020
8021        tmp = tcg_temp_new_i32();
8022        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8023        if (user) {
8024            gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8025        } else if (i == a->rn) {
8026            loaded_var = tmp;
8027            loaded_base = true;
8028        } else if (i == 15 && exc_return) {
8029            store_pc_exc_ret(s, tmp);
8030        } else {
8031            store_reg_from_load(s, i, tmp);
8032        }
8033
8034        /* No need to add after the last transfer.  */
8035        if (++j != n) {
8036            tcg_gen_addi_i32(addr, addr, 4);
8037        }
8038    }
8039
8040    op_addr_block_post(s, a, addr, n);
8041
8042    if (loaded_base) {
8043        /* Note that we reject base == pc above.  */
8044        store_reg(s, a->rn, loaded_var);
8045    }
8046
8047    if (exc_return) {
8048        /* Restore CPSR from SPSR.  */
8049        tmp = load_cpu_field(spsr);
8050        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8051            gen_io_start();
8052        }
8053        gen_helper_cpsr_write_eret(cpu_env, tmp);
8054        /* Must exit loop to check un-masked IRQs */
8055        s->base.is_jmp = DISAS_EXIT;
8056    }
8057    clear_eci_state(s);
8058    return true;
8059}
8060
8061static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8062{
8063    /*
8064     * Writeback register in register list is UNPREDICTABLE
8065     * for ArchVersion() >= 7.  Prior to v7, A32 would write
8066     * an UNKNOWN value to the base register.
8067     */
8068    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8069        unallocated_encoding(s);
8070        return true;
8071    }
8072    /* BitCount(list) < 1 is UNPREDICTABLE */
8073    return do_ldm(s, a, 1);
8074}
8075
8076static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8077{
8078    /* Writeback register in register list is UNPREDICTABLE for T32. */
8079    if (a->w && (a->list & (1 << a->rn))) {
8080        unallocated_encoding(s);
8081        return true;
8082    }
8083    /* BitCount(list) < 2 is UNPREDICTABLE */
8084    return do_ldm(s, a, 2);
8085}
8086
8087static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8088{
8089    /* Writeback is conditional on the base register not being loaded.  */
8090    a->w = !(a->list & (1 << a->rn));
8091    /* BitCount(list) < 1 is UNPREDICTABLE */
8092    return do_ldm(s, a, 1);
8093}
8094
8095static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8096{
8097    int i;
8098    TCGv_i32 zero;
8099
8100    if (!dc_isar_feature(aa32_m_sec_state, s)) {
8101        return false;
8102    }
8103
8104    if (extract32(a->list, 13, 1)) {
8105        return false;
8106    }
8107
8108    if (!a->list) {
8109        /* UNPREDICTABLE; we choose to UNDEF */
8110        return false;
8111    }
8112
8113    s->eci_handled = true;
8114
8115    zero = tcg_constant_i32(0);
8116    for (i = 0; i < 15; i++) {
8117        if (extract32(a->list, i, 1)) {
8118            /* Clear R[i] */
8119            tcg_gen_mov_i32(cpu_R[i], zero);
8120        }
8121    }
8122    if (extract32(a->list, 15, 1)) {
8123        /*
8124         * Clear APSR (by calling the MSR helper with the same argument
8125         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8126         */
8127        gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8128    }
8129    clear_eci_state(s);
8130    return true;
8131}
8132
8133/*
8134 * Branch, branch with link
8135 */
8136
8137static bool trans_B(DisasContext *s, arg_i *a)
8138{
8139    gen_jmp(s, jmp_diff(s, a->imm));
8140    return true;
8141}
8142
8143static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8144{
8145    /* This has cond from encoding, required to be outside IT block.  */
8146    if (a->cond >= 0xe) {
8147        return false;
8148    }
8149    if (s->condexec_mask) {
8150        unallocated_encoding(s);
8151        return true;
8152    }
8153    arm_skip_unless(s, a->cond);
8154    gen_jmp(s, jmp_diff(s, a->imm));
8155    return true;
8156}
8157
8158static bool trans_BL(DisasContext *s, arg_i *a)
8159{
8160    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8161    gen_jmp(s, jmp_diff(s, a->imm));
8162    return true;
8163}
8164
8165static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8166{
8167    /*
8168     * BLX <imm> would be useless on M-profile; the encoding space
8169     * is used for other insns from v8.1M onward, and UNDEFs before that.
8170     */
8171    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8172        return false;
8173    }
8174
8175    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8176    if (s->thumb && (a->imm & 2)) {
8177        return false;
8178    }
8179    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8180    store_cpu_field_constant(!s->thumb, thumb);
8181    /* This jump is computed from an aligned PC: subtract off the low bits. */
8182    gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8183    return true;
8184}
8185
8186static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8187{
8188    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8189    gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8190    return true;
8191}
8192
8193static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8194{
8195    TCGv_i32 tmp = tcg_temp_new_i32();
8196
8197    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8198    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8199    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8200    gen_bx(s, tmp);
8201    return true;
8202}
8203
8204static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8205{
8206    TCGv_i32 tmp;
8207
8208    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8209    if (!ENABLE_ARCH_5) {
8210        return false;
8211    }
8212    tmp = tcg_temp_new_i32();
8213    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8214    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8215    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8216    gen_bx(s, tmp);
8217    return true;
8218}
8219
8220static bool trans_BF(DisasContext *s, arg_BF *a)
8221{
8222    /*
8223     * M-profile branch future insns. The architecture permits an
8224     * implementation to implement these as NOPs (equivalent to
8225     * discarding the LO_BRANCH_INFO cache immediately), and we
8226     * take that IMPDEF option because for QEMU a "real" implementation
8227     * would be complicated and wouldn't execute any faster.
8228     */
8229    if (!dc_isar_feature(aa32_lob, s)) {
8230        return false;
8231    }
8232    if (a->boff == 0) {
8233        /* SEE "Related encodings" (loop insns) */
8234        return false;
8235    }
8236    /* Handle as NOP */
8237    return true;
8238}
8239
8240static bool trans_DLS(DisasContext *s, arg_DLS *a)
8241{
8242    /* M-profile low-overhead loop start */
8243    TCGv_i32 tmp;
8244
8245    if (!dc_isar_feature(aa32_lob, s)) {
8246        return false;
8247    }
8248    if (a->rn == 13 || a->rn == 15) {
8249        /*
8250         * For DLSTP rn == 15 is a related encoding (LCTP); the
8251         * other cases caught by this condition are all
8252         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8253         */
8254        return false;
8255    }
8256
8257    if (a->size != 4) {
8258        /* DLSTP */
8259        if (!dc_isar_feature(aa32_mve, s)) {
8260            return false;
8261        }
8262        if (!vfp_access_check(s)) {
8263            return true;
8264        }
8265    }
8266
8267    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8268    tmp = load_reg(s, a->rn);
8269    store_reg(s, 14, tmp);
8270    if (a->size != 4) {
8271        /* DLSTP: set FPSCR.LTPSIZE */
8272        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8273        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8274    }
8275    return true;
8276}
8277
8278static bool trans_WLS(DisasContext *s, arg_WLS *a)
8279{
8280    /* M-profile low-overhead while-loop start */
8281    TCGv_i32 tmp;
8282    DisasLabel nextlabel;
8283
8284    if (!dc_isar_feature(aa32_lob, s)) {
8285        return false;
8286    }
8287    if (a->rn == 13 || a->rn == 15) {
8288        /*
8289         * For WLSTP rn == 15 is a related encoding (LE); the
8290         * other cases caught by this condition are all
8291         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8292         */
8293        return false;
8294    }
8295    if (s->condexec_mask) {
8296        /*
8297         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8298         * we choose to UNDEF, because otherwise our use of
8299         * gen_goto_tb(1) would clash with the use of TB exit 1
8300         * in the dc->condjmp condition-failed codepath in
8301         * arm_tr_tb_stop() and we'd get an assertion.
8302         */
8303        return false;
8304    }
8305    if (a->size != 4) {
8306        /* WLSTP */
8307        if (!dc_isar_feature(aa32_mve, s)) {
8308            return false;
8309        }
8310        /*
8311         * We need to check that the FPU is enabled here, but mustn't
8312         * call vfp_access_check() to do that because we don't want to
8313         * do the lazy state preservation in the "loop count is zero" case.
8314         * Do the check-and-raise-exception by hand.
8315         */
8316        if (s->fp_excp_el) {
8317            gen_exception_insn_el(s, 0, EXCP_NOCP,
8318                                  syn_uncategorized(), s->fp_excp_el);
8319            return true;
8320        }
8321    }
8322
8323    nextlabel = gen_disas_label(s);
8324    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8325    tmp = load_reg(s, a->rn);
8326    store_reg(s, 14, tmp);
8327    if (a->size != 4) {
8328        /*
8329         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8330         * lazy state preservation, new FP context creation, etc,
8331         * that vfp_access_check() does. We know that the actual
8332         * access check will succeed (ie it won't generate code that
8333         * throws an exception) because we did that check by hand earlier.
8334         */
8335        bool ok = vfp_access_check(s);
8336        assert(ok);
8337        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8338        /*
8339         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8340         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8341         */
8342    }
8343    gen_jmp_tb(s, curr_insn_len(s), 1);
8344
8345    set_disas_label(s, nextlabel);
8346    gen_jmp(s, jmp_diff(s, a->imm));
8347    return true;
8348}
8349
8350static bool trans_LE(DisasContext *s, arg_LE *a)
8351{
8352    /*
8353     * M-profile low-overhead loop end. The architecture permits an
8354     * implementation to discard the LO_BRANCH_INFO cache at any time,
8355     * and we take the IMPDEF option to never set it in the first place
8356     * (equivalent to always discarding it immediately), because for QEMU
8357     * a "real" implementation would be complicated and wouldn't execute
8358     * any faster.
8359     */
8360    TCGv_i32 tmp;
8361    DisasLabel loopend;
8362    bool fpu_active;
8363
8364    if (!dc_isar_feature(aa32_lob, s)) {
8365        return false;
8366    }
8367    if (a->f && a->tp) {
8368        return false;
8369    }
8370    if (s->condexec_mask) {
8371        /*
8372         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8373         * we choose to UNDEF, because otherwise our use of
8374         * gen_goto_tb(1) would clash with the use of TB exit 1
8375         * in the dc->condjmp condition-failed codepath in
8376         * arm_tr_tb_stop() and we'd get an assertion.
8377         */
8378        return false;
8379    }
8380    if (a->tp) {
8381        /* LETP */
8382        if (!dc_isar_feature(aa32_mve, s)) {
8383            return false;
8384        }
8385        if (!vfp_access_check(s)) {
8386            s->eci_handled = true;
8387            return true;
8388        }
8389    }
8390
8391    /* LE/LETP is OK with ECI set and leaves it untouched */
8392    s->eci_handled = true;
8393
8394    /*
8395     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8396     * UsageFault exception for the LE insn in that case. Note that we
8397     * are not directly checking FPSCR.LTPSIZE but instead check the
8398     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8399     * not currently active (ie ActiveFPState() returns false). We
8400     * can identify not-active purely from our TB state flags, as the
8401     * FPU is active only if:
8402     *  the FPU is enabled
8403     *  AND lazy state preservation is not active
8404     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8405     *
8406     * Usually we don't need to care about this distinction between
8407     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8408     * will either take an exception or clear the conditions that make
8409     * the FPU not active. But LE is an unusual case of a non-FP insn
8410     * that looks at LTPSIZE.
8411     */
8412    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8413
8414    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8415        /* Need to do a runtime check for LTPSIZE != 4 */
8416        DisasLabel skipexc = gen_disas_label(s);
8417        tmp = load_cpu_field(v7m.ltpsize);
8418        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8419        gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8420        set_disas_label(s, skipexc);
8421    }
8422
8423    if (a->f) {
8424        /* Loop-forever: just jump back to the loop start */
8425        gen_jmp(s, jmp_diff(s, -a->imm));
8426        return true;
8427    }
8428
8429    /*
8430     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8431     * For LE, we know at this point that LTPSIZE must be 4 and the
8432     * loop decrement value is 1. For LETP we need to calculate the decrement
8433     * value from LTPSIZE.
8434     */
8435    loopend = gen_disas_label(s);
8436    if (!a->tp) {
8437        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8438        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8439    } else {
8440        /*
8441         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8442         * so that decr stays live after the brcondi.
8443         */
8444        TCGv_i32 decr = tcg_temp_new_i32();
8445        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8446        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8447        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8448
8449        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8450
8451        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8452    }
8453    /* Jump back to the loop start */
8454    gen_jmp(s, jmp_diff(s, -a->imm));
8455
8456    set_disas_label(s, loopend);
8457    if (a->tp) {
8458        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8459        store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8460    }
8461    /* End TB, continuing to following insn */
8462    gen_jmp_tb(s, curr_insn_len(s), 1);
8463    return true;
8464}
8465
8466static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8467{
8468    /*
8469     * M-profile Loop Clear with Tail Predication. Since our implementation
8470     * doesn't cache branch information, all we need to do is reset
8471     * FPSCR.LTPSIZE to 4.
8472     */
8473
8474    if (!dc_isar_feature(aa32_lob, s) ||
8475        !dc_isar_feature(aa32_mve, s)) {
8476        return false;
8477    }
8478
8479    if (!vfp_access_check(s)) {
8480        return true;
8481    }
8482
8483    store_cpu_field_constant(4, v7m.ltpsize);
8484    return true;
8485}
8486
8487static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8488{
8489    /*
8490     * M-profile Create Vector Tail Predicate. This insn is itself
8491     * predicated and is subject to beatwise execution.
8492     */
8493    TCGv_i32 rn_shifted, masklen;
8494
8495    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8496        return false;
8497    }
8498
8499    if (!mve_eci_check(s) || !vfp_access_check(s)) {
8500        return true;
8501    }
8502
8503    /*
8504     * We pre-calculate the mask length here to avoid having
8505     * to have multiple helpers specialized for size.
8506     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8507     */
8508    rn_shifted = tcg_temp_new_i32();
8509    masklen = load_reg(s, a->rn);
8510    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8511    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8512                        masklen, tcg_constant_i32(1 << (4 - a->size)),
8513                        rn_shifted, tcg_constant_i32(16));
8514    gen_helper_mve_vctp(cpu_env, masklen);
8515    /* This insn updates predication bits */
8516    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8517    mve_update_eci(s);
8518    return true;
8519}
8520
8521static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8522{
8523    TCGv_i32 addr, tmp;
8524
8525    tmp = load_reg(s, a->rm);
8526    if (half) {
8527        tcg_gen_add_i32(tmp, tmp, tmp);
8528    }
8529    addr = load_reg(s, a->rn);
8530    tcg_gen_add_i32(addr, addr, tmp);
8531
8532    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8533
8534    tcg_gen_add_i32(tmp, tmp, tmp);
8535    gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8536    tcg_gen_add_i32(tmp, tmp, addr);
8537    store_reg(s, 15, tmp);
8538    return true;
8539}
8540
8541static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8542{
8543    return op_tbranch(s, a, false);
8544}
8545
8546static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8547{
8548    return op_tbranch(s, a, true);
8549}
8550
8551static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8552{
8553    TCGv_i32 tmp = load_reg(s, a->rn);
8554
8555    arm_gen_condlabel(s);
8556    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8557                        tmp, 0, s->condlabel.label);
8558    gen_jmp(s, jmp_diff(s, a->imm));
8559    return true;
8560}
8561
8562/*
8563 * Supervisor call - both T32 & A32 come here so we need to check
8564 * which mode we are in when checking for semihosting.
8565 */
8566
8567static bool trans_SVC(DisasContext *s, arg_SVC *a)
8568{
8569    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8570
8571    if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8572        semihosting_enabled(s->current_el == 0) &&
8573        (a->imm == semihost_imm)) {
8574        gen_exception_internal_insn(s, EXCP_SEMIHOST);
8575    } else {
8576        if (s->fgt_svc) {
8577            uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8578            gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8579        } else {
8580            gen_update_pc(s, curr_insn_len(s));
8581            s->svc_imm = a->imm;
8582            s->base.is_jmp = DISAS_SWI;
8583        }
8584    }
8585    return true;
8586}
8587
8588/*
8589 * Unconditional system instructions
8590 */
8591
8592static bool trans_RFE(DisasContext *s, arg_RFE *a)
8593{
8594    static const int8_t pre_offset[4] = {
8595        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8596    };
8597    static const int8_t post_offset[4] = {
8598        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8599    };
8600    TCGv_i32 addr, t1, t2;
8601
8602    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8603        return false;
8604    }
8605    if (IS_USER(s)) {
8606        unallocated_encoding(s);
8607        return true;
8608    }
8609
8610    addr = load_reg(s, a->rn);
8611    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8612
8613    /* Load PC into tmp and CPSR into tmp2.  */
8614    t1 = tcg_temp_new_i32();
8615    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8616    tcg_gen_addi_i32(addr, addr, 4);
8617    t2 = tcg_temp_new_i32();
8618    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8619
8620    if (a->w) {
8621        /* Base writeback.  */
8622        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8623        store_reg(s, a->rn, addr);
8624    }
8625    gen_rfe(s, t1, t2);
8626    return true;
8627}
8628
8629static bool trans_SRS(DisasContext *s, arg_SRS *a)
8630{
8631    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8632        return false;
8633    }
8634    gen_srs(s, a->mode, a->pu, a->w);
8635    return true;
8636}
8637
8638static bool trans_CPS(DisasContext *s, arg_CPS *a)
8639{
8640    uint32_t mask, val;
8641
8642    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8643        return false;
8644    }
8645    if (IS_USER(s)) {
8646        /* Implemented as NOP in user mode.  */
8647        return true;
8648    }
8649    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8650
8651    mask = val = 0;
8652    if (a->imod & 2) {
8653        if (a->A) {
8654            mask |= CPSR_A;
8655        }
8656        if (a->I) {
8657            mask |= CPSR_I;
8658        }
8659        if (a->F) {
8660            mask |= CPSR_F;
8661        }
8662        if (a->imod & 1) {
8663            val |= mask;
8664        }
8665    }
8666    if (a->M) {
8667        mask |= CPSR_M;
8668        val |= a->mode;
8669    }
8670    if (mask) {
8671        gen_set_psr_im(s, mask, 0, val);
8672    }
8673    return true;
8674}
8675
8676static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8677{
8678    TCGv_i32 tmp, addr;
8679
8680    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8681        return false;
8682    }
8683    if (IS_USER(s)) {
8684        /* Implemented as NOP in user mode.  */
8685        return true;
8686    }
8687
8688    tmp = tcg_constant_i32(a->im);
8689    /* FAULTMASK */
8690    if (a->F) {
8691        addr = tcg_constant_i32(19);
8692        gen_helper_v7m_msr(cpu_env, addr, tmp);
8693    }
8694    /* PRIMASK */
8695    if (a->I) {
8696        addr = tcg_constant_i32(16);
8697        gen_helper_v7m_msr(cpu_env, addr, tmp);
8698    }
8699    gen_rebuild_hflags(s, false);
8700    gen_lookup_tb(s);
8701    return true;
8702}
8703
8704/*
8705 * Clear-Exclusive, Barriers
8706 */
8707
8708static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8709{
8710    if (s->thumb
8711        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8712        : !ENABLE_ARCH_6K) {
8713        return false;
8714    }
8715    gen_clrex(s);
8716    return true;
8717}
8718
8719static bool trans_DSB(DisasContext *s, arg_DSB *a)
8720{
8721    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8722        return false;
8723    }
8724    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8725    return true;
8726}
8727
8728static bool trans_DMB(DisasContext *s, arg_DMB *a)
8729{
8730    return trans_DSB(s, NULL);
8731}
8732
8733static bool trans_ISB(DisasContext *s, arg_ISB *a)
8734{
8735    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8736        return false;
8737    }
8738    /*
8739     * We need to break the TB after this insn to execute
8740     * self-modifying code correctly and also to take
8741     * any pending interrupts immediately.
8742     */
8743    s->base.is_jmp = DISAS_TOO_MANY;
8744    return true;
8745}
8746
8747static bool trans_SB(DisasContext *s, arg_SB *a)
8748{
8749    if (!dc_isar_feature(aa32_sb, s)) {
8750        return false;
8751    }
8752    /*
8753     * TODO: There is no speculation barrier opcode
8754     * for TCG; MB and end the TB instead.
8755     */
8756    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8757    s->base.is_jmp = DISAS_TOO_MANY;
8758    return true;
8759}
8760
8761static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8762{
8763    if (!ENABLE_ARCH_6) {
8764        return false;
8765    }
8766    if (a->E != (s->be_data == MO_BE)) {
8767        gen_helper_setend(cpu_env);
8768        s->base.is_jmp = DISAS_UPDATE_EXIT;
8769    }
8770    return true;
8771}
8772
8773/*
8774 * Preload instructions
8775 * All are nops, contingent on the appropriate arch level.
8776 */
8777
8778static bool trans_PLD(DisasContext *s, arg_PLD *a)
8779{
8780    return ENABLE_ARCH_5TE;
8781}
8782
8783static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8784{
8785    return arm_dc_feature(s, ARM_FEATURE_V7MP);
8786}
8787
8788static bool trans_PLI(DisasContext *s, arg_PLD *a)
8789{
8790    return ENABLE_ARCH_7;
8791}
8792
8793/*
8794 * If-then
8795 */
8796
8797static bool trans_IT(DisasContext *s, arg_IT *a)
8798{
8799    int cond_mask = a->cond_mask;
8800
8801    /*
8802     * No actual code generated for this insn, just setup state.
8803     *
8804     * Combinations of firstcond and mask which set up an 0b1111
8805     * condition are UNPREDICTABLE; we take the CONSTRAINED
8806     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8807     * i.e. both meaning "execute always".
8808     */
8809    s->condexec_cond = (cond_mask >> 4) & 0xe;
8810    s->condexec_mask = cond_mask & 0x1f;
8811    return true;
8812}
8813
8814/* v8.1M CSEL/CSINC/CSNEG/CSINV */
8815static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8816{
8817    TCGv_i32 rn, rm;
8818    DisasCompare c;
8819
8820    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8821        return false;
8822    }
8823
8824    if (a->rm == 13) {
8825        /* SEE "Related encodings" (MVE shifts) */
8826        return false;
8827    }
8828
8829    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8830        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8831        return false;
8832    }
8833
8834    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8835    rn = tcg_temp_new_i32();
8836    rm = tcg_temp_new_i32();
8837    if (a->rn == 15) {
8838        tcg_gen_movi_i32(rn, 0);
8839    } else {
8840        load_reg_var(s, rn, a->rn);
8841    }
8842    if (a->rm == 15) {
8843        tcg_gen_movi_i32(rm, 0);
8844    } else {
8845        load_reg_var(s, rm, a->rm);
8846    }
8847
8848    switch (a->op) {
8849    case 0: /* CSEL */
8850        break;
8851    case 1: /* CSINC */
8852        tcg_gen_addi_i32(rm, rm, 1);
8853        break;
8854    case 2: /* CSINV */
8855        tcg_gen_not_i32(rm, rm);
8856        break;
8857    case 3: /* CSNEG */
8858        tcg_gen_neg_i32(rm, rm);
8859        break;
8860    default:
8861        g_assert_not_reached();
8862    }
8863
8864    arm_test_cc(&c, a->fcond);
8865    tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8866
8867    store_reg(s, a->rd, rn);
8868    return true;
8869}
8870
8871/*
8872 * Legacy decoder.
8873 */
8874
8875static void disas_arm_insn(DisasContext *s, unsigned int insn)
8876{
8877    unsigned int cond = insn >> 28;
8878
8879    /* M variants do not implement ARM mode; this must raise the INVSTATE
8880     * UsageFault exception.
8881     */
8882    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8883        gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8884        return;
8885    }
8886
8887    if (s->pstate_il) {
8888        /*
8889         * Illegal execution state. This has priority over BTI
8890         * exceptions, but comes after instruction abort exceptions.
8891         */
8892        gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8893        return;
8894    }
8895
8896    if (cond == 0xf) {
8897        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8898         * choose to UNDEF. In ARMv5 and above the space is used
8899         * for miscellaneous unconditional instructions.
8900         */
8901        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8902            unallocated_encoding(s);
8903            return;
8904        }
8905
8906        /* Unconditional instructions.  */
8907        /* TODO: Perhaps merge these into one decodetree output file.  */
8908        if (disas_a32_uncond(s, insn) ||
8909            disas_vfp_uncond(s, insn) ||
8910            disas_neon_dp(s, insn) ||
8911            disas_neon_ls(s, insn) ||
8912            disas_neon_shared(s, insn)) {
8913            return;
8914        }
8915        /* fall back to legacy decoder */
8916
8917        if ((insn & 0x0e000f00) == 0x0c000100) {
8918            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8919                /* iWMMXt register transfer.  */
8920                if (extract32(s->c15_cpar, 1, 1)) {
8921                    if (!disas_iwmmxt_insn(s, insn)) {
8922                        return;
8923                    }
8924                }
8925            }
8926        }
8927        goto illegal_op;
8928    }
8929    if (cond != 0xe) {
8930        /* if not always execute, we generate a conditional jump to
8931           next instruction */
8932        arm_skip_unless(s, cond);
8933    }
8934
8935    /* TODO: Perhaps merge these into one decodetree output file.  */
8936    if (disas_a32(s, insn) ||
8937        disas_vfp(s, insn)) {
8938        return;
8939    }
8940    /* fall back to legacy decoder */
8941    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8942    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8943        if (((insn & 0x0c000e00) == 0x0c000000)
8944            && ((insn & 0x03000000) != 0x03000000)) {
8945            /* Coprocessor insn, coprocessor 0 or 1 */
8946            disas_xscale_insn(s, insn);
8947            return;
8948        }
8949    }
8950
8951illegal_op:
8952    unallocated_encoding(s);
8953}
8954
8955static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8956{
8957    /*
8958     * Return true if this is a 16 bit instruction. We must be precise
8959     * about this (matching the decode).
8960     */
8961    if ((insn >> 11) < 0x1d) {
8962        /* Definitely a 16-bit instruction */
8963        return true;
8964    }
8965
8966    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8967     * first half of a 32-bit Thumb insn. Thumb-1 cores might
8968     * end up actually treating this as two 16-bit insns, though,
8969     * if it's half of a bl/blx pair that might span a page boundary.
8970     */
8971    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8972        arm_dc_feature(s, ARM_FEATURE_M)) {
8973        /* Thumb2 cores (including all M profile ones) always treat
8974         * 32-bit insns as 32-bit.
8975         */
8976        return false;
8977    }
8978
8979    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8980        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8981         * is not on the next page; we merge this into a 32-bit
8982         * insn.
8983         */
8984        return false;
8985    }
8986    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8987     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8988     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8989     *  -- handle as single 16 bit insn
8990     */
8991    return true;
8992}
8993
8994/* Translate a 32-bit thumb instruction. */
8995static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8996{
8997    /*
8998     * ARMv6-M supports a limited subset of Thumb2 instructions.
8999     * Other Thumb1 architectures allow only 32-bit
9000     * combined BL/BLX prefix and suffix.
9001     */
9002    if (arm_dc_feature(s, ARM_FEATURE_M) &&
9003        !arm_dc_feature(s, ARM_FEATURE_V7)) {
9004        int i;
9005        bool found = false;
9006        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9007                                               0xf3b08040 /* dsb */,
9008                                               0xf3b08050 /* dmb */,
9009                                               0xf3b08060 /* isb */,
9010                                               0xf3e08000 /* mrs */,
9011                                               0xf000d000 /* bl */};
9012        static const uint32_t armv6m_mask[] = {0xffe0d000,
9013                                               0xfff0d0f0,
9014                                               0xfff0d0f0,
9015                                               0xfff0d0f0,
9016                                               0xffe0d000,
9017                                               0xf800d000};
9018
9019        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9020            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9021                found = true;
9022                break;
9023            }
9024        }
9025        if (!found) {
9026            goto illegal_op;
9027        }
9028    } else if ((insn & 0xf800e800) != 0xf000e800)  {
9029        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9030            unallocated_encoding(s);
9031            return;
9032        }
9033    }
9034
9035    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9036        /*
9037         * NOCP takes precedence over any UNDEF for (almost) the
9038         * entire wide range of coprocessor-space encodings, so check
9039         * for it first before proceeding to actually decode eg VFP
9040         * insns. This decode also handles the few insns which are
9041         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9042         */
9043        if (disas_m_nocp(s, insn)) {
9044            return;
9045        }
9046    }
9047
9048    if ((insn & 0xef000000) == 0xef000000) {
9049        /*
9050         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9051         * transform into
9052         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9053         */
9054        uint32_t a32_insn = (insn & 0xe2ffffff) |
9055            ((insn & (1 << 28)) >> 4) | (1 << 28);
9056
9057        if (disas_neon_dp(s, a32_insn)) {
9058            return;
9059        }
9060    }
9061
9062    if ((insn & 0xff100000) == 0xf9000000) {
9063        /*
9064         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9065         * transform into
9066         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9067         */
9068        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9069
9070        if (disas_neon_ls(s, a32_insn)) {
9071            return;
9072        }
9073    }
9074
9075    /*
9076     * TODO: Perhaps merge these into one decodetree output file.
9077     * Note disas_vfp is written for a32 with cond field in the
9078     * top nibble.  The t32 encoding requires 0xe in the top nibble.
9079     */
9080    if (disas_t32(s, insn) ||
9081        disas_vfp_uncond(s, insn) ||
9082        disas_neon_shared(s, insn) ||
9083        disas_mve(s, insn) ||
9084        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9085        return;
9086    }
9087
9088illegal_op:
9089    unallocated_encoding(s);
9090}
9091
9092static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9093{
9094    if (!disas_t16(s, insn)) {
9095        unallocated_encoding(s);
9096    }
9097}
9098
9099static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9100{
9101    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9102     * (False positives are OK, false negatives are not.)
9103     * We know this is a Thumb insn, and our caller ensures we are
9104     * only called if dc->base.pc_next is less than 4 bytes from the page
9105     * boundary, so we cross the page if the first 16 bits indicate
9106     * that this is a 32 bit insn.
9107     */
9108    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9109
9110    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9111}
9112
9113static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9114{
9115    DisasContext *dc = container_of(dcbase, DisasContext, base);
9116    CPUARMState *env = cs->env_ptr;
9117    ARMCPU *cpu = env_archcpu(env);
9118    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9119    uint32_t condexec, core_mmu_idx;
9120
9121    dc->isar = &cpu->isar;
9122    dc->condjmp = 0;
9123    dc->pc_save = dc->base.pc_first;
9124    dc->aarch64 = false;
9125    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9126    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9127    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9128    /*
9129     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9130     * is always the IT bits. On M-profile, some of the reserved encodings
9131     * of IT are used instead to indicate either ICI or ECI, which
9132     * indicate partial progress of a restartable insn that was interrupted
9133     * partway through by an exception:
9134     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9135     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9136     * In all cases CONDEXEC == 0 means "not in IT block or restartable
9137     * insn, behave normally".
9138     */
9139    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9140    dc->eci_handled = false;
9141    if (condexec & 0xf) {
9142        dc->condexec_mask = (condexec & 0xf) << 1;
9143        dc->condexec_cond = condexec >> 4;
9144    } else {
9145        if (arm_feature(env, ARM_FEATURE_M)) {
9146            dc->eci = condexec >> 4;
9147        }
9148    }
9149
9150    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9151    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9152    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9153#if !defined(CONFIG_USER_ONLY)
9154    dc->user = (dc->current_el == 0);
9155#endif
9156    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9157    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9158    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9159    dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9160    dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9161
9162    if (arm_feature(env, ARM_FEATURE_M)) {
9163        dc->vfp_enabled = 1;
9164        dc->be_data = MO_TE;
9165        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9166        dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9167        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9168        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9169        dc->v7m_new_fp_ctxt_needed =
9170            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9171        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9172        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9173    } else {
9174        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9175        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9176        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9177        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9178        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9179            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9180        } else {
9181            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9182            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9183        }
9184        dc->sme_trap_nonstreaming =
9185            EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9186    }
9187    dc->cp_regs = cpu->cp_regs;
9188    dc->features = env->features;
9189
9190    /* Single step state. The code-generation logic here is:
9191     *  SS_ACTIVE == 0:
9192     *   generate code with no special handling for single-stepping (except
9193     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9194     *   this happens anyway because those changes are all system register or
9195     *   PSTATE writes).
9196     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9197     *   emit code for one insn
9198     *   emit code to clear PSTATE.SS
9199     *   emit code to generate software step exception for completed step
9200     *   end TB (as usual for having generated an exception)
9201     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9202     *   emit code to generate a software step exception
9203     *   end the TB
9204     */
9205    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9206    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9207    dc->is_ldex = false;
9208
9209    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9210
9211    /* If architectural single step active, limit to 1.  */
9212    if (dc->ss_active) {
9213        dc->base.max_insns = 1;
9214    }
9215
9216    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9217       to those left on the page.  */
9218    if (!dc->thumb) {
9219        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9220        dc->base.max_insns = MIN(dc->base.max_insns, bound);
9221    }
9222
9223    cpu_V0 = tcg_temp_new_i64();
9224    cpu_V1 = tcg_temp_new_i64();
9225    cpu_M0 = tcg_temp_new_i64();
9226}
9227
9228static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9229{
9230    DisasContext *dc = container_of(dcbase, DisasContext, base);
9231
9232    /* A note on handling of the condexec (IT) bits:
9233     *
9234     * We want to avoid the overhead of having to write the updated condexec
9235     * bits back to the CPUARMState for every instruction in an IT block. So:
9236     * (1) if the condexec bits are not already zero then we write
9237     * zero back into the CPUARMState now. This avoids complications trying
9238     * to do it at the end of the block. (For example if we don't do this
9239     * it's hard to identify whether we can safely skip writing condexec
9240     * at the end of the TB, which we definitely want to do for the case
9241     * where a TB doesn't do anything with the IT state at all.)
9242     * (2) if we are going to leave the TB then we call gen_set_condexec()
9243     * which will write the correct value into CPUARMState if zero is wrong.
9244     * This is done both for leaving the TB at the end, and for leaving
9245     * it because of an exception we know will happen, which is done in
9246     * gen_exception_insn(). The latter is necessary because we need to
9247     * leave the TB with the PC/IT state just prior to execution of the
9248     * instruction which caused the exception.
9249     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9250     * then the CPUARMState will be wrong and we need to reset it.
9251     * This is handled in the same way as restoration of the
9252     * PC in these situations; we save the value of the condexec bits
9253     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9254     * then uses this to restore them after an exception.
9255     *
9256     * Note that there are no instructions which can read the condexec
9257     * bits, and none which can write non-static values to them, so
9258     * we don't need to care about whether CPUARMState is correct in the
9259     * middle of a TB.
9260     */
9261
9262    /* Reset the conditional execution bits immediately. This avoids
9263       complications trying to do it at the end of the block.  */
9264    if (dc->condexec_mask || dc->condexec_cond) {
9265        store_cpu_field_constant(0, condexec_bits);
9266    }
9267}
9268
9269static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9270{
9271    DisasContext *dc = container_of(dcbase, DisasContext, base);
9272    /*
9273     * The ECI/ICI bits share PSR bits with the IT bits, so we
9274     * need to reconstitute the bits from the split-out DisasContext
9275     * fields here.
9276     */
9277    uint32_t condexec_bits;
9278    target_ulong pc_arg = dc->base.pc_next;
9279
9280    if (tb_cflags(dcbase->tb) & CF_PCREL) {
9281        pc_arg &= ~TARGET_PAGE_MASK;
9282    }
9283    if (dc->eci) {
9284        condexec_bits = dc->eci << 4;
9285    } else {
9286        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9287    }
9288    tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9289    dc->insn_start = tcg_last_op();
9290}
9291
9292static bool arm_check_kernelpage(DisasContext *dc)
9293{
9294#ifdef CONFIG_USER_ONLY
9295    /* Intercept jump to the magic kernel page.  */
9296    if (dc->base.pc_next >= 0xffff0000) {
9297        /* We always get here via a jump, so know we are not in a
9298           conditional execution block.  */
9299        gen_exception_internal(EXCP_KERNEL_TRAP);
9300        dc->base.is_jmp = DISAS_NORETURN;
9301        return true;
9302    }
9303#endif
9304    return false;
9305}
9306
9307static bool arm_check_ss_active(DisasContext *dc)
9308{
9309    if (dc->ss_active && !dc->pstate_ss) {
9310        /* Singlestep state is Active-pending.
9311         * If we're in this state at the start of a TB then either
9312         *  a) we just took an exception to an EL which is being debugged
9313         *     and this is the first insn in the exception handler
9314         *  b) debug exceptions were masked and we just unmasked them
9315         *     without changing EL (eg by clearing PSTATE.D)
9316         * In either case we're going to take a swstep exception in the
9317         * "did not step an insn" case, and so the syndrome ISV and EX
9318         * bits should be zero.
9319         */
9320        assert(dc->base.num_insns == 1);
9321        gen_swstep_exception(dc, 0, 0);
9322        dc->base.is_jmp = DISAS_NORETURN;
9323        return true;
9324    }
9325
9326    return false;
9327}
9328
9329static void arm_post_translate_insn(DisasContext *dc)
9330{
9331    if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9332        if (dc->pc_save != dc->condlabel.pc_save) {
9333            gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9334        }
9335        gen_set_label(dc->condlabel.label);
9336        dc->condjmp = 0;
9337    }
9338}
9339
9340static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9341{
9342    DisasContext *dc = container_of(dcbase, DisasContext, base);
9343    CPUARMState *env = cpu->env_ptr;
9344    uint32_t pc = dc->base.pc_next;
9345    unsigned int insn;
9346
9347    /* Singlestep exceptions have the highest priority. */
9348    if (arm_check_ss_active(dc)) {
9349        dc->base.pc_next = pc + 4;
9350        return;
9351    }
9352
9353    if (pc & 3) {
9354        /*
9355         * PC alignment fault.  This has priority over the instruction abort
9356         * that we would receive from a translation fault via arm_ldl_code
9357         * (or the execution of the kernelpage entrypoint). This should only
9358         * be possible after an indirect branch, at the start of the TB.
9359         */
9360        assert(dc->base.num_insns == 1);
9361        gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9362        dc->base.is_jmp = DISAS_NORETURN;
9363        dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9364        return;
9365    }
9366
9367    if (arm_check_kernelpage(dc)) {
9368        dc->base.pc_next = pc + 4;
9369        return;
9370    }
9371
9372    dc->pc_curr = pc;
9373    insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9374    dc->insn = insn;
9375    dc->base.pc_next = pc + 4;
9376    disas_arm_insn(dc, insn);
9377
9378    arm_post_translate_insn(dc);
9379
9380    /* ARM is a fixed-length ISA.  We performed the cross-page check
9381       in init_disas_context by adjusting max_insns.  */
9382}
9383
9384static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9385{
9386    /* Return true if this Thumb insn is always unconditional,
9387     * even inside an IT block. This is true of only a very few
9388     * instructions: BKPT, HLT, and SG.
9389     *
9390     * A larger class of instructions are UNPREDICTABLE if used
9391     * inside an IT block; we do not need to detect those here, because
9392     * what we do by default (perform the cc check and update the IT
9393     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9394     * choice for those situations.
9395     *
9396     * insn is either a 16-bit or a 32-bit instruction; the two are
9397     * distinguishable because for the 16-bit case the top 16 bits
9398     * are zeroes, and that isn't a valid 32-bit encoding.
9399     */
9400    if ((insn & 0xffffff00) == 0xbe00) {
9401        /* BKPT */
9402        return true;
9403    }
9404
9405    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9406        !arm_dc_feature(s, ARM_FEATURE_M)) {
9407        /* HLT: v8A only. This is unconditional even when it is going to
9408         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9409         * For v7 cores this was a plain old undefined encoding and so
9410         * honours its cc check. (We might be using the encoding as
9411         * a semihosting trap, but we don't change the cc check behaviour
9412         * on that account, because a debugger connected to a real v7A
9413         * core and emulating semihosting traps by catching the UNDEF
9414         * exception would also only see cases where the cc check passed.
9415         * No guest code should be trying to do a HLT semihosting trap
9416         * in an IT block anyway.
9417         */
9418        return true;
9419    }
9420
9421    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9422        arm_dc_feature(s, ARM_FEATURE_M)) {
9423        /* SG: v8M only */
9424        return true;
9425    }
9426
9427    return false;
9428}
9429
9430static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9431{
9432    DisasContext *dc = container_of(dcbase, DisasContext, base);
9433    CPUARMState *env = cpu->env_ptr;
9434    uint32_t pc = dc->base.pc_next;
9435    uint32_t insn;
9436    bool is_16bit;
9437    /* TCG op to rewind to if this turns out to be an invalid ECI state */
9438    TCGOp *insn_eci_rewind = NULL;
9439    target_ulong insn_eci_pc_save = -1;
9440
9441    /* Misaligned thumb PC is architecturally impossible. */
9442    assert((dc->base.pc_next & 1) == 0);
9443
9444    if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9445        dc->base.pc_next = pc + 2;
9446        return;
9447    }
9448
9449    dc->pc_curr = pc;
9450    insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9451    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9452    pc += 2;
9453    if (!is_16bit) {
9454        uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9455        insn = insn << 16 | insn2;
9456        pc += 2;
9457    }
9458    dc->base.pc_next = pc;
9459    dc->insn = insn;
9460
9461    if (dc->pstate_il) {
9462        /*
9463         * Illegal execution state. This has priority over BTI
9464         * exceptions, but comes after instruction abort exceptions.
9465         */
9466        gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9467        return;
9468    }
9469
9470    if (dc->eci) {
9471        /*
9472         * For M-profile continuable instructions, ECI/ICI handling
9473         * falls into these cases:
9474         *  - interrupt-continuable instructions
9475         *     These are the various load/store multiple insns (both
9476         *     integer and fp). The ICI bits indicate the register
9477         *     where the load/store can resume. We make the IMPDEF
9478         *     choice to always do "instruction restart", ie ignore
9479         *     the ICI value and always execute the ldm/stm from the
9480         *     start. So all we need to do is zero PSR.ICI if the
9481         *     insn executes.
9482         *  - MVE instructions subject to beat-wise execution
9483         *     Here the ECI bits indicate which beats have already been
9484         *     executed, and we must honour this. Each insn of this
9485         *     type will handle it correctly. We will update PSR.ECI
9486         *     in the helper function for the insn (some ECI values
9487         *     mean that the following insn also has been partially
9488         *     executed).
9489         *  - Special cases which don't advance ECI
9490         *     The insns LE, LETP and BKPT leave the ECI/ICI state
9491         *     bits untouched.
9492         *  - all other insns (the common case)
9493         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9494         *     We place a rewind-marker here. Insns in the previous
9495         *     three categories will set a flag in the DisasContext.
9496         *     If the flag isn't set after we call disas_thumb_insn()
9497         *     or disas_thumb2_insn() then we know we have a "some other
9498         *     insn" case. We will rewind to the marker (ie throwing away
9499         *     all the generated code) and instead emit "take exception".
9500         */
9501        insn_eci_rewind = tcg_last_op();
9502        insn_eci_pc_save = dc->pc_save;
9503    }
9504
9505    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9506        uint32_t cond = dc->condexec_cond;
9507
9508        /*
9509         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9510         * "always"; 0xf is not "never".
9511         */
9512        if (cond < 0x0e) {
9513            arm_skip_unless(dc, cond);
9514        }
9515    }
9516
9517    if (is_16bit) {
9518        disas_thumb_insn(dc, insn);
9519    } else {
9520        disas_thumb2_insn(dc, insn);
9521    }
9522
9523    /* Advance the Thumb condexec condition.  */
9524    if (dc->condexec_mask) {
9525        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9526                             ((dc->condexec_mask >> 4) & 1));
9527        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9528        if (dc->condexec_mask == 0) {
9529            dc->condexec_cond = 0;
9530        }
9531    }
9532
9533    if (dc->eci && !dc->eci_handled) {
9534        /*
9535         * Insn wasn't valid for ECI/ICI at all: undo what we
9536         * just generated and instead emit an exception
9537         */
9538        tcg_remove_ops_after(insn_eci_rewind);
9539        dc->pc_save = insn_eci_pc_save;
9540        dc->condjmp = 0;
9541        gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9542    }
9543
9544    arm_post_translate_insn(dc);
9545
9546    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9547     * will touch a new page.  This ensures that prefetch aborts occur at
9548     * the right place.
9549     *
9550     * We want to stop the TB if the next insn starts in a new page,
9551     * or if it spans between this page and the next. This means that
9552     * if we're looking at the last halfword in the page we need to
9553     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9554     * or a 32-bit Thumb insn (which won't).
9555     * This is to avoid generating a silly TB with a single 16-bit insn
9556     * in it at the end of this page (which would execute correctly
9557     * but isn't very efficient).
9558     */
9559    if (dc->base.is_jmp == DISAS_NEXT
9560        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9561            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9562                && insn_crosses_page(env, dc)))) {
9563        dc->base.is_jmp = DISAS_TOO_MANY;
9564    }
9565}
9566
9567static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9568{
9569    DisasContext *dc = container_of(dcbase, DisasContext, base);
9570
9571    /* At this stage dc->condjmp will only be set when the skipped
9572       instruction was a conditional branch or trap, and the PC has
9573       already been written.  */
9574    gen_set_condexec(dc);
9575    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9576        /* Exception return branches need some special case code at the
9577         * end of the TB, which is complex enough that it has to
9578         * handle the single-step vs not and the condition-failed
9579         * insn codepath itself.
9580         */
9581        gen_bx_excret_final_code(dc);
9582    } else if (unlikely(dc->ss_active)) {
9583        /* Unconditional and "condition passed" instruction codepath. */
9584        switch (dc->base.is_jmp) {
9585        case DISAS_SWI:
9586            gen_ss_advance(dc);
9587            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9588            break;
9589        case DISAS_HVC:
9590            gen_ss_advance(dc);
9591            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9592            break;
9593        case DISAS_SMC:
9594            gen_ss_advance(dc);
9595            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9596            break;
9597        case DISAS_NEXT:
9598        case DISAS_TOO_MANY:
9599        case DISAS_UPDATE_EXIT:
9600        case DISAS_UPDATE_NOCHAIN:
9601            gen_update_pc(dc, curr_insn_len(dc));
9602            /* fall through */
9603        default:
9604            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9605            gen_singlestep_exception(dc);
9606            break;
9607        case DISAS_NORETURN:
9608            break;
9609        }
9610    } else {
9611        /* While branches must always occur at the end of an IT block,
9612           there are a few other things that can cause us to terminate
9613           the TB in the middle of an IT block:
9614            - Exception generating instructions (bkpt, swi, undefined).
9615            - Page boundaries.
9616            - Hardware watchpoints.
9617           Hardware breakpoints have already been handled and skip this code.
9618         */
9619        switch (dc->base.is_jmp) {
9620        case DISAS_NEXT:
9621        case DISAS_TOO_MANY:
9622            gen_goto_tb(dc, 1, curr_insn_len(dc));
9623            break;
9624        case DISAS_UPDATE_NOCHAIN:
9625            gen_update_pc(dc, curr_insn_len(dc));
9626            /* fall through */
9627        case DISAS_JUMP:
9628            gen_goto_ptr();
9629            break;
9630        case DISAS_UPDATE_EXIT:
9631            gen_update_pc(dc, curr_insn_len(dc));
9632            /* fall through */
9633        default:
9634            /* indicate that the hash table must be used to find the next TB */
9635            tcg_gen_exit_tb(NULL, 0);
9636            break;
9637        case DISAS_NORETURN:
9638            /* nothing more to generate */
9639            break;
9640        case DISAS_WFI:
9641            gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9642            /*
9643             * The helper doesn't necessarily throw an exception, but we
9644             * must go back to the main loop to check for interrupts anyway.
9645             */
9646            tcg_gen_exit_tb(NULL, 0);
9647            break;
9648        case DISAS_WFE:
9649            gen_helper_wfe(cpu_env);
9650            break;
9651        case DISAS_YIELD:
9652            gen_helper_yield(cpu_env);
9653            break;
9654        case DISAS_SWI:
9655            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9656            break;
9657        case DISAS_HVC:
9658            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9659            break;
9660        case DISAS_SMC:
9661            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9662            break;
9663        }
9664    }
9665
9666    if (dc->condjmp) {
9667        /* "Condition failed" instruction codepath for the branch/trap insn */
9668        set_disas_label(dc, dc->condlabel);
9669        gen_set_condexec(dc);
9670        if (unlikely(dc->ss_active)) {
9671            gen_update_pc(dc, curr_insn_len(dc));
9672            gen_singlestep_exception(dc);
9673        } else {
9674            gen_goto_tb(dc, 1, curr_insn_len(dc));
9675        }
9676    }
9677}
9678
9679static void arm_tr_disas_log(const DisasContextBase *dcbase,
9680                             CPUState *cpu, FILE *logfile)
9681{
9682    DisasContext *dc = container_of(dcbase, DisasContext, base);
9683
9684    fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9685    target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9686}
9687
9688static const TranslatorOps arm_translator_ops = {
9689    .init_disas_context = arm_tr_init_disas_context,
9690    .tb_start           = arm_tr_tb_start,
9691    .insn_start         = arm_tr_insn_start,
9692    .translate_insn     = arm_tr_translate_insn,
9693    .tb_stop            = arm_tr_tb_stop,
9694    .disas_log          = arm_tr_disas_log,
9695};
9696
9697static const TranslatorOps thumb_translator_ops = {
9698    .init_disas_context = arm_tr_init_disas_context,
9699    .tb_start           = arm_tr_tb_start,
9700    .insn_start         = arm_tr_insn_start,
9701    .translate_insn     = thumb_tr_translate_insn,
9702    .tb_stop            = arm_tr_tb_stop,
9703    .disas_log          = arm_tr_disas_log,
9704};
9705
9706/* generate intermediate code for basic block 'tb'.  */
9707void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9708                           target_ulong pc, void *host_pc)
9709{
9710    DisasContext dc = { };
9711    const TranslatorOps *ops = &arm_translator_ops;
9712    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9713
9714    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9715        ops = &thumb_translator_ops;
9716    }
9717#ifdef TARGET_AARCH64
9718    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9719        ops = &aarch64_translator_ops;
9720    }
9721#endif
9722
9723    translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9724}
9725