qemu/target/arm/tcg/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "translate.h"
  24#include "translate-a32.h"
  25#include "qemu/log.h"
  26#include "disas/disas.h"
  27#include "arm_ldst.h"
  28#include "semihosting/semihost.h"
  29#include "cpregs.h"
  30#include "exec/helper-proto.h"
  31
  32#define HELPER_H "helper.h"
  33#include "exec/helper-info.c.inc"
  34#undef  HELPER_H
  35
  36#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  37#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  38/* currently all emulated v5 cores are also v5TE, so don't bother */
  39#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  40#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  41#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  42#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  43#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  44#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  45#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  46
  47/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  48static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  49/* These are TCG globals which alias CPUARMState fields */
  50static TCGv_i32 cpu_R[16];
  51TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  52TCGv_i64 cpu_exclusive_addr;
  53TCGv_i64 cpu_exclusive_val;
  54
  55static const char * const regnames[] =
  56    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  57      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  58
  59
  60/* initialize TCG globals.  */
  61void arm_translate_init(void)
  62{
  63    int i;
  64
  65    for (i = 0; i < 16; i++) {
  66        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  67                                          offsetof(CPUARMState, regs[i]),
  68                                          regnames[i]);
  69    }
  70    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  71    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  72    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  73    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  74
  75    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  76        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  77    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  78        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  79
  80    a64_translate_init();
  81}
  82
  83uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
  84{
  85    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
  86    switch (cmode) {
  87    case 0: case 1:
  88        /* no-op */
  89        break;
  90    case 2: case 3:
  91        imm <<= 8;
  92        break;
  93    case 4: case 5:
  94        imm <<= 16;
  95        break;
  96    case 6: case 7:
  97        imm <<= 24;
  98        break;
  99    case 8: case 9:
 100        imm |= imm << 16;
 101        break;
 102    case 10: case 11:
 103        imm = (imm << 8) | (imm << 24);
 104        break;
 105    case 12:
 106        imm = (imm << 8) | 0xff;
 107        break;
 108    case 13:
 109        imm = (imm << 16) | 0xffff;
 110        break;
 111    case 14:
 112        if (op) {
 113            /*
 114             * This and cmode == 15 op == 1 are the only cases where
 115             * the top and bottom 32 bits of the encoded constant differ.
 116             */
 117            uint64_t imm64 = 0;
 118            int n;
 119
 120            for (n = 0; n < 8; n++) {
 121                if (imm & (1 << n)) {
 122                    imm64 |= (0xffULL << (n * 8));
 123                }
 124            }
 125            return imm64;
 126        }
 127        imm |= (imm << 8) | (imm << 16) | (imm << 24);
 128        break;
 129    case 15:
 130        if (op) {
 131            /* Reserved encoding for AArch32; valid for AArch64 */
 132            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
 133            if (imm & 0x80) {
 134                imm64 |= 0x8000000000000000ULL;
 135            }
 136            if (imm & 0x40) {
 137                imm64 |= 0x3fc0000000000000ULL;
 138            } else {
 139                imm64 |= 0x4000000000000000ULL;
 140            }
 141            return imm64;
 142        }
 143        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
 144            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
 145        break;
 146    }
 147    if (op) {
 148        imm = ~imm;
 149    }
 150    return dup_const(MO_32, imm);
 151}
 152
 153/* Generate a label used for skipping this instruction */
 154void arm_gen_condlabel(DisasContext *s)
 155{
 156    if (!s->condjmp) {
 157        s->condlabel = gen_disas_label(s);
 158        s->condjmp = 1;
 159    }
 160}
 161
 162/* Flags for the disas_set_da_iss info argument:
 163 * lower bits hold the Rt register number, higher bits are flags.
 164 */
 165typedef enum ISSInfo {
 166    ISSNone = 0,
 167    ISSRegMask = 0x1f,
 168    ISSInvalid = (1 << 5),
 169    ISSIsAcqRel = (1 << 6),
 170    ISSIsWrite = (1 << 7),
 171    ISSIs16Bit = (1 << 8),
 172} ISSInfo;
 173
 174/*
 175 * Store var into env + offset to a member with size bytes.
 176 * Free var after use.
 177 */
 178void store_cpu_offset(TCGv_i32 var, int offset, int size)
 179{
 180    switch (size) {
 181    case 1:
 182        tcg_gen_st8_i32(var, cpu_env, offset);
 183        break;
 184    case 4:
 185        tcg_gen_st_i32(var, cpu_env, offset);
 186        break;
 187    default:
 188        g_assert_not_reached();
 189    }
 190}
 191
 192/* Save the syndrome information for a Data Abort */
 193static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 194{
 195    uint32_t syn;
 196    int sas = memop & MO_SIZE;
 197    bool sse = memop & MO_SIGN;
 198    bool is_acqrel = issinfo & ISSIsAcqRel;
 199    bool is_write = issinfo & ISSIsWrite;
 200    bool is_16bit = issinfo & ISSIs16Bit;
 201    int srt = issinfo & ISSRegMask;
 202
 203    if (issinfo & ISSInvalid) {
 204        /* Some callsites want to conditionally provide ISS info,
 205         * eg "only if this was not a writeback"
 206         */
 207        return;
 208    }
 209
 210    if (srt == 15) {
 211        /* For AArch32, insns where the src/dest is R15 never generate
 212         * ISS information. Catching that here saves checking at all
 213         * the call sites.
 214         */
 215        return;
 216    }
 217
 218    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 219                                  0, 0, 0, is_write, 0, is_16bit);
 220    disas_set_insn_syndrome(s, syn);
 221}
 222
 223static inline int get_a32_user_mem_index(DisasContext *s)
 224{
 225    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 226     * insns:
 227     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 228     *  otherwise, access as if at PL0.
 229     */
 230    switch (s->mmu_idx) {
 231    case ARMMMUIdx_E3:
 232    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 233    case ARMMMUIdx_E10_0:
 234    case ARMMMUIdx_E10_1:
 235    case ARMMMUIdx_E10_1_PAN:
 236        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 237    case ARMMMUIdx_MUser:
 238    case ARMMMUIdx_MPriv:
 239        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 240    case ARMMMUIdx_MUserNegPri:
 241    case ARMMMUIdx_MPrivNegPri:
 242        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 243    case ARMMMUIdx_MSUser:
 244    case ARMMMUIdx_MSPriv:
 245        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 246    case ARMMMUIdx_MSUserNegPri:
 247    case ARMMMUIdx_MSPrivNegPri:
 248        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 249    default:
 250        g_assert_not_reached();
 251    }
 252}
 253
 254/* The pc_curr difference for an architectural jump. */
 255static target_long jmp_diff(DisasContext *s, target_long diff)
 256{
 257    return diff + (s->thumb ? 4 : 8);
 258}
 259
 260static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
 261{
 262    assert(s->pc_save != -1);
 263    if (tb_cflags(s->base.tb) & CF_PCREL) {
 264        tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
 265    } else {
 266        tcg_gen_movi_i32(var, s->pc_curr + diff);
 267    }
 268}
 269
 270/* Set a variable to the value of a CPU register.  */
 271void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 272{
 273    if (reg == 15) {
 274        gen_pc_plus_diff(s, var, jmp_diff(s, 0));
 275    } else {
 276        tcg_gen_mov_i32(var, cpu_R[reg]);
 277    }
 278}
 279
 280/*
 281 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 282 * This is used for load/store for which use of PC implies (literal),
 283 * or ADD that implies ADR.
 284 */
 285TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 286{
 287    TCGv_i32 tmp = tcg_temp_new_i32();
 288
 289    if (reg == 15) {
 290        /*
 291         * This address is computed from an aligned PC:
 292         * subtract off the low bits.
 293         */
 294        gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
 295    } else {
 296        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 297    }
 298    return tmp;
 299}
 300
 301/* Set a CPU register.  The source must be a temporary and will be
 302   marked as dead.  */
 303void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 304{
 305    if (reg == 15) {
 306        /* In Thumb mode, we must ignore bit 0.
 307         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 308         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 309         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 310         */
 311        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 312        s->base.is_jmp = DISAS_JUMP;
 313        s->pc_save = -1;
 314    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
 315        /* For M-profile SP bits [1:0] are always zero */
 316        tcg_gen_andi_i32(var, var, ~3);
 317    }
 318    tcg_gen_mov_i32(cpu_R[reg], var);
 319}
 320
 321/*
 322 * Variant of store_reg which applies v8M stack-limit checks before updating
 323 * SP. If the check fails this will result in an exception being taken.
 324 * We disable the stack checks for CONFIG_USER_ONLY because we have
 325 * no idea what the stack limits should be in that case.
 326 * If stack checking is not being done this just acts like store_reg().
 327 */
 328static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 329{
 330#ifndef CONFIG_USER_ONLY
 331    if (s->v8m_stackcheck) {
 332        gen_helper_v8m_stackcheck(cpu_env, var);
 333    }
 334#endif
 335    store_reg(s, 13, var);
 336}
 337
 338/* Value extensions.  */
 339#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 340#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 341#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 342#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 343
 344#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 345#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 346
 347void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 348{
 349    gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
 350}
 351
 352static void gen_rebuild_hflags(DisasContext *s, bool new_el)
 353{
 354    bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
 355
 356    if (new_el) {
 357        if (m_profile) {
 358            gen_helper_rebuild_hflags_m32_newel(cpu_env);
 359        } else {
 360            gen_helper_rebuild_hflags_a32_newel(cpu_env);
 361        }
 362    } else {
 363        TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
 364        if (m_profile) {
 365            gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
 366        } else {
 367            gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
 368        }
 369    }
 370}
 371
 372static void gen_exception_internal(int excp)
 373{
 374    assert(excp_is_internal(excp));
 375    gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
 376}
 377
 378static void gen_singlestep_exception(DisasContext *s)
 379{
 380    /* We just completed step of an insn. Move from Active-not-pending
 381     * to Active-pending, and then also take the swstep exception.
 382     * This corresponds to making the (IMPDEF) choice to prioritize
 383     * swstep exceptions over asynchronous exceptions taken to an exception
 384     * level where debug is disabled. This choice has the advantage that
 385     * we do not need to maintain internal state corresponding to the
 386     * ISV/EX syndrome bits between completion of the step and generation
 387     * of the exception, and our syndrome information is always correct.
 388     */
 389    gen_ss_advance(s);
 390    gen_swstep_exception(s, 1, s->is_ldex);
 391    s->base.is_jmp = DISAS_NORETURN;
 392}
 393
 394void clear_eci_state(DisasContext *s)
 395{
 396    /*
 397     * Clear any ECI/ICI state: used when a load multiple/store
 398     * multiple insn executes.
 399     */
 400    if (s->eci) {
 401        store_cpu_field_constant(0, condexec_bits);
 402        s->eci = 0;
 403    }
 404}
 405
 406static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 407{
 408    TCGv_i32 tmp1 = tcg_temp_new_i32();
 409    TCGv_i32 tmp2 = tcg_temp_new_i32();
 410    tcg_gen_ext16s_i32(tmp1, a);
 411    tcg_gen_ext16s_i32(tmp2, b);
 412    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 413    tcg_gen_sari_i32(a, a, 16);
 414    tcg_gen_sari_i32(b, b, 16);
 415    tcg_gen_mul_i32(b, b, a);
 416    tcg_gen_mov_i32(a, tmp1);
 417}
 418
 419/* Byteswap each halfword.  */
 420void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 421{
 422    TCGv_i32 tmp = tcg_temp_new_i32();
 423    TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
 424    tcg_gen_shri_i32(tmp, var, 8);
 425    tcg_gen_and_i32(tmp, tmp, mask);
 426    tcg_gen_and_i32(var, var, mask);
 427    tcg_gen_shli_i32(var, var, 8);
 428    tcg_gen_or_i32(dest, var, tmp);
 429}
 430
 431/* Byteswap low halfword and sign extend.  */
 432static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 433{
 434    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
 435}
 436
 437/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 438    tmp = (t0 ^ t1) & 0x8000;
 439    t0 &= ~0x8000;
 440    t1 &= ~0x8000;
 441    t0 = (t0 + t1) ^ tmp;
 442 */
 443
 444static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 445{
 446    TCGv_i32 tmp = tcg_temp_new_i32();
 447    tcg_gen_xor_i32(tmp, t0, t1);
 448    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 449    tcg_gen_andi_i32(t0, t0, ~0x8000);
 450    tcg_gen_andi_i32(t1, t1, ~0x8000);
 451    tcg_gen_add_i32(t0, t0, t1);
 452    tcg_gen_xor_i32(dest, t0, tmp);
 453}
 454
 455/* Set N and Z flags from var.  */
 456static inline void gen_logic_CC(TCGv_i32 var)
 457{
 458    tcg_gen_mov_i32(cpu_NF, var);
 459    tcg_gen_mov_i32(cpu_ZF, var);
 460}
 461
 462/* dest = T0 + T1 + CF. */
 463static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 464{
 465    tcg_gen_add_i32(dest, t0, t1);
 466    tcg_gen_add_i32(dest, dest, cpu_CF);
 467}
 468
 469/* dest = T0 - T1 + CF - 1.  */
 470static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 471{
 472    tcg_gen_sub_i32(dest, t0, t1);
 473    tcg_gen_add_i32(dest, dest, cpu_CF);
 474    tcg_gen_subi_i32(dest, dest, 1);
 475}
 476
 477/* dest = T0 + T1. Compute C, N, V and Z flags */
 478static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 479{
 480    TCGv_i32 tmp = tcg_temp_new_i32();
 481    tcg_gen_movi_i32(tmp, 0);
 482    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 483    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 484    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 485    tcg_gen_xor_i32(tmp, t0, t1);
 486    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 487    tcg_gen_mov_i32(dest, cpu_NF);
 488}
 489
 490/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 491static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 492{
 493    TCGv_i32 tmp = tcg_temp_new_i32();
 494    if (TCG_TARGET_HAS_add2_i32) {
 495        tcg_gen_movi_i32(tmp, 0);
 496        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 497        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 498    } else {
 499        TCGv_i64 q0 = tcg_temp_new_i64();
 500        TCGv_i64 q1 = tcg_temp_new_i64();
 501        tcg_gen_extu_i32_i64(q0, t0);
 502        tcg_gen_extu_i32_i64(q1, t1);
 503        tcg_gen_add_i64(q0, q0, q1);
 504        tcg_gen_extu_i32_i64(q1, cpu_CF);
 505        tcg_gen_add_i64(q0, q0, q1);
 506        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 507    }
 508    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 509    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 510    tcg_gen_xor_i32(tmp, t0, t1);
 511    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 512    tcg_gen_mov_i32(dest, cpu_NF);
 513}
 514
 515/* dest = T0 - T1. Compute C, N, V and Z flags */
 516static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 517{
 518    TCGv_i32 tmp;
 519    tcg_gen_sub_i32(cpu_NF, t0, t1);
 520    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 521    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 522    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 523    tmp = tcg_temp_new_i32();
 524    tcg_gen_xor_i32(tmp, t0, t1);
 525    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 526    tcg_gen_mov_i32(dest, cpu_NF);
 527}
 528
 529/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 530static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 531{
 532    TCGv_i32 tmp = tcg_temp_new_i32();
 533    tcg_gen_not_i32(tmp, t1);
 534    gen_adc_CC(dest, t0, tmp);
 535}
 536
 537#define GEN_SHIFT(name)                                               \
 538static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 539{                                                                     \
 540    TCGv_i32 tmpd = tcg_temp_new_i32();                               \
 541    TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
 542    TCGv_i32 zero = tcg_constant_i32(0);                              \
 543    tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
 544    tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
 545    tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
 546    tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
 547}
 548GEN_SHIFT(shl)
 549GEN_SHIFT(shr)
 550#undef GEN_SHIFT
 551
 552static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 553{
 554    TCGv_i32 tmp1 = tcg_temp_new_i32();
 555
 556    tcg_gen_andi_i32(tmp1, t1, 0xff);
 557    tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
 558    tcg_gen_sar_i32(dest, t0, tmp1);
 559}
 560
 561static void shifter_out_im(TCGv_i32 var, int shift)
 562{
 563    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564}
 565
 566/* Shift by immediate.  Includes special handling for shift == 0.  */
 567static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                    int shift, int flags)
 569{
 570    switch (shiftop) {
 571    case 0: /* LSL */
 572        if (shift != 0) {
 573            if (flags)
 574                shifter_out_im(var, 32 - shift);
 575            tcg_gen_shli_i32(var, var, shift);
 576        }
 577        break;
 578    case 1: /* LSR */
 579        if (shift == 0) {
 580            if (flags) {
 581                tcg_gen_shri_i32(cpu_CF, var, 31);
 582            }
 583            tcg_gen_movi_i32(var, 0);
 584        } else {
 585            if (flags)
 586                shifter_out_im(var, shift - 1);
 587            tcg_gen_shri_i32(var, var, shift);
 588        }
 589        break;
 590    case 2: /* ASR */
 591        if (shift == 0)
 592            shift = 32;
 593        if (flags)
 594            shifter_out_im(var, shift - 1);
 595        if (shift == 32)
 596          shift = 31;
 597        tcg_gen_sari_i32(var, var, shift);
 598        break;
 599    case 3: /* ROR/RRX */
 600        if (shift != 0) {
 601            if (flags)
 602                shifter_out_im(var, shift - 1);
 603            tcg_gen_rotri_i32(var, var, shift); break;
 604        } else {
 605            TCGv_i32 tmp = tcg_temp_new_i32();
 606            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607            if (flags)
 608                shifter_out_im(var, 0);
 609            tcg_gen_shri_i32(var, var, 1);
 610            tcg_gen_or_i32(var, var, tmp);
 611        }
 612    }
 613};
 614
 615static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 616                                     TCGv_i32 shift, int flags)
 617{
 618    if (flags) {
 619        switch (shiftop) {
 620        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 621        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 622        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 623        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 624        }
 625    } else {
 626        switch (shiftop) {
 627        case 0:
 628            gen_shl(var, var, shift);
 629            break;
 630        case 1:
 631            gen_shr(var, var, shift);
 632            break;
 633        case 2:
 634            gen_sar(var, var, shift);
 635            break;
 636        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 637                tcg_gen_rotr_i32(var, var, shift); break;
 638        }
 639    }
 640}
 641
 642/*
 643 * Generate a conditional based on ARM condition code cc.
 644 * This is common between ARM and Aarch64 targets.
 645 */
 646void arm_test_cc(DisasCompare *cmp, int cc)
 647{
 648    TCGv_i32 value;
 649    TCGCond cond;
 650
 651    switch (cc) {
 652    case 0: /* eq: Z */
 653    case 1: /* ne: !Z */
 654        cond = TCG_COND_EQ;
 655        value = cpu_ZF;
 656        break;
 657
 658    case 2: /* cs: C */
 659    case 3: /* cc: !C */
 660        cond = TCG_COND_NE;
 661        value = cpu_CF;
 662        break;
 663
 664    case 4: /* mi: N */
 665    case 5: /* pl: !N */
 666        cond = TCG_COND_LT;
 667        value = cpu_NF;
 668        break;
 669
 670    case 6: /* vs: V */
 671    case 7: /* vc: !V */
 672        cond = TCG_COND_LT;
 673        value = cpu_VF;
 674        break;
 675
 676    case 8: /* hi: C && !Z */
 677    case 9: /* ls: !C || Z -> !(C && !Z) */
 678        cond = TCG_COND_NE;
 679        value = tcg_temp_new_i32();
 680        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 681           ZF is non-zero for !Z; so AND the two subexpressions.  */
 682        tcg_gen_neg_i32(value, cpu_CF);
 683        tcg_gen_and_i32(value, value, cpu_ZF);
 684        break;
 685
 686    case 10: /* ge: N == V -> N ^ V == 0 */
 687    case 11: /* lt: N != V -> N ^ V != 0 */
 688        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 689        cond = TCG_COND_GE;
 690        value = tcg_temp_new_i32();
 691        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 692        break;
 693
 694    case 12: /* gt: !Z && N == V */
 695    case 13: /* le: Z || N != V */
 696        cond = TCG_COND_NE;
 697        value = tcg_temp_new_i32();
 698        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 699         * the sign bit then AND with ZF to yield the result.  */
 700        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 701        tcg_gen_sari_i32(value, value, 31);
 702        tcg_gen_andc_i32(value, cpu_ZF, value);
 703        break;
 704
 705    case 14: /* always */
 706    case 15: /* always */
 707        /* Use the ALWAYS condition, which will fold early.
 708         * It doesn't matter what we use for the value.  */
 709        cond = TCG_COND_ALWAYS;
 710        value = cpu_ZF;
 711        goto no_invert;
 712
 713    default:
 714        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 715        abort();
 716    }
 717
 718    if (cc & 1) {
 719        cond = tcg_invert_cond(cond);
 720    }
 721
 722 no_invert:
 723    cmp->cond = cond;
 724    cmp->value = value;
 725}
 726
 727void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 728{
 729    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 730}
 731
 732void arm_gen_test_cc(int cc, TCGLabel *label)
 733{
 734    DisasCompare cmp;
 735    arm_test_cc(&cmp, cc);
 736    arm_jump_cc(&cmp, label);
 737}
 738
 739void gen_set_condexec(DisasContext *s)
 740{
 741    if (s->condexec_mask) {
 742        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 743
 744        store_cpu_field_constant(val, condexec_bits);
 745    }
 746}
 747
 748void gen_update_pc(DisasContext *s, target_long diff)
 749{
 750    gen_pc_plus_diff(s, cpu_R[15], diff);
 751    s->pc_save = s->pc_curr + diff;
 752}
 753
 754/* Set PC and Thumb state from var.  var is marked as dead.  */
 755static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 756{
 757    s->base.is_jmp = DISAS_JUMP;
 758    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 759    tcg_gen_andi_i32(var, var, 1);
 760    store_cpu_field(var, thumb);
 761    s->pc_save = -1;
 762}
 763
 764/*
 765 * Set PC and Thumb state from var. var is marked as dead.
 766 * For M-profile CPUs, include logic to detect exception-return
 767 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 768 * and BX reg, and no others, and happens only for code in Handler mode.
 769 * The Security Extension also requires us to check for the FNC_RETURN
 770 * which signals a function return from non-secure state; this can happen
 771 * in both Handler and Thread mode.
 772 * To avoid having to do multiple comparisons in inline generated code,
 773 * we make the check we do here loose, so it will match for EXC_RETURN
 774 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 775 * for these spurious cases and returns without doing anything (giving
 776 * the same behaviour as for a branch to a non-magic address).
 777 *
 778 * In linux-user mode it is unclear what the right behaviour for an
 779 * attempted FNC_RETURN should be, because in real hardware this will go
 780 * directly to Secure code (ie not the Linux kernel) which will then treat
 781 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 782 * attempt behave the way it would on a CPU without the security extension,
 783 * which is to say "like a normal branch". That means we can simply treat
 784 * all branches as normal with no magic address behaviour.
 785 */
 786static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 787{
 788    /* Generate the same code here as for a simple bx, but flag via
 789     * s->base.is_jmp that we need to do the rest of the work later.
 790     */
 791    gen_bx(s, var);
 792#ifndef CONFIG_USER_ONLY
 793    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 794        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 795        s->base.is_jmp = DISAS_BX_EXCRET;
 796    }
 797#endif
 798}
 799
 800static inline void gen_bx_excret_final_code(DisasContext *s)
 801{
 802    /* Generate the code to finish possible exception return and end the TB */
 803    DisasLabel excret_label = gen_disas_label(s);
 804    uint32_t min_magic;
 805
 806    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 807        /* Covers FNC_RETURN and EXC_RETURN magic */
 808        min_magic = FNC_RETURN_MIN_MAGIC;
 809    } else {
 810        /* EXC_RETURN magic only */
 811        min_magic = EXC_RETURN_MIN_MAGIC;
 812    }
 813
 814    /* Is the new PC value in the magic range indicating exception return? */
 815    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
 816    /* No: end the TB as we would for a DISAS_JMP */
 817    if (s->ss_active) {
 818        gen_singlestep_exception(s);
 819    } else {
 820        tcg_gen_exit_tb(NULL, 0);
 821    }
 822    set_disas_label(s, excret_label);
 823    /* Yes: this is an exception return.
 824     * At this point in runtime env->regs[15] and env->thumb will hold
 825     * the exception-return magic number, which do_v7m_exception_exit()
 826     * will read. Nothing else will be able to see those values because
 827     * the cpu-exec main loop guarantees that we will always go straight
 828     * from raising the exception to the exception-handling code.
 829     *
 830     * gen_ss_advance(s) does nothing on M profile currently but
 831     * calling it is conceptually the right thing as we have executed
 832     * this instruction (compare SWI, HVC, SMC handling).
 833     */
 834    gen_ss_advance(s);
 835    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 836}
 837
 838static inline void gen_bxns(DisasContext *s, int rm)
 839{
 840    TCGv_i32 var = load_reg(s, rm);
 841
 842    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 843     * we need to sync state before calling it, but:
 844     *  - we don't need to do gen_update_pc() because the bxns helper will
 845     *    always set the PC itself
 846     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 847     *    unless it's outside an IT block or the last insn in an IT block,
 848     *    so we know that condexec == 0 (already set at the top of the TB)
 849     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 850     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 851     */
 852    gen_helper_v7m_bxns(cpu_env, var);
 853    s->base.is_jmp = DISAS_EXIT;
 854}
 855
 856static inline void gen_blxns(DisasContext *s, int rm)
 857{
 858    TCGv_i32 var = load_reg(s, rm);
 859
 860    /* We don't need to sync condexec state, for the same reason as bxns.
 861     * We do however need to set the PC, because the blxns helper reads it.
 862     * The blxns helper may throw an exception.
 863     */
 864    gen_update_pc(s, curr_insn_len(s));
 865    gen_helper_v7m_blxns(cpu_env, var);
 866    s->base.is_jmp = DISAS_EXIT;
 867}
 868
 869/* Variant of store_reg which uses branch&exchange logic when storing
 870   to r15 in ARM architecture v7 and above. The source must be a temporary
 871   and will be marked as dead. */
 872static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 873{
 874    if (reg == 15 && ENABLE_ARCH_7) {
 875        gen_bx(s, var);
 876    } else {
 877        store_reg(s, reg, var);
 878    }
 879}
 880
 881/* Variant of store_reg which uses branch&exchange logic when storing
 882 * to r15 in ARM architecture v5T and above. This is used for storing
 883 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 884 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 885static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 886{
 887    if (reg == 15 && ENABLE_ARCH_5) {
 888        gen_bx_excret(s, var);
 889    } else {
 890        store_reg(s, reg, var);
 891    }
 892}
 893
 894#ifdef CONFIG_USER_ONLY
 895#define IS_USER_ONLY 1
 896#else
 897#define IS_USER_ONLY 0
 898#endif
 899
 900MemOp pow2_align(unsigned i)
 901{
 902    static const MemOp mop_align[] = {
 903        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
 904        /*
 905         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
 906         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
 907         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
 908         */
 909        MO_ALIGN_16
 910    };
 911    g_assert(i < ARRAY_SIZE(mop_align));
 912    return mop_align[i];
 913}
 914
 915/*
 916 * Abstractions of "generate code to do a guest load/store for
 917 * AArch32", where a vaddr is always 32 bits (and is zero
 918 * extended if we're a 64 bit core) and  data is also
 919 * 32 bits unless specifically doing a 64 bit access.
 920 * These functions work like tcg_gen_qemu_{ld,st}* except
 921 * that the address argument is TCGv_i32 rather than TCGv.
 922 */
 923
 924static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925{
 926    TCGv addr = tcg_temp_new();
 927    tcg_gen_extu_i32_tl(addr, a32);
 928
 929    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932    }
 933    return addr;
 934}
 935
 936/*
 937 * Internal routines are used for NEON cases where the endianness
 938 * and/or alignment has already been taken into account and manipulated.
 939 */
 940void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
 941                              TCGv_i32 a32, int index, MemOp opc)
 942{
 943    TCGv addr = gen_aa32_addr(s, a32, opc);
 944    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 945}
 946
 947void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
 948                              TCGv_i32 a32, int index, MemOp opc)
 949{
 950    TCGv addr = gen_aa32_addr(s, a32, opc);
 951    tcg_gen_qemu_st_i32(val, addr, index, opc);
 952}
 953
 954void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
 955                              TCGv_i32 a32, int index, MemOp opc)
 956{
 957    TCGv addr = gen_aa32_addr(s, a32, opc);
 958
 959    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 960
 961    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 962    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 963        tcg_gen_rotri_i64(val, val, 32);
 964    }
 965}
 966
 967void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
 968                              TCGv_i32 a32, int index, MemOp opc)
 969{
 970    TCGv addr = gen_aa32_addr(s, a32, opc);
 971
 972    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 973    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 974        TCGv_i64 tmp = tcg_temp_new_i64();
 975        tcg_gen_rotri_i64(tmp, val, 32);
 976        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 977    } else {
 978        tcg_gen_qemu_st_i64(val, addr, index, opc);
 979    }
 980}
 981
 982void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 983                     int index, MemOp opc)
 984{
 985    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
 986}
 987
 988void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 989                     int index, MemOp opc)
 990{
 991    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
 992}
 993
 994void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 995                     int index, MemOp opc)
 996{
 997    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
 998}
 999
1000void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                     int index, MemOp opc)
1002{
1003    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004}
1005
1006#define DO_GEN_LD(SUFF, OPC)                                            \
1007    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                         TCGv_i32 a32, int index)       \
1009    {                                                                   \
1010        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011    }
1012
1013#define DO_GEN_ST(SUFF, OPC)                                            \
1014    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                         TCGv_i32 a32, int index)       \
1016    {                                                                   \
1017        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018    }
1019
1020static inline void gen_hvc(DisasContext *s, int imm16)
1021{
1022    /* The pre HVC helper handles cases when HVC gets trapped
1023     * as an undefined insn by runtime configuration (ie before
1024     * the insn really executes).
1025     */
1026    gen_update_pc(s, 0);
1027    gen_helper_pre_hvc(cpu_env);
1028    /* Otherwise we will treat this as a real exception which
1029     * happens after execution of the insn. (The distinction matters
1030     * for the PC value reported to the exception handler and also
1031     * for single stepping.)
1032     */
1033    s->svc_imm = imm16;
1034    gen_update_pc(s, curr_insn_len(s));
1035    s->base.is_jmp = DISAS_HVC;
1036}
1037
1038static inline void gen_smc(DisasContext *s)
1039{
1040    /* As with HVC, we may take an exception either before or after
1041     * the insn executes.
1042     */
1043    gen_update_pc(s, 0);
1044    gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1045    gen_update_pc(s, curr_insn_len(s));
1046    s->base.is_jmp = DISAS_SMC;
1047}
1048
1049static void gen_exception_internal_insn(DisasContext *s, int excp)
1050{
1051    gen_set_condexec(s);
1052    gen_update_pc(s, 0);
1053    gen_exception_internal(excp);
1054    s->base.is_jmp = DISAS_NORETURN;
1055}
1056
1057static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058{
1059    gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1060                                          tcg_constant_i32(syndrome), tcg_el);
1061}
1062
1063static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064{
1065    gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066}
1067
1068static void gen_exception(int excp, uint32_t syndrome)
1069{
1070    gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1071                                       tcg_constant_i32(syndrome));
1072}
1073
1074static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                    int excp, uint32_t syn, TCGv_i32 tcg_el)
1076{
1077    if (s->aarch64) {
1078        gen_a64_update_pc(s, pc_diff);
1079    } else {
1080        gen_set_condexec(s);
1081        gen_update_pc(s, pc_diff);
1082    }
1083    gen_exception_el_v(excp, syn, tcg_el);
1084    s->base.is_jmp = DISAS_NORETURN;
1085}
1086
1087void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                           uint32_t syn, uint32_t target_el)
1089{
1090    gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                            tcg_constant_i32(target_el));
1092}
1093
1094void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                        int excp, uint32_t syn)
1096{
1097    if (s->aarch64) {
1098        gen_a64_update_pc(s, pc_diff);
1099    } else {
1100        gen_set_condexec(s);
1101        gen_update_pc(s, pc_diff);
1102    }
1103    gen_exception(excp, syn);
1104    s->base.is_jmp = DISAS_NORETURN;
1105}
1106
1107static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108{
1109    gen_set_condexec(s);
1110    gen_update_pc(s, 0);
1111    gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1112    s->base.is_jmp = DISAS_NORETURN;
1113}
1114
1115void unallocated_encoding(DisasContext *s)
1116{
1117    /* Unallocated and reserved encodings are uncategorized */
1118    gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119}
1120
1121/* Force a TB lookup after an instruction that changes the CPU state.  */
1122void gen_lookup_tb(DisasContext *s)
1123{
1124    gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125    s->base.is_jmp = DISAS_EXIT;
1126}
1127
1128static inline void gen_hlt(DisasContext *s, int imm)
1129{
1130    /* HLT. This has two purposes.
1131     * Architecturally, it is an external halting debug instruction.
1132     * Since QEMU doesn't implement external debug, we treat this as
1133     * it is required for halting debug disabled: it will UNDEF.
1134     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136     * must trigger semihosting even for ARMv7 and earlier, where
1137     * HLT was an undefined encoding.
1138     * In system mode, we don't allow userspace access to
1139     * semihosting, to provide some semblance of security
1140     * (and for consistency with our 32-bit semihosting).
1141     */
1142    if (semihosting_enabled(s->current_el == 0) &&
1143        (imm == (s->thumb ? 0x3c : 0xf000))) {
1144        gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145        return;
1146    }
1147
1148    unallocated_encoding(s);
1149}
1150
1151/*
1152 * Return the offset of a "full" NEON Dreg.
1153 */
1154long neon_full_reg_offset(unsigned reg)
1155{
1156    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157}
1158
1159/*
1160 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161 * where 0 is the least significant end of the register.
1162 */
1163long neon_element_offset(int reg, int element, MemOp memop)
1164{
1165    int element_size = 1 << (memop & MO_SIZE);
1166    int ofs = element * element_size;
1167#if HOST_BIG_ENDIAN
1168    /*
1169     * Calculate the offset assuming fully little-endian,
1170     * then XOR to account for the order of the 8-byte units.
1171     */
1172    if (element_size < 8) {
1173        ofs ^= 8 - element_size;
1174    }
1175#endif
1176    return neon_full_reg_offset(reg) + ofs;
1177}
1178
1179/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180long vfp_reg_offset(bool dp, unsigned reg)
1181{
1182    if (dp) {
1183        return neon_element_offset(reg, 0, MO_64);
1184    } else {
1185        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186    }
1187}
1188
1189void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190{
1191    long off = neon_element_offset(reg, ele, memop);
1192
1193    switch (memop) {
1194    case MO_SB:
1195        tcg_gen_ld8s_i32(dest, cpu_env, off);
1196        break;
1197    case MO_UB:
1198        tcg_gen_ld8u_i32(dest, cpu_env, off);
1199        break;
1200    case MO_SW:
1201        tcg_gen_ld16s_i32(dest, cpu_env, off);
1202        break;
1203    case MO_UW:
1204        tcg_gen_ld16u_i32(dest, cpu_env, off);
1205        break;
1206    case MO_UL:
1207    case MO_SL:
1208        tcg_gen_ld_i32(dest, cpu_env, off);
1209        break;
1210    default:
1211        g_assert_not_reached();
1212    }
1213}
1214
1215void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216{
1217    long off = neon_element_offset(reg, ele, memop);
1218
1219    switch (memop) {
1220    case MO_SL:
1221        tcg_gen_ld32s_i64(dest, cpu_env, off);
1222        break;
1223    case MO_UL:
1224        tcg_gen_ld32u_i64(dest, cpu_env, off);
1225        break;
1226    case MO_UQ:
1227        tcg_gen_ld_i64(dest, cpu_env, off);
1228        break;
1229    default:
1230        g_assert_not_reached();
1231    }
1232}
1233
1234void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235{
1236    long off = neon_element_offset(reg, ele, memop);
1237
1238    switch (memop) {
1239    case MO_8:
1240        tcg_gen_st8_i32(src, cpu_env, off);
1241        break;
1242    case MO_16:
1243        tcg_gen_st16_i32(src, cpu_env, off);
1244        break;
1245    case MO_32:
1246        tcg_gen_st_i32(src, cpu_env, off);
1247        break;
1248    default:
1249        g_assert_not_reached();
1250    }
1251}
1252
1253void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254{
1255    long off = neon_element_offset(reg, ele, memop);
1256
1257    switch (memop) {
1258    case MO_32:
1259        tcg_gen_st32_i64(src, cpu_env, off);
1260        break;
1261    case MO_64:
1262        tcg_gen_st_i64(src, cpu_env, off);
1263        break;
1264    default:
1265        g_assert_not_reached();
1266    }
1267}
1268
1269#define ARM_CP_RW_BIT   (1 << 20)
1270
1271static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272{
1273    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274}
1275
1276static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277{
1278    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279}
1280
1281static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282{
1283    TCGv_i32 var = tcg_temp_new_i32();
1284    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285    return var;
1286}
1287
1288static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289{
1290    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291}
1292
1293static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294{
1295    iwmmxt_store_reg(cpu_M0, rn);
1296}
1297
1298static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299{
1300    iwmmxt_load_reg(cpu_M0, rn);
1301}
1302
1303static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304{
1305    iwmmxt_load_reg(cpu_V1, rn);
1306    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307}
1308
1309static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310{
1311    iwmmxt_load_reg(cpu_V1, rn);
1312    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313}
1314
1315static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316{
1317    iwmmxt_load_reg(cpu_V1, rn);
1318    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319}
1320
1321#define IWMMXT_OP(name) \
1322static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323{ \
1324    iwmmxt_load_reg(cpu_V1, rn); \
1325    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326}
1327
1328#define IWMMXT_OP_ENV(name) \
1329static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330{ \
1331    iwmmxt_load_reg(cpu_V1, rn); \
1332    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1333}
1334
1335#define IWMMXT_OP_ENV_SIZE(name) \
1336IWMMXT_OP_ENV(name##b) \
1337IWMMXT_OP_ENV(name##w) \
1338IWMMXT_OP_ENV(name##l)
1339
1340#define IWMMXT_OP_ENV1(name) \
1341static inline void gen_op_iwmmxt_##name##_M0(void) \
1342{ \
1343    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1344}
1345
1346IWMMXT_OP(maddsq)
1347IWMMXT_OP(madduq)
1348IWMMXT_OP(sadb)
1349IWMMXT_OP(sadw)
1350IWMMXT_OP(mulslw)
1351IWMMXT_OP(mulshw)
1352IWMMXT_OP(mululw)
1353IWMMXT_OP(muluhw)
1354IWMMXT_OP(macsw)
1355IWMMXT_OP(macuw)
1356
1357IWMMXT_OP_ENV_SIZE(unpackl)
1358IWMMXT_OP_ENV_SIZE(unpackh)
1359
1360IWMMXT_OP_ENV1(unpacklub)
1361IWMMXT_OP_ENV1(unpackluw)
1362IWMMXT_OP_ENV1(unpacklul)
1363IWMMXT_OP_ENV1(unpackhub)
1364IWMMXT_OP_ENV1(unpackhuw)
1365IWMMXT_OP_ENV1(unpackhul)
1366IWMMXT_OP_ENV1(unpacklsb)
1367IWMMXT_OP_ENV1(unpacklsw)
1368IWMMXT_OP_ENV1(unpacklsl)
1369IWMMXT_OP_ENV1(unpackhsb)
1370IWMMXT_OP_ENV1(unpackhsw)
1371IWMMXT_OP_ENV1(unpackhsl)
1372
1373IWMMXT_OP_ENV_SIZE(cmpeq)
1374IWMMXT_OP_ENV_SIZE(cmpgtu)
1375IWMMXT_OP_ENV_SIZE(cmpgts)
1376
1377IWMMXT_OP_ENV_SIZE(mins)
1378IWMMXT_OP_ENV_SIZE(minu)
1379IWMMXT_OP_ENV_SIZE(maxs)
1380IWMMXT_OP_ENV_SIZE(maxu)
1381
1382IWMMXT_OP_ENV_SIZE(subn)
1383IWMMXT_OP_ENV_SIZE(addn)
1384IWMMXT_OP_ENV_SIZE(subu)
1385IWMMXT_OP_ENV_SIZE(addu)
1386IWMMXT_OP_ENV_SIZE(subs)
1387IWMMXT_OP_ENV_SIZE(adds)
1388
1389IWMMXT_OP_ENV(avgb0)
1390IWMMXT_OP_ENV(avgb1)
1391IWMMXT_OP_ENV(avgw0)
1392IWMMXT_OP_ENV(avgw1)
1393
1394IWMMXT_OP_ENV(packuw)
1395IWMMXT_OP_ENV(packul)
1396IWMMXT_OP_ENV(packuq)
1397IWMMXT_OP_ENV(packsw)
1398IWMMXT_OP_ENV(packsl)
1399IWMMXT_OP_ENV(packsq)
1400
1401static void gen_op_iwmmxt_set_mup(void)
1402{
1403    TCGv_i32 tmp;
1404    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405    tcg_gen_ori_i32(tmp, tmp, 2);
1406    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407}
1408
1409static void gen_op_iwmmxt_set_cup(void)
1410{
1411    TCGv_i32 tmp;
1412    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413    tcg_gen_ori_i32(tmp, tmp, 1);
1414    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415}
1416
1417static void gen_op_iwmmxt_setpsr_nz(void)
1418{
1419    TCGv_i32 tmp = tcg_temp_new_i32();
1420    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422}
1423
1424static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425{
1426    iwmmxt_load_reg(cpu_V1, rn);
1427    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429}
1430
1431static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                     TCGv_i32 dest)
1433{
1434    int rd;
1435    uint32_t offset;
1436    TCGv_i32 tmp;
1437
1438    rd = (insn >> 16) & 0xf;
1439    tmp = load_reg(s, rd);
1440
1441    offset = (insn & 0xff) << ((insn >> 7) & 2);
1442    if (insn & (1 << 24)) {
1443        /* Pre indexed */
1444        if (insn & (1 << 23))
1445            tcg_gen_addi_i32(tmp, tmp, offset);
1446        else
1447            tcg_gen_addi_i32(tmp, tmp, -offset);
1448        tcg_gen_mov_i32(dest, tmp);
1449        if (insn & (1 << 21)) {
1450            store_reg(s, rd, tmp);
1451        }
1452    } else if (insn & (1 << 21)) {
1453        /* Post indexed */
1454        tcg_gen_mov_i32(dest, tmp);
1455        if (insn & (1 << 23))
1456            tcg_gen_addi_i32(tmp, tmp, offset);
1457        else
1458            tcg_gen_addi_i32(tmp, tmp, -offset);
1459        store_reg(s, rd, tmp);
1460    } else if (!(insn & (1 << 23)))
1461        return 1;
1462    return 0;
1463}
1464
1465static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466{
1467    int rd = (insn >> 0) & 0xf;
1468    TCGv_i32 tmp;
1469
1470    if (insn & (1 << 8)) {
1471        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472            return 1;
1473        } else {
1474            tmp = iwmmxt_load_creg(rd);
1475        }
1476    } else {
1477        tmp = tcg_temp_new_i32();
1478        iwmmxt_load_reg(cpu_V0, rd);
1479        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480    }
1481    tcg_gen_andi_i32(tmp, tmp, mask);
1482    tcg_gen_mov_i32(dest, tmp);
1483    return 0;
1484}
1485
1486/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487   (ie. an undefined instruction).  */
1488static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489{
1490    int rd, wrd;
1491    int rdhi, rdlo, rd0, rd1, i;
1492    TCGv_i32 addr;
1493    TCGv_i32 tmp, tmp2, tmp3;
1494
1495    if ((insn & 0x0e000e00) == 0x0c000000) {
1496        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497            wrd = insn & 0xf;
1498            rdlo = (insn >> 12) & 0xf;
1499            rdhi = (insn >> 16) & 0xf;
1500            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                iwmmxt_load_reg(cpu_V0, wrd);
1502                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504            } else {                                    /* TMCRR */
1505                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                iwmmxt_store_reg(cpu_V0, wrd);
1507                gen_op_iwmmxt_set_mup();
1508            }
1509            return 0;
1510        }
1511
1512        wrd = (insn >> 12) & 0xf;
1513        addr = tcg_temp_new_i32();
1514        if (gen_iwmmxt_address(s, insn, addr)) {
1515            return 1;
1516        }
1517        if (insn & ARM_CP_RW_BIT) {
1518            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                tmp = tcg_temp_new_i32();
1520                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                iwmmxt_store_creg(wrd, tmp);
1522            } else {
1523                i = 1;
1524                if (insn & (1 << 8)) {
1525                    if (insn & (1 << 22)) {             /* WLDRD */
1526                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                        i = 0;
1528                    } else {                            /* WLDRW wRd */
1529                        tmp = tcg_temp_new_i32();
1530                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                    }
1532                } else {
1533                    tmp = tcg_temp_new_i32();
1534                    if (insn & (1 << 22)) {             /* WLDRH */
1535                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                    } else {                            /* WLDRB */
1537                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                    }
1539                }
1540                if (i) {
1541                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                }
1543                gen_op_iwmmxt_movq_wRn_M0(wrd);
1544            }
1545        } else {
1546            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                tmp = iwmmxt_load_creg(wrd);
1548                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549            } else {
1550                gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                tmp = tcg_temp_new_i32();
1552                if (insn & (1 << 8)) {
1553                    if (insn & (1 << 22)) {             /* WSTRD */
1554                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                    } else {                            /* WSTRW wRd */
1556                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                    }
1559                } else {
1560                    if (insn & (1 << 22)) {             /* WSTRH */
1561                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                    } else {                            /* WSTRB */
1564                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                    }
1567                }
1568            }
1569        }
1570        return 0;
1571    }
1572
1573    if ((insn & 0x0f000000) != 0x0e000000)
1574        return 1;
1575
1576    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577    case 0x000:                                                 /* WOR */
1578        wrd = (insn >> 12) & 0xf;
1579        rd0 = (insn >> 0) & 0xf;
1580        rd1 = (insn >> 16) & 0xf;
1581        gen_op_iwmmxt_movq_M0_wRn(rd0);
1582        gen_op_iwmmxt_orq_M0_wRn(rd1);
1583        gen_op_iwmmxt_setpsr_nz();
1584        gen_op_iwmmxt_movq_wRn_M0(wrd);
1585        gen_op_iwmmxt_set_mup();
1586        gen_op_iwmmxt_set_cup();
1587        break;
1588    case 0x011:                                                 /* TMCR */
1589        if (insn & 0xf)
1590            return 1;
1591        rd = (insn >> 12) & 0xf;
1592        wrd = (insn >> 16) & 0xf;
1593        switch (wrd) {
1594        case ARM_IWMMXT_wCID:
1595        case ARM_IWMMXT_wCASF:
1596            break;
1597        case ARM_IWMMXT_wCon:
1598            gen_op_iwmmxt_set_cup();
1599            /* Fall through.  */
1600        case ARM_IWMMXT_wCSSF:
1601            tmp = iwmmxt_load_creg(wrd);
1602            tmp2 = load_reg(s, rd);
1603            tcg_gen_andc_i32(tmp, tmp, tmp2);
1604            iwmmxt_store_creg(wrd, tmp);
1605            break;
1606        case ARM_IWMMXT_wCGR0:
1607        case ARM_IWMMXT_wCGR1:
1608        case ARM_IWMMXT_wCGR2:
1609        case ARM_IWMMXT_wCGR3:
1610            gen_op_iwmmxt_set_cup();
1611            tmp = load_reg(s, rd);
1612            iwmmxt_store_creg(wrd, tmp);
1613            break;
1614        default:
1615            return 1;
1616        }
1617        break;
1618    case 0x100:                                                 /* WXOR */
1619        wrd = (insn >> 12) & 0xf;
1620        rd0 = (insn >> 0) & 0xf;
1621        rd1 = (insn >> 16) & 0xf;
1622        gen_op_iwmmxt_movq_M0_wRn(rd0);
1623        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624        gen_op_iwmmxt_setpsr_nz();
1625        gen_op_iwmmxt_movq_wRn_M0(wrd);
1626        gen_op_iwmmxt_set_mup();
1627        gen_op_iwmmxt_set_cup();
1628        break;
1629    case 0x111:                                                 /* TMRC */
1630        if (insn & 0xf)
1631            return 1;
1632        rd = (insn >> 12) & 0xf;
1633        wrd = (insn >> 16) & 0xf;
1634        tmp = iwmmxt_load_creg(wrd);
1635        store_reg(s, rd, tmp);
1636        break;
1637    case 0x300:                                                 /* WANDN */
1638        wrd = (insn >> 12) & 0xf;
1639        rd0 = (insn >> 0) & 0xf;
1640        rd1 = (insn >> 16) & 0xf;
1641        gen_op_iwmmxt_movq_M0_wRn(rd0);
1642        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643        gen_op_iwmmxt_andq_M0_wRn(rd1);
1644        gen_op_iwmmxt_setpsr_nz();
1645        gen_op_iwmmxt_movq_wRn_M0(wrd);
1646        gen_op_iwmmxt_set_mup();
1647        gen_op_iwmmxt_set_cup();
1648        break;
1649    case 0x200:                                                 /* WAND */
1650        wrd = (insn >> 12) & 0xf;
1651        rd0 = (insn >> 0) & 0xf;
1652        rd1 = (insn >> 16) & 0xf;
1653        gen_op_iwmmxt_movq_M0_wRn(rd0);
1654        gen_op_iwmmxt_andq_M0_wRn(rd1);
1655        gen_op_iwmmxt_setpsr_nz();
1656        gen_op_iwmmxt_movq_wRn_M0(wrd);
1657        gen_op_iwmmxt_set_mup();
1658        gen_op_iwmmxt_set_cup();
1659        break;
1660    case 0x810: case 0xa10:                             /* WMADD */
1661        wrd = (insn >> 12) & 0xf;
1662        rd0 = (insn >> 0) & 0xf;
1663        rd1 = (insn >> 16) & 0xf;
1664        gen_op_iwmmxt_movq_M0_wRn(rd0);
1665        if (insn & (1 << 21))
1666            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667        else
1668            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669        gen_op_iwmmxt_movq_wRn_M0(wrd);
1670        gen_op_iwmmxt_set_mup();
1671        break;
1672    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673        wrd = (insn >> 12) & 0xf;
1674        rd0 = (insn >> 16) & 0xf;
1675        rd1 = (insn >> 0) & 0xf;
1676        gen_op_iwmmxt_movq_M0_wRn(rd0);
1677        switch ((insn >> 22) & 3) {
1678        case 0:
1679            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680            break;
1681        case 1:
1682            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683            break;
1684        case 2:
1685            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686            break;
1687        case 3:
1688            return 1;
1689        }
1690        gen_op_iwmmxt_movq_wRn_M0(wrd);
1691        gen_op_iwmmxt_set_mup();
1692        gen_op_iwmmxt_set_cup();
1693        break;
1694    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695        wrd = (insn >> 12) & 0xf;
1696        rd0 = (insn >> 16) & 0xf;
1697        rd1 = (insn >> 0) & 0xf;
1698        gen_op_iwmmxt_movq_M0_wRn(rd0);
1699        switch ((insn >> 22) & 3) {
1700        case 0:
1701            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702            break;
1703        case 1:
1704            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705            break;
1706        case 2:
1707            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708            break;
1709        case 3:
1710            return 1;
1711        }
1712        gen_op_iwmmxt_movq_wRn_M0(wrd);
1713        gen_op_iwmmxt_set_mup();
1714        gen_op_iwmmxt_set_cup();
1715        break;
1716    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717        wrd = (insn >> 12) & 0xf;
1718        rd0 = (insn >> 16) & 0xf;
1719        rd1 = (insn >> 0) & 0xf;
1720        gen_op_iwmmxt_movq_M0_wRn(rd0);
1721        if (insn & (1 << 22))
1722            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723        else
1724            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725        if (!(insn & (1 << 20)))
1726            gen_op_iwmmxt_addl_M0_wRn(wrd);
1727        gen_op_iwmmxt_movq_wRn_M0(wrd);
1728        gen_op_iwmmxt_set_mup();
1729        break;
1730    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731        wrd = (insn >> 12) & 0xf;
1732        rd0 = (insn >> 16) & 0xf;
1733        rd1 = (insn >> 0) & 0xf;
1734        gen_op_iwmmxt_movq_M0_wRn(rd0);
1735        if (insn & (1 << 21)) {
1736            if (insn & (1 << 20))
1737                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738            else
1739                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740        } else {
1741            if (insn & (1 << 20))
1742                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743            else
1744                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745        }
1746        gen_op_iwmmxt_movq_wRn_M0(wrd);
1747        gen_op_iwmmxt_set_mup();
1748        break;
1749    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750        wrd = (insn >> 12) & 0xf;
1751        rd0 = (insn >> 16) & 0xf;
1752        rd1 = (insn >> 0) & 0xf;
1753        gen_op_iwmmxt_movq_M0_wRn(rd0);
1754        if (insn & (1 << 21))
1755            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756        else
1757            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758        if (!(insn & (1 << 20))) {
1759            iwmmxt_load_reg(cpu_V1, wrd);
1760            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761        }
1762        gen_op_iwmmxt_movq_wRn_M0(wrd);
1763        gen_op_iwmmxt_set_mup();
1764        break;
1765    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766        wrd = (insn >> 12) & 0xf;
1767        rd0 = (insn >> 16) & 0xf;
1768        rd1 = (insn >> 0) & 0xf;
1769        gen_op_iwmmxt_movq_M0_wRn(rd0);
1770        switch ((insn >> 22) & 3) {
1771        case 0:
1772            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773            break;
1774        case 1:
1775            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776            break;
1777        case 2:
1778            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779            break;
1780        case 3:
1781            return 1;
1782        }
1783        gen_op_iwmmxt_movq_wRn_M0(wrd);
1784        gen_op_iwmmxt_set_mup();
1785        gen_op_iwmmxt_set_cup();
1786        break;
1787    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788        wrd = (insn >> 12) & 0xf;
1789        rd0 = (insn >> 16) & 0xf;
1790        rd1 = (insn >> 0) & 0xf;
1791        gen_op_iwmmxt_movq_M0_wRn(rd0);
1792        if (insn & (1 << 22)) {
1793            if (insn & (1 << 20))
1794                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795            else
1796                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797        } else {
1798            if (insn & (1 << 20))
1799                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800            else
1801                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802        }
1803        gen_op_iwmmxt_movq_wRn_M0(wrd);
1804        gen_op_iwmmxt_set_mup();
1805        gen_op_iwmmxt_set_cup();
1806        break;
1807    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808        wrd = (insn >> 12) & 0xf;
1809        rd0 = (insn >> 16) & 0xf;
1810        rd1 = (insn >> 0) & 0xf;
1811        gen_op_iwmmxt_movq_M0_wRn(rd0);
1812        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813        tcg_gen_andi_i32(tmp, tmp, 7);
1814        iwmmxt_load_reg(cpu_V1, rd1);
1815        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816        gen_op_iwmmxt_movq_wRn_M0(wrd);
1817        gen_op_iwmmxt_set_mup();
1818        break;
1819    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820        if (((insn >> 6) & 3) == 3)
1821            return 1;
1822        rd = (insn >> 12) & 0xf;
1823        wrd = (insn >> 16) & 0xf;
1824        tmp = load_reg(s, rd);
1825        gen_op_iwmmxt_movq_M0_wRn(wrd);
1826        switch ((insn >> 6) & 3) {
1827        case 0:
1828            tmp2 = tcg_constant_i32(0xff);
1829            tmp3 = tcg_constant_i32((insn & 7) << 3);
1830            break;
1831        case 1:
1832            tmp2 = tcg_constant_i32(0xffff);
1833            tmp3 = tcg_constant_i32((insn & 3) << 4);
1834            break;
1835        case 2:
1836            tmp2 = tcg_constant_i32(0xffffffff);
1837            tmp3 = tcg_constant_i32((insn & 1) << 5);
1838            break;
1839        default:
1840            g_assert_not_reached();
1841        }
1842        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843        gen_op_iwmmxt_movq_wRn_M0(wrd);
1844        gen_op_iwmmxt_set_mup();
1845        break;
1846    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847        rd = (insn >> 12) & 0xf;
1848        wrd = (insn >> 16) & 0xf;
1849        if (rd == 15 || ((insn >> 22) & 3) == 3)
1850            return 1;
1851        gen_op_iwmmxt_movq_M0_wRn(wrd);
1852        tmp = tcg_temp_new_i32();
1853        switch ((insn >> 22) & 3) {
1854        case 0:
1855            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857            if (insn & 8) {
1858                tcg_gen_ext8s_i32(tmp, tmp);
1859            } else {
1860                tcg_gen_andi_i32(tmp, tmp, 0xff);
1861            }
1862            break;
1863        case 1:
1864            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866            if (insn & 8) {
1867                tcg_gen_ext16s_i32(tmp, tmp);
1868            } else {
1869                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870            }
1871            break;
1872        case 2:
1873            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875            break;
1876        }
1877        store_reg(s, rd, tmp);
1878        break;
1879    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881            return 1;
1882        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883        switch ((insn >> 22) & 3) {
1884        case 0:
1885            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886            break;
1887        case 1:
1888            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889            break;
1890        case 2:
1891            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892            break;
1893        }
1894        tcg_gen_shli_i32(tmp, tmp, 28);
1895        gen_set_nzcv(tmp);
1896        break;
1897    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898        if (((insn >> 6) & 3) == 3)
1899            return 1;
1900        rd = (insn >> 12) & 0xf;
1901        wrd = (insn >> 16) & 0xf;
1902        tmp = load_reg(s, rd);
1903        switch ((insn >> 6) & 3) {
1904        case 0:
1905            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906            break;
1907        case 1:
1908            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909            break;
1910        case 2:
1911            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912            break;
1913        }
1914        gen_op_iwmmxt_movq_wRn_M0(wrd);
1915        gen_op_iwmmxt_set_mup();
1916        break;
1917    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919            return 1;
1920        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921        tmp2 = tcg_temp_new_i32();
1922        tcg_gen_mov_i32(tmp2, tmp);
1923        switch ((insn >> 22) & 3) {
1924        case 0:
1925            for (i = 0; i < 7; i ++) {
1926                tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                tcg_gen_and_i32(tmp, tmp, tmp2);
1928            }
1929            break;
1930        case 1:
1931            for (i = 0; i < 3; i ++) {
1932                tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                tcg_gen_and_i32(tmp, tmp, tmp2);
1934            }
1935            break;
1936        case 2:
1937            tcg_gen_shli_i32(tmp2, tmp2, 16);
1938            tcg_gen_and_i32(tmp, tmp, tmp2);
1939            break;
1940        }
1941        gen_set_nzcv(tmp);
1942        break;
1943    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944        wrd = (insn >> 12) & 0xf;
1945        rd0 = (insn >> 16) & 0xf;
1946        gen_op_iwmmxt_movq_M0_wRn(rd0);
1947        switch ((insn >> 22) & 3) {
1948        case 0:
1949            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950            break;
1951        case 1:
1952            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953            break;
1954        case 2:
1955            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956            break;
1957        case 3:
1958            return 1;
1959        }
1960        gen_op_iwmmxt_movq_wRn_M0(wrd);
1961        gen_op_iwmmxt_set_mup();
1962        break;
1963    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965            return 1;
1966        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967        tmp2 = tcg_temp_new_i32();
1968        tcg_gen_mov_i32(tmp2, tmp);
1969        switch ((insn >> 22) & 3) {
1970        case 0:
1971            for (i = 0; i < 7; i ++) {
1972                tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                tcg_gen_or_i32(tmp, tmp, tmp2);
1974            }
1975            break;
1976        case 1:
1977            for (i = 0; i < 3; i ++) {
1978                tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                tcg_gen_or_i32(tmp, tmp, tmp2);
1980            }
1981            break;
1982        case 2:
1983            tcg_gen_shli_i32(tmp2, tmp2, 16);
1984            tcg_gen_or_i32(tmp, tmp, tmp2);
1985            break;
1986        }
1987        gen_set_nzcv(tmp);
1988        break;
1989    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990        rd = (insn >> 12) & 0xf;
1991        rd0 = (insn >> 16) & 0xf;
1992        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993            return 1;
1994        gen_op_iwmmxt_movq_M0_wRn(rd0);
1995        tmp = tcg_temp_new_i32();
1996        switch ((insn >> 22) & 3) {
1997        case 0:
1998            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999            break;
2000        case 1:
2001            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002            break;
2003        case 2:
2004            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005            break;
2006        }
2007        store_reg(s, rd, tmp);
2008        break;
2009    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011        wrd = (insn >> 12) & 0xf;
2012        rd0 = (insn >> 16) & 0xf;
2013        rd1 = (insn >> 0) & 0xf;
2014        gen_op_iwmmxt_movq_M0_wRn(rd0);
2015        switch ((insn >> 22) & 3) {
2016        case 0:
2017            if (insn & (1 << 21))
2018                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019            else
2020                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021            break;
2022        case 1:
2023            if (insn & (1 << 21))
2024                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025            else
2026                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027            break;
2028        case 2:
2029            if (insn & (1 << 21))
2030                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031            else
2032                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033            break;
2034        case 3:
2035            return 1;
2036        }
2037        gen_op_iwmmxt_movq_wRn_M0(wrd);
2038        gen_op_iwmmxt_set_mup();
2039        gen_op_iwmmxt_set_cup();
2040        break;
2041    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043        wrd = (insn >> 12) & 0xf;
2044        rd0 = (insn >> 16) & 0xf;
2045        gen_op_iwmmxt_movq_M0_wRn(rd0);
2046        switch ((insn >> 22) & 3) {
2047        case 0:
2048            if (insn & (1 << 21))
2049                gen_op_iwmmxt_unpacklsb_M0();
2050            else
2051                gen_op_iwmmxt_unpacklub_M0();
2052            break;
2053        case 1:
2054            if (insn & (1 << 21))
2055                gen_op_iwmmxt_unpacklsw_M0();
2056            else
2057                gen_op_iwmmxt_unpackluw_M0();
2058            break;
2059        case 2:
2060            if (insn & (1 << 21))
2061                gen_op_iwmmxt_unpacklsl_M0();
2062            else
2063                gen_op_iwmmxt_unpacklul_M0();
2064            break;
2065        case 3:
2066            return 1;
2067        }
2068        gen_op_iwmmxt_movq_wRn_M0(wrd);
2069        gen_op_iwmmxt_set_mup();
2070        gen_op_iwmmxt_set_cup();
2071        break;
2072    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074        wrd = (insn >> 12) & 0xf;
2075        rd0 = (insn >> 16) & 0xf;
2076        gen_op_iwmmxt_movq_M0_wRn(rd0);
2077        switch ((insn >> 22) & 3) {
2078        case 0:
2079            if (insn & (1 << 21))
2080                gen_op_iwmmxt_unpackhsb_M0();
2081            else
2082                gen_op_iwmmxt_unpackhub_M0();
2083            break;
2084        case 1:
2085            if (insn & (1 << 21))
2086                gen_op_iwmmxt_unpackhsw_M0();
2087            else
2088                gen_op_iwmmxt_unpackhuw_M0();
2089            break;
2090        case 2:
2091            if (insn & (1 << 21))
2092                gen_op_iwmmxt_unpackhsl_M0();
2093            else
2094                gen_op_iwmmxt_unpackhul_M0();
2095            break;
2096        case 3:
2097            return 1;
2098        }
2099        gen_op_iwmmxt_movq_wRn_M0(wrd);
2100        gen_op_iwmmxt_set_mup();
2101        gen_op_iwmmxt_set_cup();
2102        break;
2103    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104    case 0x214: case 0x614: case 0xa14: case 0xe14:
2105        if (((insn >> 22) & 3) == 0)
2106            return 1;
2107        wrd = (insn >> 12) & 0xf;
2108        rd0 = (insn >> 16) & 0xf;
2109        gen_op_iwmmxt_movq_M0_wRn(rd0);
2110        tmp = tcg_temp_new_i32();
2111        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112            return 1;
2113        }
2114        switch ((insn >> 22) & 3) {
2115        case 1:
2116            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2117            break;
2118        case 2:
2119            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2120            break;
2121        case 3:
2122            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2123            break;
2124        }
2125        gen_op_iwmmxt_movq_wRn_M0(wrd);
2126        gen_op_iwmmxt_set_mup();
2127        gen_op_iwmmxt_set_cup();
2128        break;
2129    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130    case 0x014: case 0x414: case 0x814: case 0xc14:
2131        if (((insn >> 22) & 3) == 0)
2132            return 1;
2133        wrd = (insn >> 12) & 0xf;
2134        rd0 = (insn >> 16) & 0xf;
2135        gen_op_iwmmxt_movq_M0_wRn(rd0);
2136        tmp = tcg_temp_new_i32();
2137        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138            return 1;
2139        }
2140        switch ((insn >> 22) & 3) {
2141        case 1:
2142            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2143            break;
2144        case 2:
2145            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2146            break;
2147        case 3:
2148            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2149            break;
2150        }
2151        gen_op_iwmmxt_movq_wRn_M0(wrd);
2152        gen_op_iwmmxt_set_mup();
2153        gen_op_iwmmxt_set_cup();
2154        break;
2155    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156    case 0x114: case 0x514: case 0x914: case 0xd14:
2157        if (((insn >> 22) & 3) == 0)
2158            return 1;
2159        wrd = (insn >> 12) & 0xf;
2160        rd0 = (insn >> 16) & 0xf;
2161        gen_op_iwmmxt_movq_M0_wRn(rd0);
2162        tmp = tcg_temp_new_i32();
2163        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164            return 1;
2165        }
2166        switch ((insn >> 22) & 3) {
2167        case 1:
2168            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2169            break;
2170        case 2:
2171            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2172            break;
2173        case 3:
2174            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2175            break;
2176        }
2177        gen_op_iwmmxt_movq_wRn_M0(wrd);
2178        gen_op_iwmmxt_set_mup();
2179        gen_op_iwmmxt_set_cup();
2180        break;
2181    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182    case 0x314: case 0x714: case 0xb14: case 0xf14:
2183        if (((insn >> 22) & 3) == 0)
2184            return 1;
2185        wrd = (insn >> 12) & 0xf;
2186        rd0 = (insn >> 16) & 0xf;
2187        gen_op_iwmmxt_movq_M0_wRn(rd0);
2188        tmp = tcg_temp_new_i32();
2189        switch ((insn >> 22) & 3) {
2190        case 1:
2191            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                return 1;
2193            }
2194            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2195            break;
2196        case 2:
2197            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                return 1;
2199            }
2200            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2201            break;
2202        case 3:
2203            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                return 1;
2205            }
2206            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2207            break;
2208        }
2209        gen_op_iwmmxt_movq_wRn_M0(wrd);
2210        gen_op_iwmmxt_set_mup();
2211        gen_op_iwmmxt_set_cup();
2212        break;
2213    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215        wrd = (insn >> 12) & 0xf;
2216        rd0 = (insn >> 16) & 0xf;
2217        rd1 = (insn >> 0) & 0xf;
2218        gen_op_iwmmxt_movq_M0_wRn(rd0);
2219        switch ((insn >> 22) & 3) {
2220        case 0:
2221            if (insn & (1 << 21))
2222                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223            else
2224                gen_op_iwmmxt_minub_M0_wRn(rd1);
2225            break;
2226        case 1:
2227            if (insn & (1 << 21))
2228                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229            else
2230                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231            break;
2232        case 2:
2233            if (insn & (1 << 21))
2234                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235            else
2236                gen_op_iwmmxt_minul_M0_wRn(rd1);
2237            break;
2238        case 3:
2239            return 1;
2240        }
2241        gen_op_iwmmxt_movq_wRn_M0(wrd);
2242        gen_op_iwmmxt_set_mup();
2243        break;
2244    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246        wrd = (insn >> 12) & 0xf;
2247        rd0 = (insn >> 16) & 0xf;
2248        rd1 = (insn >> 0) & 0xf;
2249        gen_op_iwmmxt_movq_M0_wRn(rd0);
2250        switch ((insn >> 22) & 3) {
2251        case 0:
2252            if (insn & (1 << 21))
2253                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254            else
2255                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256            break;
2257        case 1:
2258            if (insn & (1 << 21))
2259                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260            else
2261                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262            break;
2263        case 2:
2264            if (insn & (1 << 21))
2265                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266            else
2267                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268            break;
2269        case 3:
2270            return 1;
2271        }
2272        gen_op_iwmmxt_movq_wRn_M0(wrd);
2273        gen_op_iwmmxt_set_mup();
2274        break;
2275    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276    case 0x402: case 0x502: case 0x602: case 0x702:
2277        wrd = (insn >> 12) & 0xf;
2278        rd0 = (insn >> 16) & 0xf;
2279        rd1 = (insn >> 0) & 0xf;
2280        gen_op_iwmmxt_movq_M0_wRn(rd0);
2281        iwmmxt_load_reg(cpu_V1, rd1);
2282        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                tcg_constant_i32((insn >> 20) & 3));
2284        gen_op_iwmmxt_movq_wRn_M0(wrd);
2285        gen_op_iwmmxt_set_mup();
2286        break;
2287    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291        wrd = (insn >> 12) & 0xf;
2292        rd0 = (insn >> 16) & 0xf;
2293        rd1 = (insn >> 0) & 0xf;
2294        gen_op_iwmmxt_movq_M0_wRn(rd0);
2295        switch ((insn >> 20) & 0xf) {
2296        case 0x0:
2297            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298            break;
2299        case 0x1:
2300            gen_op_iwmmxt_subub_M0_wRn(rd1);
2301            break;
2302        case 0x3:
2303            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304            break;
2305        case 0x4:
2306            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307            break;
2308        case 0x5:
2309            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310            break;
2311        case 0x7:
2312            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313            break;
2314        case 0x8:
2315            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316            break;
2317        case 0x9:
2318            gen_op_iwmmxt_subul_M0_wRn(rd1);
2319            break;
2320        case 0xb:
2321            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322            break;
2323        default:
2324            return 1;
2325        }
2326        gen_op_iwmmxt_movq_wRn_M0(wrd);
2327        gen_op_iwmmxt_set_mup();
2328        gen_op_iwmmxt_set_cup();
2329        break;
2330    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334        wrd = (insn >> 12) & 0xf;
2335        rd0 = (insn >> 16) & 0xf;
2336        gen_op_iwmmxt_movq_M0_wRn(rd0);
2337        tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2339        gen_op_iwmmxt_movq_wRn_M0(wrd);
2340        gen_op_iwmmxt_set_mup();
2341        gen_op_iwmmxt_set_cup();
2342        break;
2343    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344    case 0x418: case 0x518: case 0x618: case 0x718:
2345    case 0x818: case 0x918: case 0xa18: case 0xb18:
2346    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347        wrd = (insn >> 12) & 0xf;
2348        rd0 = (insn >> 16) & 0xf;
2349        rd1 = (insn >> 0) & 0xf;
2350        gen_op_iwmmxt_movq_M0_wRn(rd0);
2351        switch ((insn >> 20) & 0xf) {
2352        case 0x0:
2353            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354            break;
2355        case 0x1:
2356            gen_op_iwmmxt_addub_M0_wRn(rd1);
2357            break;
2358        case 0x3:
2359            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360            break;
2361        case 0x4:
2362            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363            break;
2364        case 0x5:
2365            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366            break;
2367        case 0x7:
2368            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369            break;
2370        case 0x8:
2371            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372            break;
2373        case 0x9:
2374            gen_op_iwmmxt_addul_M0_wRn(rd1);
2375            break;
2376        case 0xb:
2377            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378            break;
2379        default:
2380            return 1;
2381        }
2382        gen_op_iwmmxt_movq_wRn_M0(wrd);
2383        gen_op_iwmmxt_set_mup();
2384        gen_op_iwmmxt_set_cup();
2385        break;
2386    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387    case 0x408: case 0x508: case 0x608: case 0x708:
2388    case 0x808: case 0x908: case 0xa08: case 0xb08:
2389    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391            return 1;
2392        wrd = (insn >> 12) & 0xf;
2393        rd0 = (insn >> 16) & 0xf;
2394        rd1 = (insn >> 0) & 0xf;
2395        gen_op_iwmmxt_movq_M0_wRn(rd0);
2396        switch ((insn >> 22) & 3) {
2397        case 1:
2398            if (insn & (1 << 21))
2399                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400            else
2401                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402            break;
2403        case 2:
2404            if (insn & (1 << 21))
2405                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406            else
2407                gen_op_iwmmxt_packul_M0_wRn(rd1);
2408            break;
2409        case 3:
2410            if (insn & (1 << 21))
2411                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412            else
2413                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414            break;
2415        }
2416        gen_op_iwmmxt_movq_wRn_M0(wrd);
2417        gen_op_iwmmxt_set_mup();
2418        gen_op_iwmmxt_set_cup();
2419        break;
2420    case 0x201: case 0x203: case 0x205: case 0x207:
2421    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422    case 0x211: case 0x213: case 0x215: case 0x217:
2423    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424        wrd = (insn >> 5) & 0xf;
2425        rd0 = (insn >> 12) & 0xf;
2426        rd1 = (insn >> 0) & 0xf;
2427        if (rd0 == 0xf || rd1 == 0xf)
2428            return 1;
2429        gen_op_iwmmxt_movq_M0_wRn(wrd);
2430        tmp = load_reg(s, rd0);
2431        tmp2 = load_reg(s, rd1);
2432        switch ((insn >> 16) & 0xf) {
2433        case 0x0:                                       /* TMIA */
2434            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435            break;
2436        case 0x8:                                       /* TMIAPH */
2437            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438            break;
2439        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440            if (insn & (1 << 16))
2441                tcg_gen_shri_i32(tmp, tmp, 16);
2442            if (insn & (1 << 17))
2443                tcg_gen_shri_i32(tmp2, tmp2, 16);
2444            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445            break;
2446        default:
2447            return 1;
2448        }
2449        gen_op_iwmmxt_movq_wRn_M0(wrd);
2450        gen_op_iwmmxt_set_mup();
2451        break;
2452    default:
2453        return 1;
2454    }
2455
2456    return 0;
2457}
2458
2459/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460   (ie. an undefined instruction).  */
2461static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462{
2463    int acc, rd0, rd1, rdhi, rdlo;
2464    TCGv_i32 tmp, tmp2;
2465
2466    if ((insn & 0x0ff00f10) == 0x0e200010) {
2467        /* Multiply with Internal Accumulate Format */
2468        rd0 = (insn >> 12) & 0xf;
2469        rd1 = insn & 0xf;
2470        acc = (insn >> 5) & 7;
2471
2472        if (acc != 0)
2473            return 1;
2474
2475        tmp = load_reg(s, rd0);
2476        tmp2 = load_reg(s, rd1);
2477        switch ((insn >> 16) & 0xf) {
2478        case 0x0:                                       /* MIA */
2479            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480            break;
2481        case 0x8:                                       /* MIAPH */
2482            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483            break;
2484        case 0xc:                                       /* MIABB */
2485        case 0xd:                                       /* MIABT */
2486        case 0xe:                                       /* MIATB */
2487        case 0xf:                                       /* MIATT */
2488            if (insn & (1 << 16))
2489                tcg_gen_shri_i32(tmp, tmp, 16);
2490            if (insn & (1 << 17))
2491                tcg_gen_shri_i32(tmp2, tmp2, 16);
2492            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493            break;
2494        default:
2495            return 1;
2496        }
2497
2498        gen_op_iwmmxt_movq_wRn_M0(acc);
2499        return 0;
2500    }
2501
2502    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503        /* Internal Accumulator Access Format */
2504        rdhi = (insn >> 16) & 0xf;
2505        rdlo = (insn >> 12) & 0xf;
2506        acc = insn & 7;
2507
2508        if (acc != 0)
2509            return 1;
2510
2511        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512            iwmmxt_load_reg(cpu_V0, acc);
2513            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516        } else {                                        /* MAR */
2517            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518            iwmmxt_store_reg(cpu_V0, acc);
2519        }
2520        return 0;
2521    }
2522
2523    return 1;
2524}
2525
2526static void gen_goto_ptr(void)
2527{
2528    tcg_gen_lookup_and_goto_ptr();
2529}
2530
2531/* This will end the TB but doesn't guarantee we'll return to
2532 * cpu_loop_exec. Any live exit_requests will be processed as we
2533 * enter the next TB.
2534 */
2535static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536{
2537    if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538        /*
2539         * For pcrel, the pc must always be up-to-date on entry to
2540         * the linked TB, so that it can use simple additions for all
2541         * further adjustments.  For !pcrel, the linked TB is compiled
2542         * to know its full virtual address, so we can delay the
2543         * update to pc to the unlinked path.  A long chain of links
2544         * can thus avoid many updates to the PC.
2545         */
2546        if (tb_cflags(s->base.tb) & CF_PCREL) {
2547            gen_update_pc(s, diff);
2548            tcg_gen_goto_tb(n);
2549        } else {
2550            tcg_gen_goto_tb(n);
2551            gen_update_pc(s, diff);
2552        }
2553        tcg_gen_exit_tb(s->base.tb, n);
2554    } else {
2555        gen_update_pc(s, diff);
2556        gen_goto_ptr();
2557    }
2558    s->base.is_jmp = DISAS_NORETURN;
2559}
2560
2561/* Jump, specifying which TB number to use if we gen_goto_tb() */
2562static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563{
2564    if (unlikely(s->ss_active)) {
2565        /* An indirect jump so that we still trigger the debug exception.  */
2566        gen_update_pc(s, diff);
2567        s->base.is_jmp = DISAS_JUMP;
2568        return;
2569    }
2570    switch (s->base.is_jmp) {
2571    case DISAS_NEXT:
2572    case DISAS_TOO_MANY:
2573    case DISAS_NORETURN:
2574        /*
2575         * The normal case: just go to the destination TB.
2576         * NB: NORETURN happens if we generate code like
2577         *    gen_brcondi(l);
2578         *    gen_jmp();
2579         *    gen_set_label(l);
2580         *    gen_jmp();
2581         * on the second call to gen_jmp().
2582         */
2583        gen_goto_tb(s, tbno, diff);
2584        break;
2585    case DISAS_UPDATE_NOCHAIN:
2586    case DISAS_UPDATE_EXIT:
2587        /*
2588         * We already decided we're leaving the TB for some other reason.
2589         * Avoid using goto_tb so we really do exit back to the main loop
2590         * and don't chain to another TB.
2591         */
2592        gen_update_pc(s, diff);
2593        gen_goto_ptr();
2594        s->base.is_jmp = DISAS_NORETURN;
2595        break;
2596    default:
2597        /*
2598         * We shouldn't be emitting code for a jump and also have
2599         * is_jmp set to one of the special cases like DISAS_SWI.
2600         */
2601        g_assert_not_reached();
2602    }
2603}
2604
2605static inline void gen_jmp(DisasContext *s, target_long diff)
2606{
2607    gen_jmp_tb(s, diff, 0);
2608}
2609
2610static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611{
2612    if (x)
2613        tcg_gen_sari_i32(t0, t0, 16);
2614    else
2615        gen_sxth(t0);
2616    if (y)
2617        tcg_gen_sari_i32(t1, t1, 16);
2618    else
2619        gen_sxth(t1);
2620    tcg_gen_mul_i32(t0, t0, t1);
2621}
2622
2623/* Return the mask of PSR bits set by a MSR instruction.  */
2624static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625{
2626    uint32_t mask = 0;
2627
2628    if (flags & (1 << 0)) {
2629        mask |= 0xff;
2630    }
2631    if (flags & (1 << 1)) {
2632        mask |= 0xff00;
2633    }
2634    if (flags & (1 << 2)) {
2635        mask |= 0xff0000;
2636    }
2637    if (flags & (1 << 3)) {
2638        mask |= 0xff000000;
2639    }
2640
2641    /* Mask out undefined and reserved bits.  */
2642    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643
2644    /* Mask out execution state.  */
2645    if (!spsr) {
2646        mask &= ~CPSR_EXEC;
2647    }
2648
2649    /* Mask out privileged bits.  */
2650    if (IS_USER(s)) {
2651        mask &= CPSR_USER;
2652    }
2653    return mask;
2654}
2655
2656/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658{
2659    TCGv_i32 tmp;
2660    if (spsr) {
2661        /* ??? This is also undefined in system mode.  */
2662        if (IS_USER(s))
2663            return 1;
2664
2665        tmp = load_cpu_field(spsr);
2666        tcg_gen_andi_i32(tmp, tmp, ~mask);
2667        tcg_gen_andi_i32(t0, t0, mask);
2668        tcg_gen_or_i32(tmp, tmp, t0);
2669        store_cpu_field(tmp, spsr);
2670    } else {
2671        gen_set_cpsr(t0, mask);
2672    }
2673    gen_lookup_tb(s);
2674    return 0;
2675}
2676
2677/* Returns nonzero if access to the PSR is not permitted.  */
2678static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679{
2680    TCGv_i32 tmp;
2681    tmp = tcg_temp_new_i32();
2682    tcg_gen_movi_i32(tmp, val);
2683    return gen_set_psr(s, mask, spsr, tmp);
2684}
2685
2686static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                     int *tgtmode, int *regno)
2688{
2689    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690     * the target mode and register number, and identify the various
2691     * unpredictable cases.
2692     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693     *  + executed in user mode
2694     *  + using R15 as the src/dest register
2695     *  + accessing an unimplemented register
2696     *  + accessing a register that's inaccessible at current PL/security state*
2697     *  + accessing a register that you could access with a different insn
2698     * We choose to UNDEF in all these cases.
2699     * Since we don't know which of the various AArch32 modes we are in
2700     * we have to defer some checks to runtime.
2701     * Accesses to Monitor mode registers from Secure EL1 (which implies
2702     * that EL3 is AArch64) must trap to EL3.
2703     *
2704     * If the access checks fail this function will emit code to take
2705     * an exception and return false. Otherwise it will return true,
2706     * and set *tgtmode and *regno appropriately.
2707     */
2708    /* These instructions are present only in ARMv8, or in ARMv7 with the
2709     * Virtualization Extensions.
2710     */
2711    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713        goto undef;
2714    }
2715
2716    if (IS_USER(s) || rn == 15) {
2717        goto undef;
2718    }
2719
2720    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721     * of registers into (r, sysm).
2722     */
2723    if (r) {
2724        /* SPSRs for other modes */
2725        switch (sysm) {
2726        case 0xe: /* SPSR_fiq */
2727            *tgtmode = ARM_CPU_MODE_FIQ;
2728            break;
2729        case 0x10: /* SPSR_irq */
2730            *tgtmode = ARM_CPU_MODE_IRQ;
2731            break;
2732        case 0x12: /* SPSR_svc */
2733            *tgtmode = ARM_CPU_MODE_SVC;
2734            break;
2735        case 0x14: /* SPSR_abt */
2736            *tgtmode = ARM_CPU_MODE_ABT;
2737            break;
2738        case 0x16: /* SPSR_und */
2739            *tgtmode = ARM_CPU_MODE_UND;
2740            break;
2741        case 0x1c: /* SPSR_mon */
2742            *tgtmode = ARM_CPU_MODE_MON;
2743            break;
2744        case 0x1e: /* SPSR_hyp */
2745            *tgtmode = ARM_CPU_MODE_HYP;
2746            break;
2747        default: /* unallocated */
2748            goto undef;
2749        }
2750        /* We arbitrarily assign SPSR a register number of 16. */
2751        *regno = 16;
2752    } else {
2753        /* general purpose registers for other modes */
2754        switch (sysm) {
2755        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756            *tgtmode = ARM_CPU_MODE_USR;
2757            *regno = sysm + 8;
2758            break;
2759        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760            *tgtmode = ARM_CPU_MODE_FIQ;
2761            *regno = sysm;
2762            break;
2763        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764            *tgtmode = ARM_CPU_MODE_IRQ;
2765            *regno = sysm & 1 ? 13 : 14;
2766            break;
2767        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768            *tgtmode = ARM_CPU_MODE_SVC;
2769            *regno = sysm & 1 ? 13 : 14;
2770            break;
2771        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772            *tgtmode = ARM_CPU_MODE_ABT;
2773            *regno = sysm & 1 ? 13 : 14;
2774            break;
2775        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776            *tgtmode = ARM_CPU_MODE_UND;
2777            *regno = sysm & 1 ? 13 : 14;
2778            break;
2779        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780            *tgtmode = ARM_CPU_MODE_MON;
2781            *regno = sysm & 1 ? 13 : 14;
2782            break;
2783        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784            *tgtmode = ARM_CPU_MODE_HYP;
2785            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786            *regno = sysm & 1 ? 13 : 17;
2787            break;
2788        default: /* unallocated */
2789            goto undef;
2790        }
2791    }
2792
2793    /* Catch the 'accessing inaccessible register' cases we can detect
2794     * at translate time.
2795     */
2796    switch (*tgtmode) {
2797    case ARM_CPU_MODE_MON:
2798        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799            goto undef;
2800        }
2801        if (s->current_el == 1) {
2802            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803             * then accesses to Mon registers trap to Secure EL2, if it exists,
2804             * otherwise EL3.
2805             */
2806            TCGv_i32 tcg_el;
2807
2808            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                dc_isar_feature(aa64_sel2, s)) {
2810                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814            } else {
2815                tcg_el = tcg_constant_i32(3);
2816            }
2817
2818            gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                    syn_uncategorized(), tcg_el);
2820            return false;
2821        }
2822        break;
2823    case ARM_CPU_MODE_HYP:
2824        /*
2825         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2826         * (and so we can forbid accesses from EL2 or below). elr_hyp
2827         * can be accessed also from Hyp mode, so forbid accesses from
2828         * EL0 or EL1.
2829         */
2830        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2831            (s->current_el < 3 && *regno != 17)) {
2832            goto undef;
2833        }
2834        break;
2835    default:
2836        break;
2837    }
2838
2839    return true;
2840
2841undef:
2842    /* If we get here then some access check did not pass */
2843    gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844    return false;
2845}
2846
2847static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848{
2849    TCGv_i32 tcg_reg;
2850    int tgtmode = 0, regno = 0;
2851
2852    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853        return;
2854    }
2855
2856    /* Sync state because msr_banked() can raise exceptions */
2857    gen_set_condexec(s);
2858    gen_update_pc(s, 0);
2859    tcg_reg = load_reg(s, rn);
2860    gen_helper_msr_banked(cpu_env, tcg_reg,
2861                          tcg_constant_i32(tgtmode),
2862                          tcg_constant_i32(regno));
2863    s->base.is_jmp = DISAS_UPDATE_EXIT;
2864}
2865
2866static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867{
2868    TCGv_i32 tcg_reg;
2869    int tgtmode = 0, regno = 0;
2870
2871    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872        return;
2873    }
2874
2875    /* Sync state because mrs_banked() can raise exceptions */
2876    gen_set_condexec(s);
2877    gen_update_pc(s, 0);
2878    tcg_reg = tcg_temp_new_i32();
2879    gen_helper_mrs_banked(tcg_reg, cpu_env,
2880                          tcg_constant_i32(tgtmode),
2881                          tcg_constant_i32(regno));
2882    store_reg(s, rn, tcg_reg);
2883    s->base.is_jmp = DISAS_UPDATE_EXIT;
2884}
2885
2886/* Store value to PC as for an exception return (ie don't
2887 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888 * will do the masking based on the new value of the Thumb bit.
2889 */
2890static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891{
2892    tcg_gen_mov_i32(cpu_R[15], pc);
2893}
2894
2895/* Generate a v6 exception return.  Marks both values as dead.  */
2896static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897{
2898    store_pc_exc_ret(s, pc);
2899    /* The cpsr_write_eret helper will mask the low bits of PC
2900     * appropriately depending on the new Thumb bit, so it must
2901     * be called after storing the new PC.
2902     */
2903    translator_io_start(&s->base);
2904    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2905    /* Must exit loop to check un-masked IRQs */
2906    s->base.is_jmp = DISAS_EXIT;
2907}
2908
2909/* Generate an old-style exception return. Marks pc as dead. */
2910static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911{
2912    gen_rfe(s, pc, load_cpu_field(spsr));
2913}
2914
2915static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                            uint32_t opr_sz, uint32_t max_sz,
2917                            gen_helper_gvec_3_ptr *fn)
2918{
2919    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920
2921    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2922    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                       opr_sz, max_sz, 0, fn);
2924}
2925
2926void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928{
2929    static gen_helper_gvec_3_ptr * const fns[2] = {
2930        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931    };
2932    tcg_debug_assert(vece >= 1 && vece <= 2);
2933    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934}
2935
2936void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938{
2939    static gen_helper_gvec_3_ptr * const fns[2] = {
2940        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941    };
2942    tcg_debug_assert(vece >= 1 && vece <= 2);
2943    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944}
2945
2946#define GEN_CMP0(NAME, COND)                                            \
2947    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2948    {                                                                   \
2949        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2950        tcg_gen_neg_i32(d, d);                                          \
2951    }                                                                   \
2952    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2953    {                                                                   \
2954        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2955        tcg_gen_neg_i64(d, d);                                          \
2956    }                                                                   \
2957    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2958    {                                                                   \
2959        TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2960        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2961    }                                                                   \
2962    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2963                            uint32_t opr_sz, uint32_t max_sz)           \
2964    {                                                                   \
2965        const GVecGen2 op[4] = {                                        \
2966            { .fno = gen_helper_gvec_##NAME##0_b,                       \
2967              .fniv = gen_##NAME##0_vec,                                \
2968              .opt_opc = vecop_list_cmp,                                \
2969              .vece = MO_8 },                                           \
2970            { .fno = gen_helper_gvec_##NAME##0_h,                       \
2971              .fniv = gen_##NAME##0_vec,                                \
2972              .opt_opc = vecop_list_cmp,                                \
2973              .vece = MO_16 },                                          \
2974            { .fni4 = gen_##NAME##0_i32,                                \
2975              .fniv = gen_##NAME##0_vec,                                \
2976              .opt_opc = vecop_list_cmp,                                \
2977              .vece = MO_32 },                                          \
2978            { .fni8 = gen_##NAME##0_i64,                                \
2979              .fniv = gen_##NAME##0_vec,                                \
2980              .opt_opc = vecop_list_cmp,                                \
2981              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2982              .vece = MO_64 },                                          \
2983        };                                                              \
2984        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2985    }
2986
2987static const TCGOpcode vecop_list_cmp[] = {
2988    INDEX_op_cmp_vec, 0
2989};
2990
2991GEN_CMP0(ceq, TCG_COND_EQ)
2992GEN_CMP0(cle, TCG_COND_LE)
2993GEN_CMP0(cge, TCG_COND_GE)
2994GEN_CMP0(clt, TCG_COND_LT)
2995GEN_CMP0(cgt, TCG_COND_GT)
2996
2997#undef GEN_CMP0
2998
2999static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3000{
3001    tcg_gen_vec_sar8i_i64(a, a, shift);
3002    tcg_gen_vec_add8_i64(d, d, a);
3003}
3004
3005static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3006{
3007    tcg_gen_vec_sar16i_i64(a, a, shift);
3008    tcg_gen_vec_add16_i64(d, d, a);
3009}
3010
3011static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3012{
3013    tcg_gen_sari_i32(a, a, shift);
3014    tcg_gen_add_i32(d, d, a);
3015}
3016
3017static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3018{
3019    tcg_gen_sari_i64(a, a, shift);
3020    tcg_gen_add_i64(d, d, a);
3021}
3022
3023static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3024{
3025    tcg_gen_sari_vec(vece, a, a, sh);
3026    tcg_gen_add_vec(vece, d, d, a);
3027}
3028
3029void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3030                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3031{
3032    static const TCGOpcode vecop_list[] = {
3033        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3034    };
3035    static const GVecGen2i ops[4] = {
3036        { .fni8 = gen_ssra8_i64,
3037          .fniv = gen_ssra_vec,
3038          .fno = gen_helper_gvec_ssra_b,
3039          .load_dest = true,
3040          .opt_opc = vecop_list,
3041          .vece = MO_8 },
3042        { .fni8 = gen_ssra16_i64,
3043          .fniv = gen_ssra_vec,
3044          .fno = gen_helper_gvec_ssra_h,
3045          .load_dest = true,
3046          .opt_opc = vecop_list,
3047          .vece = MO_16 },
3048        { .fni4 = gen_ssra32_i32,
3049          .fniv = gen_ssra_vec,
3050          .fno = gen_helper_gvec_ssra_s,
3051          .load_dest = true,
3052          .opt_opc = vecop_list,
3053          .vece = MO_32 },
3054        { .fni8 = gen_ssra64_i64,
3055          .fniv = gen_ssra_vec,
3056          .fno = gen_helper_gvec_ssra_b,
3057          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3058          .opt_opc = vecop_list,
3059          .load_dest = true,
3060          .vece = MO_64 },
3061    };
3062
3063    /* tszimm encoding produces immediates in the range [1..esize]. */
3064    tcg_debug_assert(shift > 0);
3065    tcg_debug_assert(shift <= (8 << vece));
3066
3067    /*
3068     * Shifts larger than the element size are architecturally valid.
3069     * Signed results in all sign bits.
3070     */
3071    shift = MIN(shift, (8 << vece) - 1);
3072    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3073}
3074
3075static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3076{
3077    tcg_gen_vec_shr8i_i64(a, a, shift);
3078    tcg_gen_vec_add8_i64(d, d, a);
3079}
3080
3081static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3082{
3083    tcg_gen_vec_shr16i_i64(a, a, shift);
3084    tcg_gen_vec_add16_i64(d, d, a);
3085}
3086
3087static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3088{
3089    tcg_gen_shri_i32(a, a, shift);
3090    tcg_gen_add_i32(d, d, a);
3091}
3092
3093static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3094{
3095    tcg_gen_shri_i64(a, a, shift);
3096    tcg_gen_add_i64(d, d, a);
3097}
3098
3099static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3100{
3101    tcg_gen_shri_vec(vece, a, a, sh);
3102    tcg_gen_add_vec(vece, d, d, a);
3103}
3104
3105void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3106                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3107{
3108    static const TCGOpcode vecop_list[] = {
3109        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3110    };
3111    static const GVecGen2i ops[4] = {
3112        { .fni8 = gen_usra8_i64,
3113          .fniv = gen_usra_vec,
3114          .fno = gen_helper_gvec_usra_b,
3115          .load_dest = true,
3116          .opt_opc = vecop_list,
3117          .vece = MO_8, },
3118        { .fni8 = gen_usra16_i64,
3119          .fniv = gen_usra_vec,
3120          .fno = gen_helper_gvec_usra_h,
3121          .load_dest = true,
3122          .opt_opc = vecop_list,
3123          .vece = MO_16, },
3124        { .fni4 = gen_usra32_i32,
3125          .fniv = gen_usra_vec,
3126          .fno = gen_helper_gvec_usra_s,
3127          .load_dest = true,
3128          .opt_opc = vecop_list,
3129          .vece = MO_32, },
3130        { .fni8 = gen_usra64_i64,
3131          .fniv = gen_usra_vec,
3132          .fno = gen_helper_gvec_usra_d,
3133          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3134          .load_dest = true,
3135          .opt_opc = vecop_list,
3136          .vece = MO_64, },
3137    };
3138
3139    /* tszimm encoding produces immediates in the range [1..esize]. */
3140    tcg_debug_assert(shift > 0);
3141    tcg_debug_assert(shift <= (8 << vece));
3142
3143    /*
3144     * Shifts larger than the element size are architecturally valid.
3145     * Unsigned results in all zeros as input to accumulate: nop.
3146     */
3147    if (shift < (8 << vece)) {
3148        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3149    } else {
3150        /* Nop, but we do need to clear the tail. */
3151        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3152    }
3153}
3154
3155/*
3156 * Shift one less than the requested amount, and the low bit is
3157 * the rounding bit.  For the 8 and 16-bit operations, because we
3158 * mask the low bit, we can perform a normal integer shift instead
3159 * of a vector shift.
3160 */
3161static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3162{
3163    TCGv_i64 t = tcg_temp_new_i64();
3164
3165    tcg_gen_shri_i64(t, a, sh - 1);
3166    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3167    tcg_gen_vec_sar8i_i64(d, a, sh);
3168    tcg_gen_vec_add8_i64(d, d, t);
3169}
3170
3171static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172{
3173    TCGv_i64 t = tcg_temp_new_i64();
3174
3175    tcg_gen_shri_i64(t, a, sh - 1);
3176    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3177    tcg_gen_vec_sar16i_i64(d, a, sh);
3178    tcg_gen_vec_add16_i64(d, d, t);
3179}
3180
3181static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3182{
3183    TCGv_i32 t;
3184
3185    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3186    if (sh == 32) {
3187        tcg_gen_movi_i32(d, 0);
3188        return;
3189    }
3190    t = tcg_temp_new_i32();
3191    tcg_gen_extract_i32(t, a, sh - 1, 1);
3192    tcg_gen_sari_i32(d, a, sh);
3193    tcg_gen_add_i32(d, d, t);
3194}
3195
3196static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3197{
3198    TCGv_i64 t = tcg_temp_new_i64();
3199
3200    tcg_gen_extract_i64(t, a, sh - 1, 1);
3201    tcg_gen_sari_i64(d, a, sh);
3202    tcg_gen_add_i64(d, d, t);
3203}
3204
3205static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3206{
3207    TCGv_vec t = tcg_temp_new_vec_matching(d);
3208    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3209
3210    tcg_gen_shri_vec(vece, t, a, sh - 1);
3211    tcg_gen_dupi_vec(vece, ones, 1);
3212    tcg_gen_and_vec(vece, t, t, ones);
3213    tcg_gen_sari_vec(vece, d, a, sh);
3214    tcg_gen_add_vec(vece, d, d, t);
3215}
3216
3217void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3218                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3219{
3220    static const TCGOpcode vecop_list[] = {
3221        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3222    };
3223    static const GVecGen2i ops[4] = {
3224        { .fni8 = gen_srshr8_i64,
3225          .fniv = gen_srshr_vec,
3226          .fno = gen_helper_gvec_srshr_b,
3227          .opt_opc = vecop_list,
3228          .vece = MO_8 },
3229        { .fni8 = gen_srshr16_i64,
3230          .fniv = gen_srshr_vec,
3231          .fno = gen_helper_gvec_srshr_h,
3232          .opt_opc = vecop_list,
3233          .vece = MO_16 },
3234        { .fni4 = gen_srshr32_i32,
3235          .fniv = gen_srshr_vec,
3236          .fno = gen_helper_gvec_srshr_s,
3237          .opt_opc = vecop_list,
3238          .vece = MO_32 },
3239        { .fni8 = gen_srshr64_i64,
3240          .fniv = gen_srshr_vec,
3241          .fno = gen_helper_gvec_srshr_d,
3242          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3243          .opt_opc = vecop_list,
3244          .vece = MO_64 },
3245    };
3246
3247    /* tszimm encoding produces immediates in the range [1..esize] */
3248    tcg_debug_assert(shift > 0);
3249    tcg_debug_assert(shift <= (8 << vece));
3250
3251    if (shift == (8 << vece)) {
3252        /*
3253         * Shifts larger than the element size are architecturally valid.
3254         * Signed results in all sign bits.  With rounding, this produces
3255         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3256         * I.e. always zero.
3257         */
3258        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3259    } else {
3260        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3261    }
3262}
3263
3264static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3265{
3266    TCGv_i64 t = tcg_temp_new_i64();
3267
3268    gen_srshr8_i64(t, a, sh);
3269    tcg_gen_vec_add8_i64(d, d, t);
3270}
3271
3272static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3273{
3274    TCGv_i64 t = tcg_temp_new_i64();
3275
3276    gen_srshr16_i64(t, a, sh);
3277    tcg_gen_vec_add16_i64(d, d, t);
3278}
3279
3280static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3281{
3282    TCGv_i32 t = tcg_temp_new_i32();
3283
3284    gen_srshr32_i32(t, a, sh);
3285    tcg_gen_add_i32(d, d, t);
3286}
3287
3288static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3289{
3290    TCGv_i64 t = tcg_temp_new_i64();
3291
3292    gen_srshr64_i64(t, a, sh);
3293    tcg_gen_add_i64(d, d, t);
3294}
3295
3296static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3297{
3298    TCGv_vec t = tcg_temp_new_vec_matching(d);
3299
3300    gen_srshr_vec(vece, t, a, sh);
3301    tcg_gen_add_vec(vece, d, d, t);
3302}
3303
3304void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3305                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3306{
3307    static const TCGOpcode vecop_list[] = {
3308        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3309    };
3310    static const GVecGen2i ops[4] = {
3311        { .fni8 = gen_srsra8_i64,
3312          .fniv = gen_srsra_vec,
3313          .fno = gen_helper_gvec_srsra_b,
3314          .opt_opc = vecop_list,
3315          .load_dest = true,
3316          .vece = MO_8 },
3317        { .fni8 = gen_srsra16_i64,
3318          .fniv = gen_srsra_vec,
3319          .fno = gen_helper_gvec_srsra_h,
3320          .opt_opc = vecop_list,
3321          .load_dest = true,
3322          .vece = MO_16 },
3323        { .fni4 = gen_srsra32_i32,
3324          .fniv = gen_srsra_vec,
3325          .fno = gen_helper_gvec_srsra_s,
3326          .opt_opc = vecop_list,
3327          .load_dest = true,
3328          .vece = MO_32 },
3329        { .fni8 = gen_srsra64_i64,
3330          .fniv = gen_srsra_vec,
3331          .fno = gen_helper_gvec_srsra_d,
3332          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3333          .opt_opc = vecop_list,
3334          .load_dest = true,
3335          .vece = MO_64 },
3336    };
3337
3338    /* tszimm encoding produces immediates in the range [1..esize] */
3339    tcg_debug_assert(shift > 0);
3340    tcg_debug_assert(shift <= (8 << vece));
3341
3342    /*
3343     * Shifts larger than the element size are architecturally valid.
3344     * Signed results in all sign bits.  With rounding, this produces
3345     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3346     * I.e. always zero.  With accumulation, this leaves D unchanged.
3347     */
3348    if (shift == (8 << vece)) {
3349        /* Nop, but we do need to clear the tail. */
3350        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3351    } else {
3352        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3353    }
3354}
3355
3356static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3357{
3358    TCGv_i64 t = tcg_temp_new_i64();
3359
3360    tcg_gen_shri_i64(t, a, sh - 1);
3361    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3362    tcg_gen_vec_shr8i_i64(d, a, sh);
3363    tcg_gen_vec_add8_i64(d, d, t);
3364}
3365
3366static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3367{
3368    TCGv_i64 t = tcg_temp_new_i64();
3369
3370    tcg_gen_shri_i64(t, a, sh - 1);
3371    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3372    tcg_gen_vec_shr16i_i64(d, a, sh);
3373    tcg_gen_vec_add16_i64(d, d, t);
3374}
3375
3376static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3377{
3378    TCGv_i32 t;
3379
3380    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3381    if (sh == 32) {
3382        tcg_gen_extract_i32(d, a, sh - 1, 1);
3383        return;
3384    }
3385    t = tcg_temp_new_i32();
3386    tcg_gen_extract_i32(t, a, sh - 1, 1);
3387    tcg_gen_shri_i32(d, a, sh);
3388    tcg_gen_add_i32(d, d, t);
3389}
3390
3391static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3392{
3393    TCGv_i64 t = tcg_temp_new_i64();
3394
3395    tcg_gen_extract_i64(t, a, sh - 1, 1);
3396    tcg_gen_shri_i64(d, a, sh);
3397    tcg_gen_add_i64(d, d, t);
3398}
3399
3400static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3401{
3402    TCGv_vec t = tcg_temp_new_vec_matching(d);
3403    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3404
3405    tcg_gen_shri_vec(vece, t, a, shift - 1);
3406    tcg_gen_dupi_vec(vece, ones, 1);
3407    tcg_gen_and_vec(vece, t, t, ones);
3408    tcg_gen_shri_vec(vece, d, a, shift);
3409    tcg_gen_add_vec(vece, d, d, t);
3410}
3411
3412void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3413                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3414{
3415    static const TCGOpcode vecop_list[] = {
3416        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3417    };
3418    static const GVecGen2i ops[4] = {
3419        { .fni8 = gen_urshr8_i64,
3420          .fniv = gen_urshr_vec,
3421          .fno = gen_helper_gvec_urshr_b,
3422          .opt_opc = vecop_list,
3423          .vece = MO_8 },
3424        { .fni8 = gen_urshr16_i64,
3425          .fniv = gen_urshr_vec,
3426          .fno = gen_helper_gvec_urshr_h,
3427          .opt_opc = vecop_list,
3428          .vece = MO_16 },
3429        { .fni4 = gen_urshr32_i32,
3430          .fniv = gen_urshr_vec,
3431          .fno = gen_helper_gvec_urshr_s,
3432          .opt_opc = vecop_list,
3433          .vece = MO_32 },
3434        { .fni8 = gen_urshr64_i64,
3435          .fniv = gen_urshr_vec,
3436          .fno = gen_helper_gvec_urshr_d,
3437          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3438          .opt_opc = vecop_list,
3439          .vece = MO_64 },
3440    };
3441
3442    /* tszimm encoding produces immediates in the range [1..esize] */
3443    tcg_debug_assert(shift > 0);
3444    tcg_debug_assert(shift <= (8 << vece));
3445
3446    if (shift == (8 << vece)) {
3447        /*
3448         * Shifts larger than the element size are architecturally valid.
3449         * Unsigned results in zero.  With rounding, this produces a
3450         * copy of the most significant bit.
3451         */
3452        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3453    } else {
3454        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3455    }
3456}
3457
3458static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3459{
3460    TCGv_i64 t = tcg_temp_new_i64();
3461
3462    if (sh == 8) {
3463        tcg_gen_vec_shr8i_i64(t, a, 7);
3464    } else {
3465        gen_urshr8_i64(t, a, sh);
3466    }
3467    tcg_gen_vec_add8_i64(d, d, t);
3468}
3469
3470static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3471{
3472    TCGv_i64 t = tcg_temp_new_i64();
3473
3474    if (sh == 16) {
3475        tcg_gen_vec_shr16i_i64(t, a, 15);
3476    } else {
3477        gen_urshr16_i64(t, a, sh);
3478    }
3479    tcg_gen_vec_add16_i64(d, d, t);
3480}
3481
3482static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3483{
3484    TCGv_i32 t = tcg_temp_new_i32();
3485
3486    if (sh == 32) {
3487        tcg_gen_shri_i32(t, a, 31);
3488    } else {
3489        gen_urshr32_i32(t, a, sh);
3490    }
3491    tcg_gen_add_i32(d, d, t);
3492}
3493
3494static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3495{
3496    TCGv_i64 t = tcg_temp_new_i64();
3497
3498    if (sh == 64) {
3499        tcg_gen_shri_i64(t, a, 63);
3500    } else {
3501        gen_urshr64_i64(t, a, sh);
3502    }
3503    tcg_gen_add_i64(d, d, t);
3504}
3505
3506static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3507{
3508    TCGv_vec t = tcg_temp_new_vec_matching(d);
3509
3510    if (sh == (8 << vece)) {
3511        tcg_gen_shri_vec(vece, t, a, sh - 1);
3512    } else {
3513        gen_urshr_vec(vece, t, a, sh);
3514    }
3515    tcg_gen_add_vec(vece, d, d, t);
3516}
3517
3518void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3519                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3520{
3521    static const TCGOpcode vecop_list[] = {
3522        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3523    };
3524    static const GVecGen2i ops[4] = {
3525        { .fni8 = gen_ursra8_i64,
3526          .fniv = gen_ursra_vec,
3527          .fno = gen_helper_gvec_ursra_b,
3528          .opt_opc = vecop_list,
3529          .load_dest = true,
3530          .vece = MO_8 },
3531        { .fni8 = gen_ursra16_i64,
3532          .fniv = gen_ursra_vec,
3533          .fno = gen_helper_gvec_ursra_h,
3534          .opt_opc = vecop_list,
3535          .load_dest = true,
3536          .vece = MO_16 },
3537        { .fni4 = gen_ursra32_i32,
3538          .fniv = gen_ursra_vec,
3539          .fno = gen_helper_gvec_ursra_s,
3540          .opt_opc = vecop_list,
3541          .load_dest = true,
3542          .vece = MO_32 },
3543        { .fni8 = gen_ursra64_i64,
3544          .fniv = gen_ursra_vec,
3545          .fno = gen_helper_gvec_ursra_d,
3546          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3547          .opt_opc = vecop_list,
3548          .load_dest = true,
3549          .vece = MO_64 },
3550    };
3551
3552    /* tszimm encoding produces immediates in the range [1..esize] */
3553    tcg_debug_assert(shift > 0);
3554    tcg_debug_assert(shift <= (8 << vece));
3555
3556    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3557}
3558
3559static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3560{
3561    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3562    TCGv_i64 t = tcg_temp_new_i64();
3563
3564    tcg_gen_shri_i64(t, a, shift);
3565    tcg_gen_andi_i64(t, t, mask);
3566    tcg_gen_andi_i64(d, d, ~mask);
3567    tcg_gen_or_i64(d, d, t);
3568}
3569
3570static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3571{
3572    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3573    TCGv_i64 t = tcg_temp_new_i64();
3574
3575    tcg_gen_shri_i64(t, a, shift);
3576    tcg_gen_andi_i64(t, t, mask);
3577    tcg_gen_andi_i64(d, d, ~mask);
3578    tcg_gen_or_i64(d, d, t);
3579}
3580
3581static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3582{
3583    tcg_gen_shri_i32(a, a, shift);
3584    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3585}
3586
3587static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3588{
3589    tcg_gen_shri_i64(a, a, shift);
3590    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3591}
3592
3593static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3594{
3595    TCGv_vec t = tcg_temp_new_vec_matching(d);
3596    TCGv_vec m = tcg_temp_new_vec_matching(d);
3597
3598    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3599    tcg_gen_shri_vec(vece, t, a, sh);
3600    tcg_gen_and_vec(vece, d, d, m);
3601    tcg_gen_or_vec(vece, d, d, t);
3602}
3603
3604void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3605                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3606{
3607    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3608    const GVecGen2i ops[4] = {
3609        { .fni8 = gen_shr8_ins_i64,
3610          .fniv = gen_shr_ins_vec,
3611          .fno = gen_helper_gvec_sri_b,
3612          .load_dest = true,
3613          .opt_opc = vecop_list,
3614          .vece = MO_8 },
3615        { .fni8 = gen_shr16_ins_i64,
3616          .fniv = gen_shr_ins_vec,
3617          .fno = gen_helper_gvec_sri_h,
3618          .load_dest = true,
3619          .opt_opc = vecop_list,
3620          .vece = MO_16 },
3621        { .fni4 = gen_shr32_ins_i32,
3622          .fniv = gen_shr_ins_vec,
3623          .fno = gen_helper_gvec_sri_s,
3624          .load_dest = true,
3625          .opt_opc = vecop_list,
3626          .vece = MO_32 },
3627        { .fni8 = gen_shr64_ins_i64,
3628          .fniv = gen_shr_ins_vec,
3629          .fno = gen_helper_gvec_sri_d,
3630          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3631          .load_dest = true,
3632          .opt_opc = vecop_list,
3633          .vece = MO_64 },
3634    };
3635
3636    /* tszimm encoding produces immediates in the range [1..esize]. */
3637    tcg_debug_assert(shift > 0);
3638    tcg_debug_assert(shift <= (8 << vece));
3639
3640    /* Shift of esize leaves destination unchanged. */
3641    if (shift < (8 << vece)) {
3642        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3643    } else {
3644        /* Nop, but we do need to clear the tail. */
3645        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3646    }
3647}
3648
3649static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3650{
3651    uint64_t mask = dup_const(MO_8, 0xff << shift);
3652    TCGv_i64 t = tcg_temp_new_i64();
3653
3654    tcg_gen_shli_i64(t, a, shift);
3655    tcg_gen_andi_i64(t, t, mask);
3656    tcg_gen_andi_i64(d, d, ~mask);
3657    tcg_gen_or_i64(d, d, t);
3658}
3659
3660static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3661{
3662    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3663    TCGv_i64 t = tcg_temp_new_i64();
3664
3665    tcg_gen_shli_i64(t, a, shift);
3666    tcg_gen_andi_i64(t, t, mask);
3667    tcg_gen_andi_i64(d, d, ~mask);
3668    tcg_gen_or_i64(d, d, t);
3669}
3670
3671static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3672{
3673    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3674}
3675
3676static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3677{
3678    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3679}
3680
3681static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3682{
3683    TCGv_vec t = tcg_temp_new_vec_matching(d);
3684    TCGv_vec m = tcg_temp_new_vec_matching(d);
3685
3686    tcg_gen_shli_vec(vece, t, a, sh);
3687    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3688    tcg_gen_and_vec(vece, d, d, m);
3689    tcg_gen_or_vec(vece, d, d, t);
3690}
3691
3692void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3693                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3694{
3695    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3696    const GVecGen2i ops[4] = {
3697        { .fni8 = gen_shl8_ins_i64,
3698          .fniv = gen_shl_ins_vec,
3699          .fno = gen_helper_gvec_sli_b,
3700          .load_dest = true,
3701          .opt_opc = vecop_list,
3702          .vece = MO_8 },
3703        { .fni8 = gen_shl16_ins_i64,
3704          .fniv = gen_shl_ins_vec,
3705          .fno = gen_helper_gvec_sli_h,
3706          .load_dest = true,
3707          .opt_opc = vecop_list,
3708          .vece = MO_16 },
3709        { .fni4 = gen_shl32_ins_i32,
3710          .fniv = gen_shl_ins_vec,
3711          .fno = gen_helper_gvec_sli_s,
3712          .load_dest = true,
3713          .opt_opc = vecop_list,
3714          .vece = MO_32 },
3715        { .fni8 = gen_shl64_ins_i64,
3716          .fniv = gen_shl_ins_vec,
3717          .fno = gen_helper_gvec_sli_d,
3718          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3719          .load_dest = true,
3720          .opt_opc = vecop_list,
3721          .vece = MO_64 },
3722    };
3723
3724    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3725    tcg_debug_assert(shift >= 0);
3726    tcg_debug_assert(shift < (8 << vece));
3727
3728    if (shift == 0) {
3729        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3730    } else {
3731        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3732    }
3733}
3734
3735static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3736{
3737    gen_helper_neon_mul_u8(a, a, b);
3738    gen_helper_neon_add_u8(d, d, a);
3739}
3740
3741static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3742{
3743    gen_helper_neon_mul_u8(a, a, b);
3744    gen_helper_neon_sub_u8(d, d, a);
3745}
3746
3747static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3748{
3749    gen_helper_neon_mul_u16(a, a, b);
3750    gen_helper_neon_add_u16(d, d, a);
3751}
3752
3753static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3754{
3755    gen_helper_neon_mul_u16(a, a, b);
3756    gen_helper_neon_sub_u16(d, d, a);
3757}
3758
3759static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3760{
3761    tcg_gen_mul_i32(a, a, b);
3762    tcg_gen_add_i32(d, d, a);
3763}
3764
3765static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3766{
3767    tcg_gen_mul_i32(a, a, b);
3768    tcg_gen_sub_i32(d, d, a);
3769}
3770
3771static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3772{
3773    tcg_gen_mul_i64(a, a, b);
3774    tcg_gen_add_i64(d, d, a);
3775}
3776
3777static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3778{
3779    tcg_gen_mul_i64(a, a, b);
3780    tcg_gen_sub_i64(d, d, a);
3781}
3782
3783static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3784{
3785    tcg_gen_mul_vec(vece, a, a, b);
3786    tcg_gen_add_vec(vece, d, d, a);
3787}
3788
3789static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3790{
3791    tcg_gen_mul_vec(vece, a, a, b);
3792    tcg_gen_sub_vec(vece, d, d, a);
3793}
3794
3795/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3796 * these tables are shared with AArch64 which does support them.
3797 */
3798void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3799                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3800{
3801    static const TCGOpcode vecop_list[] = {
3802        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3803    };
3804    static const GVecGen3 ops[4] = {
3805        { .fni4 = gen_mla8_i32,
3806          .fniv = gen_mla_vec,
3807          .load_dest = true,
3808          .opt_opc = vecop_list,
3809          .vece = MO_8 },
3810        { .fni4 = gen_mla16_i32,
3811          .fniv = gen_mla_vec,
3812          .load_dest = true,
3813          .opt_opc = vecop_list,
3814          .vece = MO_16 },
3815        { .fni4 = gen_mla32_i32,
3816          .fniv = gen_mla_vec,
3817          .load_dest = true,
3818          .opt_opc = vecop_list,
3819          .vece = MO_32 },
3820        { .fni8 = gen_mla64_i64,
3821          .fniv = gen_mla_vec,
3822          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3823          .load_dest = true,
3824          .opt_opc = vecop_list,
3825          .vece = MO_64 },
3826    };
3827    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3828}
3829
3830void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3831                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3832{
3833    static const TCGOpcode vecop_list[] = {
3834        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3835    };
3836    static const GVecGen3 ops[4] = {
3837        { .fni4 = gen_mls8_i32,
3838          .fniv = gen_mls_vec,
3839          .load_dest = true,
3840          .opt_opc = vecop_list,
3841          .vece = MO_8 },
3842        { .fni4 = gen_mls16_i32,
3843          .fniv = gen_mls_vec,
3844          .load_dest = true,
3845          .opt_opc = vecop_list,
3846          .vece = MO_16 },
3847        { .fni4 = gen_mls32_i32,
3848          .fniv = gen_mls_vec,
3849          .load_dest = true,
3850          .opt_opc = vecop_list,
3851          .vece = MO_32 },
3852        { .fni8 = gen_mls64_i64,
3853          .fniv = gen_mls_vec,
3854          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3855          .load_dest = true,
3856          .opt_opc = vecop_list,
3857          .vece = MO_64 },
3858    };
3859    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3860}
3861
3862/* CMTST : test is "if (X & Y != 0)". */
3863static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3864{
3865    tcg_gen_and_i32(d, a, b);
3866    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3867    tcg_gen_neg_i32(d, d);
3868}
3869
3870void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3871{
3872    tcg_gen_and_i64(d, a, b);
3873    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3874    tcg_gen_neg_i64(d, d);
3875}
3876
3877static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3878{
3879    tcg_gen_and_vec(vece, d, a, b);
3880    tcg_gen_dupi_vec(vece, a, 0);
3881    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3882}
3883
3884void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3885                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3886{
3887    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3888    static const GVecGen3 ops[4] = {
3889        { .fni4 = gen_helper_neon_tst_u8,
3890          .fniv = gen_cmtst_vec,
3891          .opt_opc = vecop_list,
3892          .vece = MO_8 },
3893        { .fni4 = gen_helper_neon_tst_u16,
3894          .fniv = gen_cmtst_vec,
3895          .opt_opc = vecop_list,
3896          .vece = MO_16 },
3897        { .fni4 = gen_cmtst_i32,
3898          .fniv = gen_cmtst_vec,
3899          .opt_opc = vecop_list,
3900          .vece = MO_32 },
3901        { .fni8 = gen_cmtst_i64,
3902          .fniv = gen_cmtst_vec,
3903          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3904          .opt_opc = vecop_list,
3905          .vece = MO_64 },
3906    };
3907    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3908}
3909
3910void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3911{
3912    TCGv_i32 lval = tcg_temp_new_i32();
3913    TCGv_i32 rval = tcg_temp_new_i32();
3914    TCGv_i32 lsh = tcg_temp_new_i32();
3915    TCGv_i32 rsh = tcg_temp_new_i32();
3916    TCGv_i32 zero = tcg_constant_i32(0);
3917    TCGv_i32 max = tcg_constant_i32(32);
3918
3919    /*
3920     * Rely on the TCG guarantee that out of range shifts produce
3921     * unspecified results, not undefined behaviour (i.e. no trap).
3922     * Discard out-of-range results after the fact.
3923     */
3924    tcg_gen_ext8s_i32(lsh, shift);
3925    tcg_gen_neg_i32(rsh, lsh);
3926    tcg_gen_shl_i32(lval, src, lsh);
3927    tcg_gen_shr_i32(rval, src, rsh);
3928    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3929    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3930}
3931
3932void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3933{
3934    TCGv_i64 lval = tcg_temp_new_i64();
3935    TCGv_i64 rval = tcg_temp_new_i64();
3936    TCGv_i64 lsh = tcg_temp_new_i64();
3937    TCGv_i64 rsh = tcg_temp_new_i64();
3938    TCGv_i64 zero = tcg_constant_i64(0);
3939    TCGv_i64 max = tcg_constant_i64(64);
3940
3941    /*
3942     * Rely on the TCG guarantee that out of range shifts produce
3943     * unspecified results, not undefined behaviour (i.e. no trap).
3944     * Discard out-of-range results after the fact.
3945     */
3946    tcg_gen_ext8s_i64(lsh, shift);
3947    tcg_gen_neg_i64(rsh, lsh);
3948    tcg_gen_shl_i64(lval, src, lsh);
3949    tcg_gen_shr_i64(rval, src, rsh);
3950    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3951    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3952}
3953
3954static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3955                         TCGv_vec src, TCGv_vec shift)
3956{
3957    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3958    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3959    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3960    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3961    TCGv_vec msk, max;
3962
3963    tcg_gen_neg_vec(vece, rsh, shift);
3964    if (vece == MO_8) {
3965        tcg_gen_mov_vec(lsh, shift);
3966    } else {
3967        msk = tcg_temp_new_vec_matching(dst);
3968        tcg_gen_dupi_vec(vece, msk, 0xff);
3969        tcg_gen_and_vec(vece, lsh, shift, msk);
3970        tcg_gen_and_vec(vece, rsh, rsh, msk);
3971    }
3972
3973    /*
3974     * Rely on the TCG guarantee that out of range shifts produce
3975     * unspecified results, not undefined behaviour (i.e. no trap).
3976     * Discard out-of-range results after the fact.
3977     */
3978    tcg_gen_shlv_vec(vece, lval, src, lsh);
3979    tcg_gen_shrv_vec(vece, rval, src, rsh);
3980
3981    max = tcg_temp_new_vec_matching(dst);
3982    tcg_gen_dupi_vec(vece, max, 8 << vece);
3983
3984    /*
3985     * The choice of LT (signed) and GEU (unsigned) are biased toward
3986     * the instructions of the x86_64 host.  For MO_8, the whole byte
3987     * is significant so we must use an unsigned compare; otherwise we
3988     * have already masked to a byte and so a signed compare works.
3989     * Other tcg hosts have a full set of comparisons and do not care.
3990     */
3991    if (vece == MO_8) {
3992        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3993        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3994        tcg_gen_andc_vec(vece, lval, lval, lsh);
3995        tcg_gen_andc_vec(vece, rval, rval, rsh);
3996    } else {
3997        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3998        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3999        tcg_gen_and_vec(vece, lval, lval, lsh);
4000        tcg_gen_and_vec(vece, rval, rval, rsh);
4001    }
4002    tcg_gen_or_vec(vece, dst, lval, rval);
4003}
4004
4005void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4006                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4007{
4008    static const TCGOpcode vecop_list[] = {
4009        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4010        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4011    };
4012    static const GVecGen3 ops[4] = {
4013        { .fniv = gen_ushl_vec,
4014          .fno = gen_helper_gvec_ushl_b,
4015          .opt_opc = vecop_list,
4016          .vece = MO_8 },
4017        { .fniv = gen_ushl_vec,
4018          .fno = gen_helper_gvec_ushl_h,
4019          .opt_opc = vecop_list,
4020          .vece = MO_16 },
4021        { .fni4 = gen_ushl_i32,
4022          .fniv = gen_ushl_vec,
4023          .opt_opc = vecop_list,
4024          .vece = MO_32 },
4025        { .fni8 = gen_ushl_i64,
4026          .fniv = gen_ushl_vec,
4027          .opt_opc = vecop_list,
4028          .vece = MO_64 },
4029    };
4030    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4031}
4032
4033void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4034{
4035    TCGv_i32 lval = tcg_temp_new_i32();
4036    TCGv_i32 rval = tcg_temp_new_i32();
4037    TCGv_i32 lsh = tcg_temp_new_i32();
4038    TCGv_i32 rsh = tcg_temp_new_i32();
4039    TCGv_i32 zero = tcg_constant_i32(0);
4040    TCGv_i32 max = tcg_constant_i32(31);
4041
4042    /*
4043     * Rely on the TCG guarantee that out of range shifts produce
4044     * unspecified results, not undefined behaviour (i.e. no trap).
4045     * Discard out-of-range results after the fact.
4046     */
4047    tcg_gen_ext8s_i32(lsh, shift);
4048    tcg_gen_neg_i32(rsh, lsh);
4049    tcg_gen_shl_i32(lval, src, lsh);
4050    tcg_gen_umin_i32(rsh, rsh, max);
4051    tcg_gen_sar_i32(rval, src, rsh);
4052    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4053    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4054}
4055
4056void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4057{
4058    TCGv_i64 lval = tcg_temp_new_i64();
4059    TCGv_i64 rval = tcg_temp_new_i64();
4060    TCGv_i64 lsh = tcg_temp_new_i64();
4061    TCGv_i64 rsh = tcg_temp_new_i64();
4062    TCGv_i64 zero = tcg_constant_i64(0);
4063    TCGv_i64 max = tcg_constant_i64(63);
4064
4065    /*
4066     * Rely on the TCG guarantee that out of range shifts produce
4067     * unspecified results, not undefined behaviour (i.e. no trap).
4068     * Discard out-of-range results after the fact.
4069     */
4070    tcg_gen_ext8s_i64(lsh, shift);
4071    tcg_gen_neg_i64(rsh, lsh);
4072    tcg_gen_shl_i64(lval, src, lsh);
4073    tcg_gen_umin_i64(rsh, rsh, max);
4074    tcg_gen_sar_i64(rval, src, rsh);
4075    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4076    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4077}
4078
4079static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4080                         TCGv_vec src, TCGv_vec shift)
4081{
4082    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4083    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4084    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4085    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4086    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4087
4088    /*
4089     * Rely on the TCG guarantee that out of range shifts produce
4090     * unspecified results, not undefined behaviour (i.e. no trap).
4091     * Discard out-of-range results after the fact.
4092     */
4093    tcg_gen_neg_vec(vece, rsh, shift);
4094    if (vece == MO_8) {
4095        tcg_gen_mov_vec(lsh, shift);
4096    } else {
4097        tcg_gen_dupi_vec(vece, tmp, 0xff);
4098        tcg_gen_and_vec(vece, lsh, shift, tmp);
4099        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4100    }
4101
4102    /* Bound rsh so out of bound right shift gets -1.  */
4103    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4104    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4105    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4106
4107    tcg_gen_shlv_vec(vece, lval, src, lsh);
4108    tcg_gen_sarv_vec(vece, rval, src, rsh);
4109
4110    /* Select in-bound left shift.  */
4111    tcg_gen_andc_vec(vece, lval, lval, tmp);
4112
4113    /* Select between left and right shift.  */
4114    if (vece == MO_8) {
4115        tcg_gen_dupi_vec(vece, tmp, 0);
4116        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4117    } else {
4118        tcg_gen_dupi_vec(vece, tmp, 0x80);
4119        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4120    }
4121}
4122
4123void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4124                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4125{
4126    static const TCGOpcode vecop_list[] = {
4127        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4128        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4129    };
4130    static const GVecGen3 ops[4] = {
4131        { .fniv = gen_sshl_vec,
4132          .fno = gen_helper_gvec_sshl_b,
4133          .opt_opc = vecop_list,
4134          .vece = MO_8 },
4135        { .fniv = gen_sshl_vec,
4136          .fno = gen_helper_gvec_sshl_h,
4137          .opt_opc = vecop_list,
4138          .vece = MO_16 },
4139        { .fni4 = gen_sshl_i32,
4140          .fniv = gen_sshl_vec,
4141          .opt_opc = vecop_list,
4142          .vece = MO_32 },
4143        { .fni8 = gen_sshl_i64,
4144          .fniv = gen_sshl_vec,
4145          .opt_opc = vecop_list,
4146          .vece = MO_64 },
4147    };
4148    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4149}
4150
4151static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4152                          TCGv_vec a, TCGv_vec b)
4153{
4154    TCGv_vec x = tcg_temp_new_vec_matching(t);
4155    tcg_gen_add_vec(vece, x, a, b);
4156    tcg_gen_usadd_vec(vece, t, a, b);
4157    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4158    tcg_gen_or_vec(vece, sat, sat, x);
4159}
4160
4161void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4162                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4163{
4164    static const TCGOpcode vecop_list[] = {
4165        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4166    };
4167    static const GVecGen4 ops[4] = {
4168        { .fniv = gen_uqadd_vec,
4169          .fno = gen_helper_gvec_uqadd_b,
4170          .write_aofs = true,
4171          .opt_opc = vecop_list,
4172          .vece = MO_8 },
4173        { .fniv = gen_uqadd_vec,
4174          .fno = gen_helper_gvec_uqadd_h,
4175          .write_aofs = true,
4176          .opt_opc = vecop_list,
4177          .vece = MO_16 },
4178        { .fniv = gen_uqadd_vec,
4179          .fno = gen_helper_gvec_uqadd_s,
4180          .write_aofs = true,
4181          .opt_opc = vecop_list,
4182          .vece = MO_32 },
4183        { .fniv = gen_uqadd_vec,
4184          .fno = gen_helper_gvec_uqadd_d,
4185          .write_aofs = true,
4186          .opt_opc = vecop_list,
4187          .vece = MO_64 },
4188    };
4189    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4190                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4191}
4192
4193static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4194                          TCGv_vec a, TCGv_vec b)
4195{
4196    TCGv_vec x = tcg_temp_new_vec_matching(t);
4197    tcg_gen_add_vec(vece, x, a, b);
4198    tcg_gen_ssadd_vec(vece, t, a, b);
4199    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4200    tcg_gen_or_vec(vece, sat, sat, x);
4201}
4202
4203void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4204                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4205{
4206    static const TCGOpcode vecop_list[] = {
4207        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4208    };
4209    static const GVecGen4 ops[4] = {
4210        { .fniv = gen_sqadd_vec,
4211          .fno = gen_helper_gvec_sqadd_b,
4212          .opt_opc = vecop_list,
4213          .write_aofs = true,
4214          .vece = MO_8 },
4215        { .fniv = gen_sqadd_vec,
4216          .fno = gen_helper_gvec_sqadd_h,
4217          .opt_opc = vecop_list,
4218          .write_aofs = true,
4219          .vece = MO_16 },
4220        { .fniv = gen_sqadd_vec,
4221          .fno = gen_helper_gvec_sqadd_s,
4222          .opt_opc = vecop_list,
4223          .write_aofs = true,
4224          .vece = MO_32 },
4225        { .fniv = gen_sqadd_vec,
4226          .fno = gen_helper_gvec_sqadd_d,
4227          .opt_opc = vecop_list,
4228          .write_aofs = true,
4229          .vece = MO_64 },
4230    };
4231    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4232                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4233}
4234
4235static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4236                          TCGv_vec a, TCGv_vec b)
4237{
4238    TCGv_vec x = tcg_temp_new_vec_matching(t);
4239    tcg_gen_sub_vec(vece, x, a, b);
4240    tcg_gen_ussub_vec(vece, t, a, b);
4241    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4242    tcg_gen_or_vec(vece, sat, sat, x);
4243}
4244
4245void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4246                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4247{
4248    static const TCGOpcode vecop_list[] = {
4249        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4250    };
4251    static const GVecGen4 ops[4] = {
4252        { .fniv = gen_uqsub_vec,
4253          .fno = gen_helper_gvec_uqsub_b,
4254          .opt_opc = vecop_list,
4255          .write_aofs = true,
4256          .vece = MO_8 },
4257        { .fniv = gen_uqsub_vec,
4258          .fno = gen_helper_gvec_uqsub_h,
4259          .opt_opc = vecop_list,
4260          .write_aofs = true,
4261          .vece = MO_16 },
4262        { .fniv = gen_uqsub_vec,
4263          .fno = gen_helper_gvec_uqsub_s,
4264          .opt_opc = vecop_list,
4265          .write_aofs = true,
4266          .vece = MO_32 },
4267        { .fniv = gen_uqsub_vec,
4268          .fno = gen_helper_gvec_uqsub_d,
4269          .opt_opc = vecop_list,
4270          .write_aofs = true,
4271          .vece = MO_64 },
4272    };
4273    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4274                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4275}
4276
4277static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4278                          TCGv_vec a, TCGv_vec b)
4279{
4280    TCGv_vec x = tcg_temp_new_vec_matching(t);
4281    tcg_gen_sub_vec(vece, x, a, b);
4282    tcg_gen_sssub_vec(vece, t, a, b);
4283    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4284    tcg_gen_or_vec(vece, sat, sat, x);
4285}
4286
4287void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4288                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4289{
4290    static const TCGOpcode vecop_list[] = {
4291        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4292    };
4293    static const GVecGen4 ops[4] = {
4294        { .fniv = gen_sqsub_vec,
4295          .fno = gen_helper_gvec_sqsub_b,
4296          .opt_opc = vecop_list,
4297          .write_aofs = true,
4298          .vece = MO_8 },
4299        { .fniv = gen_sqsub_vec,
4300          .fno = gen_helper_gvec_sqsub_h,
4301          .opt_opc = vecop_list,
4302          .write_aofs = true,
4303          .vece = MO_16 },
4304        { .fniv = gen_sqsub_vec,
4305          .fno = gen_helper_gvec_sqsub_s,
4306          .opt_opc = vecop_list,
4307          .write_aofs = true,
4308          .vece = MO_32 },
4309        { .fniv = gen_sqsub_vec,
4310          .fno = gen_helper_gvec_sqsub_d,
4311          .opt_opc = vecop_list,
4312          .write_aofs = true,
4313          .vece = MO_64 },
4314    };
4315    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4316                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4317}
4318
4319static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4320{
4321    TCGv_i32 t = tcg_temp_new_i32();
4322
4323    tcg_gen_sub_i32(t, a, b);
4324    tcg_gen_sub_i32(d, b, a);
4325    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4326}
4327
4328static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4329{
4330    TCGv_i64 t = tcg_temp_new_i64();
4331
4332    tcg_gen_sub_i64(t, a, b);
4333    tcg_gen_sub_i64(d, b, a);
4334    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4335}
4336
4337static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4338{
4339    TCGv_vec t = tcg_temp_new_vec_matching(d);
4340
4341    tcg_gen_smin_vec(vece, t, a, b);
4342    tcg_gen_smax_vec(vece, d, a, b);
4343    tcg_gen_sub_vec(vece, d, d, t);
4344}
4345
4346void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4347                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4348{
4349    static const TCGOpcode vecop_list[] = {
4350        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4351    };
4352    static const GVecGen3 ops[4] = {
4353        { .fniv = gen_sabd_vec,
4354          .fno = gen_helper_gvec_sabd_b,
4355          .opt_opc = vecop_list,
4356          .vece = MO_8 },
4357        { .fniv = gen_sabd_vec,
4358          .fno = gen_helper_gvec_sabd_h,
4359          .opt_opc = vecop_list,
4360          .vece = MO_16 },
4361        { .fni4 = gen_sabd_i32,
4362          .fniv = gen_sabd_vec,
4363          .fno = gen_helper_gvec_sabd_s,
4364          .opt_opc = vecop_list,
4365          .vece = MO_32 },
4366        { .fni8 = gen_sabd_i64,
4367          .fniv = gen_sabd_vec,
4368          .fno = gen_helper_gvec_sabd_d,
4369          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4370          .opt_opc = vecop_list,
4371          .vece = MO_64 },
4372    };
4373    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4374}
4375
4376static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4377{
4378    TCGv_i32 t = tcg_temp_new_i32();
4379
4380    tcg_gen_sub_i32(t, a, b);
4381    tcg_gen_sub_i32(d, b, a);
4382    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4383}
4384
4385static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4386{
4387    TCGv_i64 t = tcg_temp_new_i64();
4388
4389    tcg_gen_sub_i64(t, a, b);
4390    tcg_gen_sub_i64(d, b, a);
4391    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4392}
4393
4394static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4395{
4396    TCGv_vec t = tcg_temp_new_vec_matching(d);
4397
4398    tcg_gen_umin_vec(vece, t, a, b);
4399    tcg_gen_umax_vec(vece, d, a, b);
4400    tcg_gen_sub_vec(vece, d, d, t);
4401}
4402
4403void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4404                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4405{
4406    static const TCGOpcode vecop_list[] = {
4407        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4408    };
4409    static const GVecGen3 ops[4] = {
4410        { .fniv = gen_uabd_vec,
4411          .fno = gen_helper_gvec_uabd_b,
4412          .opt_opc = vecop_list,
4413          .vece = MO_8 },
4414        { .fniv = gen_uabd_vec,
4415          .fno = gen_helper_gvec_uabd_h,
4416          .opt_opc = vecop_list,
4417          .vece = MO_16 },
4418        { .fni4 = gen_uabd_i32,
4419          .fniv = gen_uabd_vec,
4420          .fno = gen_helper_gvec_uabd_s,
4421          .opt_opc = vecop_list,
4422          .vece = MO_32 },
4423        { .fni8 = gen_uabd_i64,
4424          .fniv = gen_uabd_vec,
4425          .fno = gen_helper_gvec_uabd_d,
4426          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4427          .opt_opc = vecop_list,
4428          .vece = MO_64 },
4429    };
4430    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4431}
4432
4433static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4434{
4435    TCGv_i32 t = tcg_temp_new_i32();
4436    gen_sabd_i32(t, a, b);
4437    tcg_gen_add_i32(d, d, t);
4438}
4439
4440static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4441{
4442    TCGv_i64 t = tcg_temp_new_i64();
4443    gen_sabd_i64(t, a, b);
4444    tcg_gen_add_i64(d, d, t);
4445}
4446
4447static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4448{
4449    TCGv_vec t = tcg_temp_new_vec_matching(d);
4450    gen_sabd_vec(vece, t, a, b);
4451    tcg_gen_add_vec(vece, d, d, t);
4452}
4453
4454void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4455                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4456{
4457    static const TCGOpcode vecop_list[] = {
4458        INDEX_op_sub_vec, INDEX_op_add_vec,
4459        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4460    };
4461    static const GVecGen3 ops[4] = {
4462        { .fniv = gen_saba_vec,
4463          .fno = gen_helper_gvec_saba_b,
4464          .opt_opc = vecop_list,
4465          .load_dest = true,
4466          .vece = MO_8 },
4467        { .fniv = gen_saba_vec,
4468          .fno = gen_helper_gvec_saba_h,
4469          .opt_opc = vecop_list,
4470          .load_dest = true,
4471          .vece = MO_16 },
4472        { .fni4 = gen_saba_i32,
4473          .fniv = gen_saba_vec,
4474          .fno = gen_helper_gvec_saba_s,
4475          .opt_opc = vecop_list,
4476          .load_dest = true,
4477          .vece = MO_32 },
4478        { .fni8 = gen_saba_i64,
4479          .fniv = gen_saba_vec,
4480          .fno = gen_helper_gvec_saba_d,
4481          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4482          .opt_opc = vecop_list,
4483          .load_dest = true,
4484          .vece = MO_64 },
4485    };
4486    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4487}
4488
4489static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4490{
4491    TCGv_i32 t = tcg_temp_new_i32();
4492    gen_uabd_i32(t, a, b);
4493    tcg_gen_add_i32(d, d, t);
4494}
4495
4496static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4497{
4498    TCGv_i64 t = tcg_temp_new_i64();
4499    gen_uabd_i64(t, a, b);
4500    tcg_gen_add_i64(d, d, t);
4501}
4502
4503static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4504{
4505    TCGv_vec t = tcg_temp_new_vec_matching(d);
4506    gen_uabd_vec(vece, t, a, b);
4507    tcg_gen_add_vec(vece, d, d, t);
4508}
4509
4510void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4511                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4512{
4513    static const TCGOpcode vecop_list[] = {
4514        INDEX_op_sub_vec, INDEX_op_add_vec,
4515        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4516    };
4517    static const GVecGen3 ops[4] = {
4518        { .fniv = gen_uaba_vec,
4519          .fno = gen_helper_gvec_uaba_b,
4520          .opt_opc = vecop_list,
4521          .load_dest = true,
4522          .vece = MO_8 },
4523        { .fniv = gen_uaba_vec,
4524          .fno = gen_helper_gvec_uaba_h,
4525          .opt_opc = vecop_list,
4526          .load_dest = true,
4527          .vece = MO_16 },
4528        { .fni4 = gen_uaba_i32,
4529          .fniv = gen_uaba_vec,
4530          .fno = gen_helper_gvec_uaba_s,
4531          .opt_opc = vecop_list,
4532          .load_dest = true,
4533          .vece = MO_32 },
4534        { .fni8 = gen_uaba_i64,
4535          .fniv = gen_uaba_vec,
4536          .fno = gen_helper_gvec_uaba_d,
4537          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4538          .opt_opc = vecop_list,
4539          .load_dest = true,
4540          .vece = MO_64 },
4541    };
4542    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4543}
4544
4545static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4546                           int opc1, int crn, int crm, int opc2,
4547                           bool isread, int rt, int rt2)
4548{
4549    uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4550    const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4551    TCGv_ptr tcg_ri = NULL;
4552    bool need_exit_tb = false;
4553    uint32_t syndrome;
4554
4555    /*
4556     * Note that since we are an implementation which takes an
4557     * exception on a trapped conditional instruction only if the
4558     * instruction passes its condition code check, we can take
4559     * advantage of the clause in the ARM ARM that allows us to set
4560     * the COND field in the instruction to 0xE in all cases.
4561     * We could fish the actual condition out of the insn (ARM)
4562     * or the condexec bits (Thumb) but it isn't necessary.
4563     */
4564    switch (cpnum) {
4565    case 14:
4566        if (is64) {
4567            syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4568                                         isread, false);
4569        } else {
4570            syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4571                                        rt, isread, false);
4572        }
4573        break;
4574    case 15:
4575        if (is64) {
4576            syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4577                                         isread, false);
4578        } else {
4579            syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4580                                        rt, isread, false);
4581        }
4582        break;
4583    default:
4584        /*
4585         * ARMv8 defines that only coprocessors 14 and 15 exist,
4586         * so this can only happen if this is an ARMv7 or earlier CPU,
4587         * in which case the syndrome information won't actually be
4588         * guest visible.
4589         */
4590        assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4591        syndrome = syn_uncategorized();
4592        break;
4593    }
4594
4595    if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4596        /*
4597         * At EL1, check for a HSTR_EL2 trap, which must take precedence
4598         * over the UNDEF for "no such register" or the UNDEF for "access
4599         * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4600         * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4601         * access_check_cp_reg(), after the checks for whether the access
4602         * configurably trapped to EL1.
4603         */
4604        uint32_t maskbit = is64 ? crm : crn;
4605
4606        if (maskbit != 4 && maskbit != 14) {
4607            /* T4 and T14 are RES0 so never cause traps */
4608            TCGv_i32 t;
4609            DisasLabel over = gen_disas_label(s);
4610
4611            t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4612            tcg_gen_andi_i32(t, t, 1u << maskbit);
4613            tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4614
4615            gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4616            /*
4617             * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4618             * but since we're conditionally branching over it, we want
4619             * to assume continue-to-next-instruction.
4620             */
4621            s->base.is_jmp = DISAS_NEXT;
4622            set_disas_label(s, over);
4623        }
4624    }
4625
4626    if (!ri) {
4627        /*
4628         * Unknown register; this might be a guest error or a QEMU
4629         * unimplemented feature.
4630         */
4631        if (is64) {
4632            qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4633                          "64 bit system register cp:%d opc1: %d crm:%d "
4634                          "(%s)\n",
4635                          isread ? "read" : "write", cpnum, opc1, crm,
4636                          s->ns ? "non-secure" : "secure");
4637        } else {
4638            qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4639                          "system register cp:%d opc1:%d crn:%d crm:%d "
4640                          "opc2:%d (%s)\n",
4641                          isread ? "read" : "write", cpnum, opc1, crn,
4642                          crm, opc2, s->ns ? "non-secure" : "secure");
4643        }
4644        unallocated_encoding(s);
4645        return;
4646    }
4647
4648    /* Check access permissions */
4649    if (!cp_access_ok(s->current_el, ri, isread)) {
4650        unallocated_encoding(s);
4651        return;
4652    }
4653
4654    if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4655        (ri->fgt && s->fgt_active) ||
4656        (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4657        /*
4658         * Emit code to perform further access permissions checks at
4659         * runtime; this may result in an exception.
4660         * Note that on XScale all cp0..c13 registers do an access check
4661         * call in order to handle c15_cpar.
4662         */
4663        gen_set_condexec(s);
4664        gen_update_pc(s, 0);
4665        tcg_ri = tcg_temp_new_ptr();
4666        gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4667                                       tcg_constant_i32(key),
4668                                       tcg_constant_i32(syndrome),
4669                                       tcg_constant_i32(isread));
4670    } else if (ri->type & ARM_CP_RAISES_EXC) {
4671        /*
4672         * The readfn or writefn might raise an exception;
4673         * synchronize the CPU state in case it does.
4674         */
4675        gen_set_condexec(s);
4676        gen_update_pc(s, 0);
4677    }
4678
4679    /* Handle special cases first */
4680    switch (ri->type & ARM_CP_SPECIAL_MASK) {
4681    case 0:
4682        break;
4683    case ARM_CP_NOP:
4684        return;
4685    case ARM_CP_WFI:
4686        if (isread) {
4687            unallocated_encoding(s);
4688        } else {
4689            gen_update_pc(s, curr_insn_len(s));
4690            s->base.is_jmp = DISAS_WFI;
4691        }
4692        return;
4693    default:
4694        g_assert_not_reached();
4695    }
4696
4697    if (ri->type & ARM_CP_IO) {
4698        /* I/O operations must end the TB here (whether read or write) */
4699        need_exit_tb = translator_io_start(&s->base);
4700    }
4701
4702    if (isread) {
4703        /* Read */
4704        if (is64) {
4705            TCGv_i64 tmp64;
4706            TCGv_i32 tmp;
4707            if (ri->type & ARM_CP_CONST) {
4708                tmp64 = tcg_constant_i64(ri->resetvalue);
4709            } else if (ri->readfn) {
4710                if (!tcg_ri) {
4711                    tcg_ri = gen_lookup_cp_reg(key);
4712                }
4713                tmp64 = tcg_temp_new_i64();
4714                gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4715            } else {
4716                tmp64 = tcg_temp_new_i64();
4717                tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4718            }
4719            tmp = tcg_temp_new_i32();
4720            tcg_gen_extrl_i64_i32(tmp, tmp64);
4721            store_reg(s, rt, tmp);
4722            tmp = tcg_temp_new_i32();
4723            tcg_gen_extrh_i64_i32(tmp, tmp64);
4724            store_reg(s, rt2, tmp);
4725        } else {
4726            TCGv_i32 tmp;
4727            if (ri->type & ARM_CP_CONST) {
4728                tmp = tcg_constant_i32(ri->resetvalue);
4729            } else if (ri->readfn) {
4730                if (!tcg_ri) {
4731                    tcg_ri = gen_lookup_cp_reg(key);
4732                }
4733                tmp = tcg_temp_new_i32();
4734                gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4735            } else {
4736                tmp = load_cpu_offset(ri->fieldoffset);
4737            }
4738            if (rt == 15) {
4739                /* Destination register of r15 for 32 bit loads sets
4740                 * the condition codes from the high 4 bits of the value
4741                 */
4742                gen_set_nzcv(tmp);
4743            } else {
4744                store_reg(s, rt, tmp);
4745            }
4746        }
4747    } else {
4748        /* Write */
4749        if (ri->type & ARM_CP_CONST) {
4750            /* If not forbidden by access permissions, treat as WI */
4751            return;
4752        }
4753
4754        if (is64) {
4755            TCGv_i32 tmplo, tmphi;
4756            TCGv_i64 tmp64 = tcg_temp_new_i64();
4757            tmplo = load_reg(s, rt);
4758            tmphi = load_reg(s, rt2);
4759            tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4760            if (ri->writefn) {
4761                if (!tcg_ri) {
4762                    tcg_ri = gen_lookup_cp_reg(key);
4763                }
4764                gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4765            } else {
4766                tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4767            }
4768        } else {
4769            TCGv_i32 tmp = load_reg(s, rt);
4770            if (ri->writefn) {
4771                if (!tcg_ri) {
4772                    tcg_ri = gen_lookup_cp_reg(key);
4773                }
4774                gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4775            } else {
4776                store_cpu_offset(tmp, ri->fieldoffset, 4);
4777            }
4778        }
4779    }
4780
4781    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4782        /*
4783         * A write to any coprocessor register that ends a TB
4784         * must rebuild the hflags for the next TB.
4785         */
4786        gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4787        /*
4788         * We default to ending the TB on a coprocessor register write,
4789         * but allow this to be suppressed by the register definition
4790         * (usually only necessary to work around guest bugs).
4791         */
4792        need_exit_tb = true;
4793    }
4794    if (need_exit_tb) {
4795        gen_lookup_tb(s);
4796    }
4797}
4798
4799/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4800static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4801{
4802    int cpnum = (insn >> 8) & 0xf;
4803
4804    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4805        unallocated_encoding(s);
4806    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4807        if (disas_iwmmxt_insn(s, insn)) {
4808            unallocated_encoding(s);
4809        }
4810    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4811        if (disas_dsp_insn(s, insn)) {
4812            unallocated_encoding(s);
4813        }
4814    }
4815}
4816
4817/* Store a 64-bit value to a register pair.  Clobbers val.  */
4818static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4819{
4820    TCGv_i32 tmp;
4821    tmp = tcg_temp_new_i32();
4822    tcg_gen_extrl_i64_i32(tmp, val);
4823    store_reg(s, rlow, tmp);
4824    tmp = tcg_temp_new_i32();
4825    tcg_gen_extrh_i64_i32(tmp, val);
4826    store_reg(s, rhigh, tmp);
4827}
4828
4829/* load and add a 64-bit value from a register pair.  */
4830static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4831{
4832    TCGv_i64 tmp;
4833    TCGv_i32 tmpl;
4834    TCGv_i32 tmph;
4835
4836    /* Load 64-bit value rd:rn.  */
4837    tmpl = load_reg(s, rlow);
4838    tmph = load_reg(s, rhigh);
4839    tmp = tcg_temp_new_i64();
4840    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4841    tcg_gen_add_i64(val, val, tmp);
4842}
4843
4844/* Set N and Z flags from hi|lo.  */
4845static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4846{
4847    tcg_gen_mov_i32(cpu_NF, hi);
4848    tcg_gen_or_i32(cpu_ZF, lo, hi);
4849}
4850
4851/* Load/Store exclusive instructions are implemented by remembering
4852   the value/address loaded, and seeing if these are the same
4853   when the store is performed.  This should be sufficient to implement
4854   the architecturally mandated semantics, and avoids having to monitor
4855   regular stores.  The compare vs the remembered value is done during
4856   the cmpxchg operation, but we must compare the addresses manually.  */
4857static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4858                               TCGv_i32 addr, int size)
4859{
4860    TCGv_i32 tmp = tcg_temp_new_i32();
4861    MemOp opc = size | MO_ALIGN | s->be_data;
4862
4863    s->is_ldex = true;
4864
4865    if (size == 3) {
4866        TCGv_i32 tmp2 = tcg_temp_new_i32();
4867        TCGv_i64 t64 = tcg_temp_new_i64();
4868
4869        /*
4870         * For AArch32, architecturally the 32-bit word at the lowest
4871         * address is always Rt and the one at addr+4 is Rt2, even if
4872         * the CPU is big-endian. That means we don't want to do a
4873         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4874         * architecturally 64-bit access, but instead do a 64-bit access
4875         * using MO_BE if appropriate and then split the two halves.
4876         */
4877        TCGv taddr = gen_aa32_addr(s, addr, opc);
4878
4879        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4880        tcg_gen_mov_i64(cpu_exclusive_val, t64);
4881        if (s->be_data == MO_BE) {
4882            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4883        } else {
4884            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4885        }
4886        store_reg(s, rt2, tmp2);
4887    } else {
4888        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4889        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4890    }
4891
4892    store_reg(s, rt, tmp);
4893    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4894}
4895
4896static void gen_clrex(DisasContext *s)
4897{
4898    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4899}
4900
4901static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4902                                TCGv_i32 addr, int size)
4903{
4904    TCGv_i32 t0, t1, t2;
4905    TCGv_i64 extaddr;
4906    TCGv taddr;
4907    TCGLabel *done_label;
4908    TCGLabel *fail_label;
4909    MemOp opc = size | MO_ALIGN | s->be_data;
4910
4911    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4912         [addr] = {Rt};
4913         {Rd} = 0;
4914       } else {
4915         {Rd} = 1;
4916       } */
4917    fail_label = gen_new_label();
4918    done_label = gen_new_label();
4919    extaddr = tcg_temp_new_i64();
4920    tcg_gen_extu_i32_i64(extaddr, addr);
4921    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4922
4923    taddr = gen_aa32_addr(s, addr, opc);
4924    t0 = tcg_temp_new_i32();
4925    t1 = load_reg(s, rt);
4926    if (size == 3) {
4927        TCGv_i64 o64 = tcg_temp_new_i64();
4928        TCGv_i64 n64 = tcg_temp_new_i64();
4929
4930        t2 = load_reg(s, rt2);
4931
4932        /*
4933         * For AArch32, architecturally the 32-bit word at the lowest
4934         * address is always Rt and the one at addr+4 is Rt2, even if
4935         * the CPU is big-endian. Since we're going to treat this as a
4936         * single 64-bit BE store, we need to put the two halves in the
4937         * opposite order for BE to LE, so that they end up in the right
4938         * places.  We don't want gen_aa32_st_i64, because that checks
4939         * SCTLR_B as if for an architectural 64-bit access.
4940         */
4941        if (s->be_data == MO_BE) {
4942            tcg_gen_concat_i32_i64(n64, t2, t1);
4943        } else {
4944            tcg_gen_concat_i32_i64(n64, t1, t2);
4945        }
4946
4947        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4948                                   get_mem_index(s), opc);
4949
4950        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4951        tcg_gen_extrl_i64_i32(t0, o64);
4952    } else {
4953        t2 = tcg_temp_new_i32();
4954        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4955        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4956        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4957    }
4958    tcg_gen_mov_i32(cpu_R[rd], t0);
4959    tcg_gen_br(done_label);
4960
4961    gen_set_label(fail_label);
4962    tcg_gen_movi_i32(cpu_R[rd], 1);
4963    gen_set_label(done_label);
4964    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4965}
4966
4967/* gen_srs:
4968 * @env: CPUARMState
4969 * @s: DisasContext
4970 * @mode: mode field from insn (which stack to store to)
4971 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4972 * @writeback: true if writeback bit set
4973 *
4974 * Generate code for the SRS (Store Return State) insn.
4975 */
4976static void gen_srs(DisasContext *s,
4977                    uint32_t mode, uint32_t amode, bool writeback)
4978{
4979    int32_t offset;
4980    TCGv_i32 addr, tmp;
4981    bool undef = false;
4982
4983    /* SRS is:
4984     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4985     *   and specified mode is monitor mode
4986     * - UNDEFINED in Hyp mode
4987     * - UNPREDICTABLE in User or System mode
4988     * - UNPREDICTABLE if the specified mode is:
4989     * -- not implemented
4990     * -- not a valid mode number
4991     * -- a mode that's at a higher exception level
4992     * -- Monitor, if we are Non-secure
4993     * For the UNPREDICTABLE cases we choose to UNDEF.
4994     */
4995    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4996        gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4997        return;
4998    }
4999
5000    if (s->current_el == 0 || s->current_el == 2) {
5001        undef = true;
5002    }
5003
5004    switch (mode) {
5005    case ARM_CPU_MODE_USR:
5006    case ARM_CPU_MODE_FIQ:
5007    case ARM_CPU_MODE_IRQ:
5008    case ARM_CPU_MODE_SVC:
5009    case ARM_CPU_MODE_ABT:
5010    case ARM_CPU_MODE_UND:
5011    case ARM_CPU_MODE_SYS:
5012        break;
5013    case ARM_CPU_MODE_HYP:
5014        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5015            undef = true;
5016        }
5017        break;
5018    case ARM_CPU_MODE_MON:
5019        /* No need to check specifically for "are we non-secure" because
5020         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5021         * so if this isn't EL3 then we must be non-secure.
5022         */
5023        if (s->current_el != 3) {
5024            undef = true;
5025        }
5026        break;
5027    default:
5028        undef = true;
5029    }
5030
5031    if (undef) {
5032        unallocated_encoding(s);
5033        return;
5034    }
5035
5036    addr = tcg_temp_new_i32();
5037    /* get_r13_banked() will raise an exception if called from System mode */
5038    gen_set_condexec(s);
5039    gen_update_pc(s, 0);
5040    gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5041    switch (amode) {
5042    case 0: /* DA */
5043        offset = -4;
5044        break;
5045    case 1: /* IA */
5046        offset = 0;
5047        break;
5048    case 2: /* DB */
5049        offset = -8;
5050        break;
5051    case 3: /* IB */
5052        offset = 4;
5053        break;
5054    default:
5055        g_assert_not_reached();
5056    }
5057    tcg_gen_addi_i32(addr, addr, offset);
5058    tmp = load_reg(s, 14);
5059    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5060    tmp = load_cpu_field(spsr);
5061    tcg_gen_addi_i32(addr, addr, 4);
5062    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5063    if (writeback) {
5064        switch (amode) {
5065        case 0:
5066            offset = -8;
5067            break;
5068        case 1:
5069            offset = 4;
5070            break;
5071        case 2:
5072            offset = -4;
5073            break;
5074        case 3:
5075            offset = 0;
5076            break;
5077        default:
5078            g_assert_not_reached();
5079        }
5080        tcg_gen_addi_i32(addr, addr, offset);
5081        gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5082    }
5083    s->base.is_jmp = DISAS_UPDATE_EXIT;
5084}
5085
5086/* Skip this instruction if the ARM condition is false */
5087static void arm_skip_unless(DisasContext *s, uint32_t cond)
5088{
5089    arm_gen_condlabel(s);
5090    arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5091}
5092
5093
5094/*
5095 * Constant expanders used by T16/T32 decode
5096 */
5097
5098/* Return only the rotation part of T32ExpandImm.  */
5099static int t32_expandimm_rot(DisasContext *s, int x)
5100{
5101    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5102}
5103
5104/* Return the unrotated immediate from T32ExpandImm.  */
5105static int t32_expandimm_imm(DisasContext *s, int x)
5106{
5107    int imm = extract32(x, 0, 8);
5108
5109    switch (extract32(x, 8, 4)) {
5110    case 0: /* XY */
5111        /* Nothing to do.  */
5112        break;
5113    case 1: /* 00XY00XY */
5114        imm *= 0x00010001;
5115        break;
5116    case 2: /* XY00XY00 */
5117        imm *= 0x01000100;
5118        break;
5119    case 3: /* XYXYXYXY */
5120        imm *= 0x01010101;
5121        break;
5122    default:
5123        /* Rotated constant.  */
5124        imm |= 0x80;
5125        break;
5126    }
5127    return imm;
5128}
5129
5130static int t32_branch24(DisasContext *s, int x)
5131{
5132    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5133    x ^= !(x < 0) * (3 << 21);
5134    /* Append the final zero.  */
5135    return x << 1;
5136}
5137
5138static int t16_setflags(DisasContext *s)
5139{
5140    return s->condexec_mask == 0;
5141}
5142
5143static int t16_push_list(DisasContext *s, int x)
5144{
5145    return (x & 0xff) | (x & 0x100) << (14 - 8);
5146}
5147
5148static int t16_pop_list(DisasContext *s, int x)
5149{
5150    return (x & 0xff) | (x & 0x100) << (15 - 8);
5151}
5152
5153/*
5154 * Include the generated decoders.
5155 */
5156
5157#include "decode-a32.c.inc"
5158#include "decode-a32-uncond.c.inc"
5159#include "decode-t32.c.inc"
5160#include "decode-t16.c.inc"
5161
5162static bool valid_cp(DisasContext *s, int cp)
5163{
5164    /*
5165     * Return true if this coprocessor field indicates something
5166     * that's really a possible coprocessor.
5167     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5168     * and of those only cp14 and cp15 were used for registers.
5169     * cp10 and cp11 were used for VFP and Neon, whose decode is
5170     * dealt with elsewhere. With the advent of fp16, cp9 is also
5171     * now part of VFP.
5172     * For v8A and later, the encoding has been tightened so that
5173     * only cp14 and cp15 are valid, and other values aren't considered
5174     * to be in the coprocessor-instruction space at all. v8M still
5175     * permits coprocessors 0..7.
5176     * For XScale, we must not decode the XScale cp0, cp1 space as
5177     * a standard coprocessor insn, because we want to fall through to
5178     * the legacy disas_xscale_insn() decoder after decodetree is done.
5179     */
5180    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5181        return false;
5182    }
5183
5184    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5185        !arm_dc_feature(s, ARM_FEATURE_M)) {
5186        return cp >= 14;
5187    }
5188    return cp < 8 || cp >= 14;
5189}
5190
5191static bool trans_MCR(DisasContext *s, arg_MCR *a)
5192{
5193    if (!valid_cp(s, a->cp)) {
5194        return false;
5195    }
5196    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5197                   false, a->rt, 0);
5198    return true;
5199}
5200
5201static bool trans_MRC(DisasContext *s, arg_MRC *a)
5202{
5203    if (!valid_cp(s, a->cp)) {
5204        return false;
5205    }
5206    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5207                   true, a->rt, 0);
5208    return true;
5209}
5210
5211static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5212{
5213    if (!valid_cp(s, a->cp)) {
5214        return false;
5215    }
5216    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5217                   false, a->rt, a->rt2);
5218    return true;
5219}
5220
5221static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5222{
5223    if (!valid_cp(s, a->cp)) {
5224        return false;
5225    }
5226    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5227                   true, a->rt, a->rt2);
5228    return true;
5229}
5230
5231/* Helpers to swap operands for reverse-subtract.  */
5232static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5233{
5234    tcg_gen_sub_i32(dst, b, a);
5235}
5236
5237static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5238{
5239    gen_sub_CC(dst, b, a);
5240}
5241
5242static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5243{
5244    gen_sub_carry(dest, b, a);
5245}
5246
5247static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5248{
5249    gen_sbc_CC(dest, b, a);
5250}
5251
5252/*
5253 * Helpers for the data processing routines.
5254 *
5255 * After the computation store the results back.
5256 * This may be suppressed altogether (STREG_NONE), require a runtime
5257 * check against the stack limits (STREG_SP_CHECK), or generate an
5258 * exception return.  Oh, or store into a register.
5259 *
5260 * Always return true, indicating success for a trans_* function.
5261 */
5262typedef enum {
5263   STREG_NONE,
5264   STREG_NORMAL,
5265   STREG_SP_CHECK,
5266   STREG_EXC_RET,
5267} StoreRegKind;
5268
5269static bool store_reg_kind(DisasContext *s, int rd,
5270                            TCGv_i32 val, StoreRegKind kind)
5271{
5272    switch (kind) {
5273    case STREG_NONE:
5274        return true;
5275    case STREG_NORMAL:
5276        /* See ALUWritePC: Interworking only from a32 mode. */
5277        if (s->thumb) {
5278            store_reg(s, rd, val);
5279        } else {
5280            store_reg_bx(s, rd, val);
5281        }
5282        return true;
5283    case STREG_SP_CHECK:
5284        store_sp_checked(s, val);
5285        return true;
5286    case STREG_EXC_RET:
5287        gen_exception_return(s, val);
5288        return true;
5289    }
5290    g_assert_not_reached();
5291}
5292
5293/*
5294 * Data Processing (register)
5295 *
5296 * Operate, with set flags, one register source,
5297 * one immediate shifted register source, and a destination.
5298 */
5299static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5300                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5301                         int logic_cc, StoreRegKind kind)
5302{
5303    TCGv_i32 tmp1, tmp2;
5304
5305    tmp2 = load_reg(s, a->rm);
5306    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5307    tmp1 = load_reg(s, a->rn);
5308
5309    gen(tmp1, tmp1, tmp2);
5310
5311    if (logic_cc) {
5312        gen_logic_CC(tmp1);
5313    }
5314    return store_reg_kind(s, a->rd, tmp1, kind);
5315}
5316
5317static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5318                         void (*gen)(TCGv_i32, TCGv_i32),
5319                         int logic_cc, StoreRegKind kind)
5320{
5321    TCGv_i32 tmp;
5322
5323    tmp = load_reg(s, a->rm);
5324    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5325
5326    gen(tmp, tmp);
5327    if (logic_cc) {
5328        gen_logic_CC(tmp);
5329    }
5330    return store_reg_kind(s, a->rd, tmp, kind);
5331}
5332
5333/*
5334 * Data-processing (register-shifted register)
5335 *
5336 * Operate, with set flags, one register source,
5337 * one register shifted register source, and a destination.
5338 */
5339static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5340                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5341                         int logic_cc, StoreRegKind kind)
5342{
5343    TCGv_i32 tmp1, tmp2;
5344
5345    tmp1 = load_reg(s, a->rs);
5346    tmp2 = load_reg(s, a->rm);
5347    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5348    tmp1 = load_reg(s, a->rn);
5349
5350    gen(tmp1, tmp1, tmp2);
5351
5352    if (logic_cc) {
5353        gen_logic_CC(tmp1);
5354    }
5355    return store_reg_kind(s, a->rd, tmp1, kind);
5356}
5357
5358static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5359                         void (*gen)(TCGv_i32, TCGv_i32),
5360                         int logic_cc, StoreRegKind kind)
5361{
5362    TCGv_i32 tmp1, tmp2;
5363
5364    tmp1 = load_reg(s, a->rs);
5365    tmp2 = load_reg(s, a->rm);
5366    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5367
5368    gen(tmp2, tmp2);
5369    if (logic_cc) {
5370        gen_logic_CC(tmp2);
5371    }
5372    return store_reg_kind(s, a->rd, tmp2, kind);
5373}
5374
5375/*
5376 * Data-processing (immediate)
5377 *
5378 * Operate, with set flags, one register source,
5379 * one rotated immediate, and a destination.
5380 *
5381 * Note that logic_cc && a->rot setting CF based on the msb of the
5382 * immediate is the reason why we must pass in the unrotated form
5383 * of the immediate.
5384 */
5385static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5386                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5387                         int logic_cc, StoreRegKind kind)
5388{
5389    TCGv_i32 tmp1;
5390    uint32_t imm;
5391
5392    imm = ror32(a->imm, a->rot);
5393    if (logic_cc && a->rot) {
5394        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5395    }
5396    tmp1 = load_reg(s, a->rn);
5397
5398    gen(tmp1, tmp1, tcg_constant_i32(imm));
5399
5400    if (logic_cc) {
5401        gen_logic_CC(tmp1);
5402    }
5403    return store_reg_kind(s, a->rd, tmp1, kind);
5404}
5405
5406static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5407                         void (*gen)(TCGv_i32, TCGv_i32),
5408                         int logic_cc, StoreRegKind kind)
5409{
5410    TCGv_i32 tmp;
5411    uint32_t imm;
5412
5413    imm = ror32(a->imm, a->rot);
5414    if (logic_cc && a->rot) {
5415        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5416    }
5417
5418    tmp = tcg_temp_new_i32();
5419    gen(tmp, tcg_constant_i32(imm));
5420
5421    if (logic_cc) {
5422        gen_logic_CC(tmp);
5423    }
5424    return store_reg_kind(s, a->rd, tmp, kind);
5425}
5426
5427#define DO_ANY3(NAME, OP, L, K)                                         \
5428    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5429    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5430    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5431    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5432    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5433    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5434
5435#define DO_ANY2(NAME, OP, L, K)                                         \
5436    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5437    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5438    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5439    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5440    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5441    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5442
5443#define DO_CMP2(NAME, OP, L)                                            \
5444    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5445    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5446    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5447    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5448    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5449    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5450
5451DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5452DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5453DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5454DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5455
5456DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5457DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5458DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5459DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5460
5461DO_CMP2(TST, tcg_gen_and_i32, true)
5462DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5463DO_CMP2(CMN, gen_add_CC, false)
5464DO_CMP2(CMP, gen_sub_CC, false)
5465
5466DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5467        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5468
5469/*
5470 * Note for the computation of StoreRegKind we return out of the
5471 * middle of the functions that are expanded by DO_ANY3, and that
5472 * we modify a->s via that parameter before it is used by OP.
5473 */
5474DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5475        ({
5476            StoreRegKind ret = STREG_NORMAL;
5477            if (a->rd == 15 && a->s) {
5478                /*
5479                 * See ALUExceptionReturn:
5480                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5481                 * In Hyp mode, UNDEFINED.
5482                 */
5483                if (IS_USER(s) || s->current_el == 2) {
5484                    unallocated_encoding(s);
5485                    return true;
5486                }
5487                /* There is no writeback of nzcv to PSTATE.  */
5488                a->s = 0;
5489                ret = STREG_EXC_RET;
5490            } else if (a->rd == 13 && a->rn == 13) {
5491                ret = STREG_SP_CHECK;
5492            }
5493            ret;
5494        }))
5495
5496DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5497        ({
5498            StoreRegKind ret = STREG_NORMAL;
5499            if (a->rd == 15 && a->s) {
5500                /*
5501                 * See ALUExceptionReturn:
5502                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5503                 * In Hyp mode, UNDEFINED.
5504                 */
5505                if (IS_USER(s) || s->current_el == 2) {
5506                    unallocated_encoding(s);
5507                    return true;
5508                }
5509                /* There is no writeback of nzcv to PSTATE.  */
5510                a->s = 0;
5511                ret = STREG_EXC_RET;
5512            } else if (a->rd == 13) {
5513                ret = STREG_SP_CHECK;
5514            }
5515            ret;
5516        }))
5517
5518DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5519
5520/*
5521 * ORN is only available with T32, so there is no register-shifted-register
5522 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5523 */
5524static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5525{
5526    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5527}
5528
5529static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5530{
5531    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5532}
5533
5534#undef DO_ANY3
5535#undef DO_ANY2
5536#undef DO_CMP2
5537
5538static bool trans_ADR(DisasContext *s, arg_ri *a)
5539{
5540    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5541    return true;
5542}
5543
5544static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5545{
5546    if (!ENABLE_ARCH_6T2) {
5547        return false;
5548    }
5549
5550    store_reg(s, a->rd, tcg_constant_i32(a->imm));
5551    return true;
5552}
5553
5554static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5555{
5556    TCGv_i32 tmp;
5557
5558    if (!ENABLE_ARCH_6T2) {
5559        return false;
5560    }
5561
5562    tmp = load_reg(s, a->rd);
5563    tcg_gen_ext16u_i32(tmp, tmp);
5564    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5565    store_reg(s, a->rd, tmp);
5566    return true;
5567}
5568
5569/*
5570 * v8.1M MVE wide-shifts
5571 */
5572static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5573                          WideShiftImmFn *fn)
5574{
5575    TCGv_i64 rda;
5576    TCGv_i32 rdalo, rdahi;
5577
5578    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5579        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5580        return false;
5581    }
5582    if (a->rdahi == 15) {
5583        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5584        return false;
5585    }
5586    if (!dc_isar_feature(aa32_mve, s) ||
5587        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5588        a->rdahi == 13) {
5589        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5590        unallocated_encoding(s);
5591        return true;
5592    }
5593
5594    if (a->shim == 0) {
5595        a->shim = 32;
5596    }
5597
5598    rda = tcg_temp_new_i64();
5599    rdalo = load_reg(s, a->rdalo);
5600    rdahi = load_reg(s, a->rdahi);
5601    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5602
5603    fn(rda, rda, a->shim);
5604
5605    tcg_gen_extrl_i64_i32(rdalo, rda);
5606    tcg_gen_extrh_i64_i32(rdahi, rda);
5607    store_reg(s, a->rdalo, rdalo);
5608    store_reg(s, a->rdahi, rdahi);
5609
5610    return true;
5611}
5612
5613static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5614{
5615    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5616}
5617
5618static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5619{
5620    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5621}
5622
5623static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5624{
5625    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5626}
5627
5628static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5629{
5630    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5631}
5632
5633static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5634{
5635    return do_mve_shl_ri(s, a, gen_mve_sqshll);
5636}
5637
5638static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5639{
5640    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5641}
5642
5643static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5644{
5645    return do_mve_shl_ri(s, a, gen_mve_uqshll);
5646}
5647
5648static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5649{
5650    return do_mve_shl_ri(s, a, gen_srshr64_i64);
5651}
5652
5653static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5654{
5655    return do_mve_shl_ri(s, a, gen_urshr64_i64);
5656}
5657
5658static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5659{
5660    TCGv_i64 rda;
5661    TCGv_i32 rdalo, rdahi;
5662
5663    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5664        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5665        return false;
5666    }
5667    if (a->rdahi == 15) {
5668        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5669        return false;
5670    }
5671    if (!dc_isar_feature(aa32_mve, s) ||
5672        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5673        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5674        a->rm == a->rdahi || a->rm == a->rdalo) {
5675        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5676        unallocated_encoding(s);
5677        return true;
5678    }
5679
5680    rda = tcg_temp_new_i64();
5681    rdalo = load_reg(s, a->rdalo);
5682    rdahi = load_reg(s, a->rdahi);
5683    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5684
5685    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5686    fn(rda, cpu_env, rda, cpu_R[a->rm]);
5687
5688    tcg_gen_extrl_i64_i32(rdalo, rda);
5689    tcg_gen_extrh_i64_i32(rdahi, rda);
5690    store_reg(s, a->rdalo, rdalo);
5691    store_reg(s, a->rdahi, rdahi);
5692
5693    return true;
5694}
5695
5696static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5697{
5698    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5699}
5700
5701static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5702{
5703    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5704}
5705
5706static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5707{
5708    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5709}
5710
5711static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5712{
5713    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5714}
5715
5716static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5717{
5718    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5719}
5720
5721static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5722{
5723    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5724}
5725
5726static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5727{
5728    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5729        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5730        return false;
5731    }
5732    if (!dc_isar_feature(aa32_mve, s) ||
5733        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5734        a->rda == 13 || a->rda == 15) {
5735        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5736        unallocated_encoding(s);
5737        return true;
5738    }
5739
5740    if (a->shim == 0) {
5741        a->shim = 32;
5742    }
5743    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5744
5745    return true;
5746}
5747
5748static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5749{
5750    return do_mve_sh_ri(s, a, gen_urshr32_i32);
5751}
5752
5753static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5754{
5755    return do_mve_sh_ri(s, a, gen_srshr32_i32);
5756}
5757
5758static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5759{
5760    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5761}
5762
5763static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5764{
5765    return do_mve_sh_ri(s, a, gen_mve_sqshl);
5766}
5767
5768static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5769{
5770    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5771}
5772
5773static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5774{
5775    return do_mve_sh_ri(s, a, gen_mve_uqshl);
5776}
5777
5778static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5779{
5780    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5781        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5782        return false;
5783    }
5784    if (!dc_isar_feature(aa32_mve, s) ||
5785        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5786        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5787        a->rm == a->rda) {
5788        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5789        unallocated_encoding(s);
5790        return true;
5791    }
5792
5793    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5794    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5795    return true;
5796}
5797
5798static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5799{
5800    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5801}
5802
5803static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5804{
5805    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5806}
5807
5808/*
5809 * Multiply and multiply accumulate
5810 */
5811
5812static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5813{
5814    TCGv_i32 t1, t2;
5815
5816    t1 = load_reg(s, a->rn);
5817    t2 = load_reg(s, a->rm);
5818    tcg_gen_mul_i32(t1, t1, t2);
5819    if (add) {
5820        t2 = load_reg(s, a->ra);
5821        tcg_gen_add_i32(t1, t1, t2);
5822    }
5823    if (a->s) {
5824        gen_logic_CC(t1);
5825    }
5826    store_reg(s, a->rd, t1);
5827    return true;
5828}
5829
5830static bool trans_MUL(DisasContext *s, arg_MUL *a)
5831{
5832    return op_mla(s, a, false);
5833}
5834
5835static bool trans_MLA(DisasContext *s, arg_MLA *a)
5836{
5837    return op_mla(s, a, true);
5838}
5839
5840static bool trans_MLS(DisasContext *s, arg_MLS *a)
5841{
5842    TCGv_i32 t1, t2;
5843
5844    if (!ENABLE_ARCH_6T2) {
5845        return false;
5846    }
5847    t1 = load_reg(s, a->rn);
5848    t2 = load_reg(s, a->rm);
5849    tcg_gen_mul_i32(t1, t1, t2);
5850    t2 = load_reg(s, a->ra);
5851    tcg_gen_sub_i32(t1, t2, t1);
5852    store_reg(s, a->rd, t1);
5853    return true;
5854}
5855
5856static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5857{
5858    TCGv_i32 t0, t1, t2, t3;
5859
5860    t0 = load_reg(s, a->rm);
5861    t1 = load_reg(s, a->rn);
5862    if (uns) {
5863        tcg_gen_mulu2_i32(t0, t1, t0, t1);
5864    } else {
5865        tcg_gen_muls2_i32(t0, t1, t0, t1);
5866    }
5867    if (add) {
5868        t2 = load_reg(s, a->ra);
5869        t3 = load_reg(s, a->rd);
5870        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5871    }
5872    if (a->s) {
5873        gen_logicq_cc(t0, t1);
5874    }
5875    store_reg(s, a->ra, t0);
5876    store_reg(s, a->rd, t1);
5877    return true;
5878}
5879
5880static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5881{
5882    return op_mlal(s, a, true, false);
5883}
5884
5885static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5886{
5887    return op_mlal(s, a, false, false);
5888}
5889
5890static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5891{
5892    return op_mlal(s, a, true, true);
5893}
5894
5895static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5896{
5897    return op_mlal(s, a, false, true);
5898}
5899
5900static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5901{
5902    TCGv_i32 t0, t1, t2, zero;
5903
5904    if (s->thumb
5905        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5906        : !ENABLE_ARCH_6) {
5907        return false;
5908    }
5909
5910    t0 = load_reg(s, a->rm);
5911    t1 = load_reg(s, a->rn);
5912    tcg_gen_mulu2_i32(t0, t1, t0, t1);
5913    zero = tcg_constant_i32(0);
5914    t2 = load_reg(s, a->ra);
5915    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5916    t2 = load_reg(s, a->rd);
5917    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5918    store_reg(s, a->ra, t0);
5919    store_reg(s, a->rd, t1);
5920    return true;
5921}
5922
5923/*
5924 * Saturating addition and subtraction
5925 */
5926
5927static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5928{
5929    TCGv_i32 t0, t1;
5930
5931    if (s->thumb
5932        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5933        : !ENABLE_ARCH_5TE) {
5934        return false;
5935    }
5936
5937    t0 = load_reg(s, a->rm);
5938    t1 = load_reg(s, a->rn);
5939    if (doub) {
5940        gen_helper_add_saturate(t1, cpu_env, t1, t1);
5941    }
5942    if (add) {
5943        gen_helper_add_saturate(t0, cpu_env, t0, t1);
5944    } else {
5945        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5946    }
5947    store_reg(s, a->rd, t0);
5948    return true;
5949}
5950
5951#define DO_QADDSUB(NAME, ADD, DOUB) \
5952static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5953{                                                        \
5954    return op_qaddsub(s, a, ADD, DOUB);                  \
5955}
5956
5957DO_QADDSUB(QADD, true, false)
5958DO_QADDSUB(QSUB, false, false)
5959DO_QADDSUB(QDADD, true, true)
5960DO_QADDSUB(QDSUB, false, true)
5961
5962#undef DO_QADDSUB
5963
5964/*
5965 * Halfword multiply and multiply accumulate
5966 */
5967
5968static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5969                       int add_long, bool nt, bool mt)
5970{
5971    TCGv_i32 t0, t1, tl, th;
5972
5973    if (s->thumb
5974        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5975        : !ENABLE_ARCH_5TE) {
5976        return false;
5977    }
5978
5979    t0 = load_reg(s, a->rn);
5980    t1 = load_reg(s, a->rm);
5981    gen_mulxy(t0, t1, nt, mt);
5982
5983    switch (add_long) {
5984    case 0:
5985        store_reg(s, a->rd, t0);
5986        break;
5987    case 1:
5988        t1 = load_reg(s, a->ra);
5989        gen_helper_add_setq(t0, cpu_env, t0, t1);
5990        store_reg(s, a->rd, t0);
5991        break;
5992    case 2:
5993        tl = load_reg(s, a->ra);
5994        th = load_reg(s, a->rd);
5995        /* Sign-extend the 32-bit product to 64 bits.  */
5996        t1 = tcg_temp_new_i32();
5997        tcg_gen_sari_i32(t1, t0, 31);
5998        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5999        store_reg(s, a->ra, tl);
6000        store_reg(s, a->rd, th);
6001        break;
6002    default:
6003        g_assert_not_reached();
6004    }
6005    return true;
6006}
6007
6008#define DO_SMLAX(NAME, add, nt, mt) \
6009static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6010{                                                          \
6011    return op_smlaxxx(s, a, add, nt, mt);                  \
6012}
6013
6014DO_SMLAX(SMULBB, 0, 0, 0)
6015DO_SMLAX(SMULBT, 0, 0, 1)
6016DO_SMLAX(SMULTB, 0, 1, 0)
6017DO_SMLAX(SMULTT, 0, 1, 1)
6018
6019DO_SMLAX(SMLABB, 1, 0, 0)
6020DO_SMLAX(SMLABT, 1, 0, 1)
6021DO_SMLAX(SMLATB, 1, 1, 0)
6022DO_SMLAX(SMLATT, 1, 1, 1)
6023
6024DO_SMLAX(SMLALBB, 2, 0, 0)
6025DO_SMLAX(SMLALBT, 2, 0, 1)
6026DO_SMLAX(SMLALTB, 2, 1, 0)
6027DO_SMLAX(SMLALTT, 2, 1, 1)
6028
6029#undef DO_SMLAX
6030
6031static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6032{
6033    TCGv_i32 t0, t1;
6034
6035    if (!ENABLE_ARCH_5TE) {
6036        return false;
6037    }
6038
6039    t0 = load_reg(s, a->rn);
6040    t1 = load_reg(s, a->rm);
6041    /*
6042     * Since the nominal result is product<47:16>, shift the 16-bit
6043     * input up by 16 bits, so that the result is at product<63:32>.
6044     */
6045    if (mt) {
6046        tcg_gen_andi_i32(t1, t1, 0xffff0000);
6047    } else {
6048        tcg_gen_shli_i32(t1, t1, 16);
6049    }
6050    tcg_gen_muls2_i32(t0, t1, t0, t1);
6051    if (add) {
6052        t0 = load_reg(s, a->ra);
6053        gen_helper_add_setq(t1, cpu_env, t1, t0);
6054    }
6055    store_reg(s, a->rd, t1);
6056    return true;
6057}
6058
6059#define DO_SMLAWX(NAME, add, mt) \
6060static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6061{                                                          \
6062    return op_smlawx(s, a, add, mt);                       \
6063}
6064
6065DO_SMLAWX(SMULWB, 0, 0)
6066DO_SMLAWX(SMULWT, 0, 1)
6067DO_SMLAWX(SMLAWB, 1, 0)
6068DO_SMLAWX(SMLAWT, 1, 1)
6069
6070#undef DO_SMLAWX
6071
6072/*
6073 * MSR (immediate) and hints
6074 */
6075
6076static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6077{
6078    /*
6079     * When running single-threaded TCG code, use the helper to ensure that
6080     * the next round-robin scheduled vCPU gets a crack.  When running in
6081     * MTTCG we don't generate jumps to the helper as it won't affect the
6082     * scheduling of other vCPUs.
6083     */
6084    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6085        gen_update_pc(s, curr_insn_len(s));
6086        s->base.is_jmp = DISAS_YIELD;
6087    }
6088    return true;
6089}
6090
6091static bool trans_WFE(DisasContext *s, arg_WFE *a)
6092{
6093    /*
6094     * When running single-threaded TCG code, use the helper to ensure that
6095     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6096     * just skip this instruction.  Currently the SEV/SEVL instructions,
6097     * which are *one* of many ways to wake the CPU from WFE, are not
6098     * implemented so we can't sleep like WFI does.
6099     */
6100    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6101        gen_update_pc(s, curr_insn_len(s));
6102        s->base.is_jmp = DISAS_WFE;
6103    }
6104    return true;
6105}
6106
6107static bool trans_WFI(DisasContext *s, arg_WFI *a)
6108{
6109    /* For WFI, halt the vCPU until an IRQ. */
6110    gen_update_pc(s, curr_insn_len(s));
6111    s->base.is_jmp = DISAS_WFI;
6112    return true;
6113}
6114
6115static bool trans_ESB(DisasContext *s, arg_ESB *a)
6116{
6117    /*
6118     * For M-profile, minimal-RAS ESB can be a NOP.
6119     * Without RAS, we must implement this as NOP.
6120     */
6121    if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6122        /*
6123         * QEMU does not have a source of physical SErrors,
6124         * so we are only concerned with virtual SErrors.
6125         * The pseudocode in the ARM for this case is
6126         *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6127         *      AArch32.vESBOperation();
6128         * Most of the condition can be evaluated at translation time.
6129         * Test for EL2 present, and defer test for SEL2 to runtime.
6130         */
6131        if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6132            gen_helper_vesb(cpu_env);
6133        }
6134    }
6135    return true;
6136}
6137
6138static bool trans_NOP(DisasContext *s, arg_NOP *a)
6139{
6140    return true;
6141}
6142
6143static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6144{
6145    uint32_t val = ror32(a->imm, a->rot * 2);
6146    uint32_t mask = msr_mask(s, a->mask, a->r);
6147
6148    if (gen_set_psr_im(s, mask, a->r, val)) {
6149        unallocated_encoding(s);
6150    }
6151    return true;
6152}
6153
6154/*
6155 * Cyclic Redundancy Check
6156 */
6157
6158static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6159{
6160    TCGv_i32 t1, t2, t3;
6161
6162    if (!dc_isar_feature(aa32_crc32, s)) {
6163        return false;
6164    }
6165
6166    t1 = load_reg(s, a->rn);
6167    t2 = load_reg(s, a->rm);
6168    switch (sz) {
6169    case MO_8:
6170        gen_uxtb(t2);
6171        break;
6172    case MO_16:
6173        gen_uxth(t2);
6174        break;
6175    case MO_32:
6176        break;
6177    default:
6178        g_assert_not_reached();
6179    }
6180    t3 = tcg_constant_i32(1 << sz);
6181    if (c) {
6182        gen_helper_crc32c(t1, t1, t2, t3);
6183    } else {
6184        gen_helper_crc32(t1, t1, t2, t3);
6185    }
6186    store_reg(s, a->rd, t1);
6187    return true;
6188}
6189
6190#define DO_CRC32(NAME, c, sz) \
6191static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6192    { return op_crc32(s, a, c, sz); }
6193
6194DO_CRC32(CRC32B, false, MO_8)
6195DO_CRC32(CRC32H, false, MO_16)
6196DO_CRC32(CRC32W, false, MO_32)
6197DO_CRC32(CRC32CB, true, MO_8)
6198DO_CRC32(CRC32CH, true, MO_16)
6199DO_CRC32(CRC32CW, true, MO_32)
6200
6201#undef DO_CRC32
6202
6203/*
6204 * Miscellaneous instructions
6205 */
6206
6207static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6208{
6209    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6210        return false;
6211    }
6212    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6213    return true;
6214}
6215
6216static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6217{
6218    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6219        return false;
6220    }
6221    gen_msr_banked(s, a->r, a->sysm, a->rn);
6222    return true;
6223}
6224
6225static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6226{
6227    TCGv_i32 tmp;
6228
6229    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6230        return false;
6231    }
6232    if (a->r) {
6233        if (IS_USER(s)) {
6234            unallocated_encoding(s);
6235            return true;
6236        }
6237        tmp = load_cpu_field(spsr);
6238    } else {
6239        tmp = tcg_temp_new_i32();
6240        gen_helper_cpsr_read(tmp, cpu_env);
6241    }
6242    store_reg(s, a->rd, tmp);
6243    return true;
6244}
6245
6246static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6247{
6248    TCGv_i32 tmp;
6249    uint32_t mask = msr_mask(s, a->mask, a->r);
6250
6251    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6252        return false;
6253    }
6254    tmp = load_reg(s, a->rn);
6255    if (gen_set_psr(s, mask, a->r, tmp)) {
6256        unallocated_encoding(s);
6257    }
6258    return true;
6259}
6260
6261static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6262{
6263    TCGv_i32 tmp;
6264
6265    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6266        return false;
6267    }
6268    tmp = tcg_temp_new_i32();
6269    gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6270    store_reg(s, a->rd, tmp);
6271    return true;
6272}
6273
6274static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6275{
6276    TCGv_i32 addr, reg;
6277
6278    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6279        return false;
6280    }
6281    addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6282    reg = load_reg(s, a->rn);
6283    gen_helper_v7m_msr(cpu_env, addr, reg);
6284    /* If we wrote to CONTROL, the EL might have changed */
6285    gen_rebuild_hflags(s, true);
6286    gen_lookup_tb(s);
6287    return true;
6288}
6289
6290static bool trans_BX(DisasContext *s, arg_BX *a)
6291{
6292    if (!ENABLE_ARCH_4T) {
6293        return false;
6294    }
6295    gen_bx_excret(s, load_reg(s, a->rm));
6296    return true;
6297}
6298
6299static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6300{
6301    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6302        return false;
6303    }
6304    /*
6305     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6306     * TBFLAGS bit on a basically-never-happens case, so call a helper
6307     * function to check for the trap and raise the exception if needed
6308     * (passing it the register number for the syndrome value).
6309     * v8A doesn't have this HSTR bit.
6310     */
6311    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6312        arm_dc_feature(s, ARM_FEATURE_EL2) &&
6313        s->current_el < 2 && s->ns) {
6314        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6315    }
6316    /* Trivial implementation equivalent to bx.  */
6317    gen_bx(s, load_reg(s, a->rm));
6318    return true;
6319}
6320
6321static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6322{
6323    TCGv_i32 tmp;
6324
6325    if (!ENABLE_ARCH_5) {
6326        return false;
6327    }
6328    tmp = load_reg(s, a->rm);
6329    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6330    gen_bx(s, tmp);
6331    return true;
6332}
6333
6334/*
6335 * BXNS/BLXNS: only exist for v8M with the security extensions,
6336 * and always UNDEF if NonSecure.  We don't implement these in
6337 * the user-only mode either (in theory you can use them from
6338 * Secure User mode but they are too tied in to system emulation).
6339 */
6340static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6341{
6342    if (!s->v8m_secure || IS_USER_ONLY) {
6343        unallocated_encoding(s);
6344    } else {
6345        gen_bxns(s, a->rm);
6346    }
6347    return true;
6348}
6349
6350static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6351{
6352    if (!s->v8m_secure || IS_USER_ONLY) {
6353        unallocated_encoding(s);
6354    } else {
6355        gen_blxns(s, a->rm);
6356    }
6357    return true;
6358}
6359
6360static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6361{
6362    TCGv_i32 tmp;
6363
6364    if (!ENABLE_ARCH_5) {
6365        return false;
6366    }
6367    tmp = load_reg(s, a->rm);
6368    tcg_gen_clzi_i32(tmp, tmp, 32);
6369    store_reg(s, a->rd, tmp);
6370    return true;
6371}
6372
6373static bool trans_ERET(DisasContext *s, arg_ERET *a)
6374{
6375    TCGv_i32 tmp;
6376
6377    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6378        return false;
6379    }
6380    if (IS_USER(s)) {
6381        unallocated_encoding(s);
6382        return true;
6383    }
6384    if (s->current_el == 2) {
6385        /* ERET from Hyp uses ELR_Hyp, not LR */
6386        tmp = load_cpu_field_low32(elr_el[2]);
6387    } else {
6388        tmp = load_reg(s, 14);
6389    }
6390    gen_exception_return(s, tmp);
6391    return true;
6392}
6393
6394static bool trans_HLT(DisasContext *s, arg_HLT *a)
6395{
6396    gen_hlt(s, a->imm);
6397    return true;
6398}
6399
6400static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6401{
6402    if (!ENABLE_ARCH_5) {
6403        return false;
6404    }
6405    /* BKPT is OK with ECI set and leaves it untouched */
6406    s->eci_handled = true;
6407    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6408        semihosting_enabled(s->current_el == 0) &&
6409        (a->imm == 0xab)) {
6410        gen_exception_internal_insn(s, EXCP_SEMIHOST);
6411    } else {
6412        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6413    }
6414    return true;
6415}
6416
6417static bool trans_HVC(DisasContext *s, arg_HVC *a)
6418{
6419    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6420        return false;
6421    }
6422    if (IS_USER(s)) {
6423        unallocated_encoding(s);
6424    } else {
6425        gen_hvc(s, a->imm);
6426    }
6427    return true;
6428}
6429
6430static bool trans_SMC(DisasContext *s, arg_SMC *a)
6431{
6432    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6433        return false;
6434    }
6435    if (IS_USER(s)) {
6436        unallocated_encoding(s);
6437    } else {
6438        gen_smc(s);
6439    }
6440    return true;
6441}
6442
6443static bool trans_SG(DisasContext *s, arg_SG *a)
6444{
6445    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6446        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6447        return false;
6448    }
6449    /*
6450     * SG (v8M only)
6451     * The bulk of the behaviour for this instruction is implemented
6452     * in v7m_handle_execute_nsc(), which deals with the insn when
6453     * it is executed by a CPU in non-secure state from memory
6454     * which is Secure & NonSecure-Callable.
6455     * Here we only need to handle the remaining cases:
6456     *  * in NS memory (including the "security extension not
6457     *    implemented" case) : NOP
6458     *  * in S memory but CPU already secure (clear IT bits)
6459     * We know that the attribute for the memory this insn is
6460     * in must match the current CPU state, because otherwise
6461     * get_phys_addr_pmsav8 would have generated an exception.
6462     */
6463    if (s->v8m_secure) {
6464        /* Like the IT insn, we don't need to generate any code */
6465        s->condexec_cond = 0;
6466        s->condexec_mask = 0;
6467    }
6468    return true;
6469}
6470
6471static bool trans_TT(DisasContext *s, arg_TT *a)
6472{
6473    TCGv_i32 addr, tmp;
6474
6475    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6476        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6477        return false;
6478    }
6479    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6480        /* We UNDEF for these UNPREDICTABLE cases */
6481        unallocated_encoding(s);
6482        return true;
6483    }
6484    if (a->A && !s->v8m_secure) {
6485        /* This case is UNDEFINED.  */
6486        unallocated_encoding(s);
6487        return true;
6488    }
6489
6490    addr = load_reg(s, a->rn);
6491    tmp = tcg_temp_new_i32();
6492    gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6493    store_reg(s, a->rd, tmp);
6494    return true;
6495}
6496
6497/*
6498 * Load/store register index
6499 */
6500
6501static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6502{
6503    ISSInfo ret;
6504
6505    /* ISS not valid if writeback */
6506    if (p && !w) {
6507        ret = rd;
6508        if (curr_insn_len(s) == 2) {
6509            ret |= ISSIs16Bit;
6510        }
6511    } else {
6512        ret = ISSInvalid;
6513    }
6514    return ret;
6515}
6516
6517static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6518{
6519    TCGv_i32 addr = load_reg(s, a->rn);
6520
6521    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6522        gen_helper_v8m_stackcheck(cpu_env, addr);
6523    }
6524
6525    if (a->p) {
6526        TCGv_i32 ofs = load_reg(s, a->rm);
6527        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6528        if (a->u) {
6529            tcg_gen_add_i32(addr, addr, ofs);
6530        } else {
6531            tcg_gen_sub_i32(addr, addr, ofs);
6532        }
6533    }
6534    return addr;
6535}
6536
6537static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6538                            TCGv_i32 addr, int address_offset)
6539{
6540    if (!a->p) {
6541        TCGv_i32 ofs = load_reg(s, a->rm);
6542        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6543        if (a->u) {
6544            tcg_gen_add_i32(addr, addr, ofs);
6545        } else {
6546            tcg_gen_sub_i32(addr, addr, ofs);
6547        }
6548    } else if (!a->w) {
6549        return;
6550    }
6551    tcg_gen_addi_i32(addr, addr, address_offset);
6552    store_reg(s, a->rn, addr);
6553}
6554
6555static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6556                       MemOp mop, int mem_idx)
6557{
6558    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6559    TCGv_i32 addr, tmp;
6560
6561    addr = op_addr_rr_pre(s, a);
6562
6563    tmp = tcg_temp_new_i32();
6564    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6565    disas_set_da_iss(s, mop, issinfo);
6566
6567    /*
6568     * Perform base writeback before the loaded value to
6569     * ensure correct behavior with overlapping index registers.
6570     */
6571    op_addr_rr_post(s, a, addr, 0);
6572    store_reg_from_load(s, a->rt, tmp);
6573    return true;
6574}
6575
6576static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6577                        MemOp mop, int mem_idx)
6578{
6579    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6580    TCGv_i32 addr, tmp;
6581
6582    /*
6583     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6584     * is either UNPREDICTABLE or has defined behaviour
6585     */
6586    if (s->thumb && a->rn == 15) {
6587        return false;
6588    }
6589
6590    addr = op_addr_rr_pre(s, a);
6591
6592    tmp = load_reg(s, a->rt);
6593    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6594    disas_set_da_iss(s, mop, issinfo);
6595
6596    op_addr_rr_post(s, a, addr, 0);
6597    return true;
6598}
6599
6600static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6601{
6602    int mem_idx = get_mem_index(s);
6603    TCGv_i32 addr, tmp;
6604
6605    if (!ENABLE_ARCH_5TE) {
6606        return false;
6607    }
6608    if (a->rt & 1) {
6609        unallocated_encoding(s);
6610        return true;
6611    }
6612    addr = op_addr_rr_pre(s, a);
6613
6614    tmp = tcg_temp_new_i32();
6615    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6616    store_reg(s, a->rt, tmp);
6617
6618    tcg_gen_addi_i32(addr, addr, 4);
6619
6620    tmp = tcg_temp_new_i32();
6621    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6622    store_reg(s, a->rt + 1, tmp);
6623
6624    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6625    op_addr_rr_post(s, a, addr, -4);
6626    return true;
6627}
6628
6629static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6630{
6631    int mem_idx = get_mem_index(s);
6632    TCGv_i32 addr, tmp;
6633
6634    if (!ENABLE_ARCH_5TE) {
6635        return false;
6636    }
6637    if (a->rt & 1) {
6638        unallocated_encoding(s);
6639        return true;
6640    }
6641    addr = op_addr_rr_pre(s, a);
6642
6643    tmp = load_reg(s, a->rt);
6644    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6645
6646    tcg_gen_addi_i32(addr, addr, 4);
6647
6648    tmp = load_reg(s, a->rt + 1);
6649    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6650
6651    op_addr_rr_post(s, a, addr, -4);
6652    return true;
6653}
6654
6655/*
6656 * Load/store immediate index
6657 */
6658
6659static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6660{
6661    int ofs = a->imm;
6662
6663    if (!a->u) {
6664        ofs = -ofs;
6665    }
6666
6667    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6668        /*
6669         * Stackcheck. Here we know 'addr' is the current SP;
6670         * U is set if we're moving SP up, else down. It is
6671         * UNKNOWN whether the limit check triggers when SP starts
6672         * below the limit and ends up above it; we chose to do so.
6673         */
6674        if (!a->u) {
6675            TCGv_i32 newsp = tcg_temp_new_i32();
6676            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6677            gen_helper_v8m_stackcheck(cpu_env, newsp);
6678        } else {
6679            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6680        }
6681    }
6682
6683    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6684}
6685
6686static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6687                            TCGv_i32 addr, int address_offset)
6688{
6689    if (!a->p) {
6690        if (a->u) {
6691            address_offset += a->imm;
6692        } else {
6693            address_offset -= a->imm;
6694        }
6695    } else if (!a->w) {
6696        return;
6697    }
6698    tcg_gen_addi_i32(addr, addr, address_offset);
6699    store_reg(s, a->rn, addr);
6700}
6701
6702static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6703                       MemOp mop, int mem_idx)
6704{
6705    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6706    TCGv_i32 addr, tmp;
6707
6708    addr = op_addr_ri_pre(s, a);
6709
6710    tmp = tcg_temp_new_i32();
6711    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6712    disas_set_da_iss(s, mop, issinfo);
6713
6714    /*
6715     * Perform base writeback before the loaded value to
6716     * ensure correct behavior with overlapping index registers.
6717     */
6718    op_addr_ri_post(s, a, addr, 0);
6719    store_reg_from_load(s, a->rt, tmp);
6720    return true;
6721}
6722
6723static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6724                        MemOp mop, int mem_idx)
6725{
6726    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6727    TCGv_i32 addr, tmp;
6728
6729    /*
6730     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6731     * is either UNPREDICTABLE or has defined behaviour
6732     */
6733    if (s->thumb && a->rn == 15) {
6734        return false;
6735    }
6736
6737    addr = op_addr_ri_pre(s, a);
6738
6739    tmp = load_reg(s, a->rt);
6740    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6741    disas_set_da_iss(s, mop, issinfo);
6742
6743    op_addr_ri_post(s, a, addr, 0);
6744    return true;
6745}
6746
6747static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6748{
6749    int mem_idx = get_mem_index(s);
6750    TCGv_i32 addr, tmp;
6751
6752    addr = op_addr_ri_pre(s, a);
6753
6754    tmp = tcg_temp_new_i32();
6755    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6756    store_reg(s, a->rt, tmp);
6757
6758    tcg_gen_addi_i32(addr, addr, 4);
6759
6760    tmp = tcg_temp_new_i32();
6761    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6762    store_reg(s, rt2, tmp);
6763
6764    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6765    op_addr_ri_post(s, a, addr, -4);
6766    return true;
6767}
6768
6769static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6770{
6771    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6772        return false;
6773    }
6774    return op_ldrd_ri(s, a, a->rt + 1);
6775}
6776
6777static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6778{
6779    arg_ldst_ri b = {
6780        .u = a->u, .w = a->w, .p = a->p,
6781        .rn = a->rn, .rt = a->rt, .imm = a->imm
6782    };
6783    return op_ldrd_ri(s, &b, a->rt2);
6784}
6785
6786static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6787{
6788    int mem_idx = get_mem_index(s);
6789    TCGv_i32 addr, tmp;
6790
6791    addr = op_addr_ri_pre(s, a);
6792
6793    tmp = load_reg(s, a->rt);
6794    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6795
6796    tcg_gen_addi_i32(addr, addr, 4);
6797
6798    tmp = load_reg(s, rt2);
6799    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6800
6801    op_addr_ri_post(s, a, addr, -4);
6802    return true;
6803}
6804
6805static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6806{
6807    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6808        return false;
6809    }
6810    return op_strd_ri(s, a, a->rt + 1);
6811}
6812
6813static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6814{
6815    arg_ldst_ri b = {
6816        .u = a->u, .w = a->w, .p = a->p,
6817        .rn = a->rn, .rt = a->rt, .imm = a->imm
6818    };
6819    return op_strd_ri(s, &b, a->rt2);
6820}
6821
6822#define DO_LDST(NAME, WHICH, MEMOP) \
6823static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6824{                                                                     \
6825    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6826}                                                                     \
6827static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6828{                                                                     \
6829    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6830}                                                                     \
6831static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6832{                                                                     \
6833    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6834}                                                                     \
6835static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6836{                                                                     \
6837    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6838}
6839
6840DO_LDST(LDR, load, MO_UL)
6841DO_LDST(LDRB, load, MO_UB)
6842DO_LDST(LDRH, load, MO_UW)
6843DO_LDST(LDRSB, load, MO_SB)
6844DO_LDST(LDRSH, load, MO_SW)
6845
6846DO_LDST(STR, store, MO_UL)
6847DO_LDST(STRB, store, MO_UB)
6848DO_LDST(STRH, store, MO_UW)
6849
6850#undef DO_LDST
6851
6852/*
6853 * Synchronization primitives
6854 */
6855
6856static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6857{
6858    TCGv_i32 addr, tmp;
6859    TCGv taddr;
6860
6861    opc |= s->be_data;
6862    addr = load_reg(s, a->rn);
6863    taddr = gen_aa32_addr(s, addr, opc);
6864
6865    tmp = load_reg(s, a->rt2);
6866    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6867
6868    store_reg(s, a->rt, tmp);
6869    return true;
6870}
6871
6872static bool trans_SWP(DisasContext *s, arg_SWP *a)
6873{
6874    return op_swp(s, a, MO_UL | MO_ALIGN);
6875}
6876
6877static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6878{
6879    return op_swp(s, a, MO_UB);
6880}
6881
6882/*
6883 * Load/Store Exclusive and Load-Acquire/Store-Release
6884 */
6885
6886static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6887{
6888    TCGv_i32 addr;
6889    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6890    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6891
6892    /* We UNDEF for these UNPREDICTABLE cases.  */
6893    if (a->rd == 15 || a->rn == 15 || a->rt == 15
6894        || a->rd == a->rn || a->rd == a->rt
6895        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6896        || (mop == MO_64
6897            && (a->rt2 == 15
6898                || a->rd == a->rt2
6899                || (!v8a && s->thumb && a->rt2 == 13)))) {
6900        unallocated_encoding(s);
6901        return true;
6902    }
6903
6904    if (rel) {
6905        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6906    }
6907
6908    addr = tcg_temp_new_i32();
6909    load_reg_var(s, addr, a->rn);
6910    tcg_gen_addi_i32(addr, addr, a->imm);
6911
6912    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6913    return true;
6914}
6915
6916static bool trans_STREX(DisasContext *s, arg_STREX *a)
6917{
6918    if (!ENABLE_ARCH_6) {
6919        return false;
6920    }
6921    return op_strex(s, a, MO_32, false);
6922}
6923
6924static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6925{
6926    if (!ENABLE_ARCH_6K) {
6927        return false;
6928    }
6929    /* We UNDEF for these UNPREDICTABLE cases.  */
6930    if (a->rt & 1) {
6931        unallocated_encoding(s);
6932        return true;
6933    }
6934    a->rt2 = a->rt + 1;
6935    return op_strex(s, a, MO_64, false);
6936}
6937
6938static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6939{
6940    return op_strex(s, a, MO_64, false);
6941}
6942
6943static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6944{
6945    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6946        return false;
6947    }
6948    return op_strex(s, a, MO_8, false);
6949}
6950
6951static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6952{
6953    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6954        return false;
6955    }
6956    return op_strex(s, a, MO_16, false);
6957}
6958
6959static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6960{
6961    if (!ENABLE_ARCH_8) {
6962        return false;
6963    }
6964    return op_strex(s, a, MO_32, true);
6965}
6966
6967static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6968{
6969    if (!ENABLE_ARCH_8) {
6970        return false;
6971    }
6972    /* We UNDEF for these UNPREDICTABLE cases.  */
6973    if (a->rt & 1) {
6974        unallocated_encoding(s);
6975        return true;
6976    }
6977    a->rt2 = a->rt + 1;
6978    return op_strex(s, a, MO_64, true);
6979}
6980
6981static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6982{
6983    if (!ENABLE_ARCH_8) {
6984        return false;
6985    }
6986    return op_strex(s, a, MO_64, true);
6987}
6988
6989static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6990{
6991    if (!ENABLE_ARCH_8) {
6992        return false;
6993    }
6994    return op_strex(s, a, MO_8, true);
6995}
6996
6997static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6998{
6999    if (!ENABLE_ARCH_8) {
7000        return false;
7001    }
7002    return op_strex(s, a, MO_16, true);
7003}
7004
7005static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7006{
7007    TCGv_i32 addr, tmp;
7008
7009    if (!ENABLE_ARCH_8) {
7010        return false;
7011    }
7012    /* We UNDEF for these UNPREDICTABLE cases.  */
7013    if (a->rn == 15 || a->rt == 15) {
7014        unallocated_encoding(s);
7015        return true;
7016    }
7017
7018    addr = load_reg(s, a->rn);
7019    tmp = load_reg(s, a->rt);
7020    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7021    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7022    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7023
7024    return true;
7025}
7026
7027static bool trans_STL(DisasContext *s, arg_STL *a)
7028{
7029    return op_stl(s, a, MO_UL);
7030}
7031
7032static bool trans_STLB(DisasContext *s, arg_STL *a)
7033{
7034    return op_stl(s, a, MO_UB);
7035}
7036
7037static bool trans_STLH(DisasContext *s, arg_STL *a)
7038{
7039    return op_stl(s, a, MO_UW);
7040}
7041
7042static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7043{
7044    TCGv_i32 addr;
7045    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7046    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7047
7048    /* We UNDEF for these UNPREDICTABLE cases.  */
7049    if (a->rn == 15 || a->rt == 15
7050        || (!v8a && s->thumb && a->rt == 13)
7051        || (mop == MO_64
7052            && (a->rt2 == 15 || a->rt == a->rt2
7053                || (!v8a && s->thumb && a->rt2 == 13)))) {
7054        unallocated_encoding(s);
7055        return true;
7056    }
7057
7058    addr = tcg_temp_new_i32();
7059    load_reg_var(s, addr, a->rn);
7060    tcg_gen_addi_i32(addr, addr, a->imm);
7061
7062    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7063
7064    if (acq) {
7065        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7066    }
7067    return true;
7068}
7069
7070static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7071{
7072    if (!ENABLE_ARCH_6) {
7073        return false;
7074    }
7075    return op_ldrex(s, a, MO_32, false);
7076}
7077
7078static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7079{
7080    if (!ENABLE_ARCH_6K) {
7081        return false;
7082    }
7083    /* We UNDEF for these UNPREDICTABLE cases.  */
7084    if (a->rt & 1) {
7085        unallocated_encoding(s);
7086        return true;
7087    }
7088    a->rt2 = a->rt + 1;
7089    return op_ldrex(s, a, MO_64, false);
7090}
7091
7092static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7093{
7094    return op_ldrex(s, a, MO_64, false);
7095}
7096
7097static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7098{
7099    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7100        return false;
7101    }
7102    return op_ldrex(s, a, MO_8, false);
7103}
7104
7105static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7106{
7107    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7108        return false;
7109    }
7110    return op_ldrex(s, a, MO_16, false);
7111}
7112
7113static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7114{
7115    if (!ENABLE_ARCH_8) {
7116        return false;
7117    }
7118    return op_ldrex(s, a, MO_32, true);
7119}
7120
7121static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7122{
7123    if (!ENABLE_ARCH_8) {
7124        return false;
7125    }
7126    /* We UNDEF for these UNPREDICTABLE cases.  */
7127    if (a->rt & 1) {
7128        unallocated_encoding(s);
7129        return true;
7130    }
7131    a->rt2 = a->rt + 1;
7132    return op_ldrex(s, a, MO_64, true);
7133}
7134
7135static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7136{
7137    if (!ENABLE_ARCH_8) {
7138        return false;
7139    }
7140    return op_ldrex(s, a, MO_64, true);
7141}
7142
7143static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7144{
7145    if (!ENABLE_ARCH_8) {
7146        return false;
7147    }
7148    return op_ldrex(s, a, MO_8, true);
7149}
7150
7151static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7152{
7153    if (!ENABLE_ARCH_8) {
7154        return false;
7155    }
7156    return op_ldrex(s, a, MO_16, true);
7157}
7158
7159static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7160{
7161    TCGv_i32 addr, tmp;
7162
7163    if (!ENABLE_ARCH_8) {
7164        return false;
7165    }
7166    /* We UNDEF for these UNPREDICTABLE cases.  */
7167    if (a->rn == 15 || a->rt == 15) {
7168        unallocated_encoding(s);
7169        return true;
7170    }
7171
7172    addr = load_reg(s, a->rn);
7173    tmp = tcg_temp_new_i32();
7174    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7175    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7176
7177    store_reg(s, a->rt, tmp);
7178    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7179    return true;
7180}
7181
7182static bool trans_LDA(DisasContext *s, arg_LDA *a)
7183{
7184    return op_lda(s, a, MO_UL);
7185}
7186
7187static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7188{
7189    return op_lda(s, a, MO_UB);
7190}
7191
7192static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7193{
7194    return op_lda(s, a, MO_UW);
7195}
7196
7197/*
7198 * Media instructions
7199 */
7200
7201static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7202{
7203    TCGv_i32 t1, t2;
7204
7205    if (!ENABLE_ARCH_6) {
7206        return false;
7207    }
7208
7209    t1 = load_reg(s, a->rn);
7210    t2 = load_reg(s, a->rm);
7211    gen_helper_usad8(t1, t1, t2);
7212    if (a->ra != 15) {
7213        t2 = load_reg(s, a->ra);
7214        tcg_gen_add_i32(t1, t1, t2);
7215    }
7216    store_reg(s, a->rd, t1);
7217    return true;
7218}
7219
7220static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7221{
7222    TCGv_i32 tmp;
7223    int width = a->widthm1 + 1;
7224    int shift = a->lsb;
7225
7226    if (!ENABLE_ARCH_6T2) {
7227        return false;
7228    }
7229    if (shift + width > 32) {
7230        /* UNPREDICTABLE; we choose to UNDEF */
7231        unallocated_encoding(s);
7232        return true;
7233    }
7234
7235    tmp = load_reg(s, a->rn);
7236    if (u) {
7237        tcg_gen_extract_i32(tmp, tmp, shift, width);
7238    } else {
7239        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7240    }
7241    store_reg(s, a->rd, tmp);
7242    return true;
7243}
7244
7245static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7246{
7247    return op_bfx(s, a, false);
7248}
7249
7250static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7251{
7252    return op_bfx(s, a, true);
7253}
7254
7255static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7256{
7257    int msb = a->msb, lsb = a->lsb;
7258    TCGv_i32 t_in, t_rd;
7259    int width;
7260
7261    if (!ENABLE_ARCH_6T2) {
7262        return false;
7263    }
7264    if (msb < lsb) {
7265        /* UNPREDICTABLE; we choose to UNDEF */
7266        unallocated_encoding(s);
7267        return true;
7268    }
7269
7270    width = msb + 1 - lsb;
7271    if (a->rn == 15) {
7272        /* BFC */
7273        t_in = tcg_constant_i32(0);
7274    } else {
7275        /* BFI */
7276        t_in = load_reg(s, a->rn);
7277    }
7278    t_rd = load_reg(s, a->rd);
7279    tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7280    store_reg(s, a->rd, t_rd);
7281    return true;
7282}
7283
7284static bool trans_UDF(DisasContext *s, arg_UDF *a)
7285{
7286    unallocated_encoding(s);
7287    return true;
7288}
7289
7290/*
7291 * Parallel addition and subtraction
7292 */
7293
7294static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7295                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7296{
7297    TCGv_i32 t0, t1;
7298
7299    if (s->thumb
7300        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7301        : !ENABLE_ARCH_6) {
7302        return false;
7303    }
7304
7305    t0 = load_reg(s, a->rn);
7306    t1 = load_reg(s, a->rm);
7307
7308    gen(t0, t0, t1);
7309
7310    store_reg(s, a->rd, t0);
7311    return true;
7312}
7313
7314static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7315                             void (*gen)(TCGv_i32, TCGv_i32,
7316                                         TCGv_i32, TCGv_ptr))
7317{
7318    TCGv_i32 t0, t1;
7319    TCGv_ptr ge;
7320
7321    if (s->thumb
7322        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7323        : !ENABLE_ARCH_6) {
7324        return false;
7325    }
7326
7327    t0 = load_reg(s, a->rn);
7328    t1 = load_reg(s, a->rm);
7329
7330    ge = tcg_temp_new_ptr();
7331    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7332    gen(t0, t0, t1, ge);
7333
7334    store_reg(s, a->rd, t0);
7335    return true;
7336}
7337
7338#define DO_PAR_ADDSUB(NAME, helper) \
7339static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7340{                                                       \
7341    return op_par_addsub(s, a, helper);                 \
7342}
7343
7344#define DO_PAR_ADDSUB_GE(NAME, helper) \
7345static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7346{                                                       \
7347    return op_par_addsub_ge(s, a, helper);              \
7348}
7349
7350DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7351DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7352DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7353DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7354DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7355DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7356
7357DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7358DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7359DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7360DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7361DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7362DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7363
7364DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7365DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7366DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7367DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7368DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7369DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7370
7371DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7372DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7373DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7374DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7375DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7376DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7377
7378DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7379DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7380DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7381DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7382DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7383DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7384
7385DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7386DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7387DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7388DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7389DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7390DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7391
7392#undef DO_PAR_ADDSUB
7393#undef DO_PAR_ADDSUB_GE
7394
7395/*
7396 * Packing, unpacking, saturation, and reversal
7397 */
7398
7399static bool trans_PKH(DisasContext *s, arg_PKH *a)
7400{
7401    TCGv_i32 tn, tm;
7402    int shift = a->imm;
7403
7404    if (s->thumb
7405        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7406        : !ENABLE_ARCH_6) {
7407        return false;
7408    }
7409
7410    tn = load_reg(s, a->rn);
7411    tm = load_reg(s, a->rm);
7412    if (a->tb) {
7413        /* PKHTB */
7414        if (shift == 0) {
7415            shift = 31;
7416        }
7417        tcg_gen_sari_i32(tm, tm, shift);
7418        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7419    } else {
7420        /* PKHBT */
7421        tcg_gen_shli_i32(tm, tm, shift);
7422        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7423    }
7424    store_reg(s, a->rd, tn);
7425    return true;
7426}
7427
7428static bool op_sat(DisasContext *s, arg_sat *a,
7429                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7430{
7431    TCGv_i32 tmp;
7432    int shift = a->imm;
7433
7434    if (!ENABLE_ARCH_6) {
7435        return false;
7436    }
7437
7438    tmp = load_reg(s, a->rn);
7439    if (a->sh) {
7440        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7441    } else {
7442        tcg_gen_shli_i32(tmp, tmp, shift);
7443    }
7444
7445    gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7446
7447    store_reg(s, a->rd, tmp);
7448    return true;
7449}
7450
7451static bool trans_SSAT(DisasContext *s, arg_sat *a)
7452{
7453    return op_sat(s, a, gen_helper_ssat);
7454}
7455
7456static bool trans_USAT(DisasContext *s, arg_sat *a)
7457{
7458    return op_sat(s, a, gen_helper_usat);
7459}
7460
7461static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7462{
7463    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7464        return false;
7465    }
7466    return op_sat(s, a, gen_helper_ssat16);
7467}
7468
7469static bool trans_USAT16(DisasContext *s, arg_sat *a)
7470{
7471    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7472        return false;
7473    }
7474    return op_sat(s, a, gen_helper_usat16);
7475}
7476
7477static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7478                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7479                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7480{
7481    TCGv_i32 tmp;
7482
7483    if (!ENABLE_ARCH_6) {
7484        return false;
7485    }
7486
7487    tmp = load_reg(s, a->rm);
7488    /*
7489     * TODO: In many cases we could do a shift instead of a rotate.
7490     * Combined with a simple extend, that becomes an extract.
7491     */
7492    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7493    gen_extract(tmp, tmp);
7494
7495    if (a->rn != 15) {
7496        TCGv_i32 tmp2 = load_reg(s, a->rn);
7497        gen_add(tmp, tmp, tmp2);
7498    }
7499    store_reg(s, a->rd, tmp);
7500    return true;
7501}
7502
7503static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7504{
7505    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7506}
7507
7508static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7509{
7510    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7511}
7512
7513static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7514{
7515    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7516        return false;
7517    }
7518    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7519}
7520
7521static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7522{
7523    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7524}
7525
7526static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7527{
7528    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7529}
7530
7531static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7532{
7533    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7534        return false;
7535    }
7536    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7537}
7538
7539static bool trans_SEL(DisasContext *s, arg_rrr *a)
7540{
7541    TCGv_i32 t1, t2, t3;
7542
7543    if (s->thumb
7544        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7545        : !ENABLE_ARCH_6) {
7546        return false;
7547    }
7548
7549    t1 = load_reg(s, a->rn);
7550    t2 = load_reg(s, a->rm);
7551    t3 = tcg_temp_new_i32();
7552    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7553    gen_helper_sel_flags(t1, t3, t1, t2);
7554    store_reg(s, a->rd, t1);
7555    return true;
7556}
7557
7558static bool op_rr(DisasContext *s, arg_rr *a,
7559                  void (*gen)(TCGv_i32, TCGv_i32))
7560{
7561    TCGv_i32 tmp;
7562
7563    tmp = load_reg(s, a->rm);
7564    gen(tmp, tmp);
7565    store_reg(s, a->rd, tmp);
7566    return true;
7567}
7568
7569static bool trans_REV(DisasContext *s, arg_rr *a)
7570{
7571    if (!ENABLE_ARCH_6) {
7572        return false;
7573    }
7574    return op_rr(s, a, tcg_gen_bswap32_i32);
7575}
7576
7577static bool trans_REV16(DisasContext *s, arg_rr *a)
7578{
7579    if (!ENABLE_ARCH_6) {
7580        return false;
7581    }
7582    return op_rr(s, a, gen_rev16);
7583}
7584
7585static bool trans_REVSH(DisasContext *s, arg_rr *a)
7586{
7587    if (!ENABLE_ARCH_6) {
7588        return false;
7589    }
7590    return op_rr(s, a, gen_revsh);
7591}
7592
7593static bool trans_RBIT(DisasContext *s, arg_rr *a)
7594{
7595    if (!ENABLE_ARCH_6T2) {
7596        return false;
7597    }
7598    return op_rr(s, a, gen_helper_rbit);
7599}
7600
7601/*
7602 * Signed multiply, signed and unsigned divide
7603 */
7604
7605static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7606{
7607    TCGv_i32 t1, t2;
7608
7609    if (!ENABLE_ARCH_6) {
7610        return false;
7611    }
7612
7613    t1 = load_reg(s, a->rn);
7614    t2 = load_reg(s, a->rm);
7615    if (m_swap) {
7616        gen_swap_half(t2, t2);
7617    }
7618    gen_smul_dual(t1, t2);
7619
7620    if (sub) {
7621        /*
7622         * This subtraction cannot overflow, so we can do a simple
7623         * 32-bit subtraction and then a possible 32-bit saturating
7624         * addition of Ra.
7625         */
7626        tcg_gen_sub_i32(t1, t1, t2);
7627
7628        if (a->ra != 15) {
7629            t2 = load_reg(s, a->ra);
7630            gen_helper_add_setq(t1, cpu_env, t1, t2);
7631        }
7632    } else if (a->ra == 15) {
7633        /* Single saturation-checking addition */
7634        gen_helper_add_setq(t1, cpu_env, t1, t2);
7635    } else {
7636        /*
7637         * We need to add the products and Ra together and then
7638         * determine whether the final result overflowed. Doing
7639         * this as two separate add-and-check-overflow steps incorrectly
7640         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7641         * Do all the arithmetic at 64-bits and then check for overflow.
7642         */
7643        TCGv_i64 p64, q64;
7644        TCGv_i32 t3, qf, one;
7645
7646        p64 = tcg_temp_new_i64();
7647        q64 = tcg_temp_new_i64();
7648        tcg_gen_ext_i32_i64(p64, t1);
7649        tcg_gen_ext_i32_i64(q64, t2);
7650        tcg_gen_add_i64(p64, p64, q64);
7651        load_reg_var(s, t2, a->ra);
7652        tcg_gen_ext_i32_i64(q64, t2);
7653        tcg_gen_add_i64(p64, p64, q64);
7654
7655        tcg_gen_extr_i64_i32(t1, t2, p64);
7656        /*
7657         * t1 is the low half of the result which goes into Rd.
7658         * We have overflow and must set Q if the high half (t2)
7659         * is different from the sign-extension of t1.
7660         */
7661        t3 = tcg_temp_new_i32();
7662        tcg_gen_sari_i32(t3, t1, 31);
7663        qf = load_cpu_field(QF);
7664        one = tcg_constant_i32(1);
7665        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7666        store_cpu_field(qf, QF);
7667    }
7668    store_reg(s, a->rd, t1);
7669    return true;
7670}
7671
7672static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7673{
7674    return op_smlad(s, a, false, false);
7675}
7676
7677static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7678{
7679    return op_smlad(s, a, true, false);
7680}
7681
7682static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7683{
7684    return op_smlad(s, a, false, true);
7685}
7686
7687static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7688{
7689    return op_smlad(s, a, true, true);
7690}
7691
7692static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7693{
7694    TCGv_i32 t1, t2;
7695    TCGv_i64 l1, l2;
7696
7697    if (!ENABLE_ARCH_6) {
7698        return false;
7699    }
7700
7701    t1 = load_reg(s, a->rn);
7702    t2 = load_reg(s, a->rm);
7703    if (m_swap) {
7704        gen_swap_half(t2, t2);
7705    }
7706    gen_smul_dual(t1, t2);
7707
7708    l1 = tcg_temp_new_i64();
7709    l2 = tcg_temp_new_i64();
7710    tcg_gen_ext_i32_i64(l1, t1);
7711    tcg_gen_ext_i32_i64(l2, t2);
7712
7713    if (sub) {
7714        tcg_gen_sub_i64(l1, l1, l2);
7715    } else {
7716        tcg_gen_add_i64(l1, l1, l2);
7717    }
7718
7719    gen_addq(s, l1, a->ra, a->rd);
7720    gen_storeq_reg(s, a->ra, a->rd, l1);
7721    return true;
7722}
7723
7724static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7725{
7726    return op_smlald(s, a, false, false);
7727}
7728
7729static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7730{
7731    return op_smlald(s, a, true, false);
7732}
7733
7734static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7735{
7736    return op_smlald(s, a, false, true);
7737}
7738
7739static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7740{
7741    return op_smlald(s, a, true, true);
7742}
7743
7744static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7745{
7746    TCGv_i32 t1, t2;
7747
7748    if (s->thumb
7749        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7750        : !ENABLE_ARCH_6) {
7751        return false;
7752    }
7753
7754    t1 = load_reg(s, a->rn);
7755    t2 = load_reg(s, a->rm);
7756    tcg_gen_muls2_i32(t2, t1, t1, t2);
7757
7758    if (a->ra != 15) {
7759        TCGv_i32 t3 = load_reg(s, a->ra);
7760        if (sub) {
7761            /*
7762             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7763             * a non-zero multiplicand lowpart, and the correct result
7764             * lowpart for rounding.
7765             */
7766            tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7767        } else {
7768            tcg_gen_add_i32(t1, t1, t3);
7769        }
7770    }
7771    if (round) {
7772        /*
7773         * Adding 0x80000000 to the 64-bit quantity means that we have
7774         * carry in to the high word when the low word has the msb set.
7775         */
7776        tcg_gen_shri_i32(t2, t2, 31);
7777        tcg_gen_add_i32(t1, t1, t2);
7778    }
7779    store_reg(s, a->rd, t1);
7780    return true;
7781}
7782
7783static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7784{
7785    return op_smmla(s, a, false, false);
7786}
7787
7788static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7789{
7790    return op_smmla(s, a, true, false);
7791}
7792
7793static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7794{
7795    return op_smmla(s, a, false, true);
7796}
7797
7798static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7799{
7800    return op_smmla(s, a, true, true);
7801}
7802
7803static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7804{
7805    TCGv_i32 t1, t2;
7806
7807    if (s->thumb
7808        ? !dc_isar_feature(aa32_thumb_div, s)
7809        : !dc_isar_feature(aa32_arm_div, s)) {
7810        return false;
7811    }
7812
7813    t1 = load_reg(s, a->rn);
7814    t2 = load_reg(s, a->rm);
7815    if (u) {
7816        gen_helper_udiv(t1, cpu_env, t1, t2);
7817    } else {
7818        gen_helper_sdiv(t1, cpu_env, t1, t2);
7819    }
7820    store_reg(s, a->rd, t1);
7821    return true;
7822}
7823
7824static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7825{
7826    return op_div(s, a, false);
7827}
7828
7829static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7830{
7831    return op_div(s, a, true);
7832}
7833
7834/*
7835 * Block data transfer
7836 */
7837
7838static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7839{
7840    TCGv_i32 addr = load_reg(s, a->rn);
7841
7842    if (a->b) {
7843        if (a->i) {
7844            /* pre increment */
7845            tcg_gen_addi_i32(addr, addr, 4);
7846        } else {
7847            /* pre decrement */
7848            tcg_gen_addi_i32(addr, addr, -(n * 4));
7849        }
7850    } else if (!a->i && n != 1) {
7851        /* post decrement */
7852        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7853    }
7854
7855    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7856        /*
7857         * If the writeback is incrementing SP rather than
7858         * decrementing it, and the initial SP is below the
7859         * stack limit but the final written-back SP would
7860         * be above, then we must not perform any memory
7861         * accesses, but it is IMPDEF whether we generate
7862         * an exception. We choose to do so in this case.
7863         * At this point 'addr' is the lowest address, so
7864         * either the original SP (if incrementing) or our
7865         * final SP (if decrementing), so that's what we check.
7866         */
7867        gen_helper_v8m_stackcheck(cpu_env, addr);
7868    }
7869
7870    return addr;
7871}
7872
7873static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7874                               TCGv_i32 addr, int n)
7875{
7876    if (a->w) {
7877        /* write back */
7878        if (!a->b) {
7879            if (a->i) {
7880                /* post increment */
7881                tcg_gen_addi_i32(addr, addr, 4);
7882            } else {
7883                /* post decrement */
7884                tcg_gen_addi_i32(addr, addr, -(n * 4));
7885            }
7886        } else if (!a->i && n != 1) {
7887            /* pre decrement */
7888            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7889        }
7890        store_reg(s, a->rn, addr);
7891    }
7892}
7893
7894static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7895{
7896    int i, j, n, list, mem_idx;
7897    bool user = a->u;
7898    TCGv_i32 addr, tmp;
7899
7900    if (user) {
7901        /* STM (user) */
7902        if (IS_USER(s)) {
7903            /* Only usable in supervisor mode.  */
7904            unallocated_encoding(s);
7905            return true;
7906        }
7907    }
7908
7909    list = a->list;
7910    n = ctpop16(list);
7911    if (n < min_n || a->rn == 15) {
7912        unallocated_encoding(s);
7913        return true;
7914    }
7915
7916    s->eci_handled = true;
7917
7918    addr = op_addr_block_pre(s, a, n);
7919    mem_idx = get_mem_index(s);
7920
7921    for (i = j = 0; i < 16; i++) {
7922        if (!(list & (1 << i))) {
7923            continue;
7924        }
7925
7926        if (user && i != 15) {
7927            tmp = tcg_temp_new_i32();
7928            gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7929        } else {
7930            tmp = load_reg(s, i);
7931        }
7932        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7933
7934        /* No need to add after the last transfer.  */
7935        if (++j != n) {
7936            tcg_gen_addi_i32(addr, addr, 4);
7937        }
7938    }
7939
7940    op_addr_block_post(s, a, addr, n);
7941    clear_eci_state(s);
7942    return true;
7943}
7944
7945static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7946{
7947    /* BitCount(list) < 1 is UNPREDICTABLE */
7948    return op_stm(s, a, 1);
7949}
7950
7951static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7952{
7953    /* Writeback register in register list is UNPREDICTABLE for T32.  */
7954    if (a->w && (a->list & (1 << a->rn))) {
7955        unallocated_encoding(s);
7956        return true;
7957    }
7958    /* BitCount(list) < 2 is UNPREDICTABLE */
7959    return op_stm(s, a, 2);
7960}
7961
7962static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7963{
7964    int i, j, n, list, mem_idx;
7965    bool loaded_base;
7966    bool user = a->u;
7967    bool exc_return = false;
7968    TCGv_i32 addr, tmp, loaded_var;
7969
7970    if (user) {
7971        /* LDM (user), LDM (exception return) */
7972        if (IS_USER(s)) {
7973            /* Only usable in supervisor mode.  */
7974            unallocated_encoding(s);
7975            return true;
7976        }
7977        if (extract32(a->list, 15, 1)) {
7978            exc_return = true;
7979            user = false;
7980        } else {
7981            /* LDM (user) does not allow writeback.  */
7982            if (a->w) {
7983                unallocated_encoding(s);
7984                return true;
7985            }
7986        }
7987    }
7988
7989    list = a->list;
7990    n = ctpop16(list);
7991    if (n < min_n || a->rn == 15) {
7992        unallocated_encoding(s);
7993        return true;
7994    }
7995
7996    s->eci_handled = true;
7997
7998    addr = op_addr_block_pre(s, a, n);
7999    mem_idx = get_mem_index(s);
8000    loaded_base = false;
8001    loaded_var = NULL;
8002
8003    for (i = j = 0; i < 16; i++) {
8004        if (!(list & (1 << i))) {
8005            continue;
8006        }
8007
8008        tmp = tcg_temp_new_i32();
8009        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8010        if (user) {
8011            gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8012        } else if (i == a->rn) {
8013            loaded_var = tmp;
8014            loaded_base = true;
8015        } else if (i == 15 && exc_return) {
8016            store_pc_exc_ret(s, tmp);
8017        } else {
8018            store_reg_from_load(s, i, tmp);
8019        }
8020
8021        /* No need to add after the last transfer.  */
8022        if (++j != n) {
8023            tcg_gen_addi_i32(addr, addr, 4);
8024        }
8025    }
8026
8027    op_addr_block_post(s, a, addr, n);
8028
8029    if (loaded_base) {
8030        /* Note that we reject base == pc above.  */
8031        store_reg(s, a->rn, loaded_var);
8032    }
8033
8034    if (exc_return) {
8035        /* Restore CPSR from SPSR.  */
8036        tmp = load_cpu_field(spsr);
8037        translator_io_start(&s->base);
8038        gen_helper_cpsr_write_eret(cpu_env, tmp);
8039        /* Must exit loop to check un-masked IRQs */
8040        s->base.is_jmp = DISAS_EXIT;
8041    }
8042    clear_eci_state(s);
8043    return true;
8044}
8045
8046static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8047{
8048    /*
8049     * Writeback register in register list is UNPREDICTABLE
8050     * for ArchVersion() >= 7.  Prior to v7, A32 would write
8051     * an UNKNOWN value to the base register.
8052     */
8053    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8054        unallocated_encoding(s);
8055        return true;
8056    }
8057    /* BitCount(list) < 1 is UNPREDICTABLE */
8058    return do_ldm(s, a, 1);
8059}
8060
8061static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8062{
8063    /* Writeback register in register list is UNPREDICTABLE for T32. */
8064    if (a->w && (a->list & (1 << a->rn))) {
8065        unallocated_encoding(s);
8066        return true;
8067    }
8068    /* BitCount(list) < 2 is UNPREDICTABLE */
8069    return do_ldm(s, a, 2);
8070}
8071
8072static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8073{
8074    /* Writeback is conditional on the base register not being loaded.  */
8075    a->w = !(a->list & (1 << a->rn));
8076    /* BitCount(list) < 1 is UNPREDICTABLE */
8077    return do_ldm(s, a, 1);
8078}
8079
8080static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8081{
8082    int i;
8083    TCGv_i32 zero;
8084
8085    if (!dc_isar_feature(aa32_m_sec_state, s)) {
8086        return false;
8087    }
8088
8089    if (extract32(a->list, 13, 1)) {
8090        return false;
8091    }
8092
8093    if (!a->list) {
8094        /* UNPREDICTABLE; we choose to UNDEF */
8095        return false;
8096    }
8097
8098    s->eci_handled = true;
8099
8100    zero = tcg_constant_i32(0);
8101    for (i = 0; i < 15; i++) {
8102        if (extract32(a->list, i, 1)) {
8103            /* Clear R[i] */
8104            tcg_gen_mov_i32(cpu_R[i], zero);
8105        }
8106    }
8107    if (extract32(a->list, 15, 1)) {
8108        /*
8109         * Clear APSR (by calling the MSR helper with the same argument
8110         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8111         */
8112        gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8113    }
8114    clear_eci_state(s);
8115    return true;
8116}
8117
8118/*
8119 * Branch, branch with link
8120 */
8121
8122static bool trans_B(DisasContext *s, arg_i *a)
8123{
8124    gen_jmp(s, jmp_diff(s, a->imm));
8125    return true;
8126}
8127
8128static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8129{
8130    /* This has cond from encoding, required to be outside IT block.  */
8131    if (a->cond >= 0xe) {
8132        return false;
8133    }
8134    if (s->condexec_mask) {
8135        unallocated_encoding(s);
8136        return true;
8137    }
8138    arm_skip_unless(s, a->cond);
8139    gen_jmp(s, jmp_diff(s, a->imm));
8140    return true;
8141}
8142
8143static bool trans_BL(DisasContext *s, arg_i *a)
8144{
8145    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8146    gen_jmp(s, jmp_diff(s, a->imm));
8147    return true;
8148}
8149
8150static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8151{
8152    /*
8153     * BLX <imm> would be useless on M-profile; the encoding space
8154     * is used for other insns from v8.1M onward, and UNDEFs before that.
8155     */
8156    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8157        return false;
8158    }
8159
8160    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8161    if (s->thumb && (a->imm & 2)) {
8162        return false;
8163    }
8164    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8165    store_cpu_field_constant(!s->thumb, thumb);
8166    /* This jump is computed from an aligned PC: subtract off the low bits. */
8167    gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8168    return true;
8169}
8170
8171static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8172{
8173    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8174    gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8175    return true;
8176}
8177
8178static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8179{
8180    TCGv_i32 tmp = tcg_temp_new_i32();
8181
8182    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8183    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8184    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8185    gen_bx(s, tmp);
8186    return true;
8187}
8188
8189static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8190{
8191    TCGv_i32 tmp;
8192
8193    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8194    if (!ENABLE_ARCH_5) {
8195        return false;
8196    }
8197    tmp = tcg_temp_new_i32();
8198    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8199    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8200    gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8201    gen_bx(s, tmp);
8202    return true;
8203}
8204
8205static bool trans_BF(DisasContext *s, arg_BF *a)
8206{
8207    /*
8208     * M-profile branch future insns. The architecture permits an
8209     * implementation to implement these as NOPs (equivalent to
8210     * discarding the LO_BRANCH_INFO cache immediately), and we
8211     * take that IMPDEF option because for QEMU a "real" implementation
8212     * would be complicated and wouldn't execute any faster.
8213     */
8214    if (!dc_isar_feature(aa32_lob, s)) {
8215        return false;
8216    }
8217    if (a->boff == 0) {
8218        /* SEE "Related encodings" (loop insns) */
8219        return false;
8220    }
8221    /* Handle as NOP */
8222    return true;
8223}
8224
8225static bool trans_DLS(DisasContext *s, arg_DLS *a)
8226{
8227    /* M-profile low-overhead loop start */
8228    TCGv_i32 tmp;
8229
8230    if (!dc_isar_feature(aa32_lob, s)) {
8231        return false;
8232    }
8233    if (a->rn == 13 || a->rn == 15) {
8234        /*
8235         * For DLSTP rn == 15 is a related encoding (LCTP); the
8236         * other cases caught by this condition are all
8237         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8238         */
8239        return false;
8240    }
8241
8242    if (a->size != 4) {
8243        /* DLSTP */
8244        if (!dc_isar_feature(aa32_mve, s)) {
8245            return false;
8246        }
8247        if (!vfp_access_check(s)) {
8248            return true;
8249        }
8250    }
8251
8252    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8253    tmp = load_reg(s, a->rn);
8254    store_reg(s, 14, tmp);
8255    if (a->size != 4) {
8256        /* DLSTP: set FPSCR.LTPSIZE */
8257        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8258        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8259    }
8260    return true;
8261}
8262
8263static bool trans_WLS(DisasContext *s, arg_WLS *a)
8264{
8265    /* M-profile low-overhead while-loop start */
8266    TCGv_i32 tmp;
8267    DisasLabel nextlabel;
8268
8269    if (!dc_isar_feature(aa32_lob, s)) {
8270        return false;
8271    }
8272    if (a->rn == 13 || a->rn == 15) {
8273        /*
8274         * For WLSTP rn == 15 is a related encoding (LE); the
8275         * other cases caught by this condition are all
8276         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8277         */
8278        return false;
8279    }
8280    if (s->condexec_mask) {
8281        /*
8282         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8283         * we choose to UNDEF, because otherwise our use of
8284         * gen_goto_tb(1) would clash with the use of TB exit 1
8285         * in the dc->condjmp condition-failed codepath in
8286         * arm_tr_tb_stop() and we'd get an assertion.
8287         */
8288        return false;
8289    }
8290    if (a->size != 4) {
8291        /* WLSTP */
8292        if (!dc_isar_feature(aa32_mve, s)) {
8293            return false;
8294        }
8295        /*
8296         * We need to check that the FPU is enabled here, but mustn't
8297         * call vfp_access_check() to do that because we don't want to
8298         * do the lazy state preservation in the "loop count is zero" case.
8299         * Do the check-and-raise-exception by hand.
8300         */
8301        if (s->fp_excp_el) {
8302            gen_exception_insn_el(s, 0, EXCP_NOCP,
8303                                  syn_uncategorized(), s->fp_excp_el);
8304            return true;
8305        }
8306    }
8307
8308    nextlabel = gen_disas_label(s);
8309    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8310    tmp = load_reg(s, a->rn);
8311    store_reg(s, 14, tmp);
8312    if (a->size != 4) {
8313        /*
8314         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8315         * lazy state preservation, new FP context creation, etc,
8316         * that vfp_access_check() does. We know that the actual
8317         * access check will succeed (ie it won't generate code that
8318         * throws an exception) because we did that check by hand earlier.
8319         */
8320        bool ok = vfp_access_check(s);
8321        assert(ok);
8322        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8323        /*
8324         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8325         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8326         */
8327    }
8328    gen_jmp_tb(s, curr_insn_len(s), 1);
8329
8330    set_disas_label(s, nextlabel);
8331    gen_jmp(s, jmp_diff(s, a->imm));
8332    return true;
8333}
8334
8335static bool trans_LE(DisasContext *s, arg_LE *a)
8336{
8337    /*
8338     * M-profile low-overhead loop end. The architecture permits an
8339     * implementation to discard the LO_BRANCH_INFO cache at any time,
8340     * and we take the IMPDEF option to never set it in the first place
8341     * (equivalent to always discarding it immediately), because for QEMU
8342     * a "real" implementation would be complicated and wouldn't execute
8343     * any faster.
8344     */
8345    TCGv_i32 tmp;
8346    DisasLabel loopend;
8347    bool fpu_active;
8348
8349    if (!dc_isar_feature(aa32_lob, s)) {
8350        return false;
8351    }
8352    if (a->f && a->tp) {
8353        return false;
8354    }
8355    if (s->condexec_mask) {
8356        /*
8357         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8358         * we choose to UNDEF, because otherwise our use of
8359         * gen_goto_tb(1) would clash with the use of TB exit 1
8360         * in the dc->condjmp condition-failed codepath in
8361         * arm_tr_tb_stop() and we'd get an assertion.
8362         */
8363        return false;
8364    }
8365    if (a->tp) {
8366        /* LETP */
8367        if (!dc_isar_feature(aa32_mve, s)) {
8368            return false;
8369        }
8370        if (!vfp_access_check(s)) {
8371            s->eci_handled = true;
8372            return true;
8373        }
8374    }
8375
8376    /* LE/LETP is OK with ECI set and leaves it untouched */
8377    s->eci_handled = true;
8378
8379    /*
8380     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8381     * UsageFault exception for the LE insn in that case. Note that we
8382     * are not directly checking FPSCR.LTPSIZE but instead check the
8383     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8384     * not currently active (ie ActiveFPState() returns false). We
8385     * can identify not-active purely from our TB state flags, as the
8386     * FPU is active only if:
8387     *  the FPU is enabled
8388     *  AND lazy state preservation is not active
8389     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8390     *
8391     * Usually we don't need to care about this distinction between
8392     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8393     * will either take an exception or clear the conditions that make
8394     * the FPU not active. But LE is an unusual case of a non-FP insn
8395     * that looks at LTPSIZE.
8396     */
8397    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8398
8399    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8400        /* Need to do a runtime check for LTPSIZE != 4 */
8401        DisasLabel skipexc = gen_disas_label(s);
8402        tmp = load_cpu_field(v7m.ltpsize);
8403        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8404        gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8405        set_disas_label(s, skipexc);
8406    }
8407
8408    if (a->f) {
8409        /* Loop-forever: just jump back to the loop start */
8410        gen_jmp(s, jmp_diff(s, -a->imm));
8411        return true;
8412    }
8413
8414    /*
8415     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8416     * For LE, we know at this point that LTPSIZE must be 4 and the
8417     * loop decrement value is 1. For LETP we need to calculate the decrement
8418     * value from LTPSIZE.
8419     */
8420    loopend = gen_disas_label(s);
8421    if (!a->tp) {
8422        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8423        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8424    } else {
8425        /*
8426         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8427         * so that decr stays live after the brcondi.
8428         */
8429        TCGv_i32 decr = tcg_temp_new_i32();
8430        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8431        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8432        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8433
8434        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8435
8436        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8437    }
8438    /* Jump back to the loop start */
8439    gen_jmp(s, jmp_diff(s, -a->imm));
8440
8441    set_disas_label(s, loopend);
8442    if (a->tp) {
8443        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8444        store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8445    }
8446    /* End TB, continuing to following insn */
8447    gen_jmp_tb(s, curr_insn_len(s), 1);
8448    return true;
8449}
8450
8451static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8452{
8453    /*
8454     * M-profile Loop Clear with Tail Predication. Since our implementation
8455     * doesn't cache branch information, all we need to do is reset
8456     * FPSCR.LTPSIZE to 4.
8457     */
8458
8459    if (!dc_isar_feature(aa32_lob, s) ||
8460        !dc_isar_feature(aa32_mve, s)) {
8461        return false;
8462    }
8463
8464    if (!vfp_access_check(s)) {
8465        return true;
8466    }
8467
8468    store_cpu_field_constant(4, v7m.ltpsize);
8469    return true;
8470}
8471
8472static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8473{
8474    /*
8475     * M-profile Create Vector Tail Predicate. This insn is itself
8476     * predicated and is subject to beatwise execution.
8477     */
8478    TCGv_i32 rn_shifted, masklen;
8479
8480    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8481        return false;
8482    }
8483
8484    if (!mve_eci_check(s) || !vfp_access_check(s)) {
8485        return true;
8486    }
8487
8488    /*
8489     * We pre-calculate the mask length here to avoid having
8490     * to have multiple helpers specialized for size.
8491     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8492     */
8493    rn_shifted = tcg_temp_new_i32();
8494    masklen = load_reg(s, a->rn);
8495    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8496    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8497                        masklen, tcg_constant_i32(1 << (4 - a->size)),
8498                        rn_shifted, tcg_constant_i32(16));
8499    gen_helper_mve_vctp(cpu_env, masklen);
8500    /* This insn updates predication bits */
8501    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8502    mve_update_eci(s);
8503    return true;
8504}
8505
8506static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8507{
8508    TCGv_i32 addr, tmp;
8509
8510    tmp = load_reg(s, a->rm);
8511    if (half) {
8512        tcg_gen_add_i32(tmp, tmp, tmp);
8513    }
8514    addr = load_reg(s, a->rn);
8515    tcg_gen_add_i32(addr, addr, tmp);
8516
8517    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8518
8519    tcg_gen_add_i32(tmp, tmp, tmp);
8520    gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8521    tcg_gen_add_i32(tmp, tmp, addr);
8522    store_reg(s, 15, tmp);
8523    return true;
8524}
8525
8526static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8527{
8528    return op_tbranch(s, a, false);
8529}
8530
8531static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8532{
8533    return op_tbranch(s, a, true);
8534}
8535
8536static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8537{
8538    TCGv_i32 tmp = load_reg(s, a->rn);
8539
8540    arm_gen_condlabel(s);
8541    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8542                        tmp, 0, s->condlabel.label);
8543    gen_jmp(s, jmp_diff(s, a->imm));
8544    return true;
8545}
8546
8547/*
8548 * Supervisor call - both T32 & A32 come here so we need to check
8549 * which mode we are in when checking for semihosting.
8550 */
8551
8552static bool trans_SVC(DisasContext *s, arg_SVC *a)
8553{
8554    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8555
8556    if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8557        semihosting_enabled(s->current_el == 0) &&
8558        (a->imm == semihost_imm)) {
8559        gen_exception_internal_insn(s, EXCP_SEMIHOST);
8560    } else {
8561        if (s->fgt_svc) {
8562            uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8563            gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8564        } else {
8565            gen_update_pc(s, curr_insn_len(s));
8566            s->svc_imm = a->imm;
8567            s->base.is_jmp = DISAS_SWI;
8568        }
8569    }
8570    return true;
8571}
8572
8573/*
8574 * Unconditional system instructions
8575 */
8576
8577static bool trans_RFE(DisasContext *s, arg_RFE *a)
8578{
8579    static const int8_t pre_offset[4] = {
8580        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8581    };
8582    static const int8_t post_offset[4] = {
8583        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8584    };
8585    TCGv_i32 addr, t1, t2;
8586
8587    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8588        return false;
8589    }
8590    if (IS_USER(s)) {
8591        unallocated_encoding(s);
8592        return true;
8593    }
8594
8595    addr = load_reg(s, a->rn);
8596    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8597
8598    /* Load PC into tmp and CPSR into tmp2.  */
8599    t1 = tcg_temp_new_i32();
8600    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8601    tcg_gen_addi_i32(addr, addr, 4);
8602    t2 = tcg_temp_new_i32();
8603    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8604
8605    if (a->w) {
8606        /* Base writeback.  */
8607        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8608        store_reg(s, a->rn, addr);
8609    }
8610    gen_rfe(s, t1, t2);
8611    return true;
8612}
8613
8614static bool trans_SRS(DisasContext *s, arg_SRS *a)
8615{
8616    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8617        return false;
8618    }
8619    gen_srs(s, a->mode, a->pu, a->w);
8620    return true;
8621}
8622
8623static bool trans_CPS(DisasContext *s, arg_CPS *a)
8624{
8625    uint32_t mask, val;
8626
8627    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8628        return false;
8629    }
8630    if (IS_USER(s)) {
8631        /* Implemented as NOP in user mode.  */
8632        return true;
8633    }
8634    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8635
8636    mask = val = 0;
8637    if (a->imod & 2) {
8638        if (a->A) {
8639            mask |= CPSR_A;
8640        }
8641        if (a->I) {
8642            mask |= CPSR_I;
8643        }
8644        if (a->F) {
8645            mask |= CPSR_F;
8646        }
8647        if (a->imod & 1) {
8648            val |= mask;
8649        }
8650    }
8651    if (a->M) {
8652        mask |= CPSR_M;
8653        val |= a->mode;
8654    }
8655    if (mask) {
8656        gen_set_psr_im(s, mask, 0, val);
8657    }
8658    return true;
8659}
8660
8661static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8662{
8663    TCGv_i32 tmp, addr;
8664
8665    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8666        return false;
8667    }
8668    if (IS_USER(s)) {
8669        /* Implemented as NOP in user mode.  */
8670        return true;
8671    }
8672
8673    tmp = tcg_constant_i32(a->im);
8674    /* FAULTMASK */
8675    if (a->F) {
8676        addr = tcg_constant_i32(19);
8677        gen_helper_v7m_msr(cpu_env, addr, tmp);
8678    }
8679    /* PRIMASK */
8680    if (a->I) {
8681        addr = tcg_constant_i32(16);
8682        gen_helper_v7m_msr(cpu_env, addr, tmp);
8683    }
8684    gen_rebuild_hflags(s, false);
8685    gen_lookup_tb(s);
8686    return true;
8687}
8688
8689/*
8690 * Clear-Exclusive, Barriers
8691 */
8692
8693static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8694{
8695    if (s->thumb
8696        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8697        : !ENABLE_ARCH_6K) {
8698        return false;
8699    }
8700    gen_clrex(s);
8701    return true;
8702}
8703
8704static bool trans_DSB(DisasContext *s, arg_DSB *a)
8705{
8706    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8707        return false;
8708    }
8709    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8710    return true;
8711}
8712
8713static bool trans_DMB(DisasContext *s, arg_DMB *a)
8714{
8715    return trans_DSB(s, NULL);
8716}
8717
8718static bool trans_ISB(DisasContext *s, arg_ISB *a)
8719{
8720    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8721        return false;
8722    }
8723    /*
8724     * We need to break the TB after this insn to execute
8725     * self-modifying code correctly and also to take
8726     * any pending interrupts immediately.
8727     */
8728    s->base.is_jmp = DISAS_TOO_MANY;
8729    return true;
8730}
8731
8732static bool trans_SB(DisasContext *s, arg_SB *a)
8733{
8734    if (!dc_isar_feature(aa32_sb, s)) {
8735        return false;
8736    }
8737    /*
8738     * TODO: There is no speculation barrier opcode
8739     * for TCG; MB and end the TB instead.
8740     */
8741    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8742    s->base.is_jmp = DISAS_TOO_MANY;
8743    return true;
8744}
8745
8746static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8747{
8748    if (!ENABLE_ARCH_6) {
8749        return false;
8750    }
8751    if (a->E != (s->be_data == MO_BE)) {
8752        gen_helper_setend(cpu_env);
8753        s->base.is_jmp = DISAS_UPDATE_EXIT;
8754    }
8755    return true;
8756}
8757
8758/*
8759 * Preload instructions
8760 * All are nops, contingent on the appropriate arch level.
8761 */
8762
8763static bool trans_PLD(DisasContext *s, arg_PLD *a)
8764{
8765    return ENABLE_ARCH_5TE;
8766}
8767
8768static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8769{
8770    return arm_dc_feature(s, ARM_FEATURE_V7MP);
8771}
8772
8773static bool trans_PLI(DisasContext *s, arg_PLD *a)
8774{
8775    return ENABLE_ARCH_7;
8776}
8777
8778/*
8779 * If-then
8780 */
8781
8782static bool trans_IT(DisasContext *s, arg_IT *a)
8783{
8784    int cond_mask = a->cond_mask;
8785
8786    /*
8787     * No actual code generated for this insn, just setup state.
8788     *
8789     * Combinations of firstcond and mask which set up an 0b1111
8790     * condition are UNPREDICTABLE; we take the CONSTRAINED
8791     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8792     * i.e. both meaning "execute always".
8793     */
8794    s->condexec_cond = (cond_mask >> 4) & 0xe;
8795    s->condexec_mask = cond_mask & 0x1f;
8796    return true;
8797}
8798
8799/* v8.1M CSEL/CSINC/CSNEG/CSINV */
8800static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8801{
8802    TCGv_i32 rn, rm;
8803    DisasCompare c;
8804
8805    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8806        return false;
8807    }
8808
8809    if (a->rm == 13) {
8810        /* SEE "Related encodings" (MVE shifts) */
8811        return false;
8812    }
8813
8814    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8815        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8816        return false;
8817    }
8818
8819    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8820    rn = tcg_temp_new_i32();
8821    rm = tcg_temp_new_i32();
8822    if (a->rn == 15) {
8823        tcg_gen_movi_i32(rn, 0);
8824    } else {
8825        load_reg_var(s, rn, a->rn);
8826    }
8827    if (a->rm == 15) {
8828        tcg_gen_movi_i32(rm, 0);
8829    } else {
8830        load_reg_var(s, rm, a->rm);
8831    }
8832
8833    switch (a->op) {
8834    case 0: /* CSEL */
8835        break;
8836    case 1: /* CSINC */
8837        tcg_gen_addi_i32(rm, rm, 1);
8838        break;
8839    case 2: /* CSINV */
8840        tcg_gen_not_i32(rm, rm);
8841        break;
8842    case 3: /* CSNEG */
8843        tcg_gen_neg_i32(rm, rm);
8844        break;
8845    default:
8846        g_assert_not_reached();
8847    }
8848
8849    arm_test_cc(&c, a->fcond);
8850    tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8851
8852    store_reg(s, a->rd, rn);
8853    return true;
8854}
8855
8856/*
8857 * Legacy decoder.
8858 */
8859
8860static void disas_arm_insn(DisasContext *s, unsigned int insn)
8861{
8862    unsigned int cond = insn >> 28;
8863
8864    /* M variants do not implement ARM mode; this must raise the INVSTATE
8865     * UsageFault exception.
8866     */
8867    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8868        gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8869        return;
8870    }
8871
8872    if (s->pstate_il) {
8873        /*
8874         * Illegal execution state. This has priority over BTI
8875         * exceptions, but comes after instruction abort exceptions.
8876         */
8877        gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8878        return;
8879    }
8880
8881    if (cond == 0xf) {
8882        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8883         * choose to UNDEF. In ARMv5 and above the space is used
8884         * for miscellaneous unconditional instructions.
8885         */
8886        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8887            unallocated_encoding(s);
8888            return;
8889        }
8890
8891        /* Unconditional instructions.  */
8892        /* TODO: Perhaps merge these into one decodetree output file.  */
8893        if (disas_a32_uncond(s, insn) ||
8894            disas_vfp_uncond(s, insn) ||
8895            disas_neon_dp(s, insn) ||
8896            disas_neon_ls(s, insn) ||
8897            disas_neon_shared(s, insn)) {
8898            return;
8899        }
8900        /* fall back to legacy decoder */
8901
8902        if ((insn & 0x0e000f00) == 0x0c000100) {
8903            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8904                /* iWMMXt register transfer.  */
8905                if (extract32(s->c15_cpar, 1, 1)) {
8906                    if (!disas_iwmmxt_insn(s, insn)) {
8907                        return;
8908                    }
8909                }
8910            }
8911        }
8912        goto illegal_op;
8913    }
8914    if (cond != 0xe) {
8915        /* if not always execute, we generate a conditional jump to
8916           next instruction */
8917        arm_skip_unless(s, cond);
8918    }
8919
8920    /* TODO: Perhaps merge these into one decodetree output file.  */
8921    if (disas_a32(s, insn) ||
8922        disas_vfp(s, insn)) {
8923        return;
8924    }
8925    /* fall back to legacy decoder */
8926    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8927    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8928        if (((insn & 0x0c000e00) == 0x0c000000)
8929            && ((insn & 0x03000000) != 0x03000000)) {
8930            /* Coprocessor insn, coprocessor 0 or 1 */
8931            disas_xscale_insn(s, insn);
8932            return;
8933        }
8934    }
8935
8936illegal_op:
8937    unallocated_encoding(s);
8938}
8939
8940static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8941{
8942    /*
8943     * Return true if this is a 16 bit instruction. We must be precise
8944     * about this (matching the decode).
8945     */
8946    if ((insn >> 11) < 0x1d) {
8947        /* Definitely a 16-bit instruction */
8948        return true;
8949    }
8950
8951    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8952     * first half of a 32-bit Thumb insn. Thumb-1 cores might
8953     * end up actually treating this as two 16-bit insns, though,
8954     * if it's half of a bl/blx pair that might span a page boundary.
8955     */
8956    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8957        arm_dc_feature(s, ARM_FEATURE_M)) {
8958        /* Thumb2 cores (including all M profile ones) always treat
8959         * 32-bit insns as 32-bit.
8960         */
8961        return false;
8962    }
8963
8964    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8965        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8966         * is not on the next page; we merge this into a 32-bit
8967         * insn.
8968         */
8969        return false;
8970    }
8971    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8972     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8973     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8974     *  -- handle as single 16 bit insn
8975     */
8976    return true;
8977}
8978
8979/* Translate a 32-bit thumb instruction. */
8980static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8981{
8982    /*
8983     * ARMv6-M supports a limited subset of Thumb2 instructions.
8984     * Other Thumb1 architectures allow only 32-bit
8985     * combined BL/BLX prefix and suffix.
8986     */
8987    if (arm_dc_feature(s, ARM_FEATURE_M) &&
8988        !arm_dc_feature(s, ARM_FEATURE_V7)) {
8989        int i;
8990        bool found = false;
8991        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8992                                               0xf3b08040 /* dsb */,
8993                                               0xf3b08050 /* dmb */,
8994                                               0xf3b08060 /* isb */,
8995                                               0xf3e08000 /* mrs */,
8996                                               0xf000d000 /* bl */};
8997        static const uint32_t armv6m_mask[] = {0xffe0d000,
8998                                               0xfff0d0f0,
8999                                               0xfff0d0f0,
9000                                               0xfff0d0f0,
9001                                               0xffe0d000,
9002                                               0xf800d000};
9003
9004        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9005            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9006                found = true;
9007                break;
9008            }
9009        }
9010        if (!found) {
9011            goto illegal_op;
9012        }
9013    } else if ((insn & 0xf800e800) != 0xf000e800)  {
9014        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9015            unallocated_encoding(s);
9016            return;
9017        }
9018    }
9019
9020    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9021        /*
9022         * NOCP takes precedence over any UNDEF for (almost) the
9023         * entire wide range of coprocessor-space encodings, so check
9024         * for it first before proceeding to actually decode eg VFP
9025         * insns. This decode also handles the few insns which are
9026         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9027         */
9028        if (disas_m_nocp(s, insn)) {
9029            return;
9030        }
9031    }
9032
9033    if ((insn & 0xef000000) == 0xef000000) {
9034        /*
9035         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9036         * transform into
9037         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9038         */
9039        uint32_t a32_insn = (insn & 0xe2ffffff) |
9040            ((insn & (1 << 28)) >> 4) | (1 << 28);
9041
9042        if (disas_neon_dp(s, a32_insn)) {
9043            return;
9044        }
9045    }
9046
9047    if ((insn & 0xff100000) == 0xf9000000) {
9048        /*
9049         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9050         * transform into
9051         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9052         */
9053        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9054
9055        if (disas_neon_ls(s, a32_insn)) {
9056            return;
9057        }
9058    }
9059
9060    /*
9061     * TODO: Perhaps merge these into one decodetree output file.
9062     * Note disas_vfp is written for a32 with cond field in the
9063     * top nibble.  The t32 encoding requires 0xe in the top nibble.
9064     */
9065    if (disas_t32(s, insn) ||
9066        disas_vfp_uncond(s, insn) ||
9067        disas_neon_shared(s, insn) ||
9068        disas_mve(s, insn) ||
9069        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9070        return;
9071    }
9072
9073illegal_op:
9074    unallocated_encoding(s);
9075}
9076
9077static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9078{
9079    if (!disas_t16(s, insn)) {
9080        unallocated_encoding(s);
9081    }
9082}
9083
9084static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9085{
9086    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9087     * (False positives are OK, false negatives are not.)
9088     * We know this is a Thumb insn, and our caller ensures we are
9089     * only called if dc->base.pc_next is less than 4 bytes from the page
9090     * boundary, so we cross the page if the first 16 bits indicate
9091     * that this is a 32 bit insn.
9092     */
9093    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9094
9095    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9096}
9097
9098static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9099{
9100    DisasContext *dc = container_of(dcbase, DisasContext, base);
9101    CPUARMState *env = cs->env_ptr;
9102    ARMCPU *cpu = env_archcpu(env);
9103    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9104    uint32_t condexec, core_mmu_idx;
9105
9106    dc->isar = &cpu->isar;
9107    dc->condjmp = 0;
9108    dc->pc_save = dc->base.pc_first;
9109    dc->aarch64 = false;
9110    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9111    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9112    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9113    /*
9114     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9115     * is always the IT bits. On M-profile, some of the reserved encodings
9116     * of IT are used instead to indicate either ICI or ECI, which
9117     * indicate partial progress of a restartable insn that was interrupted
9118     * partway through by an exception:
9119     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9120     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9121     * In all cases CONDEXEC == 0 means "not in IT block or restartable
9122     * insn, behave normally".
9123     */
9124    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9125    dc->eci_handled = false;
9126    if (condexec & 0xf) {
9127        dc->condexec_mask = (condexec & 0xf) << 1;
9128        dc->condexec_cond = condexec >> 4;
9129    } else {
9130        if (arm_feature(env, ARM_FEATURE_M)) {
9131            dc->eci = condexec >> 4;
9132        }
9133    }
9134
9135    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9136    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9137    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9138#if !defined(CONFIG_USER_ONLY)
9139    dc->user = (dc->current_el == 0);
9140#endif
9141    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9142    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9143    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9144    dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9145    dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9146
9147    if (arm_feature(env, ARM_FEATURE_M)) {
9148        dc->vfp_enabled = 1;
9149        dc->be_data = MO_TE;
9150        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9151        dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9152        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9153        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9154        dc->v7m_new_fp_ctxt_needed =
9155            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9156        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9157        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9158    } else {
9159        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9160        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9161        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9162        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9163        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9164            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9165        } else {
9166            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9167            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9168        }
9169        dc->sme_trap_nonstreaming =
9170            EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9171    }
9172    dc->lse2 = false; /* applies only to aarch64 */
9173    dc->cp_regs = cpu->cp_regs;
9174    dc->features = env->features;
9175
9176    /* Single step state. The code-generation logic here is:
9177     *  SS_ACTIVE == 0:
9178     *   generate code with no special handling for single-stepping (except
9179     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9180     *   this happens anyway because those changes are all system register or
9181     *   PSTATE writes).
9182     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9183     *   emit code for one insn
9184     *   emit code to clear PSTATE.SS
9185     *   emit code to generate software step exception for completed step
9186     *   end TB (as usual for having generated an exception)
9187     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9188     *   emit code to generate a software step exception
9189     *   end the TB
9190     */
9191    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9192    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9193    dc->is_ldex = false;
9194
9195    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9196
9197    /* If architectural single step active, limit to 1.  */
9198    if (dc->ss_active) {
9199        dc->base.max_insns = 1;
9200    }
9201
9202    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9203       to those left on the page.  */
9204    if (!dc->thumb) {
9205        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9206        dc->base.max_insns = MIN(dc->base.max_insns, bound);
9207    }
9208
9209    cpu_V0 = tcg_temp_new_i64();
9210    cpu_V1 = tcg_temp_new_i64();
9211    cpu_M0 = tcg_temp_new_i64();
9212}
9213
9214static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9215{
9216    DisasContext *dc = container_of(dcbase, DisasContext, base);
9217
9218    /* A note on handling of the condexec (IT) bits:
9219     *
9220     * We want to avoid the overhead of having to write the updated condexec
9221     * bits back to the CPUARMState for every instruction in an IT block. So:
9222     * (1) if the condexec bits are not already zero then we write
9223     * zero back into the CPUARMState now. This avoids complications trying
9224     * to do it at the end of the block. (For example if we don't do this
9225     * it's hard to identify whether we can safely skip writing condexec
9226     * at the end of the TB, which we definitely want to do for the case
9227     * where a TB doesn't do anything with the IT state at all.)
9228     * (2) if we are going to leave the TB then we call gen_set_condexec()
9229     * which will write the correct value into CPUARMState if zero is wrong.
9230     * This is done both for leaving the TB at the end, and for leaving
9231     * it because of an exception we know will happen, which is done in
9232     * gen_exception_insn(). The latter is necessary because we need to
9233     * leave the TB with the PC/IT state just prior to execution of the
9234     * instruction which caused the exception.
9235     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9236     * then the CPUARMState will be wrong and we need to reset it.
9237     * This is handled in the same way as restoration of the
9238     * PC in these situations; we save the value of the condexec bits
9239     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9240     * then uses this to restore them after an exception.
9241     *
9242     * Note that there are no instructions which can read the condexec
9243     * bits, and none which can write non-static values to them, so
9244     * we don't need to care about whether CPUARMState is correct in the
9245     * middle of a TB.
9246     */
9247
9248    /* Reset the conditional execution bits immediately. This avoids
9249       complications trying to do it at the end of the block.  */
9250    if (dc->condexec_mask || dc->condexec_cond) {
9251        store_cpu_field_constant(0, condexec_bits);
9252    }
9253}
9254
9255static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9256{
9257    DisasContext *dc = container_of(dcbase, DisasContext, base);
9258    /*
9259     * The ECI/ICI bits share PSR bits with the IT bits, so we
9260     * need to reconstitute the bits from the split-out DisasContext
9261     * fields here.
9262     */
9263    uint32_t condexec_bits;
9264    target_ulong pc_arg = dc->base.pc_next;
9265
9266    if (tb_cflags(dcbase->tb) & CF_PCREL) {
9267        pc_arg &= ~TARGET_PAGE_MASK;
9268    }
9269    if (dc->eci) {
9270        condexec_bits = dc->eci << 4;
9271    } else {
9272        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9273    }
9274    tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9275    dc->insn_start = tcg_last_op();
9276}
9277
9278static bool arm_check_kernelpage(DisasContext *dc)
9279{
9280#ifdef CONFIG_USER_ONLY
9281    /* Intercept jump to the magic kernel page.  */
9282    if (dc->base.pc_next >= 0xffff0000) {
9283        /* We always get here via a jump, so know we are not in a
9284           conditional execution block.  */
9285        gen_exception_internal(EXCP_KERNEL_TRAP);
9286        dc->base.is_jmp = DISAS_NORETURN;
9287        return true;
9288    }
9289#endif
9290    return false;
9291}
9292
9293static bool arm_check_ss_active(DisasContext *dc)
9294{
9295    if (dc->ss_active && !dc->pstate_ss) {
9296        /* Singlestep state is Active-pending.
9297         * If we're in this state at the start of a TB then either
9298         *  a) we just took an exception to an EL which is being debugged
9299         *     and this is the first insn in the exception handler
9300         *  b) debug exceptions were masked and we just unmasked them
9301         *     without changing EL (eg by clearing PSTATE.D)
9302         * In either case we're going to take a swstep exception in the
9303         * "did not step an insn" case, and so the syndrome ISV and EX
9304         * bits should be zero.
9305         */
9306        assert(dc->base.num_insns == 1);
9307        gen_swstep_exception(dc, 0, 0);
9308        dc->base.is_jmp = DISAS_NORETURN;
9309        return true;
9310    }
9311
9312    return false;
9313}
9314
9315static void arm_post_translate_insn(DisasContext *dc)
9316{
9317    if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9318        if (dc->pc_save != dc->condlabel.pc_save) {
9319            gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9320        }
9321        gen_set_label(dc->condlabel.label);
9322        dc->condjmp = 0;
9323    }
9324}
9325
9326static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9327{
9328    DisasContext *dc = container_of(dcbase, DisasContext, base);
9329    CPUARMState *env = cpu->env_ptr;
9330    uint32_t pc = dc->base.pc_next;
9331    unsigned int insn;
9332
9333    /* Singlestep exceptions have the highest priority. */
9334    if (arm_check_ss_active(dc)) {
9335        dc->base.pc_next = pc + 4;
9336        return;
9337    }
9338
9339    if (pc & 3) {
9340        /*
9341         * PC alignment fault.  This has priority over the instruction abort
9342         * that we would receive from a translation fault via arm_ldl_code
9343         * (or the execution of the kernelpage entrypoint). This should only
9344         * be possible after an indirect branch, at the start of the TB.
9345         */
9346        assert(dc->base.num_insns == 1);
9347        gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9348        dc->base.is_jmp = DISAS_NORETURN;
9349        dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9350        return;
9351    }
9352
9353    if (arm_check_kernelpage(dc)) {
9354        dc->base.pc_next = pc + 4;
9355        return;
9356    }
9357
9358    dc->pc_curr = pc;
9359    insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9360    dc->insn = insn;
9361    dc->base.pc_next = pc + 4;
9362    disas_arm_insn(dc, insn);
9363
9364    arm_post_translate_insn(dc);
9365
9366    /* ARM is a fixed-length ISA.  We performed the cross-page check
9367       in init_disas_context by adjusting max_insns.  */
9368}
9369
9370static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9371{
9372    /* Return true if this Thumb insn is always unconditional,
9373     * even inside an IT block. This is true of only a very few
9374     * instructions: BKPT, HLT, and SG.
9375     *
9376     * A larger class of instructions are UNPREDICTABLE if used
9377     * inside an IT block; we do not need to detect those here, because
9378     * what we do by default (perform the cc check and update the IT
9379     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9380     * choice for those situations.
9381     *
9382     * insn is either a 16-bit or a 32-bit instruction; the two are
9383     * distinguishable because for the 16-bit case the top 16 bits
9384     * are zeroes, and that isn't a valid 32-bit encoding.
9385     */
9386    if ((insn & 0xffffff00) == 0xbe00) {
9387        /* BKPT */
9388        return true;
9389    }
9390
9391    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9392        !arm_dc_feature(s, ARM_FEATURE_M)) {
9393        /* HLT: v8A only. This is unconditional even when it is going to
9394         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9395         * For v7 cores this was a plain old undefined encoding and so
9396         * honours its cc check. (We might be using the encoding as
9397         * a semihosting trap, but we don't change the cc check behaviour
9398         * on that account, because a debugger connected to a real v7A
9399         * core and emulating semihosting traps by catching the UNDEF
9400         * exception would also only see cases where the cc check passed.
9401         * No guest code should be trying to do a HLT semihosting trap
9402         * in an IT block anyway.
9403         */
9404        return true;
9405    }
9406
9407    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9408        arm_dc_feature(s, ARM_FEATURE_M)) {
9409        /* SG: v8M only */
9410        return true;
9411    }
9412
9413    return false;
9414}
9415
9416static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9417{
9418    DisasContext *dc = container_of(dcbase, DisasContext, base);
9419    CPUARMState *env = cpu->env_ptr;
9420    uint32_t pc = dc->base.pc_next;
9421    uint32_t insn;
9422    bool is_16bit;
9423    /* TCG op to rewind to if this turns out to be an invalid ECI state */
9424    TCGOp *insn_eci_rewind = NULL;
9425    target_ulong insn_eci_pc_save = -1;
9426
9427    /* Misaligned thumb PC is architecturally impossible. */
9428    assert((dc->base.pc_next & 1) == 0);
9429
9430    if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9431        dc->base.pc_next = pc + 2;
9432        return;
9433    }
9434
9435    dc->pc_curr = pc;
9436    insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9437    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9438    pc += 2;
9439    if (!is_16bit) {
9440        uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9441        insn = insn << 16 | insn2;
9442        pc += 2;
9443    }
9444    dc->base.pc_next = pc;
9445    dc->insn = insn;
9446
9447    if (dc->pstate_il) {
9448        /*
9449         * Illegal execution state. This has priority over BTI
9450         * exceptions, but comes after instruction abort exceptions.
9451         */
9452        gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9453        return;
9454    }
9455
9456    if (dc->eci) {
9457        /*
9458         * For M-profile continuable instructions, ECI/ICI handling
9459         * falls into these cases:
9460         *  - interrupt-continuable instructions
9461         *     These are the various load/store multiple insns (both
9462         *     integer and fp). The ICI bits indicate the register
9463         *     where the load/store can resume. We make the IMPDEF
9464         *     choice to always do "instruction restart", ie ignore
9465         *     the ICI value and always execute the ldm/stm from the
9466         *     start. So all we need to do is zero PSR.ICI if the
9467         *     insn executes.
9468         *  - MVE instructions subject to beat-wise execution
9469         *     Here the ECI bits indicate which beats have already been
9470         *     executed, and we must honour this. Each insn of this
9471         *     type will handle it correctly. We will update PSR.ECI
9472         *     in the helper function for the insn (some ECI values
9473         *     mean that the following insn also has been partially
9474         *     executed).
9475         *  - Special cases which don't advance ECI
9476         *     The insns LE, LETP and BKPT leave the ECI/ICI state
9477         *     bits untouched.
9478         *  - all other insns (the common case)
9479         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9480         *     We place a rewind-marker here. Insns in the previous
9481         *     three categories will set a flag in the DisasContext.
9482         *     If the flag isn't set after we call disas_thumb_insn()
9483         *     or disas_thumb2_insn() then we know we have a "some other
9484         *     insn" case. We will rewind to the marker (ie throwing away
9485         *     all the generated code) and instead emit "take exception".
9486         */
9487        insn_eci_rewind = tcg_last_op();
9488        insn_eci_pc_save = dc->pc_save;
9489    }
9490
9491    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9492        uint32_t cond = dc->condexec_cond;
9493
9494        /*
9495         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9496         * "always"; 0xf is not "never".
9497         */
9498        if (cond < 0x0e) {
9499            arm_skip_unless(dc, cond);
9500        }
9501    }
9502
9503    if (is_16bit) {
9504        disas_thumb_insn(dc, insn);
9505    } else {
9506        disas_thumb2_insn(dc, insn);
9507    }
9508
9509    /* Advance the Thumb condexec condition.  */
9510    if (dc->condexec_mask) {
9511        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9512                             ((dc->condexec_mask >> 4) & 1));
9513        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9514        if (dc->condexec_mask == 0) {
9515            dc->condexec_cond = 0;
9516        }
9517    }
9518
9519    if (dc->eci && !dc->eci_handled) {
9520        /*
9521         * Insn wasn't valid for ECI/ICI at all: undo what we
9522         * just generated and instead emit an exception
9523         */
9524        tcg_remove_ops_after(insn_eci_rewind);
9525        dc->pc_save = insn_eci_pc_save;
9526        dc->condjmp = 0;
9527        gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9528    }
9529
9530    arm_post_translate_insn(dc);
9531
9532    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9533     * will touch a new page.  This ensures that prefetch aborts occur at
9534     * the right place.
9535     *
9536     * We want to stop the TB if the next insn starts in a new page,
9537     * or if it spans between this page and the next. This means that
9538     * if we're looking at the last halfword in the page we need to
9539     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9540     * or a 32-bit Thumb insn (which won't).
9541     * This is to avoid generating a silly TB with a single 16-bit insn
9542     * in it at the end of this page (which would execute correctly
9543     * but isn't very efficient).
9544     */
9545    if (dc->base.is_jmp == DISAS_NEXT
9546        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9547            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9548                && insn_crosses_page(env, dc)))) {
9549        dc->base.is_jmp = DISAS_TOO_MANY;
9550    }
9551}
9552
9553static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9554{
9555    DisasContext *dc = container_of(dcbase, DisasContext, base);
9556
9557    /* At this stage dc->condjmp will only be set when the skipped
9558       instruction was a conditional branch or trap, and the PC has
9559       already been written.  */
9560    gen_set_condexec(dc);
9561    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9562        /* Exception return branches need some special case code at the
9563         * end of the TB, which is complex enough that it has to
9564         * handle the single-step vs not and the condition-failed
9565         * insn codepath itself.
9566         */
9567        gen_bx_excret_final_code(dc);
9568    } else if (unlikely(dc->ss_active)) {
9569        /* Unconditional and "condition passed" instruction codepath. */
9570        switch (dc->base.is_jmp) {
9571        case DISAS_SWI:
9572            gen_ss_advance(dc);
9573            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9574            break;
9575        case DISAS_HVC:
9576            gen_ss_advance(dc);
9577            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9578            break;
9579        case DISAS_SMC:
9580            gen_ss_advance(dc);
9581            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9582            break;
9583        case DISAS_NEXT:
9584        case DISAS_TOO_MANY:
9585        case DISAS_UPDATE_EXIT:
9586        case DISAS_UPDATE_NOCHAIN:
9587            gen_update_pc(dc, curr_insn_len(dc));
9588            /* fall through */
9589        default:
9590            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9591            gen_singlestep_exception(dc);
9592            break;
9593        case DISAS_NORETURN:
9594            break;
9595        }
9596    } else {
9597        /* While branches must always occur at the end of an IT block,
9598           there are a few other things that can cause us to terminate
9599           the TB in the middle of an IT block:
9600            - Exception generating instructions (bkpt, swi, undefined).
9601            - Page boundaries.
9602            - Hardware watchpoints.
9603           Hardware breakpoints have already been handled and skip this code.
9604         */
9605        switch (dc->base.is_jmp) {
9606        case DISAS_NEXT:
9607        case DISAS_TOO_MANY:
9608            gen_goto_tb(dc, 1, curr_insn_len(dc));
9609            break;
9610        case DISAS_UPDATE_NOCHAIN:
9611            gen_update_pc(dc, curr_insn_len(dc));
9612            /* fall through */
9613        case DISAS_JUMP:
9614            gen_goto_ptr();
9615            break;
9616        case DISAS_UPDATE_EXIT:
9617            gen_update_pc(dc, curr_insn_len(dc));
9618            /* fall through */
9619        default:
9620            /* indicate that the hash table must be used to find the next TB */
9621            tcg_gen_exit_tb(NULL, 0);
9622            break;
9623        case DISAS_NORETURN:
9624            /* nothing more to generate */
9625            break;
9626        case DISAS_WFI:
9627            gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9628            /*
9629             * The helper doesn't necessarily throw an exception, but we
9630             * must go back to the main loop to check for interrupts anyway.
9631             */
9632            tcg_gen_exit_tb(NULL, 0);
9633            break;
9634        case DISAS_WFE:
9635            gen_helper_wfe(cpu_env);
9636            break;
9637        case DISAS_YIELD:
9638            gen_helper_yield(cpu_env);
9639            break;
9640        case DISAS_SWI:
9641            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9642            break;
9643        case DISAS_HVC:
9644            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9645            break;
9646        case DISAS_SMC:
9647            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9648            break;
9649        }
9650    }
9651
9652    if (dc->condjmp) {
9653        /* "Condition failed" instruction codepath for the branch/trap insn */
9654        set_disas_label(dc, dc->condlabel);
9655        gen_set_condexec(dc);
9656        if (unlikely(dc->ss_active)) {
9657            gen_update_pc(dc, curr_insn_len(dc));
9658            gen_singlestep_exception(dc);
9659        } else {
9660            gen_goto_tb(dc, 1, curr_insn_len(dc));
9661        }
9662    }
9663}
9664
9665static void arm_tr_disas_log(const DisasContextBase *dcbase,
9666                             CPUState *cpu, FILE *logfile)
9667{
9668    DisasContext *dc = container_of(dcbase, DisasContext, base);
9669
9670    fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9671    target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9672}
9673
9674static const TranslatorOps arm_translator_ops = {
9675    .init_disas_context = arm_tr_init_disas_context,
9676    .tb_start           = arm_tr_tb_start,
9677    .insn_start         = arm_tr_insn_start,
9678    .translate_insn     = arm_tr_translate_insn,
9679    .tb_stop            = arm_tr_tb_stop,
9680    .disas_log          = arm_tr_disas_log,
9681};
9682
9683static const TranslatorOps thumb_translator_ops = {
9684    .init_disas_context = arm_tr_init_disas_context,
9685    .tb_start           = arm_tr_tb_start,
9686    .insn_start         = arm_tr_insn_start,
9687    .translate_insn     = thumb_tr_translate_insn,
9688    .tb_stop            = arm_tr_tb_stop,
9689    .disas_log          = arm_tr_disas_log,
9690};
9691
9692/* generate intermediate code for basic block 'tb'.  */
9693void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9694                           target_ulong pc, void *host_pc)
9695{
9696    DisasContext dc = { };
9697    const TranslatorOps *ops = &arm_translator_ops;
9698    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9699
9700    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9701        ops = &thumb_translator_ops;
9702    }
9703#ifdef TARGET_AARCH64
9704    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9705        ops = &aarch64_translator_ops;
9706    }
9707#endif
9708
9709    translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9710}
9711