qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg/tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "semihosting/semihost.h"
  33#include "exec/helper-proto.h"
  34#include "exec/helper-gen.h"
  35#include "exec/log.h"
  36#include "cpregs.h"
  37
  38
  39#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  40#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  41/* currently all emulated v5 cores are also v5TE, so don't bother */
  42#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  43#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  44#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  45#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  46#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  47#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  48#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  49
  50#include "translate.h"
  51#include "translate-a32.h"
  52
  53/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  54static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  55/* These are TCG globals which alias CPUARMState fields */
  56static TCGv_i32 cpu_R[16];
  57TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  58TCGv_i64 cpu_exclusive_addr;
  59TCGv_i64 cpu_exclusive_val;
  60
  61#include "exec/gen-icount.h"
  62
  63static const char * const regnames[] =
  64    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  65      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  66
  67
  68/* initialize TCG globals.  */
  69void arm_translate_init(void)
  70{
  71    int i;
  72
  73    for (i = 0; i < 16; i++) {
  74        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  75                                          offsetof(CPUARMState, regs[i]),
  76                                          regnames[i]);
  77    }
  78    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  79    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  80    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  81    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  82
  83    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  84        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  85    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  86        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  87
  88    a64_translate_init();
  89}
  90
  91uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
  92{
  93    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
  94    switch (cmode) {
  95    case 0: case 1:
  96        /* no-op */
  97        break;
  98    case 2: case 3:
  99        imm <<= 8;
 100        break;
 101    case 4: case 5:
 102        imm <<= 16;
 103        break;
 104    case 6: case 7:
 105        imm <<= 24;
 106        break;
 107    case 8: case 9:
 108        imm |= imm << 16;
 109        break;
 110    case 10: case 11:
 111        imm = (imm << 8) | (imm << 24);
 112        break;
 113    case 12:
 114        imm = (imm << 8) | 0xff;
 115        break;
 116    case 13:
 117        imm = (imm << 16) | 0xffff;
 118        break;
 119    case 14:
 120        if (op) {
 121            /*
 122             * This and cmode == 15 op == 1 are the only cases where
 123             * the top and bottom 32 bits of the encoded constant differ.
 124             */
 125            uint64_t imm64 = 0;
 126            int n;
 127
 128            for (n = 0; n < 8; n++) {
 129                if (imm & (1 << n)) {
 130                    imm64 |= (0xffULL << (n * 8));
 131                }
 132            }
 133            return imm64;
 134        }
 135        imm |= (imm << 8) | (imm << 16) | (imm << 24);
 136        break;
 137    case 15:
 138        if (op) {
 139            /* Reserved encoding for AArch32; valid for AArch64 */
 140            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
 141            if (imm & 0x80) {
 142                imm64 |= 0x8000000000000000ULL;
 143            }
 144            if (imm & 0x40) {
 145                imm64 |= 0x3fc0000000000000ULL;
 146            } else {
 147                imm64 |= 0x4000000000000000ULL;
 148            }
 149            return imm64;
 150        }
 151        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
 152            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
 153        break;
 154    }
 155    if (op) {
 156        imm = ~imm;
 157    }
 158    return dup_const(MO_32, imm);
 159}
 160
 161/* Generate a label used for skipping this instruction */
 162void arm_gen_condlabel(DisasContext *s)
 163{
 164    if (!s->condjmp) {
 165        s->condlabel = gen_new_label();
 166        s->condjmp = 1;
 167    }
 168}
 169
 170/* Flags for the disas_set_da_iss info argument:
 171 * lower bits hold the Rt register number, higher bits are flags.
 172 */
 173typedef enum ISSInfo {
 174    ISSNone = 0,
 175    ISSRegMask = 0x1f,
 176    ISSInvalid = (1 << 5),
 177    ISSIsAcqRel = (1 << 6),
 178    ISSIsWrite = (1 << 7),
 179    ISSIs16Bit = (1 << 8),
 180} ISSInfo;
 181
 182/*
 183 * Store var into env + offset to a member with size bytes.
 184 * Free var after use.
 185 */
 186void store_cpu_offset(TCGv_i32 var, int offset, int size)
 187{
 188    switch (size) {
 189    case 1:
 190        tcg_gen_st8_i32(var, cpu_env, offset);
 191        break;
 192    case 4:
 193        tcg_gen_st_i32(var, cpu_env, offset);
 194        break;
 195    default:
 196        g_assert_not_reached();
 197    }
 198    tcg_temp_free_i32(var);
 199}
 200
 201/* Save the syndrome information for a Data Abort */
 202static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 203{
 204    uint32_t syn;
 205    int sas = memop & MO_SIZE;
 206    bool sse = memop & MO_SIGN;
 207    bool is_acqrel = issinfo & ISSIsAcqRel;
 208    bool is_write = issinfo & ISSIsWrite;
 209    bool is_16bit = issinfo & ISSIs16Bit;
 210    int srt = issinfo & ISSRegMask;
 211
 212    if (issinfo & ISSInvalid) {
 213        /* Some callsites want to conditionally provide ISS info,
 214         * eg "only if this was not a writeback"
 215         */
 216        return;
 217    }
 218
 219    if (srt == 15) {
 220        /* For AArch32, insns where the src/dest is R15 never generate
 221         * ISS information. Catching that here saves checking at all
 222         * the call sites.
 223         */
 224        return;
 225    }
 226
 227    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 228                                  0, 0, 0, is_write, 0, is_16bit);
 229    disas_set_insn_syndrome(s, syn);
 230}
 231
 232static inline int get_a32_user_mem_index(DisasContext *s)
 233{
 234    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 235     * insns:
 236     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 237     *  otherwise, access as if at PL0.
 238     */
 239    switch (s->mmu_idx) {
 240    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 241    case ARMMMUIdx_E10_0:
 242    case ARMMMUIdx_E10_1:
 243    case ARMMMUIdx_E10_1_PAN:
 244        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 245    case ARMMMUIdx_SE3:
 246    case ARMMMUIdx_SE10_0:
 247    case ARMMMUIdx_SE10_1:
 248    case ARMMMUIdx_SE10_1_PAN:
 249        return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 250    case ARMMMUIdx_MUser:
 251    case ARMMMUIdx_MPriv:
 252        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 253    case ARMMMUIdx_MUserNegPri:
 254    case ARMMMUIdx_MPrivNegPri:
 255        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 256    case ARMMMUIdx_MSUser:
 257    case ARMMMUIdx_MSPriv:
 258        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 259    case ARMMMUIdx_MSUserNegPri:
 260    case ARMMMUIdx_MSPrivNegPri:
 261        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 262    default:
 263        g_assert_not_reached();
 264    }
 265}
 266
 267/* The architectural value of PC.  */
 268static uint32_t read_pc(DisasContext *s)
 269{
 270    return s->pc_curr + (s->thumb ? 4 : 8);
 271}
 272
 273/* Set a variable to the value of a CPU register.  */
 274void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 275{
 276    if (reg == 15) {
 277        tcg_gen_movi_i32(var, read_pc(s));
 278    } else {
 279        tcg_gen_mov_i32(var, cpu_R[reg]);
 280    }
 281}
 282
 283/*
 284 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 285 * This is used for load/store for which use of PC implies (literal),
 286 * or ADD that implies ADR.
 287 */
 288TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 289{
 290    TCGv_i32 tmp = tcg_temp_new_i32();
 291
 292    if (reg == 15) {
 293        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 294    } else {
 295        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 296    }
 297    return tmp;
 298}
 299
 300/* Set a CPU register.  The source must be a temporary and will be
 301   marked as dead.  */
 302void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 303{
 304    if (reg == 15) {
 305        /* In Thumb mode, we must ignore bit 0.
 306         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 307         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 308         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 309         */
 310        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 311        s->base.is_jmp = DISAS_JUMP;
 312    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
 313        /* For M-profile SP bits [1:0] are always zero */
 314        tcg_gen_andi_i32(var, var, ~3);
 315    }
 316    tcg_gen_mov_i32(cpu_R[reg], var);
 317    tcg_temp_free_i32(var);
 318}
 319
 320/*
 321 * Variant of store_reg which applies v8M stack-limit checks before updating
 322 * SP. If the check fails this will result in an exception being taken.
 323 * We disable the stack checks for CONFIG_USER_ONLY because we have
 324 * no idea what the stack limits should be in that case.
 325 * If stack checking is not being done this just acts like store_reg().
 326 */
 327static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 328{
 329#ifndef CONFIG_USER_ONLY
 330    if (s->v8m_stackcheck) {
 331        gen_helper_v8m_stackcheck(cpu_env, var);
 332    }
 333#endif
 334    store_reg(s, 13, var);
 335}
 336
 337/* Value extensions.  */
 338#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 339#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 340#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 341#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 342
 343#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 344#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 345
 346void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 347{
 348    gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
 349}
 350
 351static void gen_rebuild_hflags(DisasContext *s, bool new_el)
 352{
 353    bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
 354
 355    if (new_el) {
 356        if (m_profile) {
 357            gen_helper_rebuild_hflags_m32_newel(cpu_env);
 358        } else {
 359            gen_helper_rebuild_hflags_a32_newel(cpu_env);
 360        }
 361    } else {
 362        TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
 363        if (m_profile) {
 364            gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
 365        } else {
 366            gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
 367        }
 368    }
 369}
 370
 371static void gen_exception_internal(int excp)
 372{
 373    assert(excp_is_internal(excp));
 374    gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
 375}
 376
 377static void gen_singlestep_exception(DisasContext *s)
 378{
 379    /* We just completed step of an insn. Move from Active-not-pending
 380     * to Active-pending, and then also take the swstep exception.
 381     * This corresponds to making the (IMPDEF) choice to prioritize
 382     * swstep exceptions over asynchronous exceptions taken to an exception
 383     * level where debug is disabled. This choice has the advantage that
 384     * we do not need to maintain internal state corresponding to the
 385     * ISV/EX syndrome bits between completion of the step and generation
 386     * of the exception, and our syndrome information is always correct.
 387     */
 388    gen_ss_advance(s);
 389    gen_swstep_exception(s, 1, s->is_ldex);
 390    s->base.is_jmp = DISAS_NORETURN;
 391}
 392
 393void clear_eci_state(DisasContext *s)
 394{
 395    /*
 396     * Clear any ECI/ICI state: used when a load multiple/store
 397     * multiple insn executes.
 398     */
 399    if (s->eci) {
 400        store_cpu_field_constant(0, condexec_bits);
 401        s->eci = 0;
 402    }
 403}
 404
 405static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 406{
 407    TCGv_i32 tmp1 = tcg_temp_new_i32();
 408    TCGv_i32 tmp2 = tcg_temp_new_i32();
 409    tcg_gen_ext16s_i32(tmp1, a);
 410    tcg_gen_ext16s_i32(tmp2, b);
 411    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 412    tcg_temp_free_i32(tmp2);
 413    tcg_gen_sari_i32(a, a, 16);
 414    tcg_gen_sari_i32(b, b, 16);
 415    tcg_gen_mul_i32(b, b, a);
 416    tcg_gen_mov_i32(a, tmp1);
 417    tcg_temp_free_i32(tmp1);
 418}
 419
 420/* Byteswap each halfword.  */
 421void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 422{
 423    TCGv_i32 tmp = tcg_temp_new_i32();
 424    TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
 425    tcg_gen_shri_i32(tmp, var, 8);
 426    tcg_gen_and_i32(tmp, tmp, mask);
 427    tcg_gen_and_i32(var, var, mask);
 428    tcg_gen_shli_i32(var, var, 8);
 429    tcg_gen_or_i32(dest, var, tmp);
 430    tcg_temp_free_i32(tmp);
 431}
 432
 433/* Byteswap low halfword and sign extend.  */
 434static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 435{
 436    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
 437}
 438
 439/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 440    tmp = (t0 ^ t1) & 0x8000;
 441    t0 &= ~0x8000;
 442    t1 &= ~0x8000;
 443    t0 = (t0 + t1) ^ tmp;
 444 */
 445
 446static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 447{
 448    TCGv_i32 tmp = tcg_temp_new_i32();
 449    tcg_gen_xor_i32(tmp, t0, t1);
 450    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 451    tcg_gen_andi_i32(t0, t0, ~0x8000);
 452    tcg_gen_andi_i32(t1, t1, ~0x8000);
 453    tcg_gen_add_i32(t0, t0, t1);
 454    tcg_gen_xor_i32(dest, t0, tmp);
 455    tcg_temp_free_i32(tmp);
 456}
 457
 458/* Set N and Z flags from var.  */
 459static inline void gen_logic_CC(TCGv_i32 var)
 460{
 461    tcg_gen_mov_i32(cpu_NF, var);
 462    tcg_gen_mov_i32(cpu_ZF, var);
 463}
 464
 465/* dest = T0 + T1 + CF. */
 466static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 467{
 468    tcg_gen_add_i32(dest, t0, t1);
 469    tcg_gen_add_i32(dest, dest, cpu_CF);
 470}
 471
 472/* dest = T0 - T1 + CF - 1.  */
 473static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 474{
 475    tcg_gen_sub_i32(dest, t0, t1);
 476    tcg_gen_add_i32(dest, dest, cpu_CF);
 477    tcg_gen_subi_i32(dest, dest, 1);
 478}
 479
 480/* dest = T0 + T1. Compute C, N, V and Z flags */
 481static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 482{
 483    TCGv_i32 tmp = tcg_temp_new_i32();
 484    tcg_gen_movi_i32(tmp, 0);
 485    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 486    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 487    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 488    tcg_gen_xor_i32(tmp, t0, t1);
 489    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 490    tcg_temp_free_i32(tmp);
 491    tcg_gen_mov_i32(dest, cpu_NF);
 492}
 493
 494/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 495static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 496{
 497    TCGv_i32 tmp = tcg_temp_new_i32();
 498    if (TCG_TARGET_HAS_add2_i32) {
 499        tcg_gen_movi_i32(tmp, 0);
 500        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 501        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 502    } else {
 503        TCGv_i64 q0 = tcg_temp_new_i64();
 504        TCGv_i64 q1 = tcg_temp_new_i64();
 505        tcg_gen_extu_i32_i64(q0, t0);
 506        tcg_gen_extu_i32_i64(q1, t1);
 507        tcg_gen_add_i64(q0, q0, q1);
 508        tcg_gen_extu_i32_i64(q1, cpu_CF);
 509        tcg_gen_add_i64(q0, q0, q1);
 510        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 511        tcg_temp_free_i64(q0);
 512        tcg_temp_free_i64(q1);
 513    }
 514    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 515    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 516    tcg_gen_xor_i32(tmp, t0, t1);
 517    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 518    tcg_temp_free_i32(tmp);
 519    tcg_gen_mov_i32(dest, cpu_NF);
 520}
 521
 522/* dest = T0 - T1. Compute C, N, V and Z flags */
 523static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 524{
 525    TCGv_i32 tmp;
 526    tcg_gen_sub_i32(cpu_NF, t0, t1);
 527    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 528    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 529    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 530    tmp = tcg_temp_new_i32();
 531    tcg_gen_xor_i32(tmp, t0, t1);
 532    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 533    tcg_temp_free_i32(tmp);
 534    tcg_gen_mov_i32(dest, cpu_NF);
 535}
 536
 537/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 538static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 539{
 540    TCGv_i32 tmp = tcg_temp_new_i32();
 541    tcg_gen_not_i32(tmp, t1);
 542    gen_adc_CC(dest, t0, tmp);
 543    tcg_temp_free_i32(tmp);
 544}
 545
 546#define GEN_SHIFT(name)                                               \
 547static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 548{                                                                     \
 549    TCGv_i32 tmpd = tcg_temp_new_i32();                               \
 550    TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
 551    TCGv_i32 zero = tcg_constant_i32(0);                              \
 552    tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
 553    tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
 554    tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
 555    tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
 556    tcg_temp_free_i32(tmpd);                                          \
 557    tcg_temp_free_i32(tmp1);                                          \
 558}
 559GEN_SHIFT(shl)
 560GEN_SHIFT(shr)
 561#undef GEN_SHIFT
 562
 563static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 564{
 565    TCGv_i32 tmp1 = tcg_temp_new_i32();
 566
 567    tcg_gen_andi_i32(tmp1, t1, 0xff);
 568    tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
 569    tcg_gen_sar_i32(dest, t0, tmp1);
 570    tcg_temp_free_i32(tmp1);
 571}
 572
 573static void shifter_out_im(TCGv_i32 var, int shift)
 574{
 575    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 576}
 577
 578/* Shift by immediate.  Includes special handling for shift == 0.  */
 579static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 580                                    int shift, int flags)
 581{
 582    switch (shiftop) {
 583    case 0: /* LSL */
 584        if (shift != 0) {
 585            if (flags)
 586                shifter_out_im(var, 32 - shift);
 587            tcg_gen_shli_i32(var, var, shift);
 588        }
 589        break;
 590    case 1: /* LSR */
 591        if (shift == 0) {
 592            if (flags) {
 593                tcg_gen_shri_i32(cpu_CF, var, 31);
 594            }
 595            tcg_gen_movi_i32(var, 0);
 596        } else {
 597            if (flags)
 598                shifter_out_im(var, shift - 1);
 599            tcg_gen_shri_i32(var, var, shift);
 600        }
 601        break;
 602    case 2: /* ASR */
 603        if (shift == 0)
 604            shift = 32;
 605        if (flags)
 606            shifter_out_im(var, shift - 1);
 607        if (shift == 32)
 608          shift = 31;
 609        tcg_gen_sari_i32(var, var, shift);
 610        break;
 611    case 3: /* ROR/RRX */
 612        if (shift != 0) {
 613            if (flags)
 614                shifter_out_im(var, shift - 1);
 615            tcg_gen_rotri_i32(var, var, shift); break;
 616        } else {
 617            TCGv_i32 tmp = tcg_temp_new_i32();
 618            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 619            if (flags)
 620                shifter_out_im(var, 0);
 621            tcg_gen_shri_i32(var, var, 1);
 622            tcg_gen_or_i32(var, var, tmp);
 623            tcg_temp_free_i32(tmp);
 624        }
 625    }
 626};
 627
 628static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 629                                     TCGv_i32 shift, int flags)
 630{
 631    if (flags) {
 632        switch (shiftop) {
 633        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 634        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 635        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 636        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 637        }
 638    } else {
 639        switch (shiftop) {
 640        case 0:
 641            gen_shl(var, var, shift);
 642            break;
 643        case 1:
 644            gen_shr(var, var, shift);
 645            break;
 646        case 2:
 647            gen_sar(var, var, shift);
 648            break;
 649        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 650                tcg_gen_rotr_i32(var, var, shift); break;
 651        }
 652    }
 653    tcg_temp_free_i32(shift);
 654}
 655
 656/*
 657 * Generate a conditional based on ARM condition code cc.
 658 * This is common between ARM and Aarch64 targets.
 659 */
 660void arm_test_cc(DisasCompare *cmp, int cc)
 661{
 662    TCGv_i32 value;
 663    TCGCond cond;
 664    bool global = true;
 665
 666    switch (cc) {
 667    case 0: /* eq: Z */
 668    case 1: /* ne: !Z */
 669        cond = TCG_COND_EQ;
 670        value = cpu_ZF;
 671        break;
 672
 673    case 2: /* cs: C */
 674    case 3: /* cc: !C */
 675        cond = TCG_COND_NE;
 676        value = cpu_CF;
 677        break;
 678
 679    case 4: /* mi: N */
 680    case 5: /* pl: !N */
 681        cond = TCG_COND_LT;
 682        value = cpu_NF;
 683        break;
 684
 685    case 6: /* vs: V */
 686    case 7: /* vc: !V */
 687        cond = TCG_COND_LT;
 688        value = cpu_VF;
 689        break;
 690
 691    case 8: /* hi: C && !Z */
 692    case 9: /* ls: !C || Z -> !(C && !Z) */
 693        cond = TCG_COND_NE;
 694        value = tcg_temp_new_i32();
 695        global = false;
 696        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 697           ZF is non-zero for !Z; so AND the two subexpressions.  */
 698        tcg_gen_neg_i32(value, cpu_CF);
 699        tcg_gen_and_i32(value, value, cpu_ZF);
 700        break;
 701
 702    case 10: /* ge: N == V -> N ^ V == 0 */
 703    case 11: /* lt: N != V -> N ^ V != 0 */
 704        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 705        cond = TCG_COND_GE;
 706        value = tcg_temp_new_i32();
 707        global = false;
 708        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 709        break;
 710
 711    case 12: /* gt: !Z && N == V */
 712    case 13: /* le: Z || N != V */
 713        cond = TCG_COND_NE;
 714        value = tcg_temp_new_i32();
 715        global = false;
 716        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 717         * the sign bit then AND with ZF to yield the result.  */
 718        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 719        tcg_gen_sari_i32(value, value, 31);
 720        tcg_gen_andc_i32(value, cpu_ZF, value);
 721        break;
 722
 723    case 14: /* always */
 724    case 15: /* always */
 725        /* Use the ALWAYS condition, which will fold early.
 726         * It doesn't matter what we use for the value.  */
 727        cond = TCG_COND_ALWAYS;
 728        value = cpu_ZF;
 729        goto no_invert;
 730
 731    default:
 732        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 733        abort();
 734    }
 735
 736    if (cc & 1) {
 737        cond = tcg_invert_cond(cond);
 738    }
 739
 740 no_invert:
 741    cmp->cond = cond;
 742    cmp->value = value;
 743    cmp->value_global = global;
 744}
 745
 746void arm_free_cc(DisasCompare *cmp)
 747{
 748    if (!cmp->value_global) {
 749        tcg_temp_free_i32(cmp->value);
 750    }
 751}
 752
 753void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 754{
 755    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 756}
 757
 758void arm_gen_test_cc(int cc, TCGLabel *label)
 759{
 760    DisasCompare cmp;
 761    arm_test_cc(&cmp, cc);
 762    arm_jump_cc(&cmp, label);
 763    arm_free_cc(&cmp);
 764}
 765
 766void gen_set_condexec(DisasContext *s)
 767{
 768    if (s->condexec_mask) {
 769        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 770
 771        store_cpu_field_constant(val, condexec_bits);
 772    }
 773}
 774
 775void gen_set_pc_im(DisasContext *s, target_ulong val)
 776{
 777    tcg_gen_movi_i32(cpu_R[15], val);
 778}
 779
 780/* Set PC and Thumb state from var.  var is marked as dead.  */
 781static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 782{
 783    s->base.is_jmp = DISAS_JUMP;
 784    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 785    tcg_gen_andi_i32(var, var, 1);
 786    store_cpu_field(var, thumb);
 787}
 788
 789/*
 790 * Set PC and Thumb state from var. var is marked as dead.
 791 * For M-profile CPUs, include logic to detect exception-return
 792 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 793 * and BX reg, and no others, and happens only for code in Handler mode.
 794 * The Security Extension also requires us to check for the FNC_RETURN
 795 * which signals a function return from non-secure state; this can happen
 796 * in both Handler and Thread mode.
 797 * To avoid having to do multiple comparisons in inline generated code,
 798 * we make the check we do here loose, so it will match for EXC_RETURN
 799 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 800 * for these spurious cases and returns without doing anything (giving
 801 * the same behaviour as for a branch to a non-magic address).
 802 *
 803 * In linux-user mode it is unclear what the right behaviour for an
 804 * attempted FNC_RETURN should be, because in real hardware this will go
 805 * directly to Secure code (ie not the Linux kernel) which will then treat
 806 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 807 * attempt behave the way it would on a CPU without the security extension,
 808 * which is to say "like a normal branch". That means we can simply treat
 809 * all branches as normal with no magic address behaviour.
 810 */
 811static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 812{
 813    /* Generate the same code here as for a simple bx, but flag via
 814     * s->base.is_jmp that we need to do the rest of the work later.
 815     */
 816    gen_bx(s, var);
 817#ifndef CONFIG_USER_ONLY
 818    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 819        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 820        s->base.is_jmp = DISAS_BX_EXCRET;
 821    }
 822#endif
 823}
 824
 825static inline void gen_bx_excret_final_code(DisasContext *s)
 826{
 827    /* Generate the code to finish possible exception return and end the TB */
 828    TCGLabel *excret_label = gen_new_label();
 829    uint32_t min_magic;
 830
 831    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 832        /* Covers FNC_RETURN and EXC_RETURN magic */
 833        min_magic = FNC_RETURN_MIN_MAGIC;
 834    } else {
 835        /* EXC_RETURN magic only */
 836        min_magic = EXC_RETURN_MIN_MAGIC;
 837    }
 838
 839    /* Is the new PC value in the magic range indicating exception return? */
 840    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 841    /* No: end the TB as we would for a DISAS_JMP */
 842    if (s->ss_active) {
 843        gen_singlestep_exception(s);
 844    } else {
 845        tcg_gen_exit_tb(NULL, 0);
 846    }
 847    gen_set_label(excret_label);
 848    /* Yes: this is an exception return.
 849     * At this point in runtime env->regs[15] and env->thumb will hold
 850     * the exception-return magic number, which do_v7m_exception_exit()
 851     * will read. Nothing else will be able to see those values because
 852     * the cpu-exec main loop guarantees that we will always go straight
 853     * from raising the exception to the exception-handling code.
 854     *
 855     * gen_ss_advance(s) does nothing on M profile currently but
 856     * calling it is conceptually the right thing as we have executed
 857     * this instruction (compare SWI, HVC, SMC handling).
 858     */
 859    gen_ss_advance(s);
 860    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 861}
 862
 863static inline void gen_bxns(DisasContext *s, int rm)
 864{
 865    TCGv_i32 var = load_reg(s, rm);
 866
 867    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 868     * we need to sync state before calling it, but:
 869     *  - we don't need to do gen_set_pc_im() because the bxns helper will
 870     *    always set the PC itself
 871     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 872     *    unless it's outside an IT block or the last insn in an IT block,
 873     *    so we know that condexec == 0 (already set at the top of the TB)
 874     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 875     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 876     */
 877    gen_helper_v7m_bxns(cpu_env, var);
 878    tcg_temp_free_i32(var);
 879    s->base.is_jmp = DISAS_EXIT;
 880}
 881
 882static inline void gen_blxns(DisasContext *s, int rm)
 883{
 884    TCGv_i32 var = load_reg(s, rm);
 885
 886    /* We don't need to sync condexec state, for the same reason as bxns.
 887     * We do however need to set the PC, because the blxns helper reads it.
 888     * The blxns helper may throw an exception.
 889     */
 890    gen_set_pc_im(s, s->base.pc_next);
 891    gen_helper_v7m_blxns(cpu_env, var);
 892    tcg_temp_free_i32(var);
 893    s->base.is_jmp = DISAS_EXIT;
 894}
 895
 896/* Variant of store_reg which uses branch&exchange logic when storing
 897   to r15 in ARM architecture v7 and above. The source must be a temporary
 898   and will be marked as dead. */
 899static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 900{
 901    if (reg == 15 && ENABLE_ARCH_7) {
 902        gen_bx(s, var);
 903    } else {
 904        store_reg(s, reg, var);
 905    }
 906}
 907
 908/* Variant of store_reg which uses branch&exchange logic when storing
 909 * to r15 in ARM architecture v5T and above. This is used for storing
 910 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 911 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 912static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 913{
 914    if (reg == 15 && ENABLE_ARCH_5) {
 915        gen_bx_excret(s, var);
 916    } else {
 917        store_reg(s, reg, var);
 918    }
 919}
 920
 921#ifdef CONFIG_USER_ONLY
 922#define IS_USER_ONLY 1
 923#else
 924#define IS_USER_ONLY 0
 925#endif
 926
 927MemOp pow2_align(unsigned i)
 928{
 929    static const MemOp mop_align[] = {
 930        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
 931        /*
 932         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
 933         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
 934         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
 935         */
 936        MO_ALIGN_16
 937    };
 938    g_assert(i < ARRAY_SIZE(mop_align));
 939    return mop_align[i];
 940}
 941
 942/*
 943 * Abstractions of "generate code to do a guest load/store for
 944 * AArch32", where a vaddr is always 32 bits (and is zero
 945 * extended if we're a 64 bit core) and  data is also
 946 * 32 bits unless specifically doing a 64 bit access.
 947 * These functions work like tcg_gen_qemu_{ld,st}* except
 948 * that the address argument is TCGv_i32 rather than TCGv.
 949 */
 950
 951static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 952{
 953    TCGv addr = tcg_temp_new();
 954    tcg_gen_extu_i32_tl(addr, a32);
 955
 956    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 957    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 958        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 959    }
 960    return addr;
 961}
 962
 963/*
 964 * Internal routines are used for NEON cases where the endianness
 965 * and/or alignment has already been taken into account and manipulated.
 966 */
 967void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
 968                              TCGv_i32 a32, int index, MemOp opc)
 969{
 970    TCGv addr = gen_aa32_addr(s, a32, opc);
 971    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 972    tcg_temp_free(addr);
 973}
 974
 975void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
 976                              TCGv_i32 a32, int index, MemOp opc)
 977{
 978    TCGv addr = gen_aa32_addr(s, a32, opc);
 979    tcg_gen_qemu_st_i32(val, addr, index, opc);
 980    tcg_temp_free(addr);
 981}
 982
 983void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
 984                              TCGv_i32 a32, int index, MemOp opc)
 985{
 986    TCGv addr = gen_aa32_addr(s, a32, opc);
 987
 988    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 989
 990    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 991    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 992        tcg_gen_rotri_i64(val, val, 32);
 993    }
 994    tcg_temp_free(addr);
 995}
 996
 997void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
 998                              TCGv_i32 a32, int index, MemOp opc)
 999{
1000    TCGv addr = gen_aa32_addr(s, a32, opc);
1001
1002    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1003    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1004        TCGv_i64 tmp = tcg_temp_new_i64();
1005        tcg_gen_rotri_i64(tmp, val, 32);
1006        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1007        tcg_temp_free_i64(tmp);
1008    } else {
1009        tcg_gen_qemu_st_i64(val, addr, index, opc);
1010    }
1011    tcg_temp_free(addr);
1012}
1013
1014void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1015                     int index, MemOp opc)
1016{
1017    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1018}
1019
1020void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1021                     int index, MemOp opc)
1022{
1023    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1024}
1025
1026void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1027                     int index, MemOp opc)
1028{
1029    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1030}
1031
1032void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1033                     int index, MemOp opc)
1034{
1035    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1036}
1037
1038#define DO_GEN_LD(SUFF, OPC)                                            \
1039    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1040                                         TCGv_i32 a32, int index)       \
1041    {                                                                   \
1042        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1043    }
1044
1045#define DO_GEN_ST(SUFF, OPC)                                            \
1046    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1047                                         TCGv_i32 a32, int index)       \
1048    {                                                                   \
1049        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1050    }
1051
1052static inline void gen_hvc(DisasContext *s, int imm16)
1053{
1054    /* The pre HVC helper handles cases when HVC gets trapped
1055     * as an undefined insn by runtime configuration (ie before
1056     * the insn really executes).
1057     */
1058    gen_set_pc_im(s, s->pc_curr);
1059    gen_helper_pre_hvc(cpu_env);
1060    /* Otherwise we will treat this as a real exception which
1061     * happens after execution of the insn. (The distinction matters
1062     * for the PC value reported to the exception handler and also
1063     * for single stepping.)
1064     */
1065    s->svc_imm = imm16;
1066    gen_set_pc_im(s, s->base.pc_next);
1067    s->base.is_jmp = DISAS_HVC;
1068}
1069
1070static inline void gen_smc(DisasContext *s)
1071{
1072    /* As with HVC, we may take an exception either before or after
1073     * the insn executes.
1074     */
1075    gen_set_pc_im(s, s->pc_curr);
1076    gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1077    gen_set_pc_im(s, s->base.pc_next);
1078    s->base.is_jmp = DISAS_SMC;
1079}
1080
1081static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1082{
1083    gen_set_condexec(s);
1084    gen_set_pc_im(s, pc);
1085    gen_exception_internal(excp);
1086    s->base.is_jmp = DISAS_NORETURN;
1087}
1088
1089static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1090{
1091    gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1092                                          tcg_constant_i32(syndrome), tcg_el);
1093}
1094
1095static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1096{
1097    gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1098}
1099
1100static void gen_exception(int excp, uint32_t syndrome)
1101{
1102    gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1103                                       tcg_constant_i32(syndrome));
1104}
1105
1106static void gen_exception_insn_el_v(DisasContext *s, uint64_t pc, int excp,
1107                                    uint32_t syn, TCGv_i32 tcg_el)
1108{
1109    if (s->aarch64) {
1110        gen_a64_set_pc_im(pc);
1111    } else {
1112        gen_set_condexec(s);
1113        gen_set_pc_im(s, pc);
1114    }
1115    gen_exception_el_v(excp, syn, tcg_el);
1116    s->base.is_jmp = DISAS_NORETURN;
1117}
1118
1119void gen_exception_insn_el(DisasContext *s, uint64_t pc, int excp,
1120                           uint32_t syn, uint32_t target_el)
1121{
1122    gen_exception_insn_el_v(s, pc, excp, syn, tcg_constant_i32(target_el));
1123}
1124
1125void gen_exception_insn(DisasContext *s, uint64_t pc, int excp, uint32_t syn)
1126{
1127    if (s->aarch64) {
1128        gen_a64_set_pc_im(pc);
1129    } else {
1130        gen_set_condexec(s);
1131        gen_set_pc_im(s, pc);
1132    }
1133    gen_exception(excp, syn);
1134    s->base.is_jmp = DISAS_NORETURN;
1135}
1136
1137static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1138{
1139    gen_set_condexec(s);
1140    gen_set_pc_im(s, s->pc_curr);
1141    gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1142    s->base.is_jmp = DISAS_NORETURN;
1143}
1144
1145void unallocated_encoding(DisasContext *s)
1146{
1147    /* Unallocated and reserved encodings are uncategorized */
1148    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized());
1149}
1150
1151/* Force a TB lookup after an instruction that changes the CPU state.  */
1152void gen_lookup_tb(DisasContext *s)
1153{
1154    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1155    s->base.is_jmp = DISAS_EXIT;
1156}
1157
1158static inline void gen_hlt(DisasContext *s, int imm)
1159{
1160    /* HLT. This has two purposes.
1161     * Architecturally, it is an external halting debug instruction.
1162     * Since QEMU doesn't implement external debug, we treat this as
1163     * it is required for halting debug disabled: it will UNDEF.
1164     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1165     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1166     * must trigger semihosting even for ARMv7 and earlier, where
1167     * HLT was an undefined encoding.
1168     * In system mode, we don't allow userspace access to
1169     * semihosting, to provide some semblance of security
1170     * (and for consistency with our 32-bit semihosting).
1171     */
1172    if (semihosting_enabled() &&
1173#ifndef CONFIG_USER_ONLY
1174        s->current_el != 0 &&
1175#endif
1176        (imm == (s->thumb ? 0x3c : 0xf000))) {
1177        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1178        return;
1179    }
1180
1181    unallocated_encoding(s);
1182}
1183
1184/*
1185 * Return the offset of a "full" NEON Dreg.
1186 */
1187long neon_full_reg_offset(unsigned reg)
1188{
1189    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1190}
1191
1192/*
1193 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1194 * where 0 is the least significant end of the register.
1195 */
1196long neon_element_offset(int reg, int element, MemOp memop)
1197{
1198    int element_size = 1 << (memop & MO_SIZE);
1199    int ofs = element * element_size;
1200#if HOST_BIG_ENDIAN
1201    /*
1202     * Calculate the offset assuming fully little-endian,
1203     * then XOR to account for the order of the 8-byte units.
1204     */
1205    if (element_size < 8) {
1206        ofs ^= 8 - element_size;
1207    }
1208#endif
1209    return neon_full_reg_offset(reg) + ofs;
1210}
1211
1212/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1213long vfp_reg_offset(bool dp, unsigned reg)
1214{
1215    if (dp) {
1216        return neon_element_offset(reg, 0, MO_64);
1217    } else {
1218        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1219    }
1220}
1221
1222void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1223{
1224    long off = neon_element_offset(reg, ele, memop);
1225
1226    switch (memop) {
1227    case MO_SB:
1228        tcg_gen_ld8s_i32(dest, cpu_env, off);
1229        break;
1230    case MO_UB:
1231        tcg_gen_ld8u_i32(dest, cpu_env, off);
1232        break;
1233    case MO_SW:
1234        tcg_gen_ld16s_i32(dest, cpu_env, off);
1235        break;
1236    case MO_UW:
1237        tcg_gen_ld16u_i32(dest, cpu_env, off);
1238        break;
1239    case MO_UL:
1240    case MO_SL:
1241        tcg_gen_ld_i32(dest, cpu_env, off);
1242        break;
1243    default:
1244        g_assert_not_reached();
1245    }
1246}
1247
1248void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1249{
1250    long off = neon_element_offset(reg, ele, memop);
1251
1252    switch (memop) {
1253    case MO_SL:
1254        tcg_gen_ld32s_i64(dest, cpu_env, off);
1255        break;
1256    case MO_UL:
1257        tcg_gen_ld32u_i64(dest, cpu_env, off);
1258        break;
1259    case MO_UQ:
1260        tcg_gen_ld_i64(dest, cpu_env, off);
1261        break;
1262    default:
1263        g_assert_not_reached();
1264    }
1265}
1266
1267void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1268{
1269    long off = neon_element_offset(reg, ele, memop);
1270
1271    switch (memop) {
1272    case MO_8:
1273        tcg_gen_st8_i32(src, cpu_env, off);
1274        break;
1275    case MO_16:
1276        tcg_gen_st16_i32(src, cpu_env, off);
1277        break;
1278    case MO_32:
1279        tcg_gen_st_i32(src, cpu_env, off);
1280        break;
1281    default:
1282        g_assert_not_reached();
1283    }
1284}
1285
1286void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1287{
1288    long off = neon_element_offset(reg, ele, memop);
1289
1290    switch (memop) {
1291    case MO_32:
1292        tcg_gen_st32_i64(src, cpu_env, off);
1293        break;
1294    case MO_64:
1295        tcg_gen_st_i64(src, cpu_env, off);
1296        break;
1297    default:
1298        g_assert_not_reached();
1299    }
1300}
1301
1302#define ARM_CP_RW_BIT   (1 << 20)
1303
1304static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1305{
1306    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1307}
1308
1309static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1310{
1311    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1312}
1313
1314static inline TCGv_i32 iwmmxt_load_creg(int reg)
1315{
1316    TCGv_i32 var = tcg_temp_new_i32();
1317    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1318    return var;
1319}
1320
1321static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1322{
1323    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1324    tcg_temp_free_i32(var);
1325}
1326
1327static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1328{
1329    iwmmxt_store_reg(cpu_M0, rn);
1330}
1331
1332static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1333{
1334    iwmmxt_load_reg(cpu_M0, rn);
1335}
1336
1337static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1338{
1339    iwmmxt_load_reg(cpu_V1, rn);
1340    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1341}
1342
1343static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1344{
1345    iwmmxt_load_reg(cpu_V1, rn);
1346    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1347}
1348
1349static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1350{
1351    iwmmxt_load_reg(cpu_V1, rn);
1352    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1353}
1354
1355#define IWMMXT_OP(name) \
1356static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1357{ \
1358    iwmmxt_load_reg(cpu_V1, rn); \
1359    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1360}
1361
1362#define IWMMXT_OP_ENV(name) \
1363static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1364{ \
1365    iwmmxt_load_reg(cpu_V1, rn); \
1366    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1367}
1368
1369#define IWMMXT_OP_ENV_SIZE(name) \
1370IWMMXT_OP_ENV(name##b) \
1371IWMMXT_OP_ENV(name##w) \
1372IWMMXT_OP_ENV(name##l)
1373
1374#define IWMMXT_OP_ENV1(name) \
1375static inline void gen_op_iwmmxt_##name##_M0(void) \
1376{ \
1377    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1378}
1379
1380IWMMXT_OP(maddsq)
1381IWMMXT_OP(madduq)
1382IWMMXT_OP(sadb)
1383IWMMXT_OP(sadw)
1384IWMMXT_OP(mulslw)
1385IWMMXT_OP(mulshw)
1386IWMMXT_OP(mululw)
1387IWMMXT_OP(muluhw)
1388IWMMXT_OP(macsw)
1389IWMMXT_OP(macuw)
1390
1391IWMMXT_OP_ENV_SIZE(unpackl)
1392IWMMXT_OP_ENV_SIZE(unpackh)
1393
1394IWMMXT_OP_ENV1(unpacklub)
1395IWMMXT_OP_ENV1(unpackluw)
1396IWMMXT_OP_ENV1(unpacklul)
1397IWMMXT_OP_ENV1(unpackhub)
1398IWMMXT_OP_ENV1(unpackhuw)
1399IWMMXT_OP_ENV1(unpackhul)
1400IWMMXT_OP_ENV1(unpacklsb)
1401IWMMXT_OP_ENV1(unpacklsw)
1402IWMMXT_OP_ENV1(unpacklsl)
1403IWMMXT_OP_ENV1(unpackhsb)
1404IWMMXT_OP_ENV1(unpackhsw)
1405IWMMXT_OP_ENV1(unpackhsl)
1406
1407IWMMXT_OP_ENV_SIZE(cmpeq)
1408IWMMXT_OP_ENV_SIZE(cmpgtu)
1409IWMMXT_OP_ENV_SIZE(cmpgts)
1410
1411IWMMXT_OP_ENV_SIZE(mins)
1412IWMMXT_OP_ENV_SIZE(minu)
1413IWMMXT_OP_ENV_SIZE(maxs)
1414IWMMXT_OP_ENV_SIZE(maxu)
1415
1416IWMMXT_OP_ENV_SIZE(subn)
1417IWMMXT_OP_ENV_SIZE(addn)
1418IWMMXT_OP_ENV_SIZE(subu)
1419IWMMXT_OP_ENV_SIZE(addu)
1420IWMMXT_OP_ENV_SIZE(subs)
1421IWMMXT_OP_ENV_SIZE(adds)
1422
1423IWMMXT_OP_ENV(avgb0)
1424IWMMXT_OP_ENV(avgb1)
1425IWMMXT_OP_ENV(avgw0)
1426IWMMXT_OP_ENV(avgw1)
1427
1428IWMMXT_OP_ENV(packuw)
1429IWMMXT_OP_ENV(packul)
1430IWMMXT_OP_ENV(packuq)
1431IWMMXT_OP_ENV(packsw)
1432IWMMXT_OP_ENV(packsl)
1433IWMMXT_OP_ENV(packsq)
1434
1435static void gen_op_iwmmxt_set_mup(void)
1436{
1437    TCGv_i32 tmp;
1438    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1439    tcg_gen_ori_i32(tmp, tmp, 2);
1440    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1441}
1442
1443static void gen_op_iwmmxt_set_cup(void)
1444{
1445    TCGv_i32 tmp;
1446    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1447    tcg_gen_ori_i32(tmp, tmp, 1);
1448    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1449}
1450
1451static void gen_op_iwmmxt_setpsr_nz(void)
1452{
1453    TCGv_i32 tmp = tcg_temp_new_i32();
1454    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1455    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1456}
1457
1458static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1459{
1460    iwmmxt_load_reg(cpu_V1, rn);
1461    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1462    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1463}
1464
1465static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1466                                     TCGv_i32 dest)
1467{
1468    int rd;
1469    uint32_t offset;
1470    TCGv_i32 tmp;
1471
1472    rd = (insn >> 16) & 0xf;
1473    tmp = load_reg(s, rd);
1474
1475    offset = (insn & 0xff) << ((insn >> 7) & 2);
1476    if (insn & (1 << 24)) {
1477        /* Pre indexed */
1478        if (insn & (1 << 23))
1479            tcg_gen_addi_i32(tmp, tmp, offset);
1480        else
1481            tcg_gen_addi_i32(tmp, tmp, -offset);
1482        tcg_gen_mov_i32(dest, tmp);
1483        if (insn & (1 << 21))
1484            store_reg(s, rd, tmp);
1485        else
1486            tcg_temp_free_i32(tmp);
1487    } else if (insn & (1 << 21)) {
1488        /* Post indexed */
1489        tcg_gen_mov_i32(dest, tmp);
1490        if (insn & (1 << 23))
1491            tcg_gen_addi_i32(tmp, tmp, offset);
1492        else
1493            tcg_gen_addi_i32(tmp, tmp, -offset);
1494        store_reg(s, rd, tmp);
1495    } else if (!(insn & (1 << 23)))
1496        return 1;
1497    return 0;
1498}
1499
1500static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1501{
1502    int rd = (insn >> 0) & 0xf;
1503    TCGv_i32 tmp;
1504
1505    if (insn & (1 << 8)) {
1506        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1507            return 1;
1508        } else {
1509            tmp = iwmmxt_load_creg(rd);
1510        }
1511    } else {
1512        tmp = tcg_temp_new_i32();
1513        iwmmxt_load_reg(cpu_V0, rd);
1514        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1515    }
1516    tcg_gen_andi_i32(tmp, tmp, mask);
1517    tcg_gen_mov_i32(dest, tmp);
1518    tcg_temp_free_i32(tmp);
1519    return 0;
1520}
1521
1522/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1523   (ie. an undefined instruction).  */
1524static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1525{
1526    int rd, wrd;
1527    int rdhi, rdlo, rd0, rd1, i;
1528    TCGv_i32 addr;
1529    TCGv_i32 tmp, tmp2, tmp3;
1530
1531    if ((insn & 0x0e000e00) == 0x0c000000) {
1532        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1533            wrd = insn & 0xf;
1534            rdlo = (insn >> 12) & 0xf;
1535            rdhi = (insn >> 16) & 0xf;
1536            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1537                iwmmxt_load_reg(cpu_V0, wrd);
1538                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1539                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1540            } else {                                    /* TMCRR */
1541                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1542                iwmmxt_store_reg(cpu_V0, wrd);
1543                gen_op_iwmmxt_set_mup();
1544            }
1545            return 0;
1546        }
1547
1548        wrd = (insn >> 12) & 0xf;
1549        addr = tcg_temp_new_i32();
1550        if (gen_iwmmxt_address(s, insn, addr)) {
1551            tcg_temp_free_i32(addr);
1552            return 1;
1553        }
1554        if (insn & ARM_CP_RW_BIT) {
1555            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1556                tmp = tcg_temp_new_i32();
1557                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1558                iwmmxt_store_creg(wrd, tmp);
1559            } else {
1560                i = 1;
1561                if (insn & (1 << 8)) {
1562                    if (insn & (1 << 22)) {             /* WLDRD */
1563                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1564                        i = 0;
1565                    } else {                            /* WLDRW wRd */
1566                        tmp = tcg_temp_new_i32();
1567                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1568                    }
1569                } else {
1570                    tmp = tcg_temp_new_i32();
1571                    if (insn & (1 << 22)) {             /* WLDRH */
1572                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1573                    } else {                            /* WLDRB */
1574                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1575                    }
1576                }
1577                if (i) {
1578                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1579                    tcg_temp_free_i32(tmp);
1580                }
1581                gen_op_iwmmxt_movq_wRn_M0(wrd);
1582            }
1583        } else {
1584            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1585                tmp = iwmmxt_load_creg(wrd);
1586                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1587            } else {
1588                gen_op_iwmmxt_movq_M0_wRn(wrd);
1589                tmp = tcg_temp_new_i32();
1590                if (insn & (1 << 8)) {
1591                    if (insn & (1 << 22)) {             /* WSTRD */
1592                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1593                    } else {                            /* WSTRW wRd */
1594                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1595                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1596                    }
1597                } else {
1598                    if (insn & (1 << 22)) {             /* WSTRH */
1599                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1600                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1601                    } else {                            /* WSTRB */
1602                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1603                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1604                    }
1605                }
1606            }
1607            tcg_temp_free_i32(tmp);
1608        }
1609        tcg_temp_free_i32(addr);
1610        return 0;
1611    }
1612
1613    if ((insn & 0x0f000000) != 0x0e000000)
1614        return 1;
1615
1616    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1617    case 0x000:                                                 /* WOR */
1618        wrd = (insn >> 12) & 0xf;
1619        rd0 = (insn >> 0) & 0xf;
1620        rd1 = (insn >> 16) & 0xf;
1621        gen_op_iwmmxt_movq_M0_wRn(rd0);
1622        gen_op_iwmmxt_orq_M0_wRn(rd1);
1623        gen_op_iwmmxt_setpsr_nz();
1624        gen_op_iwmmxt_movq_wRn_M0(wrd);
1625        gen_op_iwmmxt_set_mup();
1626        gen_op_iwmmxt_set_cup();
1627        break;
1628    case 0x011:                                                 /* TMCR */
1629        if (insn & 0xf)
1630            return 1;
1631        rd = (insn >> 12) & 0xf;
1632        wrd = (insn >> 16) & 0xf;
1633        switch (wrd) {
1634        case ARM_IWMMXT_wCID:
1635        case ARM_IWMMXT_wCASF:
1636            break;
1637        case ARM_IWMMXT_wCon:
1638            gen_op_iwmmxt_set_cup();
1639            /* Fall through.  */
1640        case ARM_IWMMXT_wCSSF:
1641            tmp = iwmmxt_load_creg(wrd);
1642            tmp2 = load_reg(s, rd);
1643            tcg_gen_andc_i32(tmp, tmp, tmp2);
1644            tcg_temp_free_i32(tmp2);
1645            iwmmxt_store_creg(wrd, tmp);
1646            break;
1647        case ARM_IWMMXT_wCGR0:
1648        case ARM_IWMMXT_wCGR1:
1649        case ARM_IWMMXT_wCGR2:
1650        case ARM_IWMMXT_wCGR3:
1651            gen_op_iwmmxt_set_cup();
1652            tmp = load_reg(s, rd);
1653            iwmmxt_store_creg(wrd, tmp);
1654            break;
1655        default:
1656            return 1;
1657        }
1658        break;
1659    case 0x100:                                                 /* WXOR */
1660        wrd = (insn >> 12) & 0xf;
1661        rd0 = (insn >> 0) & 0xf;
1662        rd1 = (insn >> 16) & 0xf;
1663        gen_op_iwmmxt_movq_M0_wRn(rd0);
1664        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1665        gen_op_iwmmxt_setpsr_nz();
1666        gen_op_iwmmxt_movq_wRn_M0(wrd);
1667        gen_op_iwmmxt_set_mup();
1668        gen_op_iwmmxt_set_cup();
1669        break;
1670    case 0x111:                                                 /* TMRC */
1671        if (insn & 0xf)
1672            return 1;
1673        rd = (insn >> 12) & 0xf;
1674        wrd = (insn >> 16) & 0xf;
1675        tmp = iwmmxt_load_creg(wrd);
1676        store_reg(s, rd, tmp);
1677        break;
1678    case 0x300:                                                 /* WANDN */
1679        wrd = (insn >> 12) & 0xf;
1680        rd0 = (insn >> 0) & 0xf;
1681        rd1 = (insn >> 16) & 0xf;
1682        gen_op_iwmmxt_movq_M0_wRn(rd0);
1683        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1684        gen_op_iwmmxt_andq_M0_wRn(rd1);
1685        gen_op_iwmmxt_setpsr_nz();
1686        gen_op_iwmmxt_movq_wRn_M0(wrd);
1687        gen_op_iwmmxt_set_mup();
1688        gen_op_iwmmxt_set_cup();
1689        break;
1690    case 0x200:                                                 /* WAND */
1691        wrd = (insn >> 12) & 0xf;
1692        rd0 = (insn >> 0) & 0xf;
1693        rd1 = (insn >> 16) & 0xf;
1694        gen_op_iwmmxt_movq_M0_wRn(rd0);
1695        gen_op_iwmmxt_andq_M0_wRn(rd1);
1696        gen_op_iwmmxt_setpsr_nz();
1697        gen_op_iwmmxt_movq_wRn_M0(wrd);
1698        gen_op_iwmmxt_set_mup();
1699        gen_op_iwmmxt_set_cup();
1700        break;
1701    case 0x810: case 0xa10:                             /* WMADD */
1702        wrd = (insn >> 12) & 0xf;
1703        rd0 = (insn >> 0) & 0xf;
1704        rd1 = (insn >> 16) & 0xf;
1705        gen_op_iwmmxt_movq_M0_wRn(rd0);
1706        if (insn & (1 << 21))
1707            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1708        else
1709            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1710        gen_op_iwmmxt_movq_wRn_M0(wrd);
1711        gen_op_iwmmxt_set_mup();
1712        break;
1713    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1714        wrd = (insn >> 12) & 0xf;
1715        rd0 = (insn >> 16) & 0xf;
1716        rd1 = (insn >> 0) & 0xf;
1717        gen_op_iwmmxt_movq_M0_wRn(rd0);
1718        switch ((insn >> 22) & 3) {
1719        case 0:
1720            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1721            break;
1722        case 1:
1723            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1724            break;
1725        case 2:
1726            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1727            break;
1728        case 3:
1729            return 1;
1730        }
1731        gen_op_iwmmxt_movq_wRn_M0(wrd);
1732        gen_op_iwmmxt_set_mup();
1733        gen_op_iwmmxt_set_cup();
1734        break;
1735    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1736        wrd = (insn >> 12) & 0xf;
1737        rd0 = (insn >> 16) & 0xf;
1738        rd1 = (insn >> 0) & 0xf;
1739        gen_op_iwmmxt_movq_M0_wRn(rd0);
1740        switch ((insn >> 22) & 3) {
1741        case 0:
1742            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1743            break;
1744        case 1:
1745            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1746            break;
1747        case 2:
1748            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1749            break;
1750        case 3:
1751            return 1;
1752        }
1753        gen_op_iwmmxt_movq_wRn_M0(wrd);
1754        gen_op_iwmmxt_set_mup();
1755        gen_op_iwmmxt_set_cup();
1756        break;
1757    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1758        wrd = (insn >> 12) & 0xf;
1759        rd0 = (insn >> 16) & 0xf;
1760        rd1 = (insn >> 0) & 0xf;
1761        gen_op_iwmmxt_movq_M0_wRn(rd0);
1762        if (insn & (1 << 22))
1763            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1764        else
1765            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1766        if (!(insn & (1 << 20)))
1767            gen_op_iwmmxt_addl_M0_wRn(wrd);
1768        gen_op_iwmmxt_movq_wRn_M0(wrd);
1769        gen_op_iwmmxt_set_mup();
1770        break;
1771    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1772        wrd = (insn >> 12) & 0xf;
1773        rd0 = (insn >> 16) & 0xf;
1774        rd1 = (insn >> 0) & 0xf;
1775        gen_op_iwmmxt_movq_M0_wRn(rd0);
1776        if (insn & (1 << 21)) {
1777            if (insn & (1 << 20))
1778                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1779            else
1780                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1781        } else {
1782            if (insn & (1 << 20))
1783                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1784            else
1785                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1786        }
1787        gen_op_iwmmxt_movq_wRn_M0(wrd);
1788        gen_op_iwmmxt_set_mup();
1789        break;
1790    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1791        wrd = (insn >> 12) & 0xf;
1792        rd0 = (insn >> 16) & 0xf;
1793        rd1 = (insn >> 0) & 0xf;
1794        gen_op_iwmmxt_movq_M0_wRn(rd0);
1795        if (insn & (1 << 21))
1796            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1797        else
1798            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1799        if (!(insn & (1 << 20))) {
1800            iwmmxt_load_reg(cpu_V1, wrd);
1801            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1802        }
1803        gen_op_iwmmxt_movq_wRn_M0(wrd);
1804        gen_op_iwmmxt_set_mup();
1805        break;
1806    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1807        wrd = (insn >> 12) & 0xf;
1808        rd0 = (insn >> 16) & 0xf;
1809        rd1 = (insn >> 0) & 0xf;
1810        gen_op_iwmmxt_movq_M0_wRn(rd0);
1811        switch ((insn >> 22) & 3) {
1812        case 0:
1813            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1814            break;
1815        case 1:
1816            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1817            break;
1818        case 2:
1819            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1820            break;
1821        case 3:
1822            return 1;
1823        }
1824        gen_op_iwmmxt_movq_wRn_M0(wrd);
1825        gen_op_iwmmxt_set_mup();
1826        gen_op_iwmmxt_set_cup();
1827        break;
1828    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1829        wrd = (insn >> 12) & 0xf;
1830        rd0 = (insn >> 16) & 0xf;
1831        rd1 = (insn >> 0) & 0xf;
1832        gen_op_iwmmxt_movq_M0_wRn(rd0);
1833        if (insn & (1 << 22)) {
1834            if (insn & (1 << 20))
1835                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1836            else
1837                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1838        } else {
1839            if (insn & (1 << 20))
1840                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1841            else
1842                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1843        }
1844        gen_op_iwmmxt_movq_wRn_M0(wrd);
1845        gen_op_iwmmxt_set_mup();
1846        gen_op_iwmmxt_set_cup();
1847        break;
1848    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1849        wrd = (insn >> 12) & 0xf;
1850        rd0 = (insn >> 16) & 0xf;
1851        rd1 = (insn >> 0) & 0xf;
1852        gen_op_iwmmxt_movq_M0_wRn(rd0);
1853        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1854        tcg_gen_andi_i32(tmp, tmp, 7);
1855        iwmmxt_load_reg(cpu_V1, rd1);
1856        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1857        tcg_temp_free_i32(tmp);
1858        gen_op_iwmmxt_movq_wRn_M0(wrd);
1859        gen_op_iwmmxt_set_mup();
1860        break;
1861    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1862        if (((insn >> 6) & 3) == 3)
1863            return 1;
1864        rd = (insn >> 12) & 0xf;
1865        wrd = (insn >> 16) & 0xf;
1866        tmp = load_reg(s, rd);
1867        gen_op_iwmmxt_movq_M0_wRn(wrd);
1868        switch ((insn >> 6) & 3) {
1869        case 0:
1870            tmp2 = tcg_constant_i32(0xff);
1871            tmp3 = tcg_constant_i32((insn & 7) << 3);
1872            break;
1873        case 1:
1874            tmp2 = tcg_constant_i32(0xffff);
1875            tmp3 = tcg_constant_i32((insn & 3) << 4);
1876            break;
1877        case 2:
1878            tmp2 = tcg_constant_i32(0xffffffff);
1879            tmp3 = tcg_constant_i32((insn & 1) << 5);
1880            break;
1881        default:
1882            g_assert_not_reached();
1883        }
1884        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1885        tcg_temp_free_i32(tmp);
1886        gen_op_iwmmxt_movq_wRn_M0(wrd);
1887        gen_op_iwmmxt_set_mup();
1888        break;
1889    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1890        rd = (insn >> 12) & 0xf;
1891        wrd = (insn >> 16) & 0xf;
1892        if (rd == 15 || ((insn >> 22) & 3) == 3)
1893            return 1;
1894        gen_op_iwmmxt_movq_M0_wRn(wrd);
1895        tmp = tcg_temp_new_i32();
1896        switch ((insn >> 22) & 3) {
1897        case 0:
1898            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1899            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1900            if (insn & 8) {
1901                tcg_gen_ext8s_i32(tmp, tmp);
1902            } else {
1903                tcg_gen_andi_i32(tmp, tmp, 0xff);
1904            }
1905            break;
1906        case 1:
1907            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1908            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1909            if (insn & 8) {
1910                tcg_gen_ext16s_i32(tmp, tmp);
1911            } else {
1912                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1913            }
1914            break;
1915        case 2:
1916            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1917            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1918            break;
1919        }
1920        store_reg(s, rd, tmp);
1921        break;
1922    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1923        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1924            return 1;
1925        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1926        switch ((insn >> 22) & 3) {
1927        case 0:
1928            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1929            break;
1930        case 1:
1931            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1932            break;
1933        case 2:
1934            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1935            break;
1936        }
1937        tcg_gen_shli_i32(tmp, tmp, 28);
1938        gen_set_nzcv(tmp);
1939        tcg_temp_free_i32(tmp);
1940        break;
1941    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1942        if (((insn >> 6) & 3) == 3)
1943            return 1;
1944        rd = (insn >> 12) & 0xf;
1945        wrd = (insn >> 16) & 0xf;
1946        tmp = load_reg(s, rd);
1947        switch ((insn >> 6) & 3) {
1948        case 0:
1949            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1950            break;
1951        case 1:
1952            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1953            break;
1954        case 2:
1955            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1956            break;
1957        }
1958        tcg_temp_free_i32(tmp);
1959        gen_op_iwmmxt_movq_wRn_M0(wrd);
1960        gen_op_iwmmxt_set_mup();
1961        break;
1962    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1963        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1964            return 1;
1965        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1966        tmp2 = tcg_temp_new_i32();
1967        tcg_gen_mov_i32(tmp2, tmp);
1968        switch ((insn >> 22) & 3) {
1969        case 0:
1970            for (i = 0; i < 7; i ++) {
1971                tcg_gen_shli_i32(tmp2, tmp2, 4);
1972                tcg_gen_and_i32(tmp, tmp, tmp2);
1973            }
1974            break;
1975        case 1:
1976            for (i = 0; i < 3; i ++) {
1977                tcg_gen_shli_i32(tmp2, tmp2, 8);
1978                tcg_gen_and_i32(tmp, tmp, tmp2);
1979            }
1980            break;
1981        case 2:
1982            tcg_gen_shli_i32(tmp2, tmp2, 16);
1983            tcg_gen_and_i32(tmp, tmp, tmp2);
1984            break;
1985        }
1986        gen_set_nzcv(tmp);
1987        tcg_temp_free_i32(tmp2);
1988        tcg_temp_free_i32(tmp);
1989        break;
1990    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1991        wrd = (insn >> 12) & 0xf;
1992        rd0 = (insn >> 16) & 0xf;
1993        gen_op_iwmmxt_movq_M0_wRn(rd0);
1994        switch ((insn >> 22) & 3) {
1995        case 0:
1996            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1997            break;
1998        case 1:
1999            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2000            break;
2001        case 2:
2002            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2003            break;
2004        case 3:
2005            return 1;
2006        }
2007        gen_op_iwmmxt_movq_wRn_M0(wrd);
2008        gen_op_iwmmxt_set_mup();
2009        break;
2010    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2011        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2012            return 1;
2013        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2014        tmp2 = tcg_temp_new_i32();
2015        tcg_gen_mov_i32(tmp2, tmp);
2016        switch ((insn >> 22) & 3) {
2017        case 0:
2018            for (i = 0; i < 7; i ++) {
2019                tcg_gen_shli_i32(tmp2, tmp2, 4);
2020                tcg_gen_or_i32(tmp, tmp, tmp2);
2021            }
2022            break;
2023        case 1:
2024            for (i = 0; i < 3; i ++) {
2025                tcg_gen_shli_i32(tmp2, tmp2, 8);
2026                tcg_gen_or_i32(tmp, tmp, tmp2);
2027            }
2028            break;
2029        case 2:
2030            tcg_gen_shli_i32(tmp2, tmp2, 16);
2031            tcg_gen_or_i32(tmp, tmp, tmp2);
2032            break;
2033        }
2034        gen_set_nzcv(tmp);
2035        tcg_temp_free_i32(tmp2);
2036        tcg_temp_free_i32(tmp);
2037        break;
2038    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2039        rd = (insn >> 12) & 0xf;
2040        rd0 = (insn >> 16) & 0xf;
2041        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2042            return 1;
2043        gen_op_iwmmxt_movq_M0_wRn(rd0);
2044        tmp = tcg_temp_new_i32();
2045        switch ((insn >> 22) & 3) {
2046        case 0:
2047            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2048            break;
2049        case 1:
2050            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2051            break;
2052        case 2:
2053            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2054            break;
2055        }
2056        store_reg(s, rd, tmp);
2057        break;
2058    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2059    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2060        wrd = (insn >> 12) & 0xf;
2061        rd0 = (insn >> 16) & 0xf;
2062        rd1 = (insn >> 0) & 0xf;
2063        gen_op_iwmmxt_movq_M0_wRn(rd0);
2064        switch ((insn >> 22) & 3) {
2065        case 0:
2066            if (insn & (1 << 21))
2067                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2068            else
2069                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2070            break;
2071        case 1:
2072            if (insn & (1 << 21))
2073                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2074            else
2075                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2076            break;
2077        case 2:
2078            if (insn & (1 << 21))
2079                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2080            else
2081                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2082            break;
2083        case 3:
2084            return 1;
2085        }
2086        gen_op_iwmmxt_movq_wRn_M0(wrd);
2087        gen_op_iwmmxt_set_mup();
2088        gen_op_iwmmxt_set_cup();
2089        break;
2090    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2091    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2092        wrd = (insn >> 12) & 0xf;
2093        rd0 = (insn >> 16) & 0xf;
2094        gen_op_iwmmxt_movq_M0_wRn(rd0);
2095        switch ((insn >> 22) & 3) {
2096        case 0:
2097            if (insn & (1 << 21))
2098                gen_op_iwmmxt_unpacklsb_M0();
2099            else
2100                gen_op_iwmmxt_unpacklub_M0();
2101            break;
2102        case 1:
2103            if (insn & (1 << 21))
2104                gen_op_iwmmxt_unpacklsw_M0();
2105            else
2106                gen_op_iwmmxt_unpackluw_M0();
2107            break;
2108        case 2:
2109            if (insn & (1 << 21))
2110                gen_op_iwmmxt_unpacklsl_M0();
2111            else
2112                gen_op_iwmmxt_unpacklul_M0();
2113            break;
2114        case 3:
2115            return 1;
2116        }
2117        gen_op_iwmmxt_movq_wRn_M0(wrd);
2118        gen_op_iwmmxt_set_mup();
2119        gen_op_iwmmxt_set_cup();
2120        break;
2121    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2122    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2123        wrd = (insn >> 12) & 0xf;
2124        rd0 = (insn >> 16) & 0xf;
2125        gen_op_iwmmxt_movq_M0_wRn(rd0);
2126        switch ((insn >> 22) & 3) {
2127        case 0:
2128            if (insn & (1 << 21))
2129                gen_op_iwmmxt_unpackhsb_M0();
2130            else
2131                gen_op_iwmmxt_unpackhub_M0();
2132            break;
2133        case 1:
2134            if (insn & (1 << 21))
2135                gen_op_iwmmxt_unpackhsw_M0();
2136            else
2137                gen_op_iwmmxt_unpackhuw_M0();
2138            break;
2139        case 2:
2140            if (insn & (1 << 21))
2141                gen_op_iwmmxt_unpackhsl_M0();
2142            else
2143                gen_op_iwmmxt_unpackhul_M0();
2144            break;
2145        case 3:
2146            return 1;
2147        }
2148        gen_op_iwmmxt_movq_wRn_M0(wrd);
2149        gen_op_iwmmxt_set_mup();
2150        gen_op_iwmmxt_set_cup();
2151        break;
2152    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2153    case 0x214: case 0x614: case 0xa14: case 0xe14:
2154        if (((insn >> 22) & 3) == 0)
2155            return 1;
2156        wrd = (insn >> 12) & 0xf;
2157        rd0 = (insn >> 16) & 0xf;
2158        gen_op_iwmmxt_movq_M0_wRn(rd0);
2159        tmp = tcg_temp_new_i32();
2160        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2161            tcg_temp_free_i32(tmp);
2162            return 1;
2163        }
2164        switch ((insn >> 22) & 3) {
2165        case 1:
2166            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2167            break;
2168        case 2:
2169            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2170            break;
2171        case 3:
2172            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2173            break;
2174        }
2175        tcg_temp_free_i32(tmp);
2176        gen_op_iwmmxt_movq_wRn_M0(wrd);
2177        gen_op_iwmmxt_set_mup();
2178        gen_op_iwmmxt_set_cup();
2179        break;
2180    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2181    case 0x014: case 0x414: case 0x814: case 0xc14:
2182        if (((insn >> 22) & 3) == 0)
2183            return 1;
2184        wrd = (insn >> 12) & 0xf;
2185        rd0 = (insn >> 16) & 0xf;
2186        gen_op_iwmmxt_movq_M0_wRn(rd0);
2187        tmp = tcg_temp_new_i32();
2188        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2189            tcg_temp_free_i32(tmp);
2190            return 1;
2191        }
2192        switch ((insn >> 22) & 3) {
2193        case 1:
2194            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2195            break;
2196        case 2:
2197            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2198            break;
2199        case 3:
2200            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2201            break;
2202        }
2203        tcg_temp_free_i32(tmp);
2204        gen_op_iwmmxt_movq_wRn_M0(wrd);
2205        gen_op_iwmmxt_set_mup();
2206        gen_op_iwmmxt_set_cup();
2207        break;
2208    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2209    case 0x114: case 0x514: case 0x914: case 0xd14:
2210        if (((insn >> 22) & 3) == 0)
2211            return 1;
2212        wrd = (insn >> 12) & 0xf;
2213        rd0 = (insn >> 16) & 0xf;
2214        gen_op_iwmmxt_movq_M0_wRn(rd0);
2215        tmp = tcg_temp_new_i32();
2216        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2217            tcg_temp_free_i32(tmp);
2218            return 1;
2219        }
2220        switch ((insn >> 22) & 3) {
2221        case 1:
2222            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2223            break;
2224        case 2:
2225            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2226            break;
2227        case 3:
2228            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2229            break;
2230        }
2231        tcg_temp_free_i32(tmp);
2232        gen_op_iwmmxt_movq_wRn_M0(wrd);
2233        gen_op_iwmmxt_set_mup();
2234        gen_op_iwmmxt_set_cup();
2235        break;
2236    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2237    case 0x314: case 0x714: case 0xb14: case 0xf14:
2238        if (((insn >> 22) & 3) == 0)
2239            return 1;
2240        wrd = (insn >> 12) & 0xf;
2241        rd0 = (insn >> 16) & 0xf;
2242        gen_op_iwmmxt_movq_M0_wRn(rd0);
2243        tmp = tcg_temp_new_i32();
2244        switch ((insn >> 22) & 3) {
2245        case 1:
2246            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2247                tcg_temp_free_i32(tmp);
2248                return 1;
2249            }
2250            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2251            break;
2252        case 2:
2253            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2254                tcg_temp_free_i32(tmp);
2255                return 1;
2256            }
2257            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2258            break;
2259        case 3:
2260            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2261                tcg_temp_free_i32(tmp);
2262                return 1;
2263            }
2264            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2265            break;
2266        }
2267        tcg_temp_free_i32(tmp);
2268        gen_op_iwmmxt_movq_wRn_M0(wrd);
2269        gen_op_iwmmxt_set_mup();
2270        gen_op_iwmmxt_set_cup();
2271        break;
2272    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2273    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2274        wrd = (insn >> 12) & 0xf;
2275        rd0 = (insn >> 16) & 0xf;
2276        rd1 = (insn >> 0) & 0xf;
2277        gen_op_iwmmxt_movq_M0_wRn(rd0);
2278        switch ((insn >> 22) & 3) {
2279        case 0:
2280            if (insn & (1 << 21))
2281                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2282            else
2283                gen_op_iwmmxt_minub_M0_wRn(rd1);
2284            break;
2285        case 1:
2286            if (insn & (1 << 21))
2287                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2288            else
2289                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2290            break;
2291        case 2:
2292            if (insn & (1 << 21))
2293                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2294            else
2295                gen_op_iwmmxt_minul_M0_wRn(rd1);
2296            break;
2297        case 3:
2298            return 1;
2299        }
2300        gen_op_iwmmxt_movq_wRn_M0(wrd);
2301        gen_op_iwmmxt_set_mup();
2302        break;
2303    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2304    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2305        wrd = (insn >> 12) & 0xf;
2306        rd0 = (insn >> 16) & 0xf;
2307        rd1 = (insn >> 0) & 0xf;
2308        gen_op_iwmmxt_movq_M0_wRn(rd0);
2309        switch ((insn >> 22) & 3) {
2310        case 0:
2311            if (insn & (1 << 21))
2312                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2313            else
2314                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2315            break;
2316        case 1:
2317            if (insn & (1 << 21))
2318                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2319            else
2320                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2321            break;
2322        case 2:
2323            if (insn & (1 << 21))
2324                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2325            else
2326                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2327            break;
2328        case 3:
2329            return 1;
2330        }
2331        gen_op_iwmmxt_movq_wRn_M0(wrd);
2332        gen_op_iwmmxt_set_mup();
2333        break;
2334    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2335    case 0x402: case 0x502: case 0x602: case 0x702:
2336        wrd = (insn >> 12) & 0xf;
2337        rd0 = (insn >> 16) & 0xf;
2338        rd1 = (insn >> 0) & 0xf;
2339        gen_op_iwmmxt_movq_M0_wRn(rd0);
2340        iwmmxt_load_reg(cpu_V1, rd1);
2341        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2342                                tcg_constant_i32((insn >> 20) & 3));
2343        gen_op_iwmmxt_movq_wRn_M0(wrd);
2344        gen_op_iwmmxt_set_mup();
2345        break;
2346    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2347    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2348    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2349    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2350        wrd = (insn >> 12) & 0xf;
2351        rd0 = (insn >> 16) & 0xf;
2352        rd1 = (insn >> 0) & 0xf;
2353        gen_op_iwmmxt_movq_M0_wRn(rd0);
2354        switch ((insn >> 20) & 0xf) {
2355        case 0x0:
2356            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2357            break;
2358        case 0x1:
2359            gen_op_iwmmxt_subub_M0_wRn(rd1);
2360            break;
2361        case 0x3:
2362            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2363            break;
2364        case 0x4:
2365            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2366            break;
2367        case 0x5:
2368            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2369            break;
2370        case 0x7:
2371            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2372            break;
2373        case 0x8:
2374            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2375            break;
2376        case 0x9:
2377            gen_op_iwmmxt_subul_M0_wRn(rd1);
2378            break;
2379        case 0xb:
2380            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2381            break;
2382        default:
2383            return 1;
2384        }
2385        gen_op_iwmmxt_movq_wRn_M0(wrd);
2386        gen_op_iwmmxt_set_mup();
2387        gen_op_iwmmxt_set_cup();
2388        break;
2389    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2390    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2391    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2392    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2393        wrd = (insn >> 12) & 0xf;
2394        rd0 = (insn >> 16) & 0xf;
2395        gen_op_iwmmxt_movq_M0_wRn(rd0);
2396        tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2397        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2398        gen_op_iwmmxt_movq_wRn_M0(wrd);
2399        gen_op_iwmmxt_set_mup();
2400        gen_op_iwmmxt_set_cup();
2401        break;
2402    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2403    case 0x418: case 0x518: case 0x618: case 0x718:
2404    case 0x818: case 0x918: case 0xa18: case 0xb18:
2405    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2406        wrd = (insn >> 12) & 0xf;
2407        rd0 = (insn >> 16) & 0xf;
2408        rd1 = (insn >> 0) & 0xf;
2409        gen_op_iwmmxt_movq_M0_wRn(rd0);
2410        switch ((insn >> 20) & 0xf) {
2411        case 0x0:
2412            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2413            break;
2414        case 0x1:
2415            gen_op_iwmmxt_addub_M0_wRn(rd1);
2416            break;
2417        case 0x3:
2418            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2419            break;
2420        case 0x4:
2421            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2422            break;
2423        case 0x5:
2424            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2425            break;
2426        case 0x7:
2427            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2428            break;
2429        case 0x8:
2430            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2431            break;
2432        case 0x9:
2433            gen_op_iwmmxt_addul_M0_wRn(rd1);
2434            break;
2435        case 0xb:
2436            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2437            break;
2438        default:
2439            return 1;
2440        }
2441        gen_op_iwmmxt_movq_wRn_M0(wrd);
2442        gen_op_iwmmxt_set_mup();
2443        gen_op_iwmmxt_set_cup();
2444        break;
2445    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2446    case 0x408: case 0x508: case 0x608: case 0x708:
2447    case 0x808: case 0x908: case 0xa08: case 0xb08:
2448    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2449        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2450            return 1;
2451        wrd = (insn >> 12) & 0xf;
2452        rd0 = (insn >> 16) & 0xf;
2453        rd1 = (insn >> 0) & 0xf;
2454        gen_op_iwmmxt_movq_M0_wRn(rd0);
2455        switch ((insn >> 22) & 3) {
2456        case 1:
2457            if (insn & (1 << 21))
2458                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2459            else
2460                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2461            break;
2462        case 2:
2463            if (insn & (1 << 21))
2464                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2465            else
2466                gen_op_iwmmxt_packul_M0_wRn(rd1);
2467            break;
2468        case 3:
2469            if (insn & (1 << 21))
2470                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2471            else
2472                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2473            break;
2474        }
2475        gen_op_iwmmxt_movq_wRn_M0(wrd);
2476        gen_op_iwmmxt_set_mup();
2477        gen_op_iwmmxt_set_cup();
2478        break;
2479    case 0x201: case 0x203: case 0x205: case 0x207:
2480    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2481    case 0x211: case 0x213: case 0x215: case 0x217:
2482    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2483        wrd = (insn >> 5) & 0xf;
2484        rd0 = (insn >> 12) & 0xf;
2485        rd1 = (insn >> 0) & 0xf;
2486        if (rd0 == 0xf || rd1 == 0xf)
2487            return 1;
2488        gen_op_iwmmxt_movq_M0_wRn(wrd);
2489        tmp = load_reg(s, rd0);
2490        tmp2 = load_reg(s, rd1);
2491        switch ((insn >> 16) & 0xf) {
2492        case 0x0:                                       /* TMIA */
2493            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2494            break;
2495        case 0x8:                                       /* TMIAPH */
2496            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2497            break;
2498        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2499            if (insn & (1 << 16))
2500                tcg_gen_shri_i32(tmp, tmp, 16);
2501            if (insn & (1 << 17))
2502                tcg_gen_shri_i32(tmp2, tmp2, 16);
2503            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2504            break;
2505        default:
2506            tcg_temp_free_i32(tmp2);
2507            tcg_temp_free_i32(tmp);
2508            return 1;
2509        }
2510        tcg_temp_free_i32(tmp2);
2511        tcg_temp_free_i32(tmp);
2512        gen_op_iwmmxt_movq_wRn_M0(wrd);
2513        gen_op_iwmmxt_set_mup();
2514        break;
2515    default:
2516        return 1;
2517    }
2518
2519    return 0;
2520}
2521
2522/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2523   (ie. an undefined instruction).  */
2524static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2525{
2526    int acc, rd0, rd1, rdhi, rdlo;
2527    TCGv_i32 tmp, tmp2;
2528
2529    if ((insn & 0x0ff00f10) == 0x0e200010) {
2530        /* Multiply with Internal Accumulate Format */
2531        rd0 = (insn >> 12) & 0xf;
2532        rd1 = insn & 0xf;
2533        acc = (insn >> 5) & 7;
2534
2535        if (acc != 0)
2536            return 1;
2537
2538        tmp = load_reg(s, rd0);
2539        tmp2 = load_reg(s, rd1);
2540        switch ((insn >> 16) & 0xf) {
2541        case 0x0:                                       /* MIA */
2542            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2543            break;
2544        case 0x8:                                       /* MIAPH */
2545            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2546            break;
2547        case 0xc:                                       /* MIABB */
2548        case 0xd:                                       /* MIABT */
2549        case 0xe:                                       /* MIATB */
2550        case 0xf:                                       /* MIATT */
2551            if (insn & (1 << 16))
2552                tcg_gen_shri_i32(tmp, tmp, 16);
2553            if (insn & (1 << 17))
2554                tcg_gen_shri_i32(tmp2, tmp2, 16);
2555            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2556            break;
2557        default:
2558            return 1;
2559        }
2560        tcg_temp_free_i32(tmp2);
2561        tcg_temp_free_i32(tmp);
2562
2563        gen_op_iwmmxt_movq_wRn_M0(acc);
2564        return 0;
2565    }
2566
2567    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2568        /* Internal Accumulator Access Format */
2569        rdhi = (insn >> 16) & 0xf;
2570        rdlo = (insn >> 12) & 0xf;
2571        acc = insn & 7;
2572
2573        if (acc != 0)
2574            return 1;
2575
2576        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2577            iwmmxt_load_reg(cpu_V0, acc);
2578            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2579            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2580            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2581        } else {                                        /* MAR */
2582            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2583            iwmmxt_store_reg(cpu_V0, acc);
2584        }
2585        return 0;
2586    }
2587
2588    return 1;
2589}
2590
2591static void gen_goto_ptr(void)
2592{
2593    tcg_gen_lookup_and_goto_ptr();
2594}
2595
2596/* This will end the TB but doesn't guarantee we'll return to
2597 * cpu_loop_exec. Any live exit_requests will be processed as we
2598 * enter the next TB.
2599 */
2600static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2601{
2602    if (translator_use_goto_tb(&s->base, dest)) {
2603        tcg_gen_goto_tb(n);
2604        gen_set_pc_im(s, dest);
2605        tcg_gen_exit_tb(s->base.tb, n);
2606    } else {
2607        gen_set_pc_im(s, dest);
2608        gen_goto_ptr();
2609    }
2610    s->base.is_jmp = DISAS_NORETURN;
2611}
2612
2613/* Jump, specifying which TB number to use if we gen_goto_tb() */
2614static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
2615{
2616    if (unlikely(s->ss_active)) {
2617        /* An indirect jump so that we still trigger the debug exception.  */
2618        gen_set_pc_im(s, dest);
2619        s->base.is_jmp = DISAS_JUMP;
2620        return;
2621    }
2622    switch (s->base.is_jmp) {
2623    case DISAS_NEXT:
2624    case DISAS_TOO_MANY:
2625    case DISAS_NORETURN:
2626        /*
2627         * The normal case: just go to the destination TB.
2628         * NB: NORETURN happens if we generate code like
2629         *    gen_brcondi(l);
2630         *    gen_jmp();
2631         *    gen_set_label(l);
2632         *    gen_jmp();
2633         * on the second call to gen_jmp().
2634         */
2635        gen_goto_tb(s, tbno, dest);
2636        break;
2637    case DISAS_UPDATE_NOCHAIN:
2638    case DISAS_UPDATE_EXIT:
2639        /*
2640         * We already decided we're leaving the TB for some other reason.
2641         * Avoid using goto_tb so we really do exit back to the main loop
2642         * and don't chain to another TB.
2643         */
2644        gen_set_pc_im(s, dest);
2645        gen_goto_ptr();
2646        s->base.is_jmp = DISAS_NORETURN;
2647        break;
2648    default:
2649        /*
2650         * We shouldn't be emitting code for a jump and also have
2651         * is_jmp set to one of the special cases like DISAS_SWI.
2652         */
2653        g_assert_not_reached();
2654    }
2655}
2656
2657static inline void gen_jmp(DisasContext *s, uint32_t dest)
2658{
2659    gen_jmp_tb(s, dest, 0);
2660}
2661
2662static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2663{
2664    if (x)
2665        tcg_gen_sari_i32(t0, t0, 16);
2666    else
2667        gen_sxth(t0);
2668    if (y)
2669        tcg_gen_sari_i32(t1, t1, 16);
2670    else
2671        gen_sxth(t1);
2672    tcg_gen_mul_i32(t0, t0, t1);
2673}
2674
2675/* Return the mask of PSR bits set by a MSR instruction.  */
2676static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2677{
2678    uint32_t mask = 0;
2679
2680    if (flags & (1 << 0)) {
2681        mask |= 0xff;
2682    }
2683    if (flags & (1 << 1)) {
2684        mask |= 0xff00;
2685    }
2686    if (flags & (1 << 2)) {
2687        mask |= 0xff0000;
2688    }
2689    if (flags & (1 << 3)) {
2690        mask |= 0xff000000;
2691    }
2692
2693    /* Mask out undefined and reserved bits.  */
2694    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2695
2696    /* Mask out execution state.  */
2697    if (!spsr) {
2698        mask &= ~CPSR_EXEC;
2699    }
2700
2701    /* Mask out privileged bits.  */
2702    if (IS_USER(s)) {
2703        mask &= CPSR_USER;
2704    }
2705    return mask;
2706}
2707
2708/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2709static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2710{
2711    TCGv_i32 tmp;
2712    if (spsr) {
2713        /* ??? This is also undefined in system mode.  */
2714        if (IS_USER(s))
2715            return 1;
2716
2717        tmp = load_cpu_field(spsr);
2718        tcg_gen_andi_i32(tmp, tmp, ~mask);
2719        tcg_gen_andi_i32(t0, t0, mask);
2720        tcg_gen_or_i32(tmp, tmp, t0);
2721        store_cpu_field(tmp, spsr);
2722    } else {
2723        gen_set_cpsr(t0, mask);
2724    }
2725    tcg_temp_free_i32(t0);
2726    gen_lookup_tb(s);
2727    return 0;
2728}
2729
2730/* Returns nonzero if access to the PSR is not permitted.  */
2731static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2732{
2733    TCGv_i32 tmp;
2734    tmp = tcg_temp_new_i32();
2735    tcg_gen_movi_i32(tmp, val);
2736    return gen_set_psr(s, mask, spsr, tmp);
2737}
2738
2739static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2740                                     int *tgtmode, int *regno)
2741{
2742    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2743     * the target mode and register number, and identify the various
2744     * unpredictable cases.
2745     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2746     *  + executed in user mode
2747     *  + using R15 as the src/dest register
2748     *  + accessing an unimplemented register
2749     *  + accessing a register that's inaccessible at current PL/security state*
2750     *  + accessing a register that you could access with a different insn
2751     * We choose to UNDEF in all these cases.
2752     * Since we don't know which of the various AArch32 modes we are in
2753     * we have to defer some checks to runtime.
2754     * Accesses to Monitor mode registers from Secure EL1 (which implies
2755     * that EL3 is AArch64) must trap to EL3.
2756     *
2757     * If the access checks fail this function will emit code to take
2758     * an exception and return false. Otherwise it will return true,
2759     * and set *tgtmode and *regno appropriately.
2760     */
2761    /* These instructions are present only in ARMv8, or in ARMv7 with the
2762     * Virtualization Extensions.
2763     */
2764    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2765        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2766        goto undef;
2767    }
2768
2769    if (IS_USER(s) || rn == 15) {
2770        goto undef;
2771    }
2772
2773    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2774     * of registers into (r, sysm).
2775     */
2776    if (r) {
2777        /* SPSRs for other modes */
2778        switch (sysm) {
2779        case 0xe: /* SPSR_fiq */
2780            *tgtmode = ARM_CPU_MODE_FIQ;
2781            break;
2782        case 0x10: /* SPSR_irq */
2783            *tgtmode = ARM_CPU_MODE_IRQ;
2784            break;
2785        case 0x12: /* SPSR_svc */
2786            *tgtmode = ARM_CPU_MODE_SVC;
2787            break;
2788        case 0x14: /* SPSR_abt */
2789            *tgtmode = ARM_CPU_MODE_ABT;
2790            break;
2791        case 0x16: /* SPSR_und */
2792            *tgtmode = ARM_CPU_MODE_UND;
2793            break;
2794        case 0x1c: /* SPSR_mon */
2795            *tgtmode = ARM_CPU_MODE_MON;
2796            break;
2797        case 0x1e: /* SPSR_hyp */
2798            *tgtmode = ARM_CPU_MODE_HYP;
2799            break;
2800        default: /* unallocated */
2801            goto undef;
2802        }
2803        /* We arbitrarily assign SPSR a register number of 16. */
2804        *regno = 16;
2805    } else {
2806        /* general purpose registers for other modes */
2807        switch (sysm) {
2808        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2809            *tgtmode = ARM_CPU_MODE_USR;
2810            *regno = sysm + 8;
2811            break;
2812        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2813            *tgtmode = ARM_CPU_MODE_FIQ;
2814            *regno = sysm;
2815            break;
2816        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2817            *tgtmode = ARM_CPU_MODE_IRQ;
2818            *regno = sysm & 1 ? 13 : 14;
2819            break;
2820        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2821            *tgtmode = ARM_CPU_MODE_SVC;
2822            *regno = sysm & 1 ? 13 : 14;
2823            break;
2824        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2825            *tgtmode = ARM_CPU_MODE_ABT;
2826            *regno = sysm & 1 ? 13 : 14;
2827            break;
2828        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2829            *tgtmode = ARM_CPU_MODE_UND;
2830            *regno = sysm & 1 ? 13 : 14;
2831            break;
2832        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2833            *tgtmode = ARM_CPU_MODE_MON;
2834            *regno = sysm & 1 ? 13 : 14;
2835            break;
2836        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2837            *tgtmode = ARM_CPU_MODE_HYP;
2838            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2839            *regno = sysm & 1 ? 13 : 17;
2840            break;
2841        default: /* unallocated */
2842            goto undef;
2843        }
2844    }
2845
2846    /* Catch the 'accessing inaccessible register' cases we can detect
2847     * at translate time.
2848     */
2849    switch (*tgtmode) {
2850    case ARM_CPU_MODE_MON:
2851        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2852            goto undef;
2853        }
2854        if (s->current_el == 1) {
2855            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2856             * then accesses to Mon registers trap to Secure EL2, if it exists,
2857             * otherwise EL3.
2858             */
2859            TCGv_i32 tcg_el;
2860
2861            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2862                dc_isar_feature(aa64_sel2, s)) {
2863                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2864                tcg_el = load_cpu_field(cp15.scr_el3);
2865                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2866                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2867            } else {
2868                tcg_el = tcg_constant_i32(3);
2869            }
2870
2871            gen_exception_insn_el_v(s, s->pc_curr, EXCP_UDEF,
2872                                    syn_uncategorized(), tcg_el);
2873            tcg_temp_free_i32(tcg_el);
2874            return false;
2875        }
2876        break;
2877    case ARM_CPU_MODE_HYP:
2878        /*
2879         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2880         * (and so we can forbid accesses from EL2 or below). elr_hyp
2881         * can be accessed also from Hyp mode, so forbid accesses from
2882         * EL0 or EL1.
2883         */
2884        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2885            (s->current_el < 3 && *regno != 17)) {
2886            goto undef;
2887        }
2888        break;
2889    default:
2890        break;
2891    }
2892
2893    return true;
2894
2895undef:
2896    /* If we get here then some access check did not pass */
2897    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized());
2898    return false;
2899}
2900
2901static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2902{
2903    TCGv_i32 tcg_reg;
2904    int tgtmode = 0, regno = 0;
2905
2906    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2907        return;
2908    }
2909
2910    /* Sync state because msr_banked() can raise exceptions */
2911    gen_set_condexec(s);
2912    gen_set_pc_im(s, s->pc_curr);
2913    tcg_reg = load_reg(s, rn);
2914    gen_helper_msr_banked(cpu_env, tcg_reg,
2915                          tcg_constant_i32(tgtmode),
2916                          tcg_constant_i32(regno));
2917    tcg_temp_free_i32(tcg_reg);
2918    s->base.is_jmp = DISAS_UPDATE_EXIT;
2919}
2920
2921static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2922{
2923    TCGv_i32 tcg_reg;
2924    int tgtmode = 0, regno = 0;
2925
2926    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2927        return;
2928    }
2929
2930    /* Sync state because mrs_banked() can raise exceptions */
2931    gen_set_condexec(s);
2932    gen_set_pc_im(s, s->pc_curr);
2933    tcg_reg = tcg_temp_new_i32();
2934    gen_helper_mrs_banked(tcg_reg, cpu_env,
2935                          tcg_constant_i32(tgtmode),
2936                          tcg_constant_i32(regno));
2937    store_reg(s, rn, tcg_reg);
2938    s->base.is_jmp = DISAS_UPDATE_EXIT;
2939}
2940
2941/* Store value to PC as for an exception return (ie don't
2942 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2943 * will do the masking based on the new value of the Thumb bit.
2944 */
2945static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2946{
2947    tcg_gen_mov_i32(cpu_R[15], pc);
2948    tcg_temp_free_i32(pc);
2949}
2950
2951/* Generate a v6 exception return.  Marks both values as dead.  */
2952static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2953{
2954    store_pc_exc_ret(s, pc);
2955    /* The cpsr_write_eret helper will mask the low bits of PC
2956     * appropriately depending on the new Thumb bit, so it must
2957     * be called after storing the new PC.
2958     */
2959    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2960        gen_io_start();
2961    }
2962    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2963    tcg_temp_free_i32(cpsr);
2964    /* Must exit loop to check un-masked IRQs */
2965    s->base.is_jmp = DISAS_EXIT;
2966}
2967
2968/* Generate an old-style exception return. Marks pc as dead. */
2969static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2970{
2971    gen_rfe(s, pc, load_cpu_field(spsr));
2972}
2973
2974static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2975                            uint32_t opr_sz, uint32_t max_sz,
2976                            gen_helper_gvec_3_ptr *fn)
2977{
2978    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2979
2980    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2981    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2982                       opr_sz, max_sz, 0, fn);
2983    tcg_temp_free_ptr(qc_ptr);
2984}
2985
2986void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2987                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2988{
2989    static gen_helper_gvec_3_ptr * const fns[2] = {
2990        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2991    };
2992    tcg_debug_assert(vece >= 1 && vece <= 2);
2993    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2994}
2995
2996void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2997                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2998{
2999    static gen_helper_gvec_3_ptr * const fns[2] = {
3000        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3001    };
3002    tcg_debug_assert(vece >= 1 && vece <= 2);
3003    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3004}
3005
3006#define GEN_CMP0(NAME, COND)                                            \
3007    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3008    {                                                                   \
3009        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3010        tcg_gen_neg_i32(d, d);                                          \
3011    }                                                                   \
3012    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3013    {                                                                   \
3014        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3015        tcg_gen_neg_i64(d, d);                                          \
3016    }                                                                   \
3017    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3018    {                                                                   \
3019        TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
3020        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3021    }                                                                   \
3022    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3023                            uint32_t opr_sz, uint32_t max_sz)           \
3024    {                                                                   \
3025        const GVecGen2 op[4] = {                                        \
3026            { .fno = gen_helper_gvec_##NAME##0_b,                       \
3027              .fniv = gen_##NAME##0_vec,                                \
3028              .opt_opc = vecop_list_cmp,                                \
3029              .vece = MO_8 },                                           \
3030            { .fno = gen_helper_gvec_##NAME##0_h,                       \
3031              .fniv = gen_##NAME##0_vec,                                \
3032              .opt_opc = vecop_list_cmp,                                \
3033              .vece = MO_16 },                                          \
3034            { .fni4 = gen_##NAME##0_i32,                                \
3035              .fniv = gen_##NAME##0_vec,                                \
3036              .opt_opc = vecop_list_cmp,                                \
3037              .vece = MO_32 },                                          \
3038            { .fni8 = gen_##NAME##0_i64,                                \
3039              .fniv = gen_##NAME##0_vec,                                \
3040              .opt_opc = vecop_list_cmp,                                \
3041              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3042              .vece = MO_64 },                                          \
3043        };                                                              \
3044        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3045    }
3046
3047static const TCGOpcode vecop_list_cmp[] = {
3048    INDEX_op_cmp_vec, 0
3049};
3050
3051GEN_CMP0(ceq, TCG_COND_EQ)
3052GEN_CMP0(cle, TCG_COND_LE)
3053GEN_CMP0(cge, TCG_COND_GE)
3054GEN_CMP0(clt, TCG_COND_LT)
3055GEN_CMP0(cgt, TCG_COND_GT)
3056
3057#undef GEN_CMP0
3058
3059static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3060{
3061    tcg_gen_vec_sar8i_i64(a, a, shift);
3062    tcg_gen_vec_add8_i64(d, d, a);
3063}
3064
3065static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3066{
3067    tcg_gen_vec_sar16i_i64(a, a, shift);
3068    tcg_gen_vec_add16_i64(d, d, a);
3069}
3070
3071static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3072{
3073    tcg_gen_sari_i32(a, a, shift);
3074    tcg_gen_add_i32(d, d, a);
3075}
3076
3077static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3078{
3079    tcg_gen_sari_i64(a, a, shift);
3080    tcg_gen_add_i64(d, d, a);
3081}
3082
3083static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3084{
3085    tcg_gen_sari_vec(vece, a, a, sh);
3086    tcg_gen_add_vec(vece, d, d, a);
3087}
3088
3089void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3090                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3091{
3092    static const TCGOpcode vecop_list[] = {
3093        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3094    };
3095    static const GVecGen2i ops[4] = {
3096        { .fni8 = gen_ssra8_i64,
3097          .fniv = gen_ssra_vec,
3098          .fno = gen_helper_gvec_ssra_b,
3099          .load_dest = true,
3100          .opt_opc = vecop_list,
3101          .vece = MO_8 },
3102        { .fni8 = gen_ssra16_i64,
3103          .fniv = gen_ssra_vec,
3104          .fno = gen_helper_gvec_ssra_h,
3105          .load_dest = true,
3106          .opt_opc = vecop_list,
3107          .vece = MO_16 },
3108        { .fni4 = gen_ssra32_i32,
3109          .fniv = gen_ssra_vec,
3110          .fno = gen_helper_gvec_ssra_s,
3111          .load_dest = true,
3112          .opt_opc = vecop_list,
3113          .vece = MO_32 },
3114        { .fni8 = gen_ssra64_i64,
3115          .fniv = gen_ssra_vec,
3116          .fno = gen_helper_gvec_ssra_b,
3117          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3118          .opt_opc = vecop_list,
3119          .load_dest = true,
3120          .vece = MO_64 },
3121    };
3122
3123    /* tszimm encoding produces immediates in the range [1..esize]. */
3124    tcg_debug_assert(shift > 0);
3125    tcg_debug_assert(shift <= (8 << vece));
3126
3127    /*
3128     * Shifts larger than the element size are architecturally valid.
3129     * Signed results in all sign bits.
3130     */
3131    shift = MIN(shift, (8 << vece) - 1);
3132    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3133}
3134
3135static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3136{
3137    tcg_gen_vec_shr8i_i64(a, a, shift);
3138    tcg_gen_vec_add8_i64(d, d, a);
3139}
3140
3141static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3142{
3143    tcg_gen_vec_shr16i_i64(a, a, shift);
3144    tcg_gen_vec_add16_i64(d, d, a);
3145}
3146
3147static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3148{
3149    tcg_gen_shri_i32(a, a, shift);
3150    tcg_gen_add_i32(d, d, a);
3151}
3152
3153static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3154{
3155    tcg_gen_shri_i64(a, a, shift);
3156    tcg_gen_add_i64(d, d, a);
3157}
3158
3159static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3160{
3161    tcg_gen_shri_vec(vece, a, a, sh);
3162    tcg_gen_add_vec(vece, d, d, a);
3163}
3164
3165void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3166                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3167{
3168    static const TCGOpcode vecop_list[] = {
3169        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3170    };
3171    static const GVecGen2i ops[4] = {
3172        { .fni8 = gen_usra8_i64,
3173          .fniv = gen_usra_vec,
3174          .fno = gen_helper_gvec_usra_b,
3175          .load_dest = true,
3176          .opt_opc = vecop_list,
3177          .vece = MO_8, },
3178        { .fni8 = gen_usra16_i64,
3179          .fniv = gen_usra_vec,
3180          .fno = gen_helper_gvec_usra_h,
3181          .load_dest = true,
3182          .opt_opc = vecop_list,
3183          .vece = MO_16, },
3184        { .fni4 = gen_usra32_i32,
3185          .fniv = gen_usra_vec,
3186          .fno = gen_helper_gvec_usra_s,
3187          .load_dest = true,
3188          .opt_opc = vecop_list,
3189          .vece = MO_32, },
3190        { .fni8 = gen_usra64_i64,
3191          .fniv = gen_usra_vec,
3192          .fno = gen_helper_gvec_usra_d,
3193          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3194          .load_dest = true,
3195          .opt_opc = vecop_list,
3196          .vece = MO_64, },
3197    };
3198
3199    /* tszimm encoding produces immediates in the range [1..esize]. */
3200    tcg_debug_assert(shift > 0);
3201    tcg_debug_assert(shift <= (8 << vece));
3202
3203    /*
3204     * Shifts larger than the element size are architecturally valid.
3205     * Unsigned results in all zeros as input to accumulate: nop.
3206     */
3207    if (shift < (8 << vece)) {
3208        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3209    } else {
3210        /* Nop, but we do need to clear the tail. */
3211        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3212    }
3213}
3214
3215/*
3216 * Shift one less than the requested amount, and the low bit is
3217 * the rounding bit.  For the 8 and 16-bit operations, because we
3218 * mask the low bit, we can perform a normal integer shift instead
3219 * of a vector shift.
3220 */
3221static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3222{
3223    TCGv_i64 t = tcg_temp_new_i64();
3224
3225    tcg_gen_shri_i64(t, a, sh - 1);
3226    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3227    tcg_gen_vec_sar8i_i64(d, a, sh);
3228    tcg_gen_vec_add8_i64(d, d, t);
3229    tcg_temp_free_i64(t);
3230}
3231
3232static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3233{
3234    TCGv_i64 t = tcg_temp_new_i64();
3235
3236    tcg_gen_shri_i64(t, a, sh - 1);
3237    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3238    tcg_gen_vec_sar16i_i64(d, a, sh);
3239    tcg_gen_vec_add16_i64(d, d, t);
3240    tcg_temp_free_i64(t);
3241}
3242
3243static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3244{
3245    TCGv_i32 t;
3246
3247    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3248    if (sh == 32) {
3249        tcg_gen_movi_i32(d, 0);
3250        return;
3251    }
3252    t = tcg_temp_new_i32();
3253    tcg_gen_extract_i32(t, a, sh - 1, 1);
3254    tcg_gen_sari_i32(d, a, sh);
3255    tcg_gen_add_i32(d, d, t);
3256    tcg_temp_free_i32(t);
3257}
3258
3259static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3260{
3261    TCGv_i64 t = tcg_temp_new_i64();
3262
3263    tcg_gen_extract_i64(t, a, sh - 1, 1);
3264    tcg_gen_sari_i64(d, a, sh);
3265    tcg_gen_add_i64(d, d, t);
3266    tcg_temp_free_i64(t);
3267}
3268
3269static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3270{
3271    TCGv_vec t = tcg_temp_new_vec_matching(d);
3272    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3273
3274    tcg_gen_shri_vec(vece, t, a, sh - 1);
3275    tcg_gen_dupi_vec(vece, ones, 1);
3276    tcg_gen_and_vec(vece, t, t, ones);
3277    tcg_gen_sari_vec(vece, d, a, sh);
3278    tcg_gen_add_vec(vece, d, d, t);
3279
3280    tcg_temp_free_vec(t);
3281    tcg_temp_free_vec(ones);
3282}
3283
3284void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3285                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3286{
3287    static const TCGOpcode vecop_list[] = {
3288        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3289    };
3290    static const GVecGen2i ops[4] = {
3291        { .fni8 = gen_srshr8_i64,
3292          .fniv = gen_srshr_vec,
3293          .fno = gen_helper_gvec_srshr_b,
3294          .opt_opc = vecop_list,
3295          .vece = MO_8 },
3296        { .fni8 = gen_srshr16_i64,
3297          .fniv = gen_srshr_vec,
3298          .fno = gen_helper_gvec_srshr_h,
3299          .opt_opc = vecop_list,
3300          .vece = MO_16 },
3301        { .fni4 = gen_srshr32_i32,
3302          .fniv = gen_srshr_vec,
3303          .fno = gen_helper_gvec_srshr_s,
3304          .opt_opc = vecop_list,
3305          .vece = MO_32 },
3306        { .fni8 = gen_srshr64_i64,
3307          .fniv = gen_srshr_vec,
3308          .fno = gen_helper_gvec_srshr_d,
3309          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3310          .opt_opc = vecop_list,
3311          .vece = MO_64 },
3312    };
3313
3314    /* tszimm encoding produces immediates in the range [1..esize] */
3315    tcg_debug_assert(shift > 0);
3316    tcg_debug_assert(shift <= (8 << vece));
3317
3318    if (shift == (8 << vece)) {
3319        /*
3320         * Shifts larger than the element size are architecturally valid.
3321         * Signed results in all sign bits.  With rounding, this produces
3322         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3323         * I.e. always zero.
3324         */
3325        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3326    } else {
3327        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3328    }
3329}
3330
3331static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3332{
3333    TCGv_i64 t = tcg_temp_new_i64();
3334
3335    gen_srshr8_i64(t, a, sh);
3336    tcg_gen_vec_add8_i64(d, d, t);
3337    tcg_temp_free_i64(t);
3338}
3339
3340static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3341{
3342    TCGv_i64 t = tcg_temp_new_i64();
3343
3344    gen_srshr16_i64(t, a, sh);
3345    tcg_gen_vec_add16_i64(d, d, t);
3346    tcg_temp_free_i64(t);
3347}
3348
3349static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3350{
3351    TCGv_i32 t = tcg_temp_new_i32();
3352
3353    gen_srshr32_i32(t, a, sh);
3354    tcg_gen_add_i32(d, d, t);
3355    tcg_temp_free_i32(t);
3356}
3357
3358static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3359{
3360    TCGv_i64 t = tcg_temp_new_i64();
3361
3362    gen_srshr64_i64(t, a, sh);
3363    tcg_gen_add_i64(d, d, t);
3364    tcg_temp_free_i64(t);
3365}
3366
3367static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3368{
3369    TCGv_vec t = tcg_temp_new_vec_matching(d);
3370
3371    gen_srshr_vec(vece, t, a, sh);
3372    tcg_gen_add_vec(vece, d, d, t);
3373    tcg_temp_free_vec(t);
3374}
3375
3376void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3377                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3378{
3379    static const TCGOpcode vecop_list[] = {
3380        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3381    };
3382    static const GVecGen2i ops[4] = {
3383        { .fni8 = gen_srsra8_i64,
3384          .fniv = gen_srsra_vec,
3385          .fno = gen_helper_gvec_srsra_b,
3386          .opt_opc = vecop_list,
3387          .load_dest = true,
3388          .vece = MO_8 },
3389        { .fni8 = gen_srsra16_i64,
3390          .fniv = gen_srsra_vec,
3391          .fno = gen_helper_gvec_srsra_h,
3392          .opt_opc = vecop_list,
3393          .load_dest = true,
3394          .vece = MO_16 },
3395        { .fni4 = gen_srsra32_i32,
3396          .fniv = gen_srsra_vec,
3397          .fno = gen_helper_gvec_srsra_s,
3398          .opt_opc = vecop_list,
3399          .load_dest = true,
3400          .vece = MO_32 },
3401        { .fni8 = gen_srsra64_i64,
3402          .fniv = gen_srsra_vec,
3403          .fno = gen_helper_gvec_srsra_d,
3404          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3405          .opt_opc = vecop_list,
3406          .load_dest = true,
3407          .vece = MO_64 },
3408    };
3409
3410    /* tszimm encoding produces immediates in the range [1..esize] */
3411    tcg_debug_assert(shift > 0);
3412    tcg_debug_assert(shift <= (8 << vece));
3413
3414    /*
3415     * Shifts larger than the element size are architecturally valid.
3416     * Signed results in all sign bits.  With rounding, this produces
3417     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3418     * I.e. always zero.  With accumulation, this leaves D unchanged.
3419     */
3420    if (shift == (8 << vece)) {
3421        /* Nop, but we do need to clear the tail. */
3422        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3423    } else {
3424        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3425    }
3426}
3427
3428static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3429{
3430    TCGv_i64 t = tcg_temp_new_i64();
3431
3432    tcg_gen_shri_i64(t, a, sh - 1);
3433    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3434    tcg_gen_vec_shr8i_i64(d, a, sh);
3435    tcg_gen_vec_add8_i64(d, d, t);
3436    tcg_temp_free_i64(t);
3437}
3438
3439static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3440{
3441    TCGv_i64 t = tcg_temp_new_i64();
3442
3443    tcg_gen_shri_i64(t, a, sh - 1);
3444    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3445    tcg_gen_vec_shr16i_i64(d, a, sh);
3446    tcg_gen_vec_add16_i64(d, d, t);
3447    tcg_temp_free_i64(t);
3448}
3449
3450static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3451{
3452    TCGv_i32 t;
3453
3454    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3455    if (sh == 32) {
3456        tcg_gen_extract_i32(d, a, sh - 1, 1);
3457        return;
3458    }
3459    t = tcg_temp_new_i32();
3460    tcg_gen_extract_i32(t, a, sh - 1, 1);
3461    tcg_gen_shri_i32(d, a, sh);
3462    tcg_gen_add_i32(d, d, t);
3463    tcg_temp_free_i32(t);
3464}
3465
3466static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3467{
3468    TCGv_i64 t = tcg_temp_new_i64();
3469
3470    tcg_gen_extract_i64(t, a, sh - 1, 1);
3471    tcg_gen_shri_i64(d, a, sh);
3472    tcg_gen_add_i64(d, d, t);
3473    tcg_temp_free_i64(t);
3474}
3475
3476static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3477{
3478    TCGv_vec t = tcg_temp_new_vec_matching(d);
3479    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3480
3481    tcg_gen_shri_vec(vece, t, a, shift - 1);
3482    tcg_gen_dupi_vec(vece, ones, 1);
3483    tcg_gen_and_vec(vece, t, t, ones);
3484    tcg_gen_shri_vec(vece, d, a, shift);
3485    tcg_gen_add_vec(vece, d, d, t);
3486
3487    tcg_temp_free_vec(t);
3488    tcg_temp_free_vec(ones);
3489}
3490
3491void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3492                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3493{
3494    static const TCGOpcode vecop_list[] = {
3495        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3496    };
3497    static const GVecGen2i ops[4] = {
3498        { .fni8 = gen_urshr8_i64,
3499          .fniv = gen_urshr_vec,
3500          .fno = gen_helper_gvec_urshr_b,
3501          .opt_opc = vecop_list,
3502          .vece = MO_8 },
3503        { .fni8 = gen_urshr16_i64,
3504          .fniv = gen_urshr_vec,
3505          .fno = gen_helper_gvec_urshr_h,
3506          .opt_opc = vecop_list,
3507          .vece = MO_16 },
3508        { .fni4 = gen_urshr32_i32,
3509          .fniv = gen_urshr_vec,
3510          .fno = gen_helper_gvec_urshr_s,
3511          .opt_opc = vecop_list,
3512          .vece = MO_32 },
3513        { .fni8 = gen_urshr64_i64,
3514          .fniv = gen_urshr_vec,
3515          .fno = gen_helper_gvec_urshr_d,
3516          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3517          .opt_opc = vecop_list,
3518          .vece = MO_64 },
3519    };
3520
3521    /* tszimm encoding produces immediates in the range [1..esize] */
3522    tcg_debug_assert(shift > 0);
3523    tcg_debug_assert(shift <= (8 << vece));
3524
3525    if (shift == (8 << vece)) {
3526        /*
3527         * Shifts larger than the element size are architecturally valid.
3528         * Unsigned results in zero.  With rounding, this produces a
3529         * copy of the most significant bit.
3530         */
3531        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3532    } else {
3533        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3534    }
3535}
3536
3537static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3538{
3539    TCGv_i64 t = tcg_temp_new_i64();
3540
3541    if (sh == 8) {
3542        tcg_gen_vec_shr8i_i64(t, a, 7);
3543    } else {
3544        gen_urshr8_i64(t, a, sh);
3545    }
3546    tcg_gen_vec_add8_i64(d, d, t);
3547    tcg_temp_free_i64(t);
3548}
3549
3550static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3551{
3552    TCGv_i64 t = tcg_temp_new_i64();
3553
3554    if (sh == 16) {
3555        tcg_gen_vec_shr16i_i64(t, a, 15);
3556    } else {
3557        gen_urshr16_i64(t, a, sh);
3558    }
3559    tcg_gen_vec_add16_i64(d, d, t);
3560    tcg_temp_free_i64(t);
3561}
3562
3563static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3564{
3565    TCGv_i32 t = tcg_temp_new_i32();
3566
3567    if (sh == 32) {
3568        tcg_gen_shri_i32(t, a, 31);
3569    } else {
3570        gen_urshr32_i32(t, a, sh);
3571    }
3572    tcg_gen_add_i32(d, d, t);
3573    tcg_temp_free_i32(t);
3574}
3575
3576static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3577{
3578    TCGv_i64 t = tcg_temp_new_i64();
3579
3580    if (sh == 64) {
3581        tcg_gen_shri_i64(t, a, 63);
3582    } else {
3583        gen_urshr64_i64(t, a, sh);
3584    }
3585    tcg_gen_add_i64(d, d, t);
3586    tcg_temp_free_i64(t);
3587}
3588
3589static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3590{
3591    TCGv_vec t = tcg_temp_new_vec_matching(d);
3592
3593    if (sh == (8 << vece)) {
3594        tcg_gen_shri_vec(vece, t, a, sh - 1);
3595    } else {
3596        gen_urshr_vec(vece, t, a, sh);
3597    }
3598    tcg_gen_add_vec(vece, d, d, t);
3599    tcg_temp_free_vec(t);
3600}
3601
3602void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3603                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3604{
3605    static const TCGOpcode vecop_list[] = {
3606        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3607    };
3608    static const GVecGen2i ops[4] = {
3609        { .fni8 = gen_ursra8_i64,
3610          .fniv = gen_ursra_vec,
3611          .fno = gen_helper_gvec_ursra_b,
3612          .opt_opc = vecop_list,
3613          .load_dest = true,
3614          .vece = MO_8 },
3615        { .fni8 = gen_ursra16_i64,
3616          .fniv = gen_ursra_vec,
3617          .fno = gen_helper_gvec_ursra_h,
3618          .opt_opc = vecop_list,
3619          .load_dest = true,
3620          .vece = MO_16 },
3621        { .fni4 = gen_ursra32_i32,
3622          .fniv = gen_ursra_vec,
3623          .fno = gen_helper_gvec_ursra_s,
3624          .opt_opc = vecop_list,
3625          .load_dest = true,
3626          .vece = MO_32 },
3627        { .fni8 = gen_ursra64_i64,
3628          .fniv = gen_ursra_vec,
3629          .fno = gen_helper_gvec_ursra_d,
3630          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3631          .opt_opc = vecop_list,
3632          .load_dest = true,
3633          .vece = MO_64 },
3634    };
3635
3636    /* tszimm encoding produces immediates in the range [1..esize] */
3637    tcg_debug_assert(shift > 0);
3638    tcg_debug_assert(shift <= (8 << vece));
3639
3640    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3641}
3642
3643static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3644{
3645    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3646    TCGv_i64 t = tcg_temp_new_i64();
3647
3648    tcg_gen_shri_i64(t, a, shift);
3649    tcg_gen_andi_i64(t, t, mask);
3650    tcg_gen_andi_i64(d, d, ~mask);
3651    tcg_gen_or_i64(d, d, t);
3652    tcg_temp_free_i64(t);
3653}
3654
3655static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3656{
3657    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3658    TCGv_i64 t = tcg_temp_new_i64();
3659
3660    tcg_gen_shri_i64(t, a, shift);
3661    tcg_gen_andi_i64(t, t, mask);
3662    tcg_gen_andi_i64(d, d, ~mask);
3663    tcg_gen_or_i64(d, d, t);
3664    tcg_temp_free_i64(t);
3665}
3666
3667static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3668{
3669    tcg_gen_shri_i32(a, a, shift);
3670    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3671}
3672
3673static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3674{
3675    tcg_gen_shri_i64(a, a, shift);
3676    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3677}
3678
3679static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3680{
3681    TCGv_vec t = tcg_temp_new_vec_matching(d);
3682    TCGv_vec m = tcg_temp_new_vec_matching(d);
3683
3684    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3685    tcg_gen_shri_vec(vece, t, a, sh);
3686    tcg_gen_and_vec(vece, d, d, m);
3687    tcg_gen_or_vec(vece, d, d, t);
3688
3689    tcg_temp_free_vec(t);
3690    tcg_temp_free_vec(m);
3691}
3692
3693void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3694                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3695{
3696    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3697    const GVecGen2i ops[4] = {
3698        { .fni8 = gen_shr8_ins_i64,
3699          .fniv = gen_shr_ins_vec,
3700          .fno = gen_helper_gvec_sri_b,
3701          .load_dest = true,
3702          .opt_opc = vecop_list,
3703          .vece = MO_8 },
3704        { .fni8 = gen_shr16_ins_i64,
3705          .fniv = gen_shr_ins_vec,
3706          .fno = gen_helper_gvec_sri_h,
3707          .load_dest = true,
3708          .opt_opc = vecop_list,
3709          .vece = MO_16 },
3710        { .fni4 = gen_shr32_ins_i32,
3711          .fniv = gen_shr_ins_vec,
3712          .fno = gen_helper_gvec_sri_s,
3713          .load_dest = true,
3714          .opt_opc = vecop_list,
3715          .vece = MO_32 },
3716        { .fni8 = gen_shr64_ins_i64,
3717          .fniv = gen_shr_ins_vec,
3718          .fno = gen_helper_gvec_sri_d,
3719          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3720          .load_dest = true,
3721          .opt_opc = vecop_list,
3722          .vece = MO_64 },
3723    };
3724
3725    /* tszimm encoding produces immediates in the range [1..esize]. */
3726    tcg_debug_assert(shift > 0);
3727    tcg_debug_assert(shift <= (8 << vece));
3728
3729    /* Shift of esize leaves destination unchanged. */
3730    if (shift < (8 << vece)) {
3731        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3732    } else {
3733        /* Nop, but we do need to clear the tail. */
3734        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3735    }
3736}
3737
3738static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3739{
3740    uint64_t mask = dup_const(MO_8, 0xff << shift);
3741    TCGv_i64 t = tcg_temp_new_i64();
3742
3743    tcg_gen_shli_i64(t, a, shift);
3744    tcg_gen_andi_i64(t, t, mask);
3745    tcg_gen_andi_i64(d, d, ~mask);
3746    tcg_gen_or_i64(d, d, t);
3747    tcg_temp_free_i64(t);
3748}
3749
3750static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3751{
3752    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3753    TCGv_i64 t = tcg_temp_new_i64();
3754
3755    tcg_gen_shli_i64(t, a, shift);
3756    tcg_gen_andi_i64(t, t, mask);
3757    tcg_gen_andi_i64(d, d, ~mask);
3758    tcg_gen_or_i64(d, d, t);
3759    tcg_temp_free_i64(t);
3760}
3761
3762static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3763{
3764    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3765}
3766
3767static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3768{
3769    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3770}
3771
3772static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3773{
3774    TCGv_vec t = tcg_temp_new_vec_matching(d);
3775    TCGv_vec m = tcg_temp_new_vec_matching(d);
3776
3777    tcg_gen_shli_vec(vece, t, a, sh);
3778    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3779    tcg_gen_and_vec(vece, d, d, m);
3780    tcg_gen_or_vec(vece, d, d, t);
3781
3782    tcg_temp_free_vec(t);
3783    tcg_temp_free_vec(m);
3784}
3785
3786void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3787                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3788{
3789    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3790    const GVecGen2i ops[4] = {
3791        { .fni8 = gen_shl8_ins_i64,
3792          .fniv = gen_shl_ins_vec,
3793          .fno = gen_helper_gvec_sli_b,
3794          .load_dest = true,
3795          .opt_opc = vecop_list,
3796          .vece = MO_8 },
3797        { .fni8 = gen_shl16_ins_i64,
3798          .fniv = gen_shl_ins_vec,
3799          .fno = gen_helper_gvec_sli_h,
3800          .load_dest = true,
3801          .opt_opc = vecop_list,
3802          .vece = MO_16 },
3803        { .fni4 = gen_shl32_ins_i32,
3804          .fniv = gen_shl_ins_vec,
3805          .fno = gen_helper_gvec_sli_s,
3806          .load_dest = true,
3807          .opt_opc = vecop_list,
3808          .vece = MO_32 },
3809        { .fni8 = gen_shl64_ins_i64,
3810          .fniv = gen_shl_ins_vec,
3811          .fno = gen_helper_gvec_sli_d,
3812          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3813          .load_dest = true,
3814          .opt_opc = vecop_list,
3815          .vece = MO_64 },
3816    };
3817
3818    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3819    tcg_debug_assert(shift >= 0);
3820    tcg_debug_assert(shift < (8 << vece));
3821
3822    if (shift == 0) {
3823        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3824    } else {
3825        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3826    }
3827}
3828
3829static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3830{
3831    gen_helper_neon_mul_u8(a, a, b);
3832    gen_helper_neon_add_u8(d, d, a);
3833}
3834
3835static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3836{
3837    gen_helper_neon_mul_u8(a, a, b);
3838    gen_helper_neon_sub_u8(d, d, a);
3839}
3840
3841static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3842{
3843    gen_helper_neon_mul_u16(a, a, b);
3844    gen_helper_neon_add_u16(d, d, a);
3845}
3846
3847static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3848{
3849    gen_helper_neon_mul_u16(a, a, b);
3850    gen_helper_neon_sub_u16(d, d, a);
3851}
3852
3853static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3854{
3855    tcg_gen_mul_i32(a, a, b);
3856    tcg_gen_add_i32(d, d, a);
3857}
3858
3859static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3860{
3861    tcg_gen_mul_i32(a, a, b);
3862    tcg_gen_sub_i32(d, d, a);
3863}
3864
3865static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3866{
3867    tcg_gen_mul_i64(a, a, b);
3868    tcg_gen_add_i64(d, d, a);
3869}
3870
3871static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3872{
3873    tcg_gen_mul_i64(a, a, b);
3874    tcg_gen_sub_i64(d, d, a);
3875}
3876
3877static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3878{
3879    tcg_gen_mul_vec(vece, a, a, b);
3880    tcg_gen_add_vec(vece, d, d, a);
3881}
3882
3883static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3884{
3885    tcg_gen_mul_vec(vece, a, a, b);
3886    tcg_gen_sub_vec(vece, d, d, a);
3887}
3888
3889/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3890 * these tables are shared with AArch64 which does support them.
3891 */
3892void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3893                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3894{
3895    static const TCGOpcode vecop_list[] = {
3896        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3897    };
3898    static const GVecGen3 ops[4] = {
3899        { .fni4 = gen_mla8_i32,
3900          .fniv = gen_mla_vec,
3901          .load_dest = true,
3902          .opt_opc = vecop_list,
3903          .vece = MO_8 },
3904        { .fni4 = gen_mla16_i32,
3905          .fniv = gen_mla_vec,
3906          .load_dest = true,
3907          .opt_opc = vecop_list,
3908          .vece = MO_16 },
3909        { .fni4 = gen_mla32_i32,
3910          .fniv = gen_mla_vec,
3911          .load_dest = true,
3912          .opt_opc = vecop_list,
3913          .vece = MO_32 },
3914        { .fni8 = gen_mla64_i64,
3915          .fniv = gen_mla_vec,
3916          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3917          .load_dest = true,
3918          .opt_opc = vecop_list,
3919          .vece = MO_64 },
3920    };
3921    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3922}
3923
3924void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3925                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3926{
3927    static const TCGOpcode vecop_list[] = {
3928        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3929    };
3930    static const GVecGen3 ops[4] = {
3931        { .fni4 = gen_mls8_i32,
3932          .fniv = gen_mls_vec,
3933          .load_dest = true,
3934          .opt_opc = vecop_list,
3935          .vece = MO_8 },
3936        { .fni4 = gen_mls16_i32,
3937          .fniv = gen_mls_vec,
3938          .load_dest = true,
3939          .opt_opc = vecop_list,
3940          .vece = MO_16 },
3941        { .fni4 = gen_mls32_i32,
3942          .fniv = gen_mls_vec,
3943          .load_dest = true,
3944          .opt_opc = vecop_list,
3945          .vece = MO_32 },
3946        { .fni8 = gen_mls64_i64,
3947          .fniv = gen_mls_vec,
3948          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3949          .load_dest = true,
3950          .opt_opc = vecop_list,
3951          .vece = MO_64 },
3952    };
3953    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3954}
3955
3956/* CMTST : test is "if (X & Y != 0)". */
3957static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3958{
3959    tcg_gen_and_i32(d, a, b);
3960    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3961    tcg_gen_neg_i32(d, d);
3962}
3963
3964void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3965{
3966    tcg_gen_and_i64(d, a, b);
3967    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3968    tcg_gen_neg_i64(d, d);
3969}
3970
3971static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3972{
3973    tcg_gen_and_vec(vece, d, a, b);
3974    tcg_gen_dupi_vec(vece, a, 0);
3975    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3976}
3977
3978void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3979                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3980{
3981    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3982    static const GVecGen3 ops[4] = {
3983        { .fni4 = gen_helper_neon_tst_u8,
3984          .fniv = gen_cmtst_vec,
3985          .opt_opc = vecop_list,
3986          .vece = MO_8 },
3987        { .fni4 = gen_helper_neon_tst_u16,
3988          .fniv = gen_cmtst_vec,
3989          .opt_opc = vecop_list,
3990          .vece = MO_16 },
3991        { .fni4 = gen_cmtst_i32,
3992          .fniv = gen_cmtst_vec,
3993          .opt_opc = vecop_list,
3994          .vece = MO_32 },
3995        { .fni8 = gen_cmtst_i64,
3996          .fniv = gen_cmtst_vec,
3997          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3998          .opt_opc = vecop_list,
3999          .vece = MO_64 },
4000    };
4001    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4002}
4003
4004void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4005{
4006    TCGv_i32 lval = tcg_temp_new_i32();
4007    TCGv_i32 rval = tcg_temp_new_i32();
4008    TCGv_i32 lsh = tcg_temp_new_i32();
4009    TCGv_i32 rsh = tcg_temp_new_i32();
4010    TCGv_i32 zero = tcg_constant_i32(0);
4011    TCGv_i32 max = tcg_constant_i32(32);
4012
4013    /*
4014     * Rely on the TCG guarantee that out of range shifts produce
4015     * unspecified results, not undefined behaviour (i.e. no trap).
4016     * Discard out-of-range results after the fact.
4017     */
4018    tcg_gen_ext8s_i32(lsh, shift);
4019    tcg_gen_neg_i32(rsh, lsh);
4020    tcg_gen_shl_i32(lval, src, lsh);
4021    tcg_gen_shr_i32(rval, src, rsh);
4022    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4023    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4024
4025    tcg_temp_free_i32(lval);
4026    tcg_temp_free_i32(rval);
4027    tcg_temp_free_i32(lsh);
4028    tcg_temp_free_i32(rsh);
4029}
4030
4031void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4032{
4033    TCGv_i64 lval = tcg_temp_new_i64();
4034    TCGv_i64 rval = tcg_temp_new_i64();
4035    TCGv_i64 lsh = tcg_temp_new_i64();
4036    TCGv_i64 rsh = tcg_temp_new_i64();
4037    TCGv_i64 zero = tcg_constant_i64(0);
4038    TCGv_i64 max = tcg_constant_i64(64);
4039
4040    /*
4041     * Rely on the TCG guarantee that out of range shifts produce
4042     * unspecified results, not undefined behaviour (i.e. no trap).
4043     * Discard out-of-range results after the fact.
4044     */
4045    tcg_gen_ext8s_i64(lsh, shift);
4046    tcg_gen_neg_i64(rsh, lsh);
4047    tcg_gen_shl_i64(lval, src, lsh);
4048    tcg_gen_shr_i64(rval, src, rsh);
4049    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4050    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4051
4052    tcg_temp_free_i64(lval);
4053    tcg_temp_free_i64(rval);
4054    tcg_temp_free_i64(lsh);
4055    tcg_temp_free_i64(rsh);
4056}
4057
4058static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4059                         TCGv_vec src, TCGv_vec shift)
4060{
4061    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4062    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4063    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4064    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4065    TCGv_vec msk, max;
4066
4067    tcg_gen_neg_vec(vece, rsh, shift);
4068    if (vece == MO_8) {
4069        tcg_gen_mov_vec(lsh, shift);
4070    } else {
4071        msk = tcg_temp_new_vec_matching(dst);
4072        tcg_gen_dupi_vec(vece, msk, 0xff);
4073        tcg_gen_and_vec(vece, lsh, shift, msk);
4074        tcg_gen_and_vec(vece, rsh, rsh, msk);
4075        tcg_temp_free_vec(msk);
4076    }
4077
4078    /*
4079     * Rely on the TCG guarantee that out of range shifts produce
4080     * unspecified results, not undefined behaviour (i.e. no trap).
4081     * Discard out-of-range results after the fact.
4082     */
4083    tcg_gen_shlv_vec(vece, lval, src, lsh);
4084    tcg_gen_shrv_vec(vece, rval, src, rsh);
4085
4086    max = tcg_temp_new_vec_matching(dst);
4087    tcg_gen_dupi_vec(vece, max, 8 << vece);
4088
4089    /*
4090     * The choice of LT (signed) and GEU (unsigned) are biased toward
4091     * the instructions of the x86_64 host.  For MO_8, the whole byte
4092     * is significant so we must use an unsigned compare; otherwise we
4093     * have already masked to a byte and so a signed compare works.
4094     * Other tcg hosts have a full set of comparisons and do not care.
4095     */
4096    if (vece == MO_8) {
4097        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4098        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4099        tcg_gen_andc_vec(vece, lval, lval, lsh);
4100        tcg_gen_andc_vec(vece, rval, rval, rsh);
4101    } else {
4102        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4103        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4104        tcg_gen_and_vec(vece, lval, lval, lsh);
4105        tcg_gen_and_vec(vece, rval, rval, rsh);
4106    }
4107    tcg_gen_or_vec(vece, dst, lval, rval);
4108
4109    tcg_temp_free_vec(max);
4110    tcg_temp_free_vec(lval);
4111    tcg_temp_free_vec(rval);
4112    tcg_temp_free_vec(lsh);
4113    tcg_temp_free_vec(rsh);
4114}
4115
4116void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4117                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4118{
4119    static const TCGOpcode vecop_list[] = {
4120        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4121        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4122    };
4123    static const GVecGen3 ops[4] = {
4124        { .fniv = gen_ushl_vec,
4125          .fno = gen_helper_gvec_ushl_b,
4126          .opt_opc = vecop_list,
4127          .vece = MO_8 },
4128        { .fniv = gen_ushl_vec,
4129          .fno = gen_helper_gvec_ushl_h,
4130          .opt_opc = vecop_list,
4131          .vece = MO_16 },
4132        { .fni4 = gen_ushl_i32,
4133          .fniv = gen_ushl_vec,
4134          .opt_opc = vecop_list,
4135          .vece = MO_32 },
4136        { .fni8 = gen_ushl_i64,
4137          .fniv = gen_ushl_vec,
4138          .opt_opc = vecop_list,
4139          .vece = MO_64 },
4140    };
4141    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4142}
4143
4144void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4145{
4146    TCGv_i32 lval = tcg_temp_new_i32();
4147    TCGv_i32 rval = tcg_temp_new_i32();
4148    TCGv_i32 lsh = tcg_temp_new_i32();
4149    TCGv_i32 rsh = tcg_temp_new_i32();
4150    TCGv_i32 zero = tcg_constant_i32(0);
4151    TCGv_i32 max = tcg_constant_i32(31);
4152
4153    /*
4154     * Rely on the TCG guarantee that out of range shifts produce
4155     * unspecified results, not undefined behaviour (i.e. no trap).
4156     * Discard out-of-range results after the fact.
4157     */
4158    tcg_gen_ext8s_i32(lsh, shift);
4159    tcg_gen_neg_i32(rsh, lsh);
4160    tcg_gen_shl_i32(lval, src, lsh);
4161    tcg_gen_umin_i32(rsh, rsh, max);
4162    tcg_gen_sar_i32(rval, src, rsh);
4163    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4164    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4165
4166    tcg_temp_free_i32(lval);
4167    tcg_temp_free_i32(rval);
4168    tcg_temp_free_i32(lsh);
4169    tcg_temp_free_i32(rsh);
4170}
4171
4172void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4173{
4174    TCGv_i64 lval = tcg_temp_new_i64();
4175    TCGv_i64 rval = tcg_temp_new_i64();
4176    TCGv_i64 lsh = tcg_temp_new_i64();
4177    TCGv_i64 rsh = tcg_temp_new_i64();
4178    TCGv_i64 zero = tcg_constant_i64(0);
4179    TCGv_i64 max = tcg_constant_i64(63);
4180
4181    /*
4182     * Rely on the TCG guarantee that out of range shifts produce
4183     * unspecified results, not undefined behaviour (i.e. no trap).
4184     * Discard out-of-range results after the fact.
4185     */
4186    tcg_gen_ext8s_i64(lsh, shift);
4187    tcg_gen_neg_i64(rsh, lsh);
4188    tcg_gen_shl_i64(lval, src, lsh);
4189    tcg_gen_umin_i64(rsh, rsh, max);
4190    tcg_gen_sar_i64(rval, src, rsh);
4191    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4192    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4193
4194    tcg_temp_free_i64(lval);
4195    tcg_temp_free_i64(rval);
4196    tcg_temp_free_i64(lsh);
4197    tcg_temp_free_i64(rsh);
4198}
4199
4200static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4201                         TCGv_vec src, TCGv_vec shift)
4202{
4203    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4204    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4205    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4206    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4207    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4208
4209    /*
4210     * Rely on the TCG guarantee that out of range shifts produce
4211     * unspecified results, not undefined behaviour (i.e. no trap).
4212     * Discard out-of-range results after the fact.
4213     */
4214    tcg_gen_neg_vec(vece, rsh, shift);
4215    if (vece == MO_8) {
4216        tcg_gen_mov_vec(lsh, shift);
4217    } else {
4218        tcg_gen_dupi_vec(vece, tmp, 0xff);
4219        tcg_gen_and_vec(vece, lsh, shift, tmp);
4220        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4221    }
4222
4223    /* Bound rsh so out of bound right shift gets -1.  */
4224    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4225    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4226    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4227
4228    tcg_gen_shlv_vec(vece, lval, src, lsh);
4229    tcg_gen_sarv_vec(vece, rval, src, rsh);
4230
4231    /* Select in-bound left shift.  */
4232    tcg_gen_andc_vec(vece, lval, lval, tmp);
4233
4234    /* Select between left and right shift.  */
4235    if (vece == MO_8) {
4236        tcg_gen_dupi_vec(vece, tmp, 0);
4237        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4238    } else {
4239        tcg_gen_dupi_vec(vece, tmp, 0x80);
4240        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4241    }
4242
4243    tcg_temp_free_vec(lval);
4244    tcg_temp_free_vec(rval);
4245    tcg_temp_free_vec(lsh);
4246    tcg_temp_free_vec(rsh);
4247    tcg_temp_free_vec(tmp);
4248}
4249
4250void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4251                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4252{
4253    static const TCGOpcode vecop_list[] = {
4254        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4255        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4256    };
4257    static const GVecGen3 ops[4] = {
4258        { .fniv = gen_sshl_vec,
4259          .fno = gen_helper_gvec_sshl_b,
4260          .opt_opc = vecop_list,
4261          .vece = MO_8 },
4262        { .fniv = gen_sshl_vec,
4263          .fno = gen_helper_gvec_sshl_h,
4264          .opt_opc = vecop_list,
4265          .vece = MO_16 },
4266        { .fni4 = gen_sshl_i32,
4267          .fniv = gen_sshl_vec,
4268          .opt_opc = vecop_list,
4269          .vece = MO_32 },
4270        { .fni8 = gen_sshl_i64,
4271          .fniv = gen_sshl_vec,
4272          .opt_opc = vecop_list,
4273          .vece = MO_64 },
4274    };
4275    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4276}
4277
4278static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4279                          TCGv_vec a, TCGv_vec b)
4280{
4281    TCGv_vec x = tcg_temp_new_vec_matching(t);
4282    tcg_gen_add_vec(vece, x, a, b);
4283    tcg_gen_usadd_vec(vece, t, a, b);
4284    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4285    tcg_gen_or_vec(vece, sat, sat, x);
4286    tcg_temp_free_vec(x);
4287}
4288
4289void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4290                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4291{
4292    static const TCGOpcode vecop_list[] = {
4293        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4294    };
4295    static const GVecGen4 ops[4] = {
4296        { .fniv = gen_uqadd_vec,
4297          .fno = gen_helper_gvec_uqadd_b,
4298          .write_aofs = true,
4299          .opt_opc = vecop_list,
4300          .vece = MO_8 },
4301        { .fniv = gen_uqadd_vec,
4302          .fno = gen_helper_gvec_uqadd_h,
4303          .write_aofs = true,
4304          .opt_opc = vecop_list,
4305          .vece = MO_16 },
4306        { .fniv = gen_uqadd_vec,
4307          .fno = gen_helper_gvec_uqadd_s,
4308          .write_aofs = true,
4309          .opt_opc = vecop_list,
4310          .vece = MO_32 },
4311        { .fniv = gen_uqadd_vec,
4312          .fno = gen_helper_gvec_uqadd_d,
4313          .write_aofs = true,
4314          .opt_opc = vecop_list,
4315          .vece = MO_64 },
4316    };
4317    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4318                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4319}
4320
4321static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4322                          TCGv_vec a, TCGv_vec b)
4323{
4324    TCGv_vec x = tcg_temp_new_vec_matching(t);
4325    tcg_gen_add_vec(vece, x, a, b);
4326    tcg_gen_ssadd_vec(vece, t, a, b);
4327    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4328    tcg_gen_or_vec(vece, sat, sat, x);
4329    tcg_temp_free_vec(x);
4330}
4331
4332void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4333                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4334{
4335    static const TCGOpcode vecop_list[] = {
4336        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4337    };
4338    static const GVecGen4 ops[4] = {
4339        { .fniv = gen_sqadd_vec,
4340          .fno = gen_helper_gvec_sqadd_b,
4341          .opt_opc = vecop_list,
4342          .write_aofs = true,
4343          .vece = MO_8 },
4344        { .fniv = gen_sqadd_vec,
4345          .fno = gen_helper_gvec_sqadd_h,
4346          .opt_opc = vecop_list,
4347          .write_aofs = true,
4348          .vece = MO_16 },
4349        { .fniv = gen_sqadd_vec,
4350          .fno = gen_helper_gvec_sqadd_s,
4351          .opt_opc = vecop_list,
4352          .write_aofs = true,
4353          .vece = MO_32 },
4354        { .fniv = gen_sqadd_vec,
4355          .fno = gen_helper_gvec_sqadd_d,
4356          .opt_opc = vecop_list,
4357          .write_aofs = true,
4358          .vece = MO_64 },
4359    };
4360    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4361                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4362}
4363
4364static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4365                          TCGv_vec a, TCGv_vec b)
4366{
4367    TCGv_vec x = tcg_temp_new_vec_matching(t);
4368    tcg_gen_sub_vec(vece, x, a, b);
4369    tcg_gen_ussub_vec(vece, t, a, b);
4370    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4371    tcg_gen_or_vec(vece, sat, sat, x);
4372    tcg_temp_free_vec(x);
4373}
4374
4375void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4376                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4377{
4378    static const TCGOpcode vecop_list[] = {
4379        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4380    };
4381    static const GVecGen4 ops[4] = {
4382        { .fniv = gen_uqsub_vec,
4383          .fno = gen_helper_gvec_uqsub_b,
4384          .opt_opc = vecop_list,
4385          .write_aofs = true,
4386          .vece = MO_8 },
4387        { .fniv = gen_uqsub_vec,
4388          .fno = gen_helper_gvec_uqsub_h,
4389          .opt_opc = vecop_list,
4390          .write_aofs = true,
4391          .vece = MO_16 },
4392        { .fniv = gen_uqsub_vec,
4393          .fno = gen_helper_gvec_uqsub_s,
4394          .opt_opc = vecop_list,
4395          .write_aofs = true,
4396          .vece = MO_32 },
4397        { .fniv = gen_uqsub_vec,
4398          .fno = gen_helper_gvec_uqsub_d,
4399          .opt_opc = vecop_list,
4400          .write_aofs = true,
4401          .vece = MO_64 },
4402    };
4403    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4404                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4405}
4406
4407static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4408                          TCGv_vec a, TCGv_vec b)
4409{
4410    TCGv_vec x = tcg_temp_new_vec_matching(t);
4411    tcg_gen_sub_vec(vece, x, a, b);
4412    tcg_gen_sssub_vec(vece, t, a, b);
4413    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4414    tcg_gen_or_vec(vece, sat, sat, x);
4415    tcg_temp_free_vec(x);
4416}
4417
4418void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4419                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4420{
4421    static const TCGOpcode vecop_list[] = {
4422        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4423    };
4424    static const GVecGen4 ops[4] = {
4425        { .fniv = gen_sqsub_vec,
4426          .fno = gen_helper_gvec_sqsub_b,
4427          .opt_opc = vecop_list,
4428          .write_aofs = true,
4429          .vece = MO_8 },
4430        { .fniv = gen_sqsub_vec,
4431          .fno = gen_helper_gvec_sqsub_h,
4432          .opt_opc = vecop_list,
4433          .write_aofs = true,
4434          .vece = MO_16 },
4435        { .fniv = gen_sqsub_vec,
4436          .fno = gen_helper_gvec_sqsub_s,
4437          .opt_opc = vecop_list,
4438          .write_aofs = true,
4439          .vece = MO_32 },
4440        { .fniv = gen_sqsub_vec,
4441          .fno = gen_helper_gvec_sqsub_d,
4442          .opt_opc = vecop_list,
4443          .write_aofs = true,
4444          .vece = MO_64 },
4445    };
4446    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4447                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4448}
4449
4450static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4451{
4452    TCGv_i32 t = tcg_temp_new_i32();
4453
4454    tcg_gen_sub_i32(t, a, b);
4455    tcg_gen_sub_i32(d, b, a);
4456    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4457    tcg_temp_free_i32(t);
4458}
4459
4460static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4461{
4462    TCGv_i64 t = tcg_temp_new_i64();
4463
4464    tcg_gen_sub_i64(t, a, b);
4465    tcg_gen_sub_i64(d, b, a);
4466    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4467    tcg_temp_free_i64(t);
4468}
4469
4470static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4471{
4472    TCGv_vec t = tcg_temp_new_vec_matching(d);
4473
4474    tcg_gen_smin_vec(vece, t, a, b);
4475    tcg_gen_smax_vec(vece, d, a, b);
4476    tcg_gen_sub_vec(vece, d, d, t);
4477    tcg_temp_free_vec(t);
4478}
4479
4480void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4481                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4482{
4483    static const TCGOpcode vecop_list[] = {
4484        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4485    };
4486    static const GVecGen3 ops[4] = {
4487        { .fniv = gen_sabd_vec,
4488          .fno = gen_helper_gvec_sabd_b,
4489          .opt_opc = vecop_list,
4490          .vece = MO_8 },
4491        { .fniv = gen_sabd_vec,
4492          .fno = gen_helper_gvec_sabd_h,
4493          .opt_opc = vecop_list,
4494          .vece = MO_16 },
4495        { .fni4 = gen_sabd_i32,
4496          .fniv = gen_sabd_vec,
4497          .fno = gen_helper_gvec_sabd_s,
4498          .opt_opc = vecop_list,
4499          .vece = MO_32 },
4500        { .fni8 = gen_sabd_i64,
4501          .fniv = gen_sabd_vec,
4502          .fno = gen_helper_gvec_sabd_d,
4503          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4504          .opt_opc = vecop_list,
4505          .vece = MO_64 },
4506    };
4507    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4508}
4509
4510static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4511{
4512    TCGv_i32 t = tcg_temp_new_i32();
4513
4514    tcg_gen_sub_i32(t, a, b);
4515    tcg_gen_sub_i32(d, b, a);
4516    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4517    tcg_temp_free_i32(t);
4518}
4519
4520static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4521{
4522    TCGv_i64 t = tcg_temp_new_i64();
4523
4524    tcg_gen_sub_i64(t, a, b);
4525    tcg_gen_sub_i64(d, b, a);
4526    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4527    tcg_temp_free_i64(t);
4528}
4529
4530static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4531{
4532    TCGv_vec t = tcg_temp_new_vec_matching(d);
4533
4534    tcg_gen_umin_vec(vece, t, a, b);
4535    tcg_gen_umax_vec(vece, d, a, b);
4536    tcg_gen_sub_vec(vece, d, d, t);
4537    tcg_temp_free_vec(t);
4538}
4539
4540void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4541                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4542{
4543    static const TCGOpcode vecop_list[] = {
4544        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4545    };
4546    static const GVecGen3 ops[4] = {
4547        { .fniv = gen_uabd_vec,
4548          .fno = gen_helper_gvec_uabd_b,
4549          .opt_opc = vecop_list,
4550          .vece = MO_8 },
4551        { .fniv = gen_uabd_vec,
4552          .fno = gen_helper_gvec_uabd_h,
4553          .opt_opc = vecop_list,
4554          .vece = MO_16 },
4555        { .fni4 = gen_uabd_i32,
4556          .fniv = gen_uabd_vec,
4557          .fno = gen_helper_gvec_uabd_s,
4558          .opt_opc = vecop_list,
4559          .vece = MO_32 },
4560        { .fni8 = gen_uabd_i64,
4561          .fniv = gen_uabd_vec,
4562          .fno = gen_helper_gvec_uabd_d,
4563          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4564          .opt_opc = vecop_list,
4565          .vece = MO_64 },
4566    };
4567    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4568}
4569
4570static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4571{
4572    TCGv_i32 t = tcg_temp_new_i32();
4573    gen_sabd_i32(t, a, b);
4574    tcg_gen_add_i32(d, d, t);
4575    tcg_temp_free_i32(t);
4576}
4577
4578static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4579{
4580    TCGv_i64 t = tcg_temp_new_i64();
4581    gen_sabd_i64(t, a, b);
4582    tcg_gen_add_i64(d, d, t);
4583    tcg_temp_free_i64(t);
4584}
4585
4586static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4587{
4588    TCGv_vec t = tcg_temp_new_vec_matching(d);
4589    gen_sabd_vec(vece, t, a, b);
4590    tcg_gen_add_vec(vece, d, d, t);
4591    tcg_temp_free_vec(t);
4592}
4593
4594void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4595                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4596{
4597    static const TCGOpcode vecop_list[] = {
4598        INDEX_op_sub_vec, INDEX_op_add_vec,
4599        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4600    };
4601    static const GVecGen3 ops[4] = {
4602        { .fniv = gen_saba_vec,
4603          .fno = gen_helper_gvec_saba_b,
4604          .opt_opc = vecop_list,
4605          .load_dest = true,
4606          .vece = MO_8 },
4607        { .fniv = gen_saba_vec,
4608          .fno = gen_helper_gvec_saba_h,
4609          .opt_opc = vecop_list,
4610          .load_dest = true,
4611          .vece = MO_16 },
4612        { .fni4 = gen_saba_i32,
4613          .fniv = gen_saba_vec,
4614          .fno = gen_helper_gvec_saba_s,
4615          .opt_opc = vecop_list,
4616          .load_dest = true,
4617          .vece = MO_32 },
4618        { .fni8 = gen_saba_i64,
4619          .fniv = gen_saba_vec,
4620          .fno = gen_helper_gvec_saba_d,
4621          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4622          .opt_opc = vecop_list,
4623          .load_dest = true,
4624          .vece = MO_64 },
4625    };
4626    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4627}
4628
4629static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4630{
4631    TCGv_i32 t = tcg_temp_new_i32();
4632    gen_uabd_i32(t, a, b);
4633    tcg_gen_add_i32(d, d, t);
4634    tcg_temp_free_i32(t);
4635}
4636
4637static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4638{
4639    TCGv_i64 t = tcg_temp_new_i64();
4640    gen_uabd_i64(t, a, b);
4641    tcg_gen_add_i64(d, d, t);
4642    tcg_temp_free_i64(t);
4643}
4644
4645static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4646{
4647    TCGv_vec t = tcg_temp_new_vec_matching(d);
4648    gen_uabd_vec(vece, t, a, b);
4649    tcg_gen_add_vec(vece, d, d, t);
4650    tcg_temp_free_vec(t);
4651}
4652
4653void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4654                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4655{
4656    static const TCGOpcode vecop_list[] = {
4657        INDEX_op_sub_vec, INDEX_op_add_vec,
4658        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4659    };
4660    static const GVecGen3 ops[4] = {
4661        { .fniv = gen_uaba_vec,
4662          .fno = gen_helper_gvec_uaba_b,
4663          .opt_opc = vecop_list,
4664          .load_dest = true,
4665          .vece = MO_8 },
4666        { .fniv = gen_uaba_vec,
4667          .fno = gen_helper_gvec_uaba_h,
4668          .opt_opc = vecop_list,
4669          .load_dest = true,
4670          .vece = MO_16 },
4671        { .fni4 = gen_uaba_i32,
4672          .fniv = gen_uaba_vec,
4673          .fno = gen_helper_gvec_uaba_s,
4674          .opt_opc = vecop_list,
4675          .load_dest = true,
4676          .vece = MO_32 },
4677        { .fni8 = gen_uaba_i64,
4678          .fniv = gen_uaba_vec,
4679          .fno = gen_helper_gvec_uaba_d,
4680          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4681          .opt_opc = vecop_list,
4682          .load_dest = true,
4683          .vece = MO_64 },
4684    };
4685    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4686}
4687
4688static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4689                           int opc1, int crn, int crm, int opc2,
4690                           bool isread, int rt, int rt2)
4691{
4692    const ARMCPRegInfo *ri;
4693
4694    ri = get_arm_cp_reginfo(s->cp_regs,
4695            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4696    if (ri) {
4697        bool need_exit_tb;
4698
4699        /* Check access permissions */
4700        if (!cp_access_ok(s->current_el, ri, isread)) {
4701            unallocated_encoding(s);
4702            return;
4703        }
4704
4705        if (s->hstr_active || ri->accessfn ||
4706            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4707            /* Emit code to perform further access permissions checks at
4708             * runtime; this may result in an exception.
4709             * Note that on XScale all cp0..c13 registers do an access check
4710             * call in order to handle c15_cpar.
4711             */
4712            uint32_t syndrome;
4713
4714            /* Note that since we are an implementation which takes an
4715             * exception on a trapped conditional instruction only if the
4716             * instruction passes its condition code check, we can take
4717             * advantage of the clause in the ARM ARM that allows us to set
4718             * the COND field in the instruction to 0xE in all cases.
4719             * We could fish the actual condition out of the insn (ARM)
4720             * or the condexec bits (Thumb) but it isn't necessary.
4721             */
4722            switch (cpnum) {
4723            case 14:
4724                if (is64) {
4725                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4726                                                 isread, false);
4727                } else {
4728                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4729                                                rt, isread, false);
4730                }
4731                break;
4732            case 15:
4733                if (is64) {
4734                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4735                                                 isread, false);
4736                } else {
4737                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4738                                                rt, isread, false);
4739                }
4740                break;
4741            default:
4742                /* ARMv8 defines that only coprocessors 14 and 15 exist,
4743                 * so this can only happen if this is an ARMv7 or earlier CPU,
4744                 * in which case the syndrome information won't actually be
4745                 * guest visible.
4746                 */
4747                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4748                syndrome = syn_uncategorized();
4749                break;
4750            }
4751
4752            gen_set_condexec(s);
4753            gen_set_pc_im(s, s->pc_curr);
4754            gen_helper_access_check_cp_reg(cpu_env,
4755                                           tcg_constant_ptr(ri),
4756                                           tcg_constant_i32(syndrome),
4757                                           tcg_constant_i32(isread));
4758        } else if (ri->type & ARM_CP_RAISES_EXC) {
4759            /*
4760             * The readfn or writefn might raise an exception;
4761             * synchronize the CPU state in case it does.
4762             */
4763            gen_set_condexec(s);
4764            gen_set_pc_im(s, s->pc_curr);
4765        }
4766
4767        /* Handle special cases first */
4768        switch (ri->type & ARM_CP_SPECIAL_MASK) {
4769        case 0:
4770            break;
4771        case ARM_CP_NOP:
4772            return;
4773        case ARM_CP_WFI:
4774            if (isread) {
4775                unallocated_encoding(s);
4776                return;
4777            }
4778            gen_set_pc_im(s, s->base.pc_next);
4779            s->base.is_jmp = DISAS_WFI;
4780            return;
4781        default:
4782            g_assert_not_reached();
4783        }
4784
4785        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4786            gen_io_start();
4787        }
4788
4789        if (isread) {
4790            /* Read */
4791            if (is64) {
4792                TCGv_i64 tmp64;
4793                TCGv_i32 tmp;
4794                if (ri->type & ARM_CP_CONST) {
4795                    tmp64 = tcg_constant_i64(ri->resetvalue);
4796                } else if (ri->readfn) {
4797                    tmp64 = tcg_temp_new_i64();
4798                    gen_helper_get_cp_reg64(tmp64, cpu_env,
4799                                            tcg_constant_ptr(ri));
4800                } else {
4801                    tmp64 = tcg_temp_new_i64();
4802                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4803                }
4804                tmp = tcg_temp_new_i32();
4805                tcg_gen_extrl_i64_i32(tmp, tmp64);
4806                store_reg(s, rt, tmp);
4807                tmp = tcg_temp_new_i32();
4808                tcg_gen_extrh_i64_i32(tmp, tmp64);
4809                tcg_temp_free_i64(tmp64);
4810                store_reg(s, rt2, tmp);
4811            } else {
4812                TCGv_i32 tmp;
4813                if (ri->type & ARM_CP_CONST) {
4814                    tmp = tcg_constant_i32(ri->resetvalue);
4815                } else if (ri->readfn) {
4816                    tmp = tcg_temp_new_i32();
4817                    gen_helper_get_cp_reg(tmp, cpu_env, tcg_constant_ptr(ri));
4818                } else {
4819                    tmp = load_cpu_offset(ri->fieldoffset);
4820                }
4821                if (rt == 15) {
4822                    /* Destination register of r15 for 32 bit loads sets
4823                     * the condition codes from the high 4 bits of the value
4824                     */
4825                    gen_set_nzcv(tmp);
4826                    tcg_temp_free_i32(tmp);
4827                } else {
4828                    store_reg(s, rt, tmp);
4829                }
4830            }
4831        } else {
4832            /* Write */
4833            if (ri->type & ARM_CP_CONST) {
4834                /* If not forbidden by access permissions, treat as WI */
4835                return;
4836            }
4837
4838            if (is64) {
4839                TCGv_i32 tmplo, tmphi;
4840                TCGv_i64 tmp64 = tcg_temp_new_i64();
4841                tmplo = load_reg(s, rt);
4842                tmphi = load_reg(s, rt2);
4843                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4844                tcg_temp_free_i32(tmplo);
4845                tcg_temp_free_i32(tmphi);
4846                if (ri->writefn) {
4847                    gen_helper_set_cp_reg64(cpu_env, tcg_constant_ptr(ri),
4848                                            tmp64);
4849                } else {
4850                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4851                }
4852                tcg_temp_free_i64(tmp64);
4853            } else {
4854                TCGv_i32 tmp = load_reg(s, rt);
4855                if (ri->writefn) {
4856                    gen_helper_set_cp_reg(cpu_env, tcg_constant_ptr(ri), tmp);
4857                    tcg_temp_free_i32(tmp);
4858                } else {
4859                    store_cpu_offset(tmp, ri->fieldoffset, 4);
4860                }
4861            }
4862        }
4863
4864        /* I/O operations must end the TB here (whether read or write) */
4865        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4866                        (ri->type & ARM_CP_IO));
4867
4868        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4869            /*
4870             * A write to any coprocessor register that ends a TB
4871             * must rebuild the hflags for the next TB.
4872             */
4873            gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4874            /*
4875             * We default to ending the TB on a coprocessor register write,
4876             * but allow this to be suppressed by the register definition
4877             * (usually only necessary to work around guest bugs).
4878             */
4879            need_exit_tb = true;
4880        }
4881        if (need_exit_tb) {
4882            gen_lookup_tb(s);
4883        }
4884
4885        return;
4886    }
4887
4888    /* Unknown register; this might be a guest error or a QEMU
4889     * unimplemented feature.
4890     */
4891    if (is64) {
4892        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4893                      "64 bit system register cp:%d opc1: %d crm:%d "
4894                      "(%s)\n",
4895                      isread ? "read" : "write", cpnum, opc1, crm,
4896                      s->ns ? "non-secure" : "secure");
4897    } else {
4898        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4899                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4900                      "(%s)\n",
4901                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4902                      s->ns ? "non-secure" : "secure");
4903    }
4904
4905    unallocated_encoding(s);
4906    return;
4907}
4908
4909/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4910static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4911{
4912    int cpnum = (insn >> 8) & 0xf;
4913
4914    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4915        unallocated_encoding(s);
4916    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4917        if (disas_iwmmxt_insn(s, insn)) {
4918            unallocated_encoding(s);
4919        }
4920    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4921        if (disas_dsp_insn(s, insn)) {
4922            unallocated_encoding(s);
4923        }
4924    }
4925}
4926
4927/* Store a 64-bit value to a register pair.  Clobbers val.  */
4928static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4929{
4930    TCGv_i32 tmp;
4931    tmp = tcg_temp_new_i32();
4932    tcg_gen_extrl_i64_i32(tmp, val);
4933    store_reg(s, rlow, tmp);
4934    tmp = tcg_temp_new_i32();
4935    tcg_gen_extrh_i64_i32(tmp, val);
4936    store_reg(s, rhigh, tmp);
4937}
4938
4939/* load and add a 64-bit value from a register pair.  */
4940static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4941{
4942    TCGv_i64 tmp;
4943    TCGv_i32 tmpl;
4944    TCGv_i32 tmph;
4945
4946    /* Load 64-bit value rd:rn.  */
4947    tmpl = load_reg(s, rlow);
4948    tmph = load_reg(s, rhigh);
4949    tmp = tcg_temp_new_i64();
4950    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4951    tcg_temp_free_i32(tmpl);
4952    tcg_temp_free_i32(tmph);
4953    tcg_gen_add_i64(val, val, tmp);
4954    tcg_temp_free_i64(tmp);
4955}
4956
4957/* Set N and Z flags from hi|lo.  */
4958static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4959{
4960    tcg_gen_mov_i32(cpu_NF, hi);
4961    tcg_gen_or_i32(cpu_ZF, lo, hi);
4962}
4963
4964/* Load/Store exclusive instructions are implemented by remembering
4965   the value/address loaded, and seeing if these are the same
4966   when the store is performed.  This should be sufficient to implement
4967   the architecturally mandated semantics, and avoids having to monitor
4968   regular stores.  The compare vs the remembered value is done during
4969   the cmpxchg operation, but we must compare the addresses manually.  */
4970static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4971                               TCGv_i32 addr, int size)
4972{
4973    TCGv_i32 tmp = tcg_temp_new_i32();
4974    MemOp opc = size | MO_ALIGN | s->be_data;
4975
4976    s->is_ldex = true;
4977
4978    if (size == 3) {
4979        TCGv_i32 tmp2 = tcg_temp_new_i32();
4980        TCGv_i64 t64 = tcg_temp_new_i64();
4981
4982        /*
4983         * For AArch32, architecturally the 32-bit word at the lowest
4984         * address is always Rt and the one at addr+4 is Rt2, even if
4985         * the CPU is big-endian. That means we don't want to do a
4986         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4987         * architecturally 64-bit access, but instead do a 64-bit access
4988         * using MO_BE if appropriate and then split the two halves.
4989         */
4990        TCGv taddr = gen_aa32_addr(s, addr, opc);
4991
4992        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4993        tcg_temp_free(taddr);
4994        tcg_gen_mov_i64(cpu_exclusive_val, t64);
4995        if (s->be_data == MO_BE) {
4996            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4997        } else {
4998            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4999        }
5000        tcg_temp_free_i64(t64);
5001
5002        store_reg(s, rt2, tmp2);
5003    } else {
5004        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5005        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5006    }
5007
5008    store_reg(s, rt, tmp);
5009    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5010}
5011
5012static void gen_clrex(DisasContext *s)
5013{
5014    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5015}
5016
5017static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5018                                TCGv_i32 addr, int size)
5019{
5020    TCGv_i32 t0, t1, t2;
5021    TCGv_i64 extaddr;
5022    TCGv taddr;
5023    TCGLabel *done_label;
5024    TCGLabel *fail_label;
5025    MemOp opc = size | MO_ALIGN | s->be_data;
5026
5027    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5028         [addr] = {Rt};
5029         {Rd} = 0;
5030       } else {
5031         {Rd} = 1;
5032       } */
5033    fail_label = gen_new_label();
5034    done_label = gen_new_label();
5035    extaddr = tcg_temp_new_i64();
5036    tcg_gen_extu_i32_i64(extaddr, addr);
5037    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5038    tcg_temp_free_i64(extaddr);
5039
5040    taddr = gen_aa32_addr(s, addr, opc);
5041    t0 = tcg_temp_new_i32();
5042    t1 = load_reg(s, rt);
5043    if (size == 3) {
5044        TCGv_i64 o64 = tcg_temp_new_i64();
5045        TCGv_i64 n64 = tcg_temp_new_i64();
5046
5047        t2 = load_reg(s, rt2);
5048
5049        /*
5050         * For AArch32, architecturally the 32-bit word at the lowest
5051         * address is always Rt and the one at addr+4 is Rt2, even if
5052         * the CPU is big-endian. Since we're going to treat this as a
5053         * single 64-bit BE store, we need to put the two halves in the
5054         * opposite order for BE to LE, so that they end up in the right
5055         * places.  We don't want gen_aa32_st_i64, because that checks
5056         * SCTLR_B as if for an architectural 64-bit access.
5057         */
5058        if (s->be_data == MO_BE) {
5059            tcg_gen_concat_i32_i64(n64, t2, t1);
5060        } else {
5061            tcg_gen_concat_i32_i64(n64, t1, t2);
5062        }
5063        tcg_temp_free_i32(t2);
5064
5065        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5066                                   get_mem_index(s), opc);
5067        tcg_temp_free_i64(n64);
5068
5069        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5070        tcg_gen_extrl_i64_i32(t0, o64);
5071
5072        tcg_temp_free_i64(o64);
5073    } else {
5074        t2 = tcg_temp_new_i32();
5075        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5076        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5077        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5078        tcg_temp_free_i32(t2);
5079    }
5080    tcg_temp_free_i32(t1);
5081    tcg_temp_free(taddr);
5082    tcg_gen_mov_i32(cpu_R[rd], t0);
5083    tcg_temp_free_i32(t0);
5084    tcg_gen_br(done_label);
5085
5086    gen_set_label(fail_label);
5087    tcg_gen_movi_i32(cpu_R[rd], 1);
5088    gen_set_label(done_label);
5089    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5090}
5091
5092/* gen_srs:
5093 * @env: CPUARMState
5094 * @s: DisasContext
5095 * @mode: mode field from insn (which stack to store to)
5096 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5097 * @writeback: true if writeback bit set
5098 *
5099 * Generate code for the SRS (Store Return State) insn.
5100 */
5101static void gen_srs(DisasContext *s,
5102                    uint32_t mode, uint32_t amode, bool writeback)
5103{
5104    int32_t offset;
5105    TCGv_i32 addr, tmp;
5106    bool undef = false;
5107
5108    /* SRS is:
5109     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5110     *   and specified mode is monitor mode
5111     * - UNDEFINED in Hyp mode
5112     * - UNPREDICTABLE in User or System mode
5113     * - UNPREDICTABLE if the specified mode is:
5114     * -- not implemented
5115     * -- not a valid mode number
5116     * -- a mode that's at a higher exception level
5117     * -- Monitor, if we are Non-secure
5118     * For the UNPREDICTABLE cases we choose to UNDEF.
5119     */
5120    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5121        gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
5122                              syn_uncategorized(), 3);
5123        return;
5124    }
5125
5126    if (s->current_el == 0 || s->current_el == 2) {
5127        undef = true;
5128    }
5129
5130    switch (mode) {
5131    case ARM_CPU_MODE_USR:
5132    case ARM_CPU_MODE_FIQ:
5133    case ARM_CPU_MODE_IRQ:
5134    case ARM_CPU_MODE_SVC:
5135    case ARM_CPU_MODE_ABT:
5136    case ARM_CPU_MODE_UND:
5137    case ARM_CPU_MODE_SYS:
5138        break;
5139    case ARM_CPU_MODE_HYP:
5140        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5141            undef = true;
5142        }
5143        break;
5144    case ARM_CPU_MODE_MON:
5145        /* No need to check specifically for "are we non-secure" because
5146         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5147         * so if this isn't EL3 then we must be non-secure.
5148         */
5149        if (s->current_el != 3) {
5150            undef = true;
5151        }
5152        break;
5153    default:
5154        undef = true;
5155    }
5156
5157    if (undef) {
5158        unallocated_encoding(s);
5159        return;
5160    }
5161
5162    addr = tcg_temp_new_i32();
5163    /* get_r13_banked() will raise an exception if called from System mode */
5164    gen_set_condexec(s);
5165    gen_set_pc_im(s, s->pc_curr);
5166    gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5167    switch (amode) {
5168    case 0: /* DA */
5169        offset = -4;
5170        break;
5171    case 1: /* IA */
5172        offset = 0;
5173        break;
5174    case 2: /* DB */
5175        offset = -8;
5176        break;
5177    case 3: /* IB */
5178        offset = 4;
5179        break;
5180    default:
5181        g_assert_not_reached();
5182    }
5183    tcg_gen_addi_i32(addr, addr, offset);
5184    tmp = load_reg(s, 14);
5185    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5186    tcg_temp_free_i32(tmp);
5187    tmp = load_cpu_field(spsr);
5188    tcg_gen_addi_i32(addr, addr, 4);
5189    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5190    tcg_temp_free_i32(tmp);
5191    if (writeback) {
5192        switch (amode) {
5193        case 0:
5194            offset = -8;
5195            break;
5196        case 1:
5197            offset = 4;
5198            break;
5199        case 2:
5200            offset = -4;
5201            break;
5202        case 3:
5203            offset = 0;
5204            break;
5205        default:
5206            g_assert_not_reached();
5207        }
5208        tcg_gen_addi_i32(addr, addr, offset);
5209        gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5210    }
5211    tcg_temp_free_i32(addr);
5212    s->base.is_jmp = DISAS_UPDATE_EXIT;
5213}
5214
5215/* Skip this instruction if the ARM condition is false */
5216static void arm_skip_unless(DisasContext *s, uint32_t cond)
5217{
5218    arm_gen_condlabel(s);
5219    arm_gen_test_cc(cond ^ 1, s->condlabel);
5220}
5221
5222
5223/*
5224 * Constant expanders used by T16/T32 decode
5225 */
5226
5227/* Return only the rotation part of T32ExpandImm.  */
5228static int t32_expandimm_rot(DisasContext *s, int x)
5229{
5230    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5231}
5232
5233/* Return the unrotated immediate from T32ExpandImm.  */
5234static int t32_expandimm_imm(DisasContext *s, int x)
5235{
5236    int imm = extract32(x, 0, 8);
5237
5238    switch (extract32(x, 8, 4)) {
5239    case 0: /* XY */
5240        /* Nothing to do.  */
5241        break;
5242    case 1: /* 00XY00XY */
5243        imm *= 0x00010001;
5244        break;
5245    case 2: /* XY00XY00 */
5246        imm *= 0x01000100;
5247        break;
5248    case 3: /* XYXYXYXY */
5249        imm *= 0x01010101;
5250        break;
5251    default:
5252        /* Rotated constant.  */
5253        imm |= 0x80;
5254        break;
5255    }
5256    return imm;
5257}
5258
5259static int t32_branch24(DisasContext *s, int x)
5260{
5261    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5262    x ^= !(x < 0) * (3 << 21);
5263    /* Append the final zero.  */
5264    return x << 1;
5265}
5266
5267static int t16_setflags(DisasContext *s)
5268{
5269    return s->condexec_mask == 0;
5270}
5271
5272static int t16_push_list(DisasContext *s, int x)
5273{
5274    return (x & 0xff) | (x & 0x100) << (14 - 8);
5275}
5276
5277static int t16_pop_list(DisasContext *s, int x)
5278{
5279    return (x & 0xff) | (x & 0x100) << (15 - 8);
5280}
5281
5282/*
5283 * Include the generated decoders.
5284 */
5285
5286#include "decode-a32.c.inc"
5287#include "decode-a32-uncond.c.inc"
5288#include "decode-t32.c.inc"
5289#include "decode-t16.c.inc"
5290
5291static bool valid_cp(DisasContext *s, int cp)
5292{
5293    /*
5294     * Return true if this coprocessor field indicates something
5295     * that's really a possible coprocessor.
5296     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5297     * and of those only cp14 and cp15 were used for registers.
5298     * cp10 and cp11 were used for VFP and Neon, whose decode is
5299     * dealt with elsewhere. With the advent of fp16, cp9 is also
5300     * now part of VFP.
5301     * For v8A and later, the encoding has been tightened so that
5302     * only cp14 and cp15 are valid, and other values aren't considered
5303     * to be in the coprocessor-instruction space at all. v8M still
5304     * permits coprocessors 0..7.
5305     * For XScale, we must not decode the XScale cp0, cp1 space as
5306     * a standard coprocessor insn, because we want to fall through to
5307     * the legacy disas_xscale_insn() decoder after decodetree is done.
5308     */
5309    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5310        return false;
5311    }
5312
5313    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5314        !arm_dc_feature(s, ARM_FEATURE_M)) {
5315        return cp >= 14;
5316    }
5317    return cp < 8 || cp >= 14;
5318}
5319
5320static bool trans_MCR(DisasContext *s, arg_MCR *a)
5321{
5322    if (!valid_cp(s, a->cp)) {
5323        return false;
5324    }
5325    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5326                   false, a->rt, 0);
5327    return true;
5328}
5329
5330static bool trans_MRC(DisasContext *s, arg_MRC *a)
5331{
5332    if (!valid_cp(s, a->cp)) {
5333        return false;
5334    }
5335    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5336                   true, a->rt, 0);
5337    return true;
5338}
5339
5340static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5341{
5342    if (!valid_cp(s, a->cp)) {
5343        return false;
5344    }
5345    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5346                   false, a->rt, a->rt2);
5347    return true;
5348}
5349
5350static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5351{
5352    if (!valid_cp(s, a->cp)) {
5353        return false;
5354    }
5355    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5356                   true, a->rt, a->rt2);
5357    return true;
5358}
5359
5360/* Helpers to swap operands for reverse-subtract.  */
5361static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5362{
5363    tcg_gen_sub_i32(dst, b, a);
5364}
5365
5366static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5367{
5368    gen_sub_CC(dst, b, a);
5369}
5370
5371static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5372{
5373    gen_sub_carry(dest, b, a);
5374}
5375
5376static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5377{
5378    gen_sbc_CC(dest, b, a);
5379}
5380
5381/*
5382 * Helpers for the data processing routines.
5383 *
5384 * After the computation store the results back.
5385 * This may be suppressed altogether (STREG_NONE), require a runtime
5386 * check against the stack limits (STREG_SP_CHECK), or generate an
5387 * exception return.  Oh, or store into a register.
5388 *
5389 * Always return true, indicating success for a trans_* function.
5390 */
5391typedef enum {
5392   STREG_NONE,
5393   STREG_NORMAL,
5394   STREG_SP_CHECK,
5395   STREG_EXC_RET,
5396} StoreRegKind;
5397
5398static bool store_reg_kind(DisasContext *s, int rd,
5399                            TCGv_i32 val, StoreRegKind kind)
5400{
5401    switch (kind) {
5402    case STREG_NONE:
5403        tcg_temp_free_i32(val);
5404        return true;
5405    case STREG_NORMAL:
5406        /* See ALUWritePC: Interworking only from a32 mode. */
5407        if (s->thumb) {
5408            store_reg(s, rd, val);
5409        } else {
5410            store_reg_bx(s, rd, val);
5411        }
5412        return true;
5413    case STREG_SP_CHECK:
5414        store_sp_checked(s, val);
5415        return true;
5416    case STREG_EXC_RET:
5417        gen_exception_return(s, val);
5418        return true;
5419    }
5420    g_assert_not_reached();
5421}
5422
5423/*
5424 * Data Processing (register)
5425 *
5426 * Operate, with set flags, one register source,
5427 * one immediate shifted register source, and a destination.
5428 */
5429static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5430                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5431                         int logic_cc, StoreRegKind kind)
5432{
5433    TCGv_i32 tmp1, tmp2;
5434
5435    tmp2 = load_reg(s, a->rm);
5436    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5437    tmp1 = load_reg(s, a->rn);
5438
5439    gen(tmp1, tmp1, tmp2);
5440    tcg_temp_free_i32(tmp2);
5441
5442    if (logic_cc) {
5443        gen_logic_CC(tmp1);
5444    }
5445    return store_reg_kind(s, a->rd, tmp1, kind);
5446}
5447
5448static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5449                         void (*gen)(TCGv_i32, TCGv_i32),
5450                         int logic_cc, StoreRegKind kind)
5451{
5452    TCGv_i32 tmp;
5453
5454    tmp = load_reg(s, a->rm);
5455    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5456
5457    gen(tmp, tmp);
5458    if (logic_cc) {
5459        gen_logic_CC(tmp);
5460    }
5461    return store_reg_kind(s, a->rd, tmp, kind);
5462}
5463
5464/*
5465 * Data-processing (register-shifted register)
5466 *
5467 * Operate, with set flags, one register source,
5468 * one register shifted register source, and a destination.
5469 */
5470static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5471                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5472                         int logic_cc, StoreRegKind kind)
5473{
5474    TCGv_i32 tmp1, tmp2;
5475
5476    tmp1 = load_reg(s, a->rs);
5477    tmp2 = load_reg(s, a->rm);
5478    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5479    tmp1 = load_reg(s, a->rn);
5480
5481    gen(tmp1, tmp1, tmp2);
5482    tcg_temp_free_i32(tmp2);
5483
5484    if (logic_cc) {
5485        gen_logic_CC(tmp1);
5486    }
5487    return store_reg_kind(s, a->rd, tmp1, kind);
5488}
5489
5490static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5491                         void (*gen)(TCGv_i32, TCGv_i32),
5492                         int logic_cc, StoreRegKind kind)
5493{
5494    TCGv_i32 tmp1, tmp2;
5495
5496    tmp1 = load_reg(s, a->rs);
5497    tmp2 = load_reg(s, a->rm);
5498    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5499
5500    gen(tmp2, tmp2);
5501    if (logic_cc) {
5502        gen_logic_CC(tmp2);
5503    }
5504    return store_reg_kind(s, a->rd, tmp2, kind);
5505}
5506
5507/*
5508 * Data-processing (immediate)
5509 *
5510 * Operate, with set flags, one register source,
5511 * one rotated immediate, and a destination.
5512 *
5513 * Note that logic_cc && a->rot setting CF based on the msb of the
5514 * immediate is the reason why we must pass in the unrotated form
5515 * of the immediate.
5516 */
5517static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5518                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5519                         int logic_cc, StoreRegKind kind)
5520{
5521    TCGv_i32 tmp1;
5522    uint32_t imm;
5523
5524    imm = ror32(a->imm, a->rot);
5525    if (logic_cc && a->rot) {
5526        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5527    }
5528    tmp1 = load_reg(s, a->rn);
5529
5530    gen(tmp1, tmp1, tcg_constant_i32(imm));
5531
5532    if (logic_cc) {
5533        gen_logic_CC(tmp1);
5534    }
5535    return store_reg_kind(s, a->rd, tmp1, kind);
5536}
5537
5538static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5539                         void (*gen)(TCGv_i32, TCGv_i32),
5540                         int logic_cc, StoreRegKind kind)
5541{
5542    TCGv_i32 tmp;
5543    uint32_t imm;
5544
5545    imm = ror32(a->imm, a->rot);
5546    if (logic_cc && a->rot) {
5547        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5548    }
5549
5550    tmp = tcg_temp_new_i32();
5551    gen(tmp, tcg_constant_i32(imm));
5552
5553    if (logic_cc) {
5554        gen_logic_CC(tmp);
5555    }
5556    return store_reg_kind(s, a->rd, tmp, kind);
5557}
5558
5559#define DO_ANY3(NAME, OP, L, K)                                         \
5560    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5561    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5562    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5563    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5564    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5565    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5566
5567#define DO_ANY2(NAME, OP, L, K)                                         \
5568    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5569    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5570    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5571    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5572    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5573    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5574
5575#define DO_CMP2(NAME, OP, L)                                            \
5576    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5577    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5578    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5579    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5580    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5581    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5582
5583DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5584DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5585DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5586DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5587
5588DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5589DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5590DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5591DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5592
5593DO_CMP2(TST, tcg_gen_and_i32, true)
5594DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5595DO_CMP2(CMN, gen_add_CC, false)
5596DO_CMP2(CMP, gen_sub_CC, false)
5597
5598DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5599        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5600
5601/*
5602 * Note for the computation of StoreRegKind we return out of the
5603 * middle of the functions that are expanded by DO_ANY3, and that
5604 * we modify a->s via that parameter before it is used by OP.
5605 */
5606DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5607        ({
5608            StoreRegKind ret = STREG_NORMAL;
5609            if (a->rd == 15 && a->s) {
5610                /*
5611                 * See ALUExceptionReturn:
5612                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5613                 * In Hyp mode, UNDEFINED.
5614                 */
5615                if (IS_USER(s) || s->current_el == 2) {
5616                    unallocated_encoding(s);
5617                    return true;
5618                }
5619                /* There is no writeback of nzcv to PSTATE.  */
5620                a->s = 0;
5621                ret = STREG_EXC_RET;
5622            } else if (a->rd == 13 && a->rn == 13) {
5623                ret = STREG_SP_CHECK;
5624            }
5625            ret;
5626        }))
5627
5628DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5629        ({
5630            StoreRegKind ret = STREG_NORMAL;
5631            if (a->rd == 15 && a->s) {
5632                /*
5633                 * See ALUExceptionReturn:
5634                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5635                 * In Hyp mode, UNDEFINED.
5636                 */
5637                if (IS_USER(s) || s->current_el == 2) {
5638                    unallocated_encoding(s);
5639                    return true;
5640                }
5641                /* There is no writeback of nzcv to PSTATE.  */
5642                a->s = 0;
5643                ret = STREG_EXC_RET;
5644            } else if (a->rd == 13) {
5645                ret = STREG_SP_CHECK;
5646            }
5647            ret;
5648        }))
5649
5650DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5651
5652/*
5653 * ORN is only available with T32, so there is no register-shifted-register
5654 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5655 */
5656static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5657{
5658    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5659}
5660
5661static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5662{
5663    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5664}
5665
5666#undef DO_ANY3
5667#undef DO_ANY2
5668#undef DO_CMP2
5669
5670static bool trans_ADR(DisasContext *s, arg_ri *a)
5671{
5672    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5673    return true;
5674}
5675
5676static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5677{
5678    if (!ENABLE_ARCH_6T2) {
5679        return false;
5680    }
5681
5682    store_reg(s, a->rd, tcg_constant_i32(a->imm));
5683    return true;
5684}
5685
5686static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5687{
5688    TCGv_i32 tmp;
5689
5690    if (!ENABLE_ARCH_6T2) {
5691        return false;
5692    }
5693
5694    tmp = load_reg(s, a->rd);
5695    tcg_gen_ext16u_i32(tmp, tmp);
5696    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5697    store_reg(s, a->rd, tmp);
5698    return true;
5699}
5700
5701/*
5702 * v8.1M MVE wide-shifts
5703 */
5704static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5705                          WideShiftImmFn *fn)
5706{
5707    TCGv_i64 rda;
5708    TCGv_i32 rdalo, rdahi;
5709
5710    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5711        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5712        return false;
5713    }
5714    if (a->rdahi == 15) {
5715        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5716        return false;
5717    }
5718    if (!dc_isar_feature(aa32_mve, s) ||
5719        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5720        a->rdahi == 13) {
5721        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5722        unallocated_encoding(s);
5723        return true;
5724    }
5725
5726    if (a->shim == 0) {
5727        a->shim = 32;
5728    }
5729
5730    rda = tcg_temp_new_i64();
5731    rdalo = load_reg(s, a->rdalo);
5732    rdahi = load_reg(s, a->rdahi);
5733    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5734
5735    fn(rda, rda, a->shim);
5736
5737    tcg_gen_extrl_i64_i32(rdalo, rda);
5738    tcg_gen_extrh_i64_i32(rdahi, rda);
5739    store_reg(s, a->rdalo, rdalo);
5740    store_reg(s, a->rdahi, rdahi);
5741    tcg_temp_free_i64(rda);
5742
5743    return true;
5744}
5745
5746static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5747{
5748    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5749}
5750
5751static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5752{
5753    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5754}
5755
5756static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5757{
5758    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5759}
5760
5761static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5762{
5763    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5764}
5765
5766static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5767{
5768    return do_mve_shl_ri(s, a, gen_mve_sqshll);
5769}
5770
5771static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5772{
5773    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5774}
5775
5776static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5777{
5778    return do_mve_shl_ri(s, a, gen_mve_uqshll);
5779}
5780
5781static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5782{
5783    return do_mve_shl_ri(s, a, gen_srshr64_i64);
5784}
5785
5786static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5787{
5788    return do_mve_shl_ri(s, a, gen_urshr64_i64);
5789}
5790
5791static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5792{
5793    TCGv_i64 rda;
5794    TCGv_i32 rdalo, rdahi;
5795
5796    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5797        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5798        return false;
5799    }
5800    if (a->rdahi == 15) {
5801        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5802        return false;
5803    }
5804    if (!dc_isar_feature(aa32_mve, s) ||
5805        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5806        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5807        a->rm == a->rdahi || a->rm == a->rdalo) {
5808        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5809        unallocated_encoding(s);
5810        return true;
5811    }
5812
5813    rda = tcg_temp_new_i64();
5814    rdalo = load_reg(s, a->rdalo);
5815    rdahi = load_reg(s, a->rdahi);
5816    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5817
5818    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5819    fn(rda, cpu_env, rda, cpu_R[a->rm]);
5820
5821    tcg_gen_extrl_i64_i32(rdalo, rda);
5822    tcg_gen_extrh_i64_i32(rdahi, rda);
5823    store_reg(s, a->rdalo, rdalo);
5824    store_reg(s, a->rdahi, rdahi);
5825    tcg_temp_free_i64(rda);
5826
5827    return true;
5828}
5829
5830static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5831{
5832    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5833}
5834
5835static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5836{
5837    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5838}
5839
5840static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5841{
5842    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5843}
5844
5845static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5846{
5847    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5848}
5849
5850static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5851{
5852    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5853}
5854
5855static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5856{
5857    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5858}
5859
5860static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5861{
5862    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5863        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5864        return false;
5865    }
5866    if (!dc_isar_feature(aa32_mve, s) ||
5867        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5868        a->rda == 13 || a->rda == 15) {
5869        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5870        unallocated_encoding(s);
5871        return true;
5872    }
5873
5874    if (a->shim == 0) {
5875        a->shim = 32;
5876    }
5877    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5878
5879    return true;
5880}
5881
5882static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5883{
5884    return do_mve_sh_ri(s, a, gen_urshr32_i32);
5885}
5886
5887static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5888{
5889    return do_mve_sh_ri(s, a, gen_srshr32_i32);
5890}
5891
5892static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5893{
5894    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5895}
5896
5897static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5898{
5899    return do_mve_sh_ri(s, a, gen_mve_sqshl);
5900}
5901
5902static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5903{
5904    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5905}
5906
5907static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5908{
5909    return do_mve_sh_ri(s, a, gen_mve_uqshl);
5910}
5911
5912static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5913{
5914    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5915        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5916        return false;
5917    }
5918    if (!dc_isar_feature(aa32_mve, s) ||
5919        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5920        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5921        a->rm == a->rda) {
5922        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5923        unallocated_encoding(s);
5924        return true;
5925    }
5926
5927    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5928    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5929    return true;
5930}
5931
5932static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5933{
5934    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5935}
5936
5937static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5938{
5939    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5940}
5941
5942/*
5943 * Multiply and multiply accumulate
5944 */
5945
5946static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5947{
5948    TCGv_i32 t1, t2;
5949
5950    t1 = load_reg(s, a->rn);
5951    t2 = load_reg(s, a->rm);
5952    tcg_gen_mul_i32(t1, t1, t2);
5953    tcg_temp_free_i32(t2);
5954    if (add) {
5955        t2 = load_reg(s, a->ra);
5956        tcg_gen_add_i32(t1, t1, t2);
5957        tcg_temp_free_i32(t2);
5958    }
5959    if (a->s) {
5960        gen_logic_CC(t1);
5961    }
5962    store_reg(s, a->rd, t1);
5963    return true;
5964}
5965
5966static bool trans_MUL(DisasContext *s, arg_MUL *a)
5967{
5968    return op_mla(s, a, false);
5969}
5970
5971static bool trans_MLA(DisasContext *s, arg_MLA *a)
5972{
5973    return op_mla(s, a, true);
5974}
5975
5976static bool trans_MLS(DisasContext *s, arg_MLS *a)
5977{
5978    TCGv_i32 t1, t2;
5979
5980    if (!ENABLE_ARCH_6T2) {
5981        return false;
5982    }
5983    t1 = load_reg(s, a->rn);
5984    t2 = load_reg(s, a->rm);
5985    tcg_gen_mul_i32(t1, t1, t2);
5986    tcg_temp_free_i32(t2);
5987    t2 = load_reg(s, a->ra);
5988    tcg_gen_sub_i32(t1, t2, t1);
5989    tcg_temp_free_i32(t2);
5990    store_reg(s, a->rd, t1);
5991    return true;
5992}
5993
5994static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5995{
5996    TCGv_i32 t0, t1, t2, t3;
5997
5998    t0 = load_reg(s, a->rm);
5999    t1 = load_reg(s, a->rn);
6000    if (uns) {
6001        tcg_gen_mulu2_i32(t0, t1, t0, t1);
6002    } else {
6003        tcg_gen_muls2_i32(t0, t1, t0, t1);
6004    }
6005    if (add) {
6006        t2 = load_reg(s, a->ra);
6007        t3 = load_reg(s, a->rd);
6008        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6009        tcg_temp_free_i32(t2);
6010        tcg_temp_free_i32(t3);
6011    }
6012    if (a->s) {
6013        gen_logicq_cc(t0, t1);
6014    }
6015    store_reg(s, a->ra, t0);
6016    store_reg(s, a->rd, t1);
6017    return true;
6018}
6019
6020static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6021{
6022    return op_mlal(s, a, true, false);
6023}
6024
6025static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6026{
6027    return op_mlal(s, a, false, false);
6028}
6029
6030static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6031{
6032    return op_mlal(s, a, true, true);
6033}
6034
6035static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6036{
6037    return op_mlal(s, a, false, true);
6038}
6039
6040static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6041{
6042    TCGv_i32 t0, t1, t2, zero;
6043
6044    if (s->thumb
6045        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6046        : !ENABLE_ARCH_6) {
6047        return false;
6048    }
6049
6050    t0 = load_reg(s, a->rm);
6051    t1 = load_reg(s, a->rn);
6052    tcg_gen_mulu2_i32(t0, t1, t0, t1);
6053    zero = tcg_constant_i32(0);
6054    t2 = load_reg(s, a->ra);
6055    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6056    tcg_temp_free_i32(t2);
6057    t2 = load_reg(s, a->rd);
6058    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6059    tcg_temp_free_i32(t2);
6060    store_reg(s, a->ra, t0);
6061    store_reg(s, a->rd, t1);
6062    return true;
6063}
6064
6065/*
6066 * Saturating addition and subtraction
6067 */
6068
6069static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6070{
6071    TCGv_i32 t0, t1;
6072
6073    if (s->thumb
6074        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6075        : !ENABLE_ARCH_5TE) {
6076        return false;
6077    }
6078
6079    t0 = load_reg(s, a->rm);
6080    t1 = load_reg(s, a->rn);
6081    if (doub) {
6082        gen_helper_add_saturate(t1, cpu_env, t1, t1);
6083    }
6084    if (add) {
6085        gen_helper_add_saturate(t0, cpu_env, t0, t1);
6086    } else {
6087        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6088    }
6089    tcg_temp_free_i32(t1);
6090    store_reg(s, a->rd, t0);
6091    return true;
6092}
6093
6094#define DO_QADDSUB(NAME, ADD, DOUB) \
6095static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6096{                                                        \
6097    return op_qaddsub(s, a, ADD, DOUB);                  \
6098}
6099
6100DO_QADDSUB(QADD, true, false)
6101DO_QADDSUB(QSUB, false, false)
6102DO_QADDSUB(QDADD, true, true)
6103DO_QADDSUB(QDSUB, false, true)
6104
6105#undef DO_QADDSUB
6106
6107/*
6108 * Halfword multiply and multiply accumulate
6109 */
6110
6111static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6112                       int add_long, bool nt, bool mt)
6113{
6114    TCGv_i32 t0, t1, tl, th;
6115
6116    if (s->thumb
6117        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6118        : !ENABLE_ARCH_5TE) {
6119        return false;
6120    }
6121
6122    t0 = load_reg(s, a->rn);
6123    t1 = load_reg(s, a->rm);
6124    gen_mulxy(t0, t1, nt, mt);
6125    tcg_temp_free_i32(t1);
6126
6127    switch (add_long) {
6128    case 0:
6129        store_reg(s, a->rd, t0);
6130        break;
6131    case 1:
6132        t1 = load_reg(s, a->ra);
6133        gen_helper_add_setq(t0, cpu_env, t0, t1);
6134        tcg_temp_free_i32(t1);
6135        store_reg(s, a->rd, t0);
6136        break;
6137    case 2:
6138        tl = load_reg(s, a->ra);
6139        th = load_reg(s, a->rd);
6140        /* Sign-extend the 32-bit product to 64 bits.  */
6141        t1 = tcg_temp_new_i32();
6142        tcg_gen_sari_i32(t1, t0, 31);
6143        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6144        tcg_temp_free_i32(t0);
6145        tcg_temp_free_i32(t1);
6146        store_reg(s, a->ra, tl);
6147        store_reg(s, a->rd, th);
6148        break;
6149    default:
6150        g_assert_not_reached();
6151    }
6152    return true;
6153}
6154
6155#define DO_SMLAX(NAME, add, nt, mt) \
6156static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6157{                                                          \
6158    return op_smlaxxx(s, a, add, nt, mt);                  \
6159}
6160
6161DO_SMLAX(SMULBB, 0, 0, 0)
6162DO_SMLAX(SMULBT, 0, 0, 1)
6163DO_SMLAX(SMULTB, 0, 1, 0)
6164DO_SMLAX(SMULTT, 0, 1, 1)
6165
6166DO_SMLAX(SMLABB, 1, 0, 0)
6167DO_SMLAX(SMLABT, 1, 0, 1)
6168DO_SMLAX(SMLATB, 1, 1, 0)
6169DO_SMLAX(SMLATT, 1, 1, 1)
6170
6171DO_SMLAX(SMLALBB, 2, 0, 0)
6172DO_SMLAX(SMLALBT, 2, 0, 1)
6173DO_SMLAX(SMLALTB, 2, 1, 0)
6174DO_SMLAX(SMLALTT, 2, 1, 1)
6175
6176#undef DO_SMLAX
6177
6178static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6179{
6180    TCGv_i32 t0, t1;
6181
6182    if (!ENABLE_ARCH_5TE) {
6183        return false;
6184    }
6185
6186    t0 = load_reg(s, a->rn);
6187    t1 = load_reg(s, a->rm);
6188    /*
6189     * Since the nominal result is product<47:16>, shift the 16-bit
6190     * input up by 16 bits, so that the result is at product<63:32>.
6191     */
6192    if (mt) {
6193        tcg_gen_andi_i32(t1, t1, 0xffff0000);
6194    } else {
6195        tcg_gen_shli_i32(t1, t1, 16);
6196    }
6197    tcg_gen_muls2_i32(t0, t1, t0, t1);
6198    tcg_temp_free_i32(t0);
6199    if (add) {
6200        t0 = load_reg(s, a->ra);
6201        gen_helper_add_setq(t1, cpu_env, t1, t0);
6202        tcg_temp_free_i32(t0);
6203    }
6204    store_reg(s, a->rd, t1);
6205    return true;
6206}
6207
6208#define DO_SMLAWX(NAME, add, mt) \
6209static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6210{                                                          \
6211    return op_smlawx(s, a, add, mt);                       \
6212}
6213
6214DO_SMLAWX(SMULWB, 0, 0)
6215DO_SMLAWX(SMULWT, 0, 1)
6216DO_SMLAWX(SMLAWB, 1, 0)
6217DO_SMLAWX(SMLAWT, 1, 1)
6218
6219#undef DO_SMLAWX
6220
6221/*
6222 * MSR (immediate) and hints
6223 */
6224
6225static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6226{
6227    /*
6228     * When running single-threaded TCG code, use the helper to ensure that
6229     * the next round-robin scheduled vCPU gets a crack.  When running in
6230     * MTTCG we don't generate jumps to the helper as it won't affect the
6231     * scheduling of other vCPUs.
6232     */
6233    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6234        gen_set_pc_im(s, s->base.pc_next);
6235        s->base.is_jmp = DISAS_YIELD;
6236    }
6237    return true;
6238}
6239
6240static bool trans_WFE(DisasContext *s, arg_WFE *a)
6241{
6242    /*
6243     * When running single-threaded TCG code, use the helper to ensure that
6244     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6245     * just skip this instruction.  Currently the SEV/SEVL instructions,
6246     * which are *one* of many ways to wake the CPU from WFE, are not
6247     * implemented so we can't sleep like WFI does.
6248     */
6249    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6250        gen_set_pc_im(s, s->base.pc_next);
6251        s->base.is_jmp = DISAS_WFE;
6252    }
6253    return true;
6254}
6255
6256static bool trans_WFI(DisasContext *s, arg_WFI *a)
6257{
6258    /* For WFI, halt the vCPU until an IRQ. */
6259    gen_set_pc_im(s, s->base.pc_next);
6260    s->base.is_jmp = DISAS_WFI;
6261    return true;
6262}
6263
6264static bool trans_ESB(DisasContext *s, arg_ESB *a)
6265{
6266    /*
6267     * For M-profile, minimal-RAS ESB can be a NOP.
6268     * Without RAS, we must implement this as NOP.
6269     */
6270    if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6271        /*
6272         * QEMU does not have a source of physical SErrors,
6273         * so we are only concerned with virtual SErrors.
6274         * The pseudocode in the ARM for this case is
6275         *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6276         *      AArch32.vESBOperation();
6277         * Most of the condition can be evaluated at translation time.
6278         * Test for EL2 present, and defer test for SEL2 to runtime.
6279         */
6280        if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6281            gen_helper_vesb(cpu_env);
6282        }
6283    }
6284    return true;
6285}
6286
6287static bool trans_NOP(DisasContext *s, arg_NOP *a)
6288{
6289    return true;
6290}
6291
6292static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6293{
6294    uint32_t val = ror32(a->imm, a->rot * 2);
6295    uint32_t mask = msr_mask(s, a->mask, a->r);
6296
6297    if (gen_set_psr_im(s, mask, a->r, val)) {
6298        unallocated_encoding(s);
6299    }
6300    return true;
6301}
6302
6303/*
6304 * Cyclic Redundancy Check
6305 */
6306
6307static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6308{
6309    TCGv_i32 t1, t2, t3;
6310
6311    if (!dc_isar_feature(aa32_crc32, s)) {
6312        return false;
6313    }
6314
6315    t1 = load_reg(s, a->rn);
6316    t2 = load_reg(s, a->rm);
6317    switch (sz) {
6318    case MO_8:
6319        gen_uxtb(t2);
6320        break;
6321    case MO_16:
6322        gen_uxth(t2);
6323        break;
6324    case MO_32:
6325        break;
6326    default:
6327        g_assert_not_reached();
6328    }
6329    t3 = tcg_constant_i32(1 << sz);
6330    if (c) {
6331        gen_helper_crc32c(t1, t1, t2, t3);
6332    } else {
6333        gen_helper_crc32(t1, t1, t2, t3);
6334    }
6335    tcg_temp_free_i32(t2);
6336    store_reg(s, a->rd, t1);
6337    return true;
6338}
6339
6340#define DO_CRC32(NAME, c, sz) \
6341static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6342    { return op_crc32(s, a, c, sz); }
6343
6344DO_CRC32(CRC32B, false, MO_8)
6345DO_CRC32(CRC32H, false, MO_16)
6346DO_CRC32(CRC32W, false, MO_32)
6347DO_CRC32(CRC32CB, true, MO_8)
6348DO_CRC32(CRC32CH, true, MO_16)
6349DO_CRC32(CRC32CW, true, MO_32)
6350
6351#undef DO_CRC32
6352
6353/*
6354 * Miscellaneous instructions
6355 */
6356
6357static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6358{
6359    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6360        return false;
6361    }
6362    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6363    return true;
6364}
6365
6366static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6367{
6368    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6369        return false;
6370    }
6371    gen_msr_banked(s, a->r, a->sysm, a->rn);
6372    return true;
6373}
6374
6375static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6376{
6377    TCGv_i32 tmp;
6378
6379    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6380        return false;
6381    }
6382    if (a->r) {
6383        if (IS_USER(s)) {
6384            unallocated_encoding(s);
6385            return true;
6386        }
6387        tmp = load_cpu_field(spsr);
6388    } else {
6389        tmp = tcg_temp_new_i32();
6390        gen_helper_cpsr_read(tmp, cpu_env);
6391    }
6392    store_reg(s, a->rd, tmp);
6393    return true;
6394}
6395
6396static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6397{
6398    TCGv_i32 tmp;
6399    uint32_t mask = msr_mask(s, a->mask, a->r);
6400
6401    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6402        return false;
6403    }
6404    tmp = load_reg(s, a->rn);
6405    if (gen_set_psr(s, mask, a->r, tmp)) {
6406        unallocated_encoding(s);
6407    }
6408    return true;
6409}
6410
6411static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6412{
6413    TCGv_i32 tmp;
6414
6415    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6416        return false;
6417    }
6418    tmp = tcg_temp_new_i32();
6419    gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6420    store_reg(s, a->rd, tmp);
6421    return true;
6422}
6423
6424static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6425{
6426    TCGv_i32 addr, reg;
6427
6428    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6429        return false;
6430    }
6431    addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6432    reg = load_reg(s, a->rn);
6433    gen_helper_v7m_msr(cpu_env, addr, reg);
6434    tcg_temp_free_i32(reg);
6435    /* If we wrote to CONTROL, the EL might have changed */
6436    gen_rebuild_hflags(s, true);
6437    gen_lookup_tb(s);
6438    return true;
6439}
6440
6441static bool trans_BX(DisasContext *s, arg_BX *a)
6442{
6443    if (!ENABLE_ARCH_4T) {
6444        return false;
6445    }
6446    gen_bx_excret(s, load_reg(s, a->rm));
6447    return true;
6448}
6449
6450static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6451{
6452    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6453        return false;
6454    }
6455    /*
6456     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6457     * TBFLAGS bit on a basically-never-happens case, so call a helper
6458     * function to check for the trap and raise the exception if needed
6459     * (passing it the register number for the syndrome value).
6460     * v8A doesn't have this HSTR bit.
6461     */
6462    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6463        arm_dc_feature(s, ARM_FEATURE_EL2) &&
6464        s->current_el < 2 && s->ns) {
6465        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6466    }
6467    /* Trivial implementation equivalent to bx.  */
6468    gen_bx(s, load_reg(s, a->rm));
6469    return true;
6470}
6471
6472static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6473{
6474    TCGv_i32 tmp;
6475
6476    if (!ENABLE_ARCH_5) {
6477        return false;
6478    }
6479    tmp = load_reg(s, a->rm);
6480    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6481    gen_bx(s, tmp);
6482    return true;
6483}
6484
6485/*
6486 * BXNS/BLXNS: only exist for v8M with the security extensions,
6487 * and always UNDEF if NonSecure.  We don't implement these in
6488 * the user-only mode either (in theory you can use them from
6489 * Secure User mode but they are too tied in to system emulation).
6490 */
6491static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6492{
6493    if (!s->v8m_secure || IS_USER_ONLY) {
6494        unallocated_encoding(s);
6495    } else {
6496        gen_bxns(s, a->rm);
6497    }
6498    return true;
6499}
6500
6501static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6502{
6503    if (!s->v8m_secure || IS_USER_ONLY) {
6504        unallocated_encoding(s);
6505    } else {
6506        gen_blxns(s, a->rm);
6507    }
6508    return true;
6509}
6510
6511static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6512{
6513    TCGv_i32 tmp;
6514
6515    if (!ENABLE_ARCH_5) {
6516        return false;
6517    }
6518    tmp = load_reg(s, a->rm);
6519    tcg_gen_clzi_i32(tmp, tmp, 32);
6520    store_reg(s, a->rd, tmp);
6521    return true;
6522}
6523
6524static bool trans_ERET(DisasContext *s, arg_ERET *a)
6525{
6526    TCGv_i32 tmp;
6527
6528    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6529        return false;
6530    }
6531    if (IS_USER(s)) {
6532        unallocated_encoding(s);
6533        return true;
6534    }
6535    if (s->current_el == 2) {
6536        /* ERET from Hyp uses ELR_Hyp, not LR */
6537        tmp = load_cpu_field(elr_el[2]);
6538    } else {
6539        tmp = load_reg(s, 14);
6540    }
6541    gen_exception_return(s, tmp);
6542    return true;
6543}
6544
6545static bool trans_HLT(DisasContext *s, arg_HLT *a)
6546{
6547    gen_hlt(s, a->imm);
6548    return true;
6549}
6550
6551static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6552{
6553    if (!ENABLE_ARCH_5) {
6554        return false;
6555    }
6556    /* BKPT is OK with ECI set and leaves it untouched */
6557    s->eci_handled = true;
6558    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6559        semihosting_enabled() &&
6560#ifndef CONFIG_USER_ONLY
6561        !IS_USER(s) &&
6562#endif
6563        (a->imm == 0xab)) {
6564        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6565    } else {
6566        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6567    }
6568    return true;
6569}
6570
6571static bool trans_HVC(DisasContext *s, arg_HVC *a)
6572{
6573    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6574        return false;
6575    }
6576    if (IS_USER(s)) {
6577        unallocated_encoding(s);
6578    } else {
6579        gen_hvc(s, a->imm);
6580    }
6581    return true;
6582}
6583
6584static bool trans_SMC(DisasContext *s, arg_SMC *a)
6585{
6586    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6587        return false;
6588    }
6589    if (IS_USER(s)) {
6590        unallocated_encoding(s);
6591    } else {
6592        gen_smc(s);
6593    }
6594    return true;
6595}
6596
6597static bool trans_SG(DisasContext *s, arg_SG *a)
6598{
6599    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6600        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6601        return false;
6602    }
6603    /*
6604     * SG (v8M only)
6605     * The bulk of the behaviour for this instruction is implemented
6606     * in v7m_handle_execute_nsc(), which deals with the insn when
6607     * it is executed by a CPU in non-secure state from memory
6608     * which is Secure & NonSecure-Callable.
6609     * Here we only need to handle the remaining cases:
6610     *  * in NS memory (including the "security extension not
6611     *    implemented" case) : NOP
6612     *  * in S memory but CPU already secure (clear IT bits)
6613     * We know that the attribute for the memory this insn is
6614     * in must match the current CPU state, because otherwise
6615     * get_phys_addr_pmsav8 would have generated an exception.
6616     */
6617    if (s->v8m_secure) {
6618        /* Like the IT insn, we don't need to generate any code */
6619        s->condexec_cond = 0;
6620        s->condexec_mask = 0;
6621    }
6622    return true;
6623}
6624
6625static bool trans_TT(DisasContext *s, arg_TT *a)
6626{
6627    TCGv_i32 addr, tmp;
6628
6629    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6630        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6631        return false;
6632    }
6633    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6634        /* We UNDEF for these UNPREDICTABLE cases */
6635        unallocated_encoding(s);
6636        return true;
6637    }
6638    if (a->A && !s->v8m_secure) {
6639        /* This case is UNDEFINED.  */
6640        unallocated_encoding(s);
6641        return true;
6642    }
6643
6644    addr = load_reg(s, a->rn);
6645    tmp = tcg_temp_new_i32();
6646    gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6647    tcg_temp_free_i32(addr);
6648    store_reg(s, a->rd, tmp);
6649    return true;
6650}
6651
6652/*
6653 * Load/store register index
6654 */
6655
6656static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6657{
6658    ISSInfo ret;
6659
6660    /* ISS not valid if writeback */
6661    if (p && !w) {
6662        ret = rd;
6663        if (s->base.pc_next - s->pc_curr == 2) {
6664            ret |= ISSIs16Bit;
6665        }
6666    } else {
6667        ret = ISSInvalid;
6668    }
6669    return ret;
6670}
6671
6672static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6673{
6674    TCGv_i32 addr = load_reg(s, a->rn);
6675
6676    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6677        gen_helper_v8m_stackcheck(cpu_env, addr);
6678    }
6679
6680    if (a->p) {
6681        TCGv_i32 ofs = load_reg(s, a->rm);
6682        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6683        if (a->u) {
6684            tcg_gen_add_i32(addr, addr, ofs);
6685        } else {
6686            tcg_gen_sub_i32(addr, addr, ofs);
6687        }
6688        tcg_temp_free_i32(ofs);
6689    }
6690    return addr;
6691}
6692
6693static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6694                            TCGv_i32 addr, int address_offset)
6695{
6696    if (!a->p) {
6697        TCGv_i32 ofs = load_reg(s, a->rm);
6698        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6699        if (a->u) {
6700            tcg_gen_add_i32(addr, addr, ofs);
6701        } else {
6702            tcg_gen_sub_i32(addr, addr, ofs);
6703        }
6704        tcg_temp_free_i32(ofs);
6705    } else if (!a->w) {
6706        tcg_temp_free_i32(addr);
6707        return;
6708    }
6709    tcg_gen_addi_i32(addr, addr, address_offset);
6710    store_reg(s, a->rn, addr);
6711}
6712
6713static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6714                       MemOp mop, int mem_idx)
6715{
6716    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6717    TCGv_i32 addr, tmp;
6718
6719    addr = op_addr_rr_pre(s, a);
6720
6721    tmp = tcg_temp_new_i32();
6722    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6723    disas_set_da_iss(s, mop, issinfo);
6724
6725    /*
6726     * Perform base writeback before the loaded value to
6727     * ensure correct behavior with overlapping index registers.
6728     */
6729    op_addr_rr_post(s, a, addr, 0);
6730    store_reg_from_load(s, a->rt, tmp);
6731    return true;
6732}
6733
6734static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6735                        MemOp mop, int mem_idx)
6736{
6737    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6738    TCGv_i32 addr, tmp;
6739
6740    /*
6741     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6742     * is either UNPREDICTABLE or has defined behaviour
6743     */
6744    if (s->thumb && a->rn == 15) {
6745        return false;
6746    }
6747
6748    addr = op_addr_rr_pre(s, a);
6749
6750    tmp = load_reg(s, a->rt);
6751    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6752    disas_set_da_iss(s, mop, issinfo);
6753    tcg_temp_free_i32(tmp);
6754
6755    op_addr_rr_post(s, a, addr, 0);
6756    return true;
6757}
6758
6759static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6760{
6761    int mem_idx = get_mem_index(s);
6762    TCGv_i32 addr, tmp;
6763
6764    if (!ENABLE_ARCH_5TE) {
6765        return false;
6766    }
6767    if (a->rt & 1) {
6768        unallocated_encoding(s);
6769        return true;
6770    }
6771    addr = op_addr_rr_pre(s, a);
6772
6773    tmp = tcg_temp_new_i32();
6774    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6775    store_reg(s, a->rt, tmp);
6776
6777    tcg_gen_addi_i32(addr, addr, 4);
6778
6779    tmp = tcg_temp_new_i32();
6780    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6781    store_reg(s, a->rt + 1, tmp);
6782
6783    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6784    op_addr_rr_post(s, a, addr, -4);
6785    return true;
6786}
6787
6788static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6789{
6790    int mem_idx = get_mem_index(s);
6791    TCGv_i32 addr, tmp;
6792
6793    if (!ENABLE_ARCH_5TE) {
6794        return false;
6795    }
6796    if (a->rt & 1) {
6797        unallocated_encoding(s);
6798        return true;
6799    }
6800    addr = op_addr_rr_pre(s, a);
6801
6802    tmp = load_reg(s, a->rt);
6803    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6804    tcg_temp_free_i32(tmp);
6805
6806    tcg_gen_addi_i32(addr, addr, 4);
6807
6808    tmp = load_reg(s, a->rt + 1);
6809    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6810    tcg_temp_free_i32(tmp);
6811
6812    op_addr_rr_post(s, a, addr, -4);
6813    return true;
6814}
6815
6816/*
6817 * Load/store immediate index
6818 */
6819
6820static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6821{
6822    int ofs = a->imm;
6823
6824    if (!a->u) {
6825        ofs = -ofs;
6826    }
6827
6828    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6829        /*
6830         * Stackcheck. Here we know 'addr' is the current SP;
6831         * U is set if we're moving SP up, else down. It is
6832         * UNKNOWN whether the limit check triggers when SP starts
6833         * below the limit and ends up above it; we chose to do so.
6834         */
6835        if (!a->u) {
6836            TCGv_i32 newsp = tcg_temp_new_i32();
6837            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6838            gen_helper_v8m_stackcheck(cpu_env, newsp);
6839            tcg_temp_free_i32(newsp);
6840        } else {
6841            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6842        }
6843    }
6844
6845    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6846}
6847
6848static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6849                            TCGv_i32 addr, int address_offset)
6850{
6851    if (!a->p) {
6852        if (a->u) {
6853            address_offset += a->imm;
6854        } else {
6855            address_offset -= a->imm;
6856        }
6857    } else if (!a->w) {
6858        tcg_temp_free_i32(addr);
6859        return;
6860    }
6861    tcg_gen_addi_i32(addr, addr, address_offset);
6862    store_reg(s, a->rn, addr);
6863}
6864
6865static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6866                       MemOp mop, int mem_idx)
6867{
6868    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6869    TCGv_i32 addr, tmp;
6870
6871    addr = op_addr_ri_pre(s, a);
6872
6873    tmp = tcg_temp_new_i32();
6874    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6875    disas_set_da_iss(s, mop, issinfo);
6876
6877    /*
6878     * Perform base writeback before the loaded value to
6879     * ensure correct behavior with overlapping index registers.
6880     */
6881    op_addr_ri_post(s, a, addr, 0);
6882    store_reg_from_load(s, a->rt, tmp);
6883    return true;
6884}
6885
6886static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6887                        MemOp mop, int mem_idx)
6888{
6889    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6890    TCGv_i32 addr, tmp;
6891
6892    /*
6893     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6894     * is either UNPREDICTABLE or has defined behaviour
6895     */
6896    if (s->thumb && a->rn == 15) {
6897        return false;
6898    }
6899
6900    addr = op_addr_ri_pre(s, a);
6901
6902    tmp = load_reg(s, a->rt);
6903    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6904    disas_set_da_iss(s, mop, issinfo);
6905    tcg_temp_free_i32(tmp);
6906
6907    op_addr_ri_post(s, a, addr, 0);
6908    return true;
6909}
6910
6911static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6912{
6913    int mem_idx = get_mem_index(s);
6914    TCGv_i32 addr, tmp;
6915
6916    addr = op_addr_ri_pre(s, a);
6917
6918    tmp = tcg_temp_new_i32();
6919    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6920    store_reg(s, a->rt, tmp);
6921
6922    tcg_gen_addi_i32(addr, addr, 4);
6923
6924    tmp = tcg_temp_new_i32();
6925    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6926    store_reg(s, rt2, tmp);
6927
6928    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6929    op_addr_ri_post(s, a, addr, -4);
6930    return true;
6931}
6932
6933static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6934{
6935    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6936        return false;
6937    }
6938    return op_ldrd_ri(s, a, a->rt + 1);
6939}
6940
6941static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6942{
6943    arg_ldst_ri b = {
6944        .u = a->u, .w = a->w, .p = a->p,
6945        .rn = a->rn, .rt = a->rt, .imm = a->imm
6946    };
6947    return op_ldrd_ri(s, &b, a->rt2);
6948}
6949
6950static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6951{
6952    int mem_idx = get_mem_index(s);
6953    TCGv_i32 addr, tmp;
6954
6955    addr = op_addr_ri_pre(s, a);
6956
6957    tmp = load_reg(s, a->rt);
6958    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6959    tcg_temp_free_i32(tmp);
6960
6961    tcg_gen_addi_i32(addr, addr, 4);
6962
6963    tmp = load_reg(s, rt2);
6964    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6965    tcg_temp_free_i32(tmp);
6966
6967    op_addr_ri_post(s, a, addr, -4);
6968    return true;
6969}
6970
6971static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6972{
6973    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6974        return false;
6975    }
6976    return op_strd_ri(s, a, a->rt + 1);
6977}
6978
6979static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6980{
6981    arg_ldst_ri b = {
6982        .u = a->u, .w = a->w, .p = a->p,
6983        .rn = a->rn, .rt = a->rt, .imm = a->imm
6984    };
6985    return op_strd_ri(s, &b, a->rt2);
6986}
6987
6988#define DO_LDST(NAME, WHICH, MEMOP) \
6989static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6990{                                                                     \
6991    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6992}                                                                     \
6993static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6994{                                                                     \
6995    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6996}                                                                     \
6997static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6998{                                                                     \
6999    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
7000}                                                                     \
7001static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
7002{                                                                     \
7003    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
7004}
7005
7006DO_LDST(LDR, load, MO_UL)
7007DO_LDST(LDRB, load, MO_UB)
7008DO_LDST(LDRH, load, MO_UW)
7009DO_LDST(LDRSB, load, MO_SB)
7010DO_LDST(LDRSH, load, MO_SW)
7011
7012DO_LDST(STR, store, MO_UL)
7013DO_LDST(STRB, store, MO_UB)
7014DO_LDST(STRH, store, MO_UW)
7015
7016#undef DO_LDST
7017
7018/*
7019 * Synchronization primitives
7020 */
7021
7022static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7023{
7024    TCGv_i32 addr, tmp;
7025    TCGv taddr;
7026
7027    opc |= s->be_data;
7028    addr = load_reg(s, a->rn);
7029    taddr = gen_aa32_addr(s, addr, opc);
7030    tcg_temp_free_i32(addr);
7031
7032    tmp = load_reg(s, a->rt2);
7033    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7034    tcg_temp_free(taddr);
7035
7036    store_reg(s, a->rt, tmp);
7037    return true;
7038}
7039
7040static bool trans_SWP(DisasContext *s, arg_SWP *a)
7041{
7042    return op_swp(s, a, MO_UL | MO_ALIGN);
7043}
7044
7045static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7046{
7047    return op_swp(s, a, MO_UB);
7048}
7049
7050/*
7051 * Load/Store Exclusive and Load-Acquire/Store-Release
7052 */
7053
7054static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7055{
7056    TCGv_i32 addr;
7057    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7058    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7059
7060    /* We UNDEF for these UNPREDICTABLE cases.  */
7061    if (a->rd == 15 || a->rn == 15 || a->rt == 15
7062        || a->rd == a->rn || a->rd == a->rt
7063        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7064        || (mop == MO_64
7065            && (a->rt2 == 15
7066                || a->rd == a->rt2
7067                || (!v8a && s->thumb && a->rt2 == 13)))) {
7068        unallocated_encoding(s);
7069        return true;
7070    }
7071
7072    if (rel) {
7073        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7074    }
7075
7076    addr = tcg_temp_local_new_i32();
7077    load_reg_var(s, addr, a->rn);
7078    tcg_gen_addi_i32(addr, addr, a->imm);
7079
7080    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7081    tcg_temp_free_i32(addr);
7082    return true;
7083}
7084
7085static bool trans_STREX(DisasContext *s, arg_STREX *a)
7086{
7087    if (!ENABLE_ARCH_6) {
7088        return false;
7089    }
7090    return op_strex(s, a, MO_32, false);
7091}
7092
7093static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7094{
7095    if (!ENABLE_ARCH_6K) {
7096        return false;
7097    }
7098    /* We UNDEF for these UNPREDICTABLE cases.  */
7099    if (a->rt & 1) {
7100        unallocated_encoding(s);
7101        return true;
7102    }
7103    a->rt2 = a->rt + 1;
7104    return op_strex(s, a, MO_64, false);
7105}
7106
7107static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7108{
7109    return op_strex(s, a, MO_64, false);
7110}
7111
7112static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7113{
7114    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7115        return false;
7116    }
7117    return op_strex(s, a, MO_8, false);
7118}
7119
7120static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7121{
7122    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7123        return false;
7124    }
7125    return op_strex(s, a, MO_16, false);
7126}
7127
7128static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7129{
7130    if (!ENABLE_ARCH_8) {
7131        return false;
7132    }
7133    return op_strex(s, a, MO_32, true);
7134}
7135
7136static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7137{
7138    if (!ENABLE_ARCH_8) {
7139        return false;
7140    }
7141    /* We UNDEF for these UNPREDICTABLE cases.  */
7142    if (a->rt & 1) {
7143        unallocated_encoding(s);
7144        return true;
7145    }
7146    a->rt2 = a->rt + 1;
7147    return op_strex(s, a, MO_64, true);
7148}
7149
7150static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7151{
7152    if (!ENABLE_ARCH_8) {
7153        return false;
7154    }
7155    return op_strex(s, a, MO_64, true);
7156}
7157
7158static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7159{
7160    if (!ENABLE_ARCH_8) {
7161        return false;
7162    }
7163    return op_strex(s, a, MO_8, true);
7164}
7165
7166static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7167{
7168    if (!ENABLE_ARCH_8) {
7169        return false;
7170    }
7171    return op_strex(s, a, MO_16, true);
7172}
7173
7174static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7175{
7176    TCGv_i32 addr, tmp;
7177
7178    if (!ENABLE_ARCH_8) {
7179        return false;
7180    }
7181    /* We UNDEF for these UNPREDICTABLE cases.  */
7182    if (a->rn == 15 || a->rt == 15) {
7183        unallocated_encoding(s);
7184        return true;
7185    }
7186
7187    addr = load_reg(s, a->rn);
7188    tmp = load_reg(s, a->rt);
7189    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7190    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7191    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7192
7193    tcg_temp_free_i32(tmp);
7194    tcg_temp_free_i32(addr);
7195    return true;
7196}
7197
7198static bool trans_STL(DisasContext *s, arg_STL *a)
7199{
7200    return op_stl(s, a, MO_UL);
7201}
7202
7203static bool trans_STLB(DisasContext *s, arg_STL *a)
7204{
7205    return op_stl(s, a, MO_UB);
7206}
7207
7208static bool trans_STLH(DisasContext *s, arg_STL *a)
7209{
7210    return op_stl(s, a, MO_UW);
7211}
7212
7213static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7214{
7215    TCGv_i32 addr;
7216    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7217    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7218
7219    /* We UNDEF for these UNPREDICTABLE cases.  */
7220    if (a->rn == 15 || a->rt == 15
7221        || (!v8a && s->thumb && a->rt == 13)
7222        || (mop == MO_64
7223            && (a->rt2 == 15 || a->rt == a->rt2
7224                || (!v8a && s->thumb && a->rt2 == 13)))) {
7225        unallocated_encoding(s);
7226        return true;
7227    }
7228
7229    addr = tcg_temp_local_new_i32();
7230    load_reg_var(s, addr, a->rn);
7231    tcg_gen_addi_i32(addr, addr, a->imm);
7232
7233    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7234    tcg_temp_free_i32(addr);
7235
7236    if (acq) {
7237        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7238    }
7239    return true;
7240}
7241
7242static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7243{
7244    if (!ENABLE_ARCH_6) {
7245        return false;
7246    }
7247    return op_ldrex(s, a, MO_32, false);
7248}
7249
7250static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7251{
7252    if (!ENABLE_ARCH_6K) {
7253        return false;
7254    }
7255    /* We UNDEF for these UNPREDICTABLE cases.  */
7256    if (a->rt & 1) {
7257        unallocated_encoding(s);
7258        return true;
7259    }
7260    a->rt2 = a->rt + 1;
7261    return op_ldrex(s, a, MO_64, false);
7262}
7263
7264static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7265{
7266    return op_ldrex(s, a, MO_64, false);
7267}
7268
7269static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7270{
7271    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7272        return false;
7273    }
7274    return op_ldrex(s, a, MO_8, false);
7275}
7276
7277static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7278{
7279    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7280        return false;
7281    }
7282    return op_ldrex(s, a, MO_16, false);
7283}
7284
7285static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7286{
7287    if (!ENABLE_ARCH_8) {
7288        return false;
7289    }
7290    return op_ldrex(s, a, MO_32, true);
7291}
7292
7293static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7294{
7295    if (!ENABLE_ARCH_8) {
7296        return false;
7297    }
7298    /* We UNDEF for these UNPREDICTABLE cases.  */
7299    if (a->rt & 1) {
7300        unallocated_encoding(s);
7301        return true;
7302    }
7303    a->rt2 = a->rt + 1;
7304    return op_ldrex(s, a, MO_64, true);
7305}
7306
7307static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7308{
7309    if (!ENABLE_ARCH_8) {
7310        return false;
7311    }
7312    return op_ldrex(s, a, MO_64, true);
7313}
7314
7315static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7316{
7317    if (!ENABLE_ARCH_8) {
7318        return false;
7319    }
7320    return op_ldrex(s, a, MO_8, true);
7321}
7322
7323static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7324{
7325    if (!ENABLE_ARCH_8) {
7326        return false;
7327    }
7328    return op_ldrex(s, a, MO_16, true);
7329}
7330
7331static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7332{
7333    TCGv_i32 addr, tmp;
7334
7335    if (!ENABLE_ARCH_8) {
7336        return false;
7337    }
7338    /* We UNDEF for these UNPREDICTABLE cases.  */
7339    if (a->rn == 15 || a->rt == 15) {
7340        unallocated_encoding(s);
7341        return true;
7342    }
7343
7344    addr = load_reg(s, a->rn);
7345    tmp = tcg_temp_new_i32();
7346    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7347    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7348    tcg_temp_free_i32(addr);
7349
7350    store_reg(s, a->rt, tmp);
7351    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7352    return true;
7353}
7354
7355static bool trans_LDA(DisasContext *s, arg_LDA *a)
7356{
7357    return op_lda(s, a, MO_UL);
7358}
7359
7360static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7361{
7362    return op_lda(s, a, MO_UB);
7363}
7364
7365static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7366{
7367    return op_lda(s, a, MO_UW);
7368}
7369
7370/*
7371 * Media instructions
7372 */
7373
7374static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7375{
7376    TCGv_i32 t1, t2;
7377
7378    if (!ENABLE_ARCH_6) {
7379        return false;
7380    }
7381
7382    t1 = load_reg(s, a->rn);
7383    t2 = load_reg(s, a->rm);
7384    gen_helper_usad8(t1, t1, t2);
7385    tcg_temp_free_i32(t2);
7386    if (a->ra != 15) {
7387        t2 = load_reg(s, a->ra);
7388        tcg_gen_add_i32(t1, t1, t2);
7389        tcg_temp_free_i32(t2);
7390    }
7391    store_reg(s, a->rd, t1);
7392    return true;
7393}
7394
7395static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7396{
7397    TCGv_i32 tmp;
7398    int width = a->widthm1 + 1;
7399    int shift = a->lsb;
7400
7401    if (!ENABLE_ARCH_6T2) {
7402        return false;
7403    }
7404    if (shift + width > 32) {
7405        /* UNPREDICTABLE; we choose to UNDEF */
7406        unallocated_encoding(s);
7407        return true;
7408    }
7409
7410    tmp = load_reg(s, a->rn);
7411    if (u) {
7412        tcg_gen_extract_i32(tmp, tmp, shift, width);
7413    } else {
7414        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7415    }
7416    store_reg(s, a->rd, tmp);
7417    return true;
7418}
7419
7420static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7421{
7422    return op_bfx(s, a, false);
7423}
7424
7425static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7426{
7427    return op_bfx(s, a, true);
7428}
7429
7430static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7431{
7432    TCGv_i32 tmp;
7433    int msb = a->msb, lsb = a->lsb;
7434    int width;
7435
7436    if (!ENABLE_ARCH_6T2) {
7437        return false;
7438    }
7439    if (msb < lsb) {
7440        /* UNPREDICTABLE; we choose to UNDEF */
7441        unallocated_encoding(s);
7442        return true;
7443    }
7444
7445    width = msb + 1 - lsb;
7446    if (a->rn == 15) {
7447        /* BFC */
7448        tmp = tcg_const_i32(0);
7449    } else {
7450        /* BFI */
7451        tmp = load_reg(s, a->rn);
7452    }
7453    if (width != 32) {
7454        TCGv_i32 tmp2 = load_reg(s, a->rd);
7455        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7456        tcg_temp_free_i32(tmp2);
7457    }
7458    store_reg(s, a->rd, tmp);
7459    return true;
7460}
7461
7462static bool trans_UDF(DisasContext *s, arg_UDF *a)
7463{
7464    unallocated_encoding(s);
7465    return true;
7466}
7467
7468/*
7469 * Parallel addition and subtraction
7470 */
7471
7472static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7473                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7474{
7475    TCGv_i32 t0, t1;
7476
7477    if (s->thumb
7478        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7479        : !ENABLE_ARCH_6) {
7480        return false;
7481    }
7482
7483    t0 = load_reg(s, a->rn);
7484    t1 = load_reg(s, a->rm);
7485
7486    gen(t0, t0, t1);
7487
7488    tcg_temp_free_i32(t1);
7489    store_reg(s, a->rd, t0);
7490    return true;
7491}
7492
7493static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7494                             void (*gen)(TCGv_i32, TCGv_i32,
7495                                         TCGv_i32, TCGv_ptr))
7496{
7497    TCGv_i32 t0, t1;
7498    TCGv_ptr ge;
7499
7500    if (s->thumb
7501        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7502        : !ENABLE_ARCH_6) {
7503        return false;
7504    }
7505
7506    t0 = load_reg(s, a->rn);
7507    t1 = load_reg(s, a->rm);
7508
7509    ge = tcg_temp_new_ptr();
7510    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7511    gen(t0, t0, t1, ge);
7512
7513    tcg_temp_free_ptr(ge);
7514    tcg_temp_free_i32(t1);
7515    store_reg(s, a->rd, t0);
7516    return true;
7517}
7518
7519#define DO_PAR_ADDSUB(NAME, helper) \
7520static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7521{                                                       \
7522    return op_par_addsub(s, a, helper);                 \
7523}
7524
7525#define DO_PAR_ADDSUB_GE(NAME, helper) \
7526static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7527{                                                       \
7528    return op_par_addsub_ge(s, a, helper);              \
7529}
7530
7531DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7532DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7533DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7534DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7535DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7536DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7537
7538DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7539DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7540DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7541DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7542DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7543DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7544
7545DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7546DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7547DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7548DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7549DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7550DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7551
7552DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7553DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7554DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7555DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7556DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7557DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7558
7559DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7560DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7561DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7562DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7563DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7564DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7565
7566DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7567DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7568DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7569DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7570DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7571DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7572
7573#undef DO_PAR_ADDSUB
7574#undef DO_PAR_ADDSUB_GE
7575
7576/*
7577 * Packing, unpacking, saturation, and reversal
7578 */
7579
7580static bool trans_PKH(DisasContext *s, arg_PKH *a)
7581{
7582    TCGv_i32 tn, tm;
7583    int shift = a->imm;
7584
7585    if (s->thumb
7586        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7587        : !ENABLE_ARCH_6) {
7588        return false;
7589    }
7590
7591    tn = load_reg(s, a->rn);
7592    tm = load_reg(s, a->rm);
7593    if (a->tb) {
7594        /* PKHTB */
7595        if (shift == 0) {
7596            shift = 31;
7597        }
7598        tcg_gen_sari_i32(tm, tm, shift);
7599        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7600    } else {
7601        /* PKHBT */
7602        tcg_gen_shli_i32(tm, tm, shift);
7603        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7604    }
7605    tcg_temp_free_i32(tm);
7606    store_reg(s, a->rd, tn);
7607    return true;
7608}
7609
7610static bool op_sat(DisasContext *s, arg_sat *a,
7611                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7612{
7613    TCGv_i32 tmp;
7614    int shift = a->imm;
7615
7616    if (!ENABLE_ARCH_6) {
7617        return false;
7618    }
7619
7620    tmp = load_reg(s, a->rn);
7621    if (a->sh) {
7622        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7623    } else {
7624        tcg_gen_shli_i32(tmp, tmp, shift);
7625    }
7626
7627    gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7628
7629    store_reg(s, a->rd, tmp);
7630    return true;
7631}
7632
7633static bool trans_SSAT(DisasContext *s, arg_sat *a)
7634{
7635    return op_sat(s, a, gen_helper_ssat);
7636}
7637
7638static bool trans_USAT(DisasContext *s, arg_sat *a)
7639{
7640    return op_sat(s, a, gen_helper_usat);
7641}
7642
7643static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7644{
7645    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7646        return false;
7647    }
7648    return op_sat(s, a, gen_helper_ssat16);
7649}
7650
7651static bool trans_USAT16(DisasContext *s, arg_sat *a)
7652{
7653    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7654        return false;
7655    }
7656    return op_sat(s, a, gen_helper_usat16);
7657}
7658
7659static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7660                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7661                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7662{
7663    TCGv_i32 tmp;
7664
7665    if (!ENABLE_ARCH_6) {
7666        return false;
7667    }
7668
7669    tmp = load_reg(s, a->rm);
7670    /*
7671     * TODO: In many cases we could do a shift instead of a rotate.
7672     * Combined with a simple extend, that becomes an extract.
7673     */
7674    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7675    gen_extract(tmp, tmp);
7676
7677    if (a->rn != 15) {
7678        TCGv_i32 tmp2 = load_reg(s, a->rn);
7679        gen_add(tmp, tmp, tmp2);
7680        tcg_temp_free_i32(tmp2);
7681    }
7682    store_reg(s, a->rd, tmp);
7683    return true;
7684}
7685
7686static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7687{
7688    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7689}
7690
7691static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7692{
7693    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7694}
7695
7696static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7697{
7698    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7699        return false;
7700    }
7701    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7702}
7703
7704static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7705{
7706    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7707}
7708
7709static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7710{
7711    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7712}
7713
7714static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7715{
7716    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7717        return false;
7718    }
7719    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7720}
7721
7722static bool trans_SEL(DisasContext *s, arg_rrr *a)
7723{
7724    TCGv_i32 t1, t2, t3;
7725
7726    if (s->thumb
7727        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7728        : !ENABLE_ARCH_6) {
7729        return false;
7730    }
7731
7732    t1 = load_reg(s, a->rn);
7733    t2 = load_reg(s, a->rm);
7734    t3 = tcg_temp_new_i32();
7735    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7736    gen_helper_sel_flags(t1, t3, t1, t2);
7737    tcg_temp_free_i32(t3);
7738    tcg_temp_free_i32(t2);
7739    store_reg(s, a->rd, t1);
7740    return true;
7741}
7742
7743static bool op_rr(DisasContext *s, arg_rr *a,
7744                  void (*gen)(TCGv_i32, TCGv_i32))
7745{
7746    TCGv_i32 tmp;
7747
7748    tmp = load_reg(s, a->rm);
7749    gen(tmp, tmp);
7750    store_reg(s, a->rd, tmp);
7751    return true;
7752}
7753
7754static bool trans_REV(DisasContext *s, arg_rr *a)
7755{
7756    if (!ENABLE_ARCH_6) {
7757        return false;
7758    }
7759    return op_rr(s, a, tcg_gen_bswap32_i32);
7760}
7761
7762static bool trans_REV16(DisasContext *s, arg_rr *a)
7763{
7764    if (!ENABLE_ARCH_6) {
7765        return false;
7766    }
7767    return op_rr(s, a, gen_rev16);
7768}
7769
7770static bool trans_REVSH(DisasContext *s, arg_rr *a)
7771{
7772    if (!ENABLE_ARCH_6) {
7773        return false;
7774    }
7775    return op_rr(s, a, gen_revsh);
7776}
7777
7778static bool trans_RBIT(DisasContext *s, arg_rr *a)
7779{
7780    if (!ENABLE_ARCH_6T2) {
7781        return false;
7782    }
7783    return op_rr(s, a, gen_helper_rbit);
7784}
7785
7786/*
7787 * Signed multiply, signed and unsigned divide
7788 */
7789
7790static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7791{
7792    TCGv_i32 t1, t2;
7793
7794    if (!ENABLE_ARCH_6) {
7795        return false;
7796    }
7797
7798    t1 = load_reg(s, a->rn);
7799    t2 = load_reg(s, a->rm);
7800    if (m_swap) {
7801        gen_swap_half(t2, t2);
7802    }
7803    gen_smul_dual(t1, t2);
7804
7805    if (sub) {
7806        /*
7807         * This subtraction cannot overflow, so we can do a simple
7808         * 32-bit subtraction and then a possible 32-bit saturating
7809         * addition of Ra.
7810         */
7811        tcg_gen_sub_i32(t1, t1, t2);
7812        tcg_temp_free_i32(t2);
7813
7814        if (a->ra != 15) {
7815            t2 = load_reg(s, a->ra);
7816            gen_helper_add_setq(t1, cpu_env, t1, t2);
7817            tcg_temp_free_i32(t2);
7818        }
7819    } else if (a->ra == 15) {
7820        /* Single saturation-checking addition */
7821        gen_helper_add_setq(t1, cpu_env, t1, t2);
7822        tcg_temp_free_i32(t2);
7823    } else {
7824        /*
7825         * We need to add the products and Ra together and then
7826         * determine whether the final result overflowed. Doing
7827         * this as two separate add-and-check-overflow steps incorrectly
7828         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7829         * Do all the arithmetic at 64-bits and then check for overflow.
7830         */
7831        TCGv_i64 p64, q64;
7832        TCGv_i32 t3, qf, one;
7833
7834        p64 = tcg_temp_new_i64();
7835        q64 = tcg_temp_new_i64();
7836        tcg_gen_ext_i32_i64(p64, t1);
7837        tcg_gen_ext_i32_i64(q64, t2);
7838        tcg_gen_add_i64(p64, p64, q64);
7839        load_reg_var(s, t2, a->ra);
7840        tcg_gen_ext_i32_i64(q64, t2);
7841        tcg_gen_add_i64(p64, p64, q64);
7842        tcg_temp_free_i64(q64);
7843
7844        tcg_gen_extr_i64_i32(t1, t2, p64);
7845        tcg_temp_free_i64(p64);
7846        /*
7847         * t1 is the low half of the result which goes into Rd.
7848         * We have overflow and must set Q if the high half (t2)
7849         * is different from the sign-extension of t1.
7850         */
7851        t3 = tcg_temp_new_i32();
7852        tcg_gen_sari_i32(t3, t1, 31);
7853        qf = load_cpu_field(QF);
7854        one = tcg_constant_i32(1);
7855        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7856        store_cpu_field(qf, QF);
7857        tcg_temp_free_i32(t3);
7858        tcg_temp_free_i32(t2);
7859    }
7860    store_reg(s, a->rd, t1);
7861    return true;
7862}
7863
7864static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7865{
7866    return op_smlad(s, a, false, false);
7867}
7868
7869static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7870{
7871    return op_smlad(s, a, true, false);
7872}
7873
7874static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7875{
7876    return op_smlad(s, a, false, true);
7877}
7878
7879static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7880{
7881    return op_smlad(s, a, true, true);
7882}
7883
7884static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7885{
7886    TCGv_i32 t1, t2;
7887    TCGv_i64 l1, l2;
7888
7889    if (!ENABLE_ARCH_6) {
7890        return false;
7891    }
7892
7893    t1 = load_reg(s, a->rn);
7894    t2 = load_reg(s, a->rm);
7895    if (m_swap) {
7896        gen_swap_half(t2, t2);
7897    }
7898    gen_smul_dual(t1, t2);
7899
7900    l1 = tcg_temp_new_i64();
7901    l2 = tcg_temp_new_i64();
7902    tcg_gen_ext_i32_i64(l1, t1);
7903    tcg_gen_ext_i32_i64(l2, t2);
7904    tcg_temp_free_i32(t1);
7905    tcg_temp_free_i32(t2);
7906
7907    if (sub) {
7908        tcg_gen_sub_i64(l1, l1, l2);
7909    } else {
7910        tcg_gen_add_i64(l1, l1, l2);
7911    }
7912    tcg_temp_free_i64(l2);
7913
7914    gen_addq(s, l1, a->ra, a->rd);
7915    gen_storeq_reg(s, a->ra, a->rd, l1);
7916    tcg_temp_free_i64(l1);
7917    return true;
7918}
7919
7920static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7921{
7922    return op_smlald(s, a, false, false);
7923}
7924
7925static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7926{
7927    return op_smlald(s, a, true, false);
7928}
7929
7930static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7931{
7932    return op_smlald(s, a, false, true);
7933}
7934
7935static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7936{
7937    return op_smlald(s, a, true, true);
7938}
7939
7940static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7941{
7942    TCGv_i32 t1, t2;
7943
7944    if (s->thumb
7945        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7946        : !ENABLE_ARCH_6) {
7947        return false;
7948    }
7949
7950    t1 = load_reg(s, a->rn);
7951    t2 = load_reg(s, a->rm);
7952    tcg_gen_muls2_i32(t2, t1, t1, t2);
7953
7954    if (a->ra != 15) {
7955        TCGv_i32 t3 = load_reg(s, a->ra);
7956        if (sub) {
7957            /*
7958             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7959             * a non-zero multiplicand lowpart, and the correct result
7960             * lowpart for rounding.
7961             */
7962            tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7963        } else {
7964            tcg_gen_add_i32(t1, t1, t3);
7965        }
7966        tcg_temp_free_i32(t3);
7967    }
7968    if (round) {
7969        /*
7970         * Adding 0x80000000 to the 64-bit quantity means that we have
7971         * carry in to the high word when the low word has the msb set.
7972         */
7973        tcg_gen_shri_i32(t2, t2, 31);
7974        tcg_gen_add_i32(t1, t1, t2);
7975    }
7976    tcg_temp_free_i32(t2);
7977    store_reg(s, a->rd, t1);
7978    return true;
7979}
7980
7981static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7982{
7983    return op_smmla(s, a, false, false);
7984}
7985
7986static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7987{
7988    return op_smmla(s, a, true, false);
7989}
7990
7991static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7992{
7993    return op_smmla(s, a, false, true);
7994}
7995
7996static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7997{
7998    return op_smmla(s, a, true, true);
7999}
8000
8001static bool op_div(DisasContext *s, arg_rrr *a, bool u)
8002{
8003    TCGv_i32 t1, t2;
8004
8005    if (s->thumb
8006        ? !dc_isar_feature(aa32_thumb_div, s)
8007        : !dc_isar_feature(aa32_arm_div, s)) {
8008        return false;
8009    }
8010
8011    t1 = load_reg(s, a->rn);
8012    t2 = load_reg(s, a->rm);
8013    if (u) {
8014        gen_helper_udiv(t1, cpu_env, t1, t2);
8015    } else {
8016        gen_helper_sdiv(t1, cpu_env, t1, t2);
8017    }
8018    tcg_temp_free_i32(t2);
8019    store_reg(s, a->rd, t1);
8020    return true;
8021}
8022
8023static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8024{
8025    return op_div(s, a, false);
8026}
8027
8028static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8029{
8030    return op_div(s, a, true);
8031}
8032
8033/*
8034 * Block data transfer
8035 */
8036
8037static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8038{
8039    TCGv_i32 addr = load_reg(s, a->rn);
8040
8041    if (a->b) {
8042        if (a->i) {
8043            /* pre increment */
8044            tcg_gen_addi_i32(addr, addr, 4);
8045        } else {
8046            /* pre decrement */
8047            tcg_gen_addi_i32(addr, addr, -(n * 4));
8048        }
8049    } else if (!a->i && n != 1) {
8050        /* post decrement */
8051        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8052    }
8053
8054    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8055        /*
8056         * If the writeback is incrementing SP rather than
8057         * decrementing it, and the initial SP is below the
8058         * stack limit but the final written-back SP would
8059         * be above, then we must not perform any memory
8060         * accesses, but it is IMPDEF whether we generate
8061         * an exception. We choose to do so in this case.
8062         * At this point 'addr' is the lowest address, so
8063         * either the original SP (if incrementing) or our
8064         * final SP (if decrementing), so that's what we check.
8065         */
8066        gen_helper_v8m_stackcheck(cpu_env, addr);
8067    }
8068
8069    return addr;
8070}
8071
8072static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8073                               TCGv_i32 addr, int n)
8074{
8075    if (a->w) {
8076        /* write back */
8077        if (!a->b) {
8078            if (a->i) {
8079                /* post increment */
8080                tcg_gen_addi_i32(addr, addr, 4);
8081            } else {
8082                /* post decrement */
8083                tcg_gen_addi_i32(addr, addr, -(n * 4));
8084            }
8085        } else if (!a->i && n != 1) {
8086            /* pre decrement */
8087            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8088        }
8089        store_reg(s, a->rn, addr);
8090    } else {
8091        tcg_temp_free_i32(addr);
8092    }
8093}
8094
8095static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8096{
8097    int i, j, n, list, mem_idx;
8098    bool user = a->u;
8099    TCGv_i32 addr, tmp;
8100
8101    if (user) {
8102        /* STM (user) */
8103        if (IS_USER(s)) {
8104            /* Only usable in supervisor mode.  */
8105            unallocated_encoding(s);
8106            return true;
8107        }
8108    }
8109
8110    list = a->list;
8111    n = ctpop16(list);
8112    if (n < min_n || a->rn == 15) {
8113        unallocated_encoding(s);
8114        return true;
8115    }
8116
8117    s->eci_handled = true;
8118
8119    addr = op_addr_block_pre(s, a, n);
8120    mem_idx = get_mem_index(s);
8121
8122    for (i = j = 0; i < 16; i++) {
8123        if (!(list & (1 << i))) {
8124            continue;
8125        }
8126
8127        if (user && i != 15) {
8128            tmp = tcg_temp_new_i32();
8129            gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
8130        } else {
8131            tmp = load_reg(s, i);
8132        }
8133        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8134        tcg_temp_free_i32(tmp);
8135
8136        /* No need to add after the last transfer.  */
8137        if (++j != n) {
8138            tcg_gen_addi_i32(addr, addr, 4);
8139        }
8140    }
8141
8142    op_addr_block_post(s, a, addr, n);
8143    clear_eci_state(s);
8144    return true;
8145}
8146
8147static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8148{
8149    /* BitCount(list) < 1 is UNPREDICTABLE */
8150    return op_stm(s, a, 1);
8151}
8152
8153static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8154{
8155    /* Writeback register in register list is UNPREDICTABLE for T32.  */
8156    if (a->w && (a->list & (1 << a->rn))) {
8157        unallocated_encoding(s);
8158        return true;
8159    }
8160    /* BitCount(list) < 2 is UNPREDICTABLE */
8161    return op_stm(s, a, 2);
8162}
8163
8164static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8165{
8166    int i, j, n, list, mem_idx;
8167    bool loaded_base;
8168    bool user = a->u;
8169    bool exc_return = false;
8170    TCGv_i32 addr, tmp, loaded_var;
8171
8172    if (user) {
8173        /* LDM (user), LDM (exception return) */
8174        if (IS_USER(s)) {
8175            /* Only usable in supervisor mode.  */
8176            unallocated_encoding(s);
8177            return true;
8178        }
8179        if (extract32(a->list, 15, 1)) {
8180            exc_return = true;
8181            user = false;
8182        } else {
8183            /* LDM (user) does not allow writeback.  */
8184            if (a->w) {
8185                unallocated_encoding(s);
8186                return true;
8187            }
8188        }
8189    }
8190
8191    list = a->list;
8192    n = ctpop16(list);
8193    if (n < min_n || a->rn == 15) {
8194        unallocated_encoding(s);
8195        return true;
8196    }
8197
8198    s->eci_handled = true;
8199
8200    addr = op_addr_block_pre(s, a, n);
8201    mem_idx = get_mem_index(s);
8202    loaded_base = false;
8203    loaded_var = NULL;
8204
8205    for (i = j = 0; i < 16; i++) {
8206        if (!(list & (1 << i))) {
8207            continue;
8208        }
8209
8210        tmp = tcg_temp_new_i32();
8211        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8212        if (user) {
8213            gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8214            tcg_temp_free_i32(tmp);
8215        } else if (i == a->rn) {
8216            loaded_var = tmp;
8217            loaded_base = true;
8218        } else if (i == 15 && exc_return) {
8219            store_pc_exc_ret(s, tmp);
8220        } else {
8221            store_reg_from_load(s, i, tmp);
8222        }
8223
8224        /* No need to add after the last transfer.  */
8225        if (++j != n) {
8226            tcg_gen_addi_i32(addr, addr, 4);
8227        }
8228    }
8229
8230    op_addr_block_post(s, a, addr, n);
8231
8232    if (loaded_base) {
8233        /* Note that we reject base == pc above.  */
8234        store_reg(s, a->rn, loaded_var);
8235    }
8236
8237    if (exc_return) {
8238        /* Restore CPSR from SPSR.  */
8239        tmp = load_cpu_field(spsr);
8240        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8241            gen_io_start();
8242        }
8243        gen_helper_cpsr_write_eret(cpu_env, tmp);
8244        tcg_temp_free_i32(tmp);
8245        /* Must exit loop to check un-masked IRQs */
8246        s->base.is_jmp = DISAS_EXIT;
8247    }
8248    clear_eci_state(s);
8249    return true;
8250}
8251
8252static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8253{
8254    /*
8255     * Writeback register in register list is UNPREDICTABLE
8256     * for ArchVersion() >= 7.  Prior to v7, A32 would write
8257     * an UNKNOWN value to the base register.
8258     */
8259    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8260        unallocated_encoding(s);
8261        return true;
8262    }
8263    /* BitCount(list) < 1 is UNPREDICTABLE */
8264    return do_ldm(s, a, 1);
8265}
8266
8267static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8268{
8269    /* Writeback register in register list is UNPREDICTABLE for T32. */
8270    if (a->w && (a->list & (1 << a->rn))) {
8271        unallocated_encoding(s);
8272        return true;
8273    }
8274    /* BitCount(list) < 2 is UNPREDICTABLE */
8275    return do_ldm(s, a, 2);
8276}
8277
8278static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8279{
8280    /* Writeback is conditional on the base register not being loaded.  */
8281    a->w = !(a->list & (1 << a->rn));
8282    /* BitCount(list) < 1 is UNPREDICTABLE */
8283    return do_ldm(s, a, 1);
8284}
8285
8286static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8287{
8288    int i;
8289    TCGv_i32 zero;
8290
8291    if (!dc_isar_feature(aa32_m_sec_state, s)) {
8292        return false;
8293    }
8294
8295    if (extract32(a->list, 13, 1)) {
8296        return false;
8297    }
8298
8299    if (!a->list) {
8300        /* UNPREDICTABLE; we choose to UNDEF */
8301        return false;
8302    }
8303
8304    s->eci_handled = true;
8305
8306    zero = tcg_constant_i32(0);
8307    for (i = 0; i < 15; i++) {
8308        if (extract32(a->list, i, 1)) {
8309            /* Clear R[i] */
8310            tcg_gen_mov_i32(cpu_R[i], zero);
8311        }
8312    }
8313    if (extract32(a->list, 15, 1)) {
8314        /*
8315         * Clear APSR (by calling the MSR helper with the same argument
8316         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8317         */
8318        gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8319    }
8320    clear_eci_state(s);
8321    return true;
8322}
8323
8324/*
8325 * Branch, branch with link
8326 */
8327
8328static bool trans_B(DisasContext *s, arg_i *a)
8329{
8330    gen_jmp(s, read_pc(s) + a->imm);
8331    return true;
8332}
8333
8334static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8335{
8336    /* This has cond from encoding, required to be outside IT block.  */
8337    if (a->cond >= 0xe) {
8338        return false;
8339    }
8340    if (s->condexec_mask) {
8341        unallocated_encoding(s);
8342        return true;
8343    }
8344    arm_skip_unless(s, a->cond);
8345    gen_jmp(s, read_pc(s) + a->imm);
8346    return true;
8347}
8348
8349static bool trans_BL(DisasContext *s, arg_i *a)
8350{
8351    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8352    gen_jmp(s, read_pc(s) + a->imm);
8353    return true;
8354}
8355
8356static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8357{
8358    /*
8359     * BLX <imm> would be useless on M-profile; the encoding space
8360     * is used for other insns from v8.1M onward, and UNDEFs before that.
8361     */
8362    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8363        return false;
8364    }
8365
8366    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8367    if (s->thumb && (a->imm & 2)) {
8368        return false;
8369    }
8370    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8371    store_cpu_field_constant(!s->thumb, thumb);
8372    gen_jmp(s, (read_pc(s) & ~3) + a->imm);
8373    return true;
8374}
8375
8376static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8377{
8378    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8379    tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
8380    return true;
8381}
8382
8383static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8384{
8385    TCGv_i32 tmp = tcg_temp_new_i32();
8386
8387    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8388    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8389    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8390    gen_bx(s, tmp);
8391    return true;
8392}
8393
8394static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8395{
8396    TCGv_i32 tmp;
8397
8398    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8399    if (!ENABLE_ARCH_5) {
8400        return false;
8401    }
8402    tmp = tcg_temp_new_i32();
8403    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8404    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8405    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8406    gen_bx(s, tmp);
8407    return true;
8408}
8409
8410static bool trans_BF(DisasContext *s, arg_BF *a)
8411{
8412    /*
8413     * M-profile branch future insns. The architecture permits an
8414     * implementation to implement these as NOPs (equivalent to
8415     * discarding the LO_BRANCH_INFO cache immediately), and we
8416     * take that IMPDEF option because for QEMU a "real" implementation
8417     * would be complicated and wouldn't execute any faster.
8418     */
8419    if (!dc_isar_feature(aa32_lob, s)) {
8420        return false;
8421    }
8422    if (a->boff == 0) {
8423        /* SEE "Related encodings" (loop insns) */
8424        return false;
8425    }
8426    /* Handle as NOP */
8427    return true;
8428}
8429
8430static bool trans_DLS(DisasContext *s, arg_DLS *a)
8431{
8432    /* M-profile low-overhead loop start */
8433    TCGv_i32 tmp;
8434
8435    if (!dc_isar_feature(aa32_lob, s)) {
8436        return false;
8437    }
8438    if (a->rn == 13 || a->rn == 15) {
8439        /*
8440         * For DLSTP rn == 15 is a related encoding (LCTP); the
8441         * other cases caught by this condition are all
8442         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8443         */
8444        return false;
8445    }
8446
8447    if (a->size != 4) {
8448        /* DLSTP */
8449        if (!dc_isar_feature(aa32_mve, s)) {
8450            return false;
8451        }
8452        if (!vfp_access_check(s)) {
8453            return true;
8454        }
8455    }
8456
8457    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8458    tmp = load_reg(s, a->rn);
8459    store_reg(s, 14, tmp);
8460    if (a->size != 4) {
8461        /* DLSTP: set FPSCR.LTPSIZE */
8462        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8463        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8464    }
8465    return true;
8466}
8467
8468static bool trans_WLS(DisasContext *s, arg_WLS *a)
8469{
8470    /* M-profile low-overhead while-loop start */
8471    TCGv_i32 tmp;
8472    TCGLabel *nextlabel;
8473
8474    if (!dc_isar_feature(aa32_lob, s)) {
8475        return false;
8476    }
8477    if (a->rn == 13 || a->rn == 15) {
8478        /*
8479         * For WLSTP rn == 15 is a related encoding (LE); the
8480         * other cases caught by this condition are all
8481         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8482         */
8483        return false;
8484    }
8485    if (s->condexec_mask) {
8486        /*
8487         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8488         * we choose to UNDEF, because otherwise our use of
8489         * gen_goto_tb(1) would clash with the use of TB exit 1
8490         * in the dc->condjmp condition-failed codepath in
8491         * arm_tr_tb_stop() and we'd get an assertion.
8492         */
8493        return false;
8494    }
8495    if (a->size != 4) {
8496        /* WLSTP */
8497        if (!dc_isar_feature(aa32_mve, s)) {
8498            return false;
8499        }
8500        /*
8501         * We need to check that the FPU is enabled here, but mustn't
8502         * call vfp_access_check() to do that because we don't want to
8503         * do the lazy state preservation in the "loop count is zero" case.
8504         * Do the check-and-raise-exception by hand.
8505         */
8506        if (s->fp_excp_el) {
8507            gen_exception_insn_el(s, s->pc_curr, EXCP_NOCP,
8508                                  syn_uncategorized(), s->fp_excp_el);
8509            return true;
8510        }
8511    }
8512
8513    nextlabel = gen_new_label();
8514    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
8515    tmp = load_reg(s, a->rn);
8516    store_reg(s, 14, tmp);
8517    if (a->size != 4) {
8518        /*
8519         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8520         * lazy state preservation, new FP context creation, etc,
8521         * that vfp_access_check() does. We know that the actual
8522         * access check will succeed (ie it won't generate code that
8523         * throws an exception) because we did that check by hand earlier.
8524         */
8525        bool ok = vfp_access_check(s);
8526        assert(ok);
8527        store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8528        /*
8529         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8530         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8531         */
8532    }
8533    gen_jmp_tb(s, s->base.pc_next, 1);
8534
8535    gen_set_label(nextlabel);
8536    gen_jmp(s, read_pc(s) + a->imm);
8537    return true;
8538}
8539
8540static bool trans_LE(DisasContext *s, arg_LE *a)
8541{
8542    /*
8543     * M-profile low-overhead loop end. The architecture permits an
8544     * implementation to discard the LO_BRANCH_INFO cache at any time,
8545     * and we take the IMPDEF option to never set it in the first place
8546     * (equivalent to always discarding it immediately), because for QEMU
8547     * a "real" implementation would be complicated and wouldn't execute
8548     * any faster.
8549     */
8550    TCGv_i32 tmp;
8551    TCGLabel *loopend;
8552    bool fpu_active;
8553
8554    if (!dc_isar_feature(aa32_lob, s)) {
8555        return false;
8556    }
8557    if (a->f && a->tp) {
8558        return false;
8559    }
8560    if (s->condexec_mask) {
8561        /*
8562         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8563         * we choose to UNDEF, because otherwise our use of
8564         * gen_goto_tb(1) would clash with the use of TB exit 1
8565         * in the dc->condjmp condition-failed codepath in
8566         * arm_tr_tb_stop() and we'd get an assertion.
8567         */
8568        return false;
8569    }
8570    if (a->tp) {
8571        /* LETP */
8572        if (!dc_isar_feature(aa32_mve, s)) {
8573            return false;
8574        }
8575        if (!vfp_access_check(s)) {
8576            s->eci_handled = true;
8577            return true;
8578        }
8579    }
8580
8581    /* LE/LETP is OK with ECI set and leaves it untouched */
8582    s->eci_handled = true;
8583
8584    /*
8585     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8586     * UsageFault exception for the LE insn in that case. Note that we
8587     * are not directly checking FPSCR.LTPSIZE but instead check the
8588     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8589     * not currently active (ie ActiveFPState() returns false). We
8590     * can identify not-active purely from our TB state flags, as the
8591     * FPU is active only if:
8592     *  the FPU is enabled
8593     *  AND lazy state preservation is not active
8594     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8595     *
8596     * Usually we don't need to care about this distinction between
8597     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8598     * will either take an exception or clear the conditions that make
8599     * the FPU not active. But LE is an unusual case of a non-FP insn
8600     * that looks at LTPSIZE.
8601     */
8602    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8603
8604    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8605        /* Need to do a runtime check for LTPSIZE != 4 */
8606        TCGLabel *skipexc = gen_new_label();
8607        tmp = load_cpu_field(v7m.ltpsize);
8608        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
8609        tcg_temp_free_i32(tmp);
8610        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized());
8611        gen_set_label(skipexc);
8612    }
8613
8614    if (a->f) {
8615        /* Loop-forever: just jump back to the loop start */
8616        gen_jmp(s, read_pc(s) - a->imm);
8617        return true;
8618    }
8619
8620    /*
8621     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8622     * For LE, we know at this point that LTPSIZE must be 4 and the
8623     * loop decrement value is 1. For LETP we need to calculate the decrement
8624     * value from LTPSIZE.
8625     */
8626    loopend = gen_new_label();
8627    if (!a->tp) {
8628        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
8629        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8630    } else {
8631        /*
8632         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8633         * so that decr stays live after the brcondi.
8634         */
8635        TCGv_i32 decr = tcg_temp_local_new_i32();
8636        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8637        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8638        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8639        tcg_temp_free_i32(ltpsize);
8640
8641        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
8642
8643        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8644        tcg_temp_free_i32(decr);
8645    }
8646    /* Jump back to the loop start */
8647    gen_jmp(s, read_pc(s) - a->imm);
8648
8649    gen_set_label(loopend);
8650    if (a->tp) {
8651        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8652        store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8653    }
8654    /* End TB, continuing to following insn */
8655    gen_jmp_tb(s, s->base.pc_next, 1);
8656    return true;
8657}
8658
8659static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8660{
8661    /*
8662     * M-profile Loop Clear with Tail Predication. Since our implementation
8663     * doesn't cache branch information, all we need to do is reset
8664     * FPSCR.LTPSIZE to 4.
8665     */
8666
8667    if (!dc_isar_feature(aa32_lob, s) ||
8668        !dc_isar_feature(aa32_mve, s)) {
8669        return false;
8670    }
8671
8672    if (!vfp_access_check(s)) {
8673        return true;
8674    }
8675
8676    store_cpu_field_constant(4, v7m.ltpsize);
8677    return true;
8678}
8679
8680static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8681{
8682    /*
8683     * M-profile Create Vector Tail Predicate. This insn is itself
8684     * predicated and is subject to beatwise execution.
8685     */
8686    TCGv_i32 rn_shifted, masklen;
8687
8688    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8689        return false;
8690    }
8691
8692    if (!mve_eci_check(s) || !vfp_access_check(s)) {
8693        return true;
8694    }
8695
8696    /*
8697     * We pre-calculate the mask length here to avoid having
8698     * to have multiple helpers specialized for size.
8699     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8700     */
8701    rn_shifted = tcg_temp_new_i32();
8702    masklen = load_reg(s, a->rn);
8703    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8704    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8705                        masklen, tcg_constant_i32(1 << (4 - a->size)),
8706                        rn_shifted, tcg_constant_i32(16));
8707    gen_helper_mve_vctp(cpu_env, masklen);
8708    tcg_temp_free_i32(masklen);
8709    tcg_temp_free_i32(rn_shifted);
8710    /* This insn updates predication bits */
8711    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8712    mve_update_eci(s);
8713    return true;
8714}
8715
8716static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8717{
8718    TCGv_i32 addr, tmp;
8719
8720    tmp = load_reg(s, a->rm);
8721    if (half) {
8722        tcg_gen_add_i32(tmp, tmp, tmp);
8723    }
8724    addr = load_reg(s, a->rn);
8725    tcg_gen_add_i32(addr, addr, tmp);
8726
8727    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8728    tcg_temp_free_i32(addr);
8729
8730    tcg_gen_add_i32(tmp, tmp, tmp);
8731    tcg_gen_addi_i32(tmp, tmp, read_pc(s));
8732    store_reg(s, 15, tmp);
8733    return true;
8734}
8735
8736static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8737{
8738    return op_tbranch(s, a, false);
8739}
8740
8741static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8742{
8743    return op_tbranch(s, a, true);
8744}
8745
8746static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8747{
8748    TCGv_i32 tmp = load_reg(s, a->rn);
8749
8750    arm_gen_condlabel(s);
8751    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8752                        tmp, 0, s->condlabel);
8753    tcg_temp_free_i32(tmp);
8754    gen_jmp(s, read_pc(s) + a->imm);
8755    return true;
8756}
8757
8758/*
8759 * Supervisor call - both T32 & A32 come here so we need to check
8760 * which mode we are in when checking for semihosting.
8761 */
8762
8763static bool trans_SVC(DisasContext *s, arg_SVC *a)
8764{
8765    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8766
8767    if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8768#ifndef CONFIG_USER_ONLY
8769        !IS_USER(s) &&
8770#endif
8771        (a->imm == semihost_imm)) {
8772        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8773    } else {
8774        gen_set_pc_im(s, s->base.pc_next);
8775        s->svc_imm = a->imm;
8776        s->base.is_jmp = DISAS_SWI;
8777    }
8778    return true;
8779}
8780
8781/*
8782 * Unconditional system instructions
8783 */
8784
8785static bool trans_RFE(DisasContext *s, arg_RFE *a)
8786{
8787    static const int8_t pre_offset[4] = {
8788        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8789    };
8790    static const int8_t post_offset[4] = {
8791        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8792    };
8793    TCGv_i32 addr, t1, t2;
8794
8795    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8796        return false;
8797    }
8798    if (IS_USER(s)) {
8799        unallocated_encoding(s);
8800        return true;
8801    }
8802
8803    addr = load_reg(s, a->rn);
8804    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8805
8806    /* Load PC into tmp and CPSR into tmp2.  */
8807    t1 = tcg_temp_new_i32();
8808    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8809    tcg_gen_addi_i32(addr, addr, 4);
8810    t2 = tcg_temp_new_i32();
8811    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8812
8813    if (a->w) {
8814        /* Base writeback.  */
8815        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8816        store_reg(s, a->rn, addr);
8817    } else {
8818        tcg_temp_free_i32(addr);
8819    }
8820    gen_rfe(s, t1, t2);
8821    return true;
8822}
8823
8824static bool trans_SRS(DisasContext *s, arg_SRS *a)
8825{
8826    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8827        return false;
8828    }
8829    gen_srs(s, a->mode, a->pu, a->w);
8830    return true;
8831}
8832
8833static bool trans_CPS(DisasContext *s, arg_CPS *a)
8834{
8835    uint32_t mask, val;
8836
8837    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8838        return false;
8839    }
8840    if (IS_USER(s)) {
8841        /* Implemented as NOP in user mode.  */
8842        return true;
8843    }
8844    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8845
8846    mask = val = 0;
8847    if (a->imod & 2) {
8848        if (a->A) {
8849            mask |= CPSR_A;
8850        }
8851        if (a->I) {
8852            mask |= CPSR_I;
8853        }
8854        if (a->F) {
8855            mask |= CPSR_F;
8856        }
8857        if (a->imod & 1) {
8858            val |= mask;
8859        }
8860    }
8861    if (a->M) {
8862        mask |= CPSR_M;
8863        val |= a->mode;
8864    }
8865    if (mask) {
8866        gen_set_psr_im(s, mask, 0, val);
8867    }
8868    return true;
8869}
8870
8871static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8872{
8873    TCGv_i32 tmp, addr;
8874
8875    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8876        return false;
8877    }
8878    if (IS_USER(s)) {
8879        /* Implemented as NOP in user mode.  */
8880        return true;
8881    }
8882
8883    tmp = tcg_constant_i32(a->im);
8884    /* FAULTMASK */
8885    if (a->F) {
8886        addr = tcg_constant_i32(19);
8887        gen_helper_v7m_msr(cpu_env, addr, tmp);
8888    }
8889    /* PRIMASK */
8890    if (a->I) {
8891        addr = tcg_constant_i32(16);
8892        gen_helper_v7m_msr(cpu_env, addr, tmp);
8893    }
8894    gen_rebuild_hflags(s, false);
8895    gen_lookup_tb(s);
8896    return true;
8897}
8898
8899/*
8900 * Clear-Exclusive, Barriers
8901 */
8902
8903static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8904{
8905    if (s->thumb
8906        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8907        : !ENABLE_ARCH_6K) {
8908        return false;
8909    }
8910    gen_clrex(s);
8911    return true;
8912}
8913
8914static bool trans_DSB(DisasContext *s, arg_DSB *a)
8915{
8916    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8917        return false;
8918    }
8919    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8920    return true;
8921}
8922
8923static bool trans_DMB(DisasContext *s, arg_DMB *a)
8924{
8925    return trans_DSB(s, NULL);
8926}
8927
8928static bool trans_ISB(DisasContext *s, arg_ISB *a)
8929{
8930    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8931        return false;
8932    }
8933    /*
8934     * We need to break the TB after this insn to execute
8935     * self-modifying code correctly and also to take
8936     * any pending interrupts immediately.
8937     */
8938    s->base.is_jmp = DISAS_TOO_MANY;
8939    return true;
8940}
8941
8942static bool trans_SB(DisasContext *s, arg_SB *a)
8943{
8944    if (!dc_isar_feature(aa32_sb, s)) {
8945        return false;
8946    }
8947    /*
8948     * TODO: There is no speculation barrier opcode
8949     * for TCG; MB and end the TB instead.
8950     */
8951    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8952    s->base.is_jmp = DISAS_TOO_MANY;
8953    return true;
8954}
8955
8956static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8957{
8958    if (!ENABLE_ARCH_6) {
8959        return false;
8960    }
8961    if (a->E != (s->be_data == MO_BE)) {
8962        gen_helper_setend(cpu_env);
8963        s->base.is_jmp = DISAS_UPDATE_EXIT;
8964    }
8965    return true;
8966}
8967
8968/*
8969 * Preload instructions
8970 * All are nops, contingent on the appropriate arch level.
8971 */
8972
8973static bool trans_PLD(DisasContext *s, arg_PLD *a)
8974{
8975    return ENABLE_ARCH_5TE;
8976}
8977
8978static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8979{
8980    return arm_dc_feature(s, ARM_FEATURE_V7MP);
8981}
8982
8983static bool trans_PLI(DisasContext *s, arg_PLD *a)
8984{
8985    return ENABLE_ARCH_7;
8986}
8987
8988/*
8989 * If-then
8990 */
8991
8992static bool trans_IT(DisasContext *s, arg_IT *a)
8993{
8994    int cond_mask = a->cond_mask;
8995
8996    /*
8997     * No actual code generated for this insn, just setup state.
8998     *
8999     * Combinations of firstcond and mask which set up an 0b1111
9000     * condition are UNPREDICTABLE; we take the CONSTRAINED
9001     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9002     * i.e. both meaning "execute always".
9003     */
9004    s->condexec_cond = (cond_mask >> 4) & 0xe;
9005    s->condexec_mask = cond_mask & 0x1f;
9006    return true;
9007}
9008
9009/* v8.1M CSEL/CSINC/CSNEG/CSINV */
9010static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9011{
9012    TCGv_i32 rn, rm, zero;
9013    DisasCompare c;
9014
9015    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
9016        return false;
9017    }
9018
9019    if (a->rm == 13) {
9020        /* SEE "Related encodings" (MVE shifts) */
9021        return false;
9022    }
9023
9024    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
9025        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
9026        return false;
9027    }
9028
9029    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
9030    zero = tcg_constant_i32(0);
9031    if (a->rn == 15) {
9032        rn = zero;
9033    } else {
9034        rn = load_reg(s, a->rn);
9035    }
9036    if (a->rm == 15) {
9037        rm = zero;
9038    } else {
9039        rm = load_reg(s, a->rm);
9040    }
9041
9042    switch (a->op) {
9043    case 0: /* CSEL */
9044        break;
9045    case 1: /* CSINC */
9046        tcg_gen_addi_i32(rm, rm, 1);
9047        break;
9048    case 2: /* CSINV */
9049        tcg_gen_not_i32(rm, rm);
9050        break;
9051    case 3: /* CSNEG */
9052        tcg_gen_neg_i32(rm, rm);
9053        break;
9054    default:
9055        g_assert_not_reached();
9056    }
9057
9058    arm_test_cc(&c, a->fcond);
9059    tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
9060    arm_free_cc(&c);
9061
9062    store_reg(s, a->rd, rn);
9063    tcg_temp_free_i32(rm);
9064
9065    return true;
9066}
9067
9068/*
9069 * Legacy decoder.
9070 */
9071
9072static void disas_arm_insn(DisasContext *s, unsigned int insn)
9073{
9074    unsigned int cond = insn >> 28;
9075
9076    /* M variants do not implement ARM mode; this must raise the INVSTATE
9077     * UsageFault exception.
9078     */
9079    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9080        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized());
9081        return;
9082    }
9083
9084    if (s->pstate_il) {
9085        /*
9086         * Illegal execution state. This has priority over BTI
9087         * exceptions, but comes after instruction abort exceptions.
9088         */
9089        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_illegalstate());
9090        return;
9091    }
9092
9093    if (cond == 0xf) {
9094        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9095         * choose to UNDEF. In ARMv5 and above the space is used
9096         * for miscellaneous unconditional instructions.
9097         */
9098        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
9099            unallocated_encoding(s);
9100            return;
9101        }
9102
9103        /* Unconditional instructions.  */
9104        /* TODO: Perhaps merge these into one decodetree output file.  */
9105        if (disas_a32_uncond(s, insn) ||
9106            disas_vfp_uncond(s, insn) ||
9107            disas_neon_dp(s, insn) ||
9108            disas_neon_ls(s, insn) ||
9109            disas_neon_shared(s, insn)) {
9110            return;
9111        }
9112        /* fall back to legacy decoder */
9113
9114        if ((insn & 0x0e000f00) == 0x0c000100) {
9115            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9116                /* iWMMXt register transfer.  */
9117                if (extract32(s->c15_cpar, 1, 1)) {
9118                    if (!disas_iwmmxt_insn(s, insn)) {
9119                        return;
9120                    }
9121                }
9122            }
9123        }
9124        goto illegal_op;
9125    }
9126    if (cond != 0xe) {
9127        /* if not always execute, we generate a conditional jump to
9128           next instruction */
9129        arm_skip_unless(s, cond);
9130    }
9131
9132    /* TODO: Perhaps merge these into one decodetree output file.  */
9133    if (disas_a32(s, insn) ||
9134        disas_vfp(s, insn)) {
9135        return;
9136    }
9137    /* fall back to legacy decoder */
9138    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
9139    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
9140        if (((insn & 0x0c000e00) == 0x0c000000)
9141            && ((insn & 0x03000000) != 0x03000000)) {
9142            /* Coprocessor insn, coprocessor 0 or 1 */
9143            disas_xscale_insn(s, insn);
9144            return;
9145        }
9146    }
9147
9148illegal_op:
9149    unallocated_encoding(s);
9150}
9151
9152static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9153{
9154    /*
9155     * Return true if this is a 16 bit instruction. We must be precise
9156     * about this (matching the decode).
9157     */
9158    if ((insn >> 11) < 0x1d) {
9159        /* Definitely a 16-bit instruction */
9160        return true;
9161    }
9162
9163    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9164     * first half of a 32-bit Thumb insn. Thumb-1 cores might
9165     * end up actually treating this as two 16-bit insns, though,
9166     * if it's half of a bl/blx pair that might span a page boundary.
9167     */
9168    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9169        arm_dc_feature(s, ARM_FEATURE_M)) {
9170        /* Thumb2 cores (including all M profile ones) always treat
9171         * 32-bit insns as 32-bit.
9172         */
9173        return false;
9174    }
9175
9176    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9177        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9178         * is not on the next page; we merge this into a 32-bit
9179         * insn.
9180         */
9181        return false;
9182    }
9183    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9184     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9185     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9186     *  -- handle as single 16 bit insn
9187     */
9188    return true;
9189}
9190
9191/* Translate a 32-bit thumb instruction. */
9192static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9193{
9194    /*
9195     * ARMv6-M supports a limited subset of Thumb2 instructions.
9196     * Other Thumb1 architectures allow only 32-bit
9197     * combined BL/BLX prefix and suffix.
9198     */
9199    if (arm_dc_feature(s, ARM_FEATURE_M) &&
9200        !arm_dc_feature(s, ARM_FEATURE_V7)) {
9201        int i;
9202        bool found = false;
9203        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9204                                               0xf3b08040 /* dsb */,
9205                                               0xf3b08050 /* dmb */,
9206                                               0xf3b08060 /* isb */,
9207                                               0xf3e08000 /* mrs */,
9208                                               0xf000d000 /* bl */};
9209        static const uint32_t armv6m_mask[] = {0xffe0d000,
9210                                               0xfff0d0f0,
9211                                               0xfff0d0f0,
9212                                               0xfff0d0f0,
9213                                               0xffe0d000,
9214                                               0xf800d000};
9215
9216        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9217            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9218                found = true;
9219                break;
9220            }
9221        }
9222        if (!found) {
9223            goto illegal_op;
9224        }
9225    } else if ((insn & 0xf800e800) != 0xf000e800)  {
9226        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9227            unallocated_encoding(s);
9228            return;
9229        }
9230    }
9231
9232    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9233        /*
9234         * NOCP takes precedence over any UNDEF for (almost) the
9235         * entire wide range of coprocessor-space encodings, so check
9236         * for it first before proceeding to actually decode eg VFP
9237         * insns. This decode also handles the few insns which are
9238         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9239         */
9240        if (disas_m_nocp(s, insn)) {
9241            return;
9242        }
9243    }
9244
9245    if ((insn & 0xef000000) == 0xef000000) {
9246        /*
9247         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9248         * transform into
9249         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9250         */
9251        uint32_t a32_insn = (insn & 0xe2ffffff) |
9252            ((insn & (1 << 28)) >> 4) | (1 << 28);
9253
9254        if (disas_neon_dp(s, a32_insn)) {
9255            return;
9256        }
9257    }
9258
9259    if ((insn & 0xff100000) == 0xf9000000) {
9260        /*
9261         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9262         * transform into
9263         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9264         */
9265        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9266
9267        if (disas_neon_ls(s, a32_insn)) {
9268            return;
9269        }
9270    }
9271
9272    /*
9273     * TODO: Perhaps merge these into one decodetree output file.
9274     * Note disas_vfp is written for a32 with cond field in the
9275     * top nibble.  The t32 encoding requires 0xe in the top nibble.
9276     */
9277    if (disas_t32(s, insn) ||
9278        disas_vfp_uncond(s, insn) ||
9279        disas_neon_shared(s, insn) ||
9280        disas_mve(s, insn) ||
9281        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9282        return;
9283    }
9284
9285illegal_op:
9286    unallocated_encoding(s);
9287}
9288
9289static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9290{
9291    if (!disas_t16(s, insn)) {
9292        unallocated_encoding(s);
9293    }
9294}
9295
9296static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9297{
9298    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9299     * (False positives are OK, false negatives are not.)
9300     * We know this is a Thumb insn, and our caller ensures we are
9301     * only called if dc->base.pc_next is less than 4 bytes from the page
9302     * boundary, so we cross the page if the first 16 bits indicate
9303     * that this is a 32 bit insn.
9304     */
9305    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9306
9307    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9308}
9309
9310static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9311{
9312    DisasContext *dc = container_of(dcbase, DisasContext, base);
9313    CPUARMState *env = cs->env_ptr;
9314    ARMCPU *cpu = env_archcpu(env);
9315    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9316    uint32_t condexec, core_mmu_idx;
9317
9318    dc->isar = &cpu->isar;
9319    dc->condjmp = 0;
9320
9321    dc->aarch64 = false;
9322    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9323    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9324    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9325    /*
9326     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9327     * is always the IT bits. On M-profile, some of the reserved encodings
9328     * of IT are used instead to indicate either ICI or ECI, which
9329     * indicate partial progress of a restartable insn that was interrupted
9330     * partway through by an exception:
9331     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9332     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9333     * In all cases CONDEXEC == 0 means "not in IT block or restartable
9334     * insn, behave normally".
9335     */
9336    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9337    dc->eci_handled = false;
9338    dc->insn_eci_rewind = NULL;
9339    if (condexec & 0xf) {
9340        dc->condexec_mask = (condexec & 0xf) << 1;
9341        dc->condexec_cond = condexec >> 4;
9342    } else {
9343        if (arm_feature(env, ARM_FEATURE_M)) {
9344            dc->eci = condexec >> 4;
9345        }
9346    }
9347
9348    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9349    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9350    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9351#if !defined(CONFIG_USER_ONLY)
9352    dc->user = (dc->current_el == 0);
9353#endif
9354    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9355    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9356    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9357
9358    if (arm_feature(env, ARM_FEATURE_M)) {
9359        dc->vfp_enabled = 1;
9360        dc->be_data = MO_TE;
9361        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9362        dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
9363            regime_is_secure(env, dc->mmu_idx);
9364        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9365        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9366        dc->v7m_new_fp_ctxt_needed =
9367            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9368        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9369        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9370    } else {
9371        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9372        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9373        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9374        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9375        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9376            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9377        } else {
9378            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9379            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9380        }
9381        dc->sme_trap_nonstreaming =
9382            EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9383    }
9384    dc->cp_regs = cpu->cp_regs;
9385    dc->features = env->features;
9386
9387    /* Single step state. The code-generation logic here is:
9388     *  SS_ACTIVE == 0:
9389     *   generate code with no special handling for single-stepping (except
9390     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9391     *   this happens anyway because those changes are all system register or
9392     *   PSTATE writes).
9393     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9394     *   emit code for one insn
9395     *   emit code to clear PSTATE.SS
9396     *   emit code to generate software step exception for completed step
9397     *   end TB (as usual for having generated an exception)
9398     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9399     *   emit code to generate a software step exception
9400     *   end the TB
9401     */
9402    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9403    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9404    dc->is_ldex = false;
9405
9406    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9407
9408    /* If architectural single step active, limit to 1.  */
9409    if (dc->ss_active) {
9410        dc->base.max_insns = 1;
9411    }
9412
9413    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9414       to those left on the page.  */
9415    if (!dc->thumb) {
9416        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9417        dc->base.max_insns = MIN(dc->base.max_insns, bound);
9418    }
9419
9420    cpu_V0 = tcg_temp_new_i64();
9421    cpu_V1 = tcg_temp_new_i64();
9422    cpu_M0 = tcg_temp_new_i64();
9423}
9424
9425static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9426{
9427    DisasContext *dc = container_of(dcbase, DisasContext, base);
9428
9429    /* A note on handling of the condexec (IT) bits:
9430     *
9431     * We want to avoid the overhead of having to write the updated condexec
9432     * bits back to the CPUARMState for every instruction in an IT block. So:
9433     * (1) if the condexec bits are not already zero then we write
9434     * zero back into the CPUARMState now. This avoids complications trying
9435     * to do it at the end of the block. (For example if we don't do this
9436     * it's hard to identify whether we can safely skip writing condexec
9437     * at the end of the TB, which we definitely want to do for the case
9438     * where a TB doesn't do anything with the IT state at all.)
9439     * (2) if we are going to leave the TB then we call gen_set_condexec()
9440     * which will write the correct value into CPUARMState if zero is wrong.
9441     * This is done both for leaving the TB at the end, and for leaving
9442     * it because of an exception we know will happen, which is done in
9443     * gen_exception_insn(). The latter is necessary because we need to
9444     * leave the TB with the PC/IT state just prior to execution of the
9445     * instruction which caused the exception.
9446     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9447     * then the CPUARMState will be wrong and we need to reset it.
9448     * This is handled in the same way as restoration of the
9449     * PC in these situations; we save the value of the condexec bits
9450     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9451     * then uses this to restore them after an exception.
9452     *
9453     * Note that there are no instructions which can read the condexec
9454     * bits, and none which can write non-static values to them, so
9455     * we don't need to care about whether CPUARMState is correct in the
9456     * middle of a TB.
9457     */
9458
9459    /* Reset the conditional execution bits immediately. This avoids
9460       complications trying to do it at the end of the block.  */
9461    if (dc->condexec_mask || dc->condexec_cond) {
9462        store_cpu_field_constant(0, condexec_bits);
9463    }
9464}
9465
9466static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9467{
9468    DisasContext *dc = container_of(dcbase, DisasContext, base);
9469    /*
9470     * The ECI/ICI bits share PSR bits with the IT bits, so we
9471     * need to reconstitute the bits from the split-out DisasContext
9472     * fields here.
9473     */
9474    uint32_t condexec_bits;
9475
9476    if (dc->eci) {
9477        condexec_bits = dc->eci << 4;
9478    } else {
9479        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9480    }
9481    tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0);
9482    dc->insn_start = tcg_last_op();
9483}
9484
9485static bool arm_check_kernelpage(DisasContext *dc)
9486{
9487#ifdef CONFIG_USER_ONLY
9488    /* Intercept jump to the magic kernel page.  */
9489    if (dc->base.pc_next >= 0xffff0000) {
9490        /* We always get here via a jump, so know we are not in a
9491           conditional execution block.  */
9492        gen_exception_internal(EXCP_KERNEL_TRAP);
9493        dc->base.is_jmp = DISAS_NORETURN;
9494        return true;
9495    }
9496#endif
9497    return false;
9498}
9499
9500static bool arm_check_ss_active(DisasContext *dc)
9501{
9502    if (dc->ss_active && !dc->pstate_ss) {
9503        /* Singlestep state is Active-pending.
9504         * If we're in this state at the start of a TB then either
9505         *  a) we just took an exception to an EL which is being debugged
9506         *     and this is the first insn in the exception handler
9507         *  b) debug exceptions were masked and we just unmasked them
9508         *     without changing EL (eg by clearing PSTATE.D)
9509         * In either case we're going to take a swstep exception in the
9510         * "did not step an insn" case, and so the syndrome ISV and EX
9511         * bits should be zero.
9512         */
9513        assert(dc->base.num_insns == 1);
9514        gen_swstep_exception(dc, 0, 0);
9515        dc->base.is_jmp = DISAS_NORETURN;
9516        return true;
9517    }
9518
9519    return false;
9520}
9521
9522static void arm_post_translate_insn(DisasContext *dc)
9523{
9524    if (dc->condjmp && !dc->base.is_jmp) {
9525        gen_set_label(dc->condlabel);
9526        dc->condjmp = 0;
9527    }
9528    translator_loop_temp_check(&dc->base);
9529}
9530
9531static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9532{
9533    DisasContext *dc = container_of(dcbase, DisasContext, base);
9534    CPUARMState *env = cpu->env_ptr;
9535    uint32_t pc = dc->base.pc_next;
9536    unsigned int insn;
9537
9538    /* Singlestep exceptions have the highest priority. */
9539    if (arm_check_ss_active(dc)) {
9540        dc->base.pc_next = pc + 4;
9541        return;
9542    }
9543
9544    if (pc & 3) {
9545        /*
9546         * PC alignment fault.  This has priority over the instruction abort
9547         * that we would receive from a translation fault via arm_ldl_code
9548         * (or the execution of the kernelpage entrypoint). This should only
9549         * be possible after an indirect branch, at the start of the TB.
9550         */
9551        assert(dc->base.num_insns == 1);
9552        gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9553        dc->base.is_jmp = DISAS_NORETURN;
9554        dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9555        return;
9556    }
9557
9558    if (arm_check_kernelpage(dc)) {
9559        dc->base.pc_next = pc + 4;
9560        return;
9561    }
9562
9563    dc->pc_curr = pc;
9564    insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9565    dc->insn = insn;
9566    dc->base.pc_next = pc + 4;
9567    disas_arm_insn(dc, insn);
9568
9569    arm_post_translate_insn(dc);
9570
9571    /* ARM is a fixed-length ISA.  We performed the cross-page check
9572       in init_disas_context by adjusting max_insns.  */
9573}
9574
9575static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9576{
9577    /* Return true if this Thumb insn is always unconditional,
9578     * even inside an IT block. This is true of only a very few
9579     * instructions: BKPT, HLT, and SG.
9580     *
9581     * A larger class of instructions are UNPREDICTABLE if used
9582     * inside an IT block; we do not need to detect those here, because
9583     * what we do by default (perform the cc check and update the IT
9584     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9585     * choice for those situations.
9586     *
9587     * insn is either a 16-bit or a 32-bit instruction; the two are
9588     * distinguishable because for the 16-bit case the top 16 bits
9589     * are zeroes, and that isn't a valid 32-bit encoding.
9590     */
9591    if ((insn & 0xffffff00) == 0xbe00) {
9592        /* BKPT */
9593        return true;
9594    }
9595
9596    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9597        !arm_dc_feature(s, ARM_FEATURE_M)) {
9598        /* HLT: v8A only. This is unconditional even when it is going to
9599         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9600         * For v7 cores this was a plain old undefined encoding and so
9601         * honours its cc check. (We might be using the encoding as
9602         * a semihosting trap, but we don't change the cc check behaviour
9603         * on that account, because a debugger connected to a real v7A
9604         * core and emulating semihosting traps by catching the UNDEF
9605         * exception would also only see cases where the cc check passed.
9606         * No guest code should be trying to do a HLT semihosting trap
9607         * in an IT block anyway.
9608         */
9609        return true;
9610    }
9611
9612    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9613        arm_dc_feature(s, ARM_FEATURE_M)) {
9614        /* SG: v8M only */
9615        return true;
9616    }
9617
9618    return false;
9619}
9620
9621static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9622{
9623    DisasContext *dc = container_of(dcbase, DisasContext, base);
9624    CPUARMState *env = cpu->env_ptr;
9625    uint32_t pc = dc->base.pc_next;
9626    uint32_t insn;
9627    bool is_16bit;
9628
9629    /* Misaligned thumb PC is architecturally impossible. */
9630    assert((dc->base.pc_next & 1) == 0);
9631
9632    if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9633        dc->base.pc_next = pc + 2;
9634        return;
9635    }
9636
9637    dc->pc_curr = pc;
9638    insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9639    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9640    pc += 2;
9641    if (!is_16bit) {
9642        uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9643        insn = insn << 16 | insn2;
9644        pc += 2;
9645    }
9646    dc->base.pc_next = pc;
9647    dc->insn = insn;
9648
9649    if (dc->pstate_il) {
9650        /*
9651         * Illegal execution state. This has priority over BTI
9652         * exceptions, but comes after instruction abort exceptions.
9653         */
9654        gen_exception_insn(dc, dc->pc_curr, EXCP_UDEF, syn_illegalstate());
9655        return;
9656    }
9657
9658    if (dc->eci) {
9659        /*
9660         * For M-profile continuable instructions, ECI/ICI handling
9661         * falls into these cases:
9662         *  - interrupt-continuable instructions
9663         *     These are the various load/store multiple insns (both
9664         *     integer and fp). The ICI bits indicate the register
9665         *     where the load/store can resume. We make the IMPDEF
9666         *     choice to always do "instruction restart", ie ignore
9667         *     the ICI value and always execute the ldm/stm from the
9668         *     start. So all we need to do is zero PSR.ICI if the
9669         *     insn executes.
9670         *  - MVE instructions subject to beat-wise execution
9671         *     Here the ECI bits indicate which beats have already been
9672         *     executed, and we must honour this. Each insn of this
9673         *     type will handle it correctly. We will update PSR.ECI
9674         *     in the helper function for the insn (some ECI values
9675         *     mean that the following insn also has been partially
9676         *     executed).
9677         *  - Special cases which don't advance ECI
9678         *     The insns LE, LETP and BKPT leave the ECI/ICI state
9679         *     bits untouched.
9680         *  - all other insns (the common case)
9681         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9682         *     We place a rewind-marker here. Insns in the previous
9683         *     three categories will set a flag in the DisasContext.
9684         *     If the flag isn't set after we call disas_thumb_insn()
9685         *     or disas_thumb2_insn() then we know we have a "some other
9686         *     insn" case. We will rewind to the marker (ie throwing away
9687         *     all the generated code) and instead emit "take exception".
9688         */
9689        dc->insn_eci_rewind = tcg_last_op();
9690    }
9691
9692    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9693        uint32_t cond = dc->condexec_cond;
9694
9695        /*
9696         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9697         * "always"; 0xf is not "never".
9698         */
9699        if (cond < 0x0e) {
9700            arm_skip_unless(dc, cond);
9701        }
9702    }
9703
9704    if (is_16bit) {
9705        disas_thumb_insn(dc, insn);
9706    } else {
9707        disas_thumb2_insn(dc, insn);
9708    }
9709
9710    /* Advance the Thumb condexec condition.  */
9711    if (dc->condexec_mask) {
9712        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9713                             ((dc->condexec_mask >> 4) & 1));
9714        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9715        if (dc->condexec_mask == 0) {
9716            dc->condexec_cond = 0;
9717        }
9718    }
9719
9720    if (dc->eci && !dc->eci_handled) {
9721        /*
9722         * Insn wasn't valid for ECI/ICI at all: undo what we
9723         * just generated and instead emit an exception
9724         */
9725        tcg_remove_ops_after(dc->insn_eci_rewind);
9726        dc->condjmp = 0;
9727        gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE,
9728                           syn_uncategorized());
9729    }
9730
9731    arm_post_translate_insn(dc);
9732
9733    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9734     * will touch a new page.  This ensures that prefetch aborts occur at
9735     * the right place.
9736     *
9737     * We want to stop the TB if the next insn starts in a new page,
9738     * or if it spans between this page and the next. This means that
9739     * if we're looking at the last halfword in the page we need to
9740     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9741     * or a 32-bit Thumb insn (which won't).
9742     * This is to avoid generating a silly TB with a single 16-bit insn
9743     * in it at the end of this page (which would execute correctly
9744     * but isn't very efficient).
9745     */
9746    if (dc->base.is_jmp == DISAS_NEXT
9747        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9748            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9749                && insn_crosses_page(env, dc)))) {
9750        dc->base.is_jmp = DISAS_TOO_MANY;
9751    }
9752}
9753
9754static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9755{
9756    DisasContext *dc = container_of(dcbase, DisasContext, base);
9757
9758    /* At this stage dc->condjmp will only be set when the skipped
9759       instruction was a conditional branch or trap, and the PC has
9760       already been written.  */
9761    gen_set_condexec(dc);
9762    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9763        /* Exception return branches need some special case code at the
9764         * end of the TB, which is complex enough that it has to
9765         * handle the single-step vs not and the condition-failed
9766         * insn codepath itself.
9767         */
9768        gen_bx_excret_final_code(dc);
9769    } else if (unlikely(dc->ss_active)) {
9770        /* Unconditional and "condition passed" instruction codepath. */
9771        switch (dc->base.is_jmp) {
9772        case DISAS_SWI:
9773            gen_ss_advance(dc);
9774            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9775            break;
9776        case DISAS_HVC:
9777            gen_ss_advance(dc);
9778            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9779            break;
9780        case DISAS_SMC:
9781            gen_ss_advance(dc);
9782            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9783            break;
9784        case DISAS_NEXT:
9785        case DISAS_TOO_MANY:
9786        case DISAS_UPDATE_EXIT:
9787        case DISAS_UPDATE_NOCHAIN:
9788            gen_set_pc_im(dc, dc->base.pc_next);
9789            /* fall through */
9790        default:
9791            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9792            gen_singlestep_exception(dc);
9793            break;
9794        case DISAS_NORETURN:
9795            break;
9796        }
9797    } else {
9798        /* While branches must always occur at the end of an IT block,
9799           there are a few other things that can cause us to terminate
9800           the TB in the middle of an IT block:
9801            - Exception generating instructions (bkpt, swi, undefined).
9802            - Page boundaries.
9803            - Hardware watchpoints.
9804           Hardware breakpoints have already been handled and skip this code.
9805         */
9806        switch (dc->base.is_jmp) {
9807        case DISAS_NEXT:
9808        case DISAS_TOO_MANY:
9809            gen_goto_tb(dc, 1, dc->base.pc_next);
9810            break;
9811        case DISAS_UPDATE_NOCHAIN:
9812            gen_set_pc_im(dc, dc->base.pc_next);
9813            /* fall through */
9814        case DISAS_JUMP:
9815            gen_goto_ptr();
9816            break;
9817        case DISAS_UPDATE_EXIT:
9818            gen_set_pc_im(dc, dc->base.pc_next);
9819            /* fall through */
9820        default:
9821            /* indicate that the hash table must be used to find the next TB */
9822            tcg_gen_exit_tb(NULL, 0);
9823            break;
9824        case DISAS_NORETURN:
9825            /* nothing more to generate */
9826            break;
9827        case DISAS_WFI:
9828            gen_helper_wfi(cpu_env,
9829                           tcg_constant_i32(dc->base.pc_next - dc->pc_curr));
9830            /*
9831             * The helper doesn't necessarily throw an exception, but we
9832             * must go back to the main loop to check for interrupts anyway.
9833             */
9834            tcg_gen_exit_tb(NULL, 0);
9835            break;
9836        case DISAS_WFE:
9837            gen_helper_wfe(cpu_env);
9838            break;
9839        case DISAS_YIELD:
9840            gen_helper_yield(cpu_env);
9841            break;
9842        case DISAS_SWI:
9843            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9844            break;
9845        case DISAS_HVC:
9846            gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9847            break;
9848        case DISAS_SMC:
9849            gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9850            break;
9851        }
9852    }
9853
9854    if (dc->condjmp) {
9855        /* "Condition failed" instruction codepath for the branch/trap insn */
9856        gen_set_label(dc->condlabel);
9857        gen_set_condexec(dc);
9858        if (unlikely(dc->ss_active)) {
9859            gen_set_pc_im(dc, dc->base.pc_next);
9860            gen_singlestep_exception(dc);
9861        } else {
9862            gen_goto_tb(dc, 1, dc->base.pc_next);
9863        }
9864    }
9865}
9866
9867static void arm_tr_disas_log(const DisasContextBase *dcbase,
9868                             CPUState *cpu, FILE *logfile)
9869{
9870    DisasContext *dc = container_of(dcbase, DisasContext, base);
9871
9872    fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9873    target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9874}
9875
9876static const TranslatorOps arm_translator_ops = {
9877    .init_disas_context = arm_tr_init_disas_context,
9878    .tb_start           = arm_tr_tb_start,
9879    .insn_start         = arm_tr_insn_start,
9880    .translate_insn     = arm_tr_translate_insn,
9881    .tb_stop            = arm_tr_tb_stop,
9882    .disas_log          = arm_tr_disas_log,
9883};
9884
9885static const TranslatorOps thumb_translator_ops = {
9886    .init_disas_context = arm_tr_init_disas_context,
9887    .tb_start           = arm_tr_tb_start,
9888    .insn_start         = arm_tr_insn_start,
9889    .translate_insn     = thumb_tr_translate_insn,
9890    .tb_stop            = arm_tr_tb_stop,
9891    .disas_log          = arm_tr_disas_log,
9892};
9893
9894/* generate intermediate code for basic block 'tb'.  */
9895void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9896{
9897    DisasContext dc = { };
9898    const TranslatorOps *ops = &arm_translator_ops;
9899    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9900
9901    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9902        ops = &thumb_translator_ops;
9903    }
9904#ifdef TARGET_AARCH64
9905    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9906        ops = &aarch64_translator_ops;
9907    }
9908#endif
9909
9910    translator_loop(ops, &dc.base, cpu, tb, max_insns);
9911}
9912
9913void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9914                          target_ulong *data)
9915{
9916    if (is_a64(env)) {
9917        env->pc = data[0];
9918        env->condexec_bits = 0;
9919        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9920    } else {
9921        env->regs[15] = data[0];
9922        env->condexec_bits = data[1];
9923        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9924    }
9925}
9926