qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2.1 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg/tcg-op.h"
  28#include "tcg/tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "semihosting/semihost.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36
  37#include "exec/log.h"
  38
  39
  40#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  41#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  42/* currently all emulated v5 cores are also v5TE, so don't bother */
  43#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  44#define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  45#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  46#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  47#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  48#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  49#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  50
  51#include "translate.h"
  52#include "translate-a32.h"
  53
  54/* These are TCG temporaries used only by the legacy iwMMXt decoder */
  55static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  56/* These are TCG globals which alias CPUARMState fields */
  57static TCGv_i32 cpu_R[16];
  58TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  59TCGv_i64 cpu_exclusive_addr;
  60TCGv_i64 cpu_exclusive_val;
  61
  62#include "exec/gen-icount.h"
  63
  64static const char * const regnames[] =
  65    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  66      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  67
  68
  69/* initialize TCG globals.  */
  70void arm_translate_init(void)
  71{
  72    int i;
  73
  74    for (i = 0; i < 16; i++) {
  75        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  76                                          offsetof(CPUARMState, regs[i]),
  77                                          regnames[i]);
  78    }
  79    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  80    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  81    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  82    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  83
  84    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  85        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  86    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  87        offsetof(CPUARMState, exclusive_val), "exclusive_val");
  88
  89    a64_translate_init();
  90}
  91
  92uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
  93{
  94    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
  95    switch (cmode) {
  96    case 0: case 1:
  97        /* no-op */
  98        break;
  99    case 2: case 3:
 100        imm <<= 8;
 101        break;
 102    case 4: case 5:
 103        imm <<= 16;
 104        break;
 105    case 6: case 7:
 106        imm <<= 24;
 107        break;
 108    case 8: case 9:
 109        imm |= imm << 16;
 110        break;
 111    case 10: case 11:
 112        imm = (imm << 8) | (imm << 24);
 113        break;
 114    case 12:
 115        imm = (imm << 8) | 0xff;
 116        break;
 117    case 13:
 118        imm = (imm << 16) | 0xffff;
 119        break;
 120    case 14:
 121        if (op) {
 122            /*
 123             * This and cmode == 15 op == 1 are the only cases where
 124             * the top and bottom 32 bits of the encoded constant differ.
 125             */
 126            uint64_t imm64 = 0;
 127            int n;
 128
 129            for (n = 0; n < 8; n++) {
 130                if (imm & (1 << n)) {
 131                    imm64 |= (0xffULL << (n * 8));
 132                }
 133            }
 134            return imm64;
 135        }
 136        imm |= (imm << 8) | (imm << 16) | (imm << 24);
 137        break;
 138    case 15:
 139        if (op) {
 140            /* Reserved encoding for AArch32; valid for AArch64 */
 141            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
 142            if (imm & 0x80) {
 143                imm64 |= 0x8000000000000000ULL;
 144            }
 145            if (imm & 0x40) {
 146                imm64 |= 0x3fc0000000000000ULL;
 147            } else {
 148                imm64 |= 0x4000000000000000ULL;
 149            }
 150            return imm64;
 151        }
 152        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
 153            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
 154        break;
 155    }
 156    if (op) {
 157        imm = ~imm;
 158    }
 159    return dup_const(MO_32, imm);
 160}
 161
 162/* Generate a label used for skipping this instruction */
 163void arm_gen_condlabel(DisasContext *s)
 164{
 165    if (!s->condjmp) {
 166        s->condlabel = gen_new_label();
 167        s->condjmp = 1;
 168    }
 169}
 170
 171/* Flags for the disas_set_da_iss info argument:
 172 * lower bits hold the Rt register number, higher bits are flags.
 173 */
 174typedef enum ISSInfo {
 175    ISSNone = 0,
 176    ISSRegMask = 0x1f,
 177    ISSInvalid = (1 << 5),
 178    ISSIsAcqRel = (1 << 6),
 179    ISSIsWrite = (1 << 7),
 180    ISSIs16Bit = (1 << 8),
 181} ISSInfo;
 182
 183/* Save the syndrome information for a Data Abort */
 184static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 185{
 186    uint32_t syn;
 187    int sas = memop & MO_SIZE;
 188    bool sse = memop & MO_SIGN;
 189    bool is_acqrel = issinfo & ISSIsAcqRel;
 190    bool is_write = issinfo & ISSIsWrite;
 191    bool is_16bit = issinfo & ISSIs16Bit;
 192    int srt = issinfo & ISSRegMask;
 193
 194    if (issinfo & ISSInvalid) {
 195        /* Some callsites want to conditionally provide ISS info,
 196         * eg "only if this was not a writeback"
 197         */
 198        return;
 199    }
 200
 201    if (srt == 15) {
 202        /* For AArch32, insns where the src/dest is R15 never generate
 203         * ISS information. Catching that here saves checking at all
 204         * the call sites.
 205         */
 206        return;
 207    }
 208
 209    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 210                                  0, 0, 0, is_write, 0, is_16bit);
 211    disas_set_insn_syndrome(s, syn);
 212}
 213
 214static inline int get_a32_user_mem_index(DisasContext *s)
 215{
 216    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 217     * insns:
 218     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 219     *  otherwise, access as if at PL0.
 220     */
 221    switch (s->mmu_idx) {
 222    case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 223    case ARMMMUIdx_E10_0:
 224    case ARMMMUIdx_E10_1:
 225    case ARMMMUIdx_E10_1_PAN:
 226        return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 227    case ARMMMUIdx_SE3:
 228    case ARMMMUIdx_SE10_0:
 229    case ARMMMUIdx_SE10_1:
 230    case ARMMMUIdx_SE10_1_PAN:
 231        return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 232    case ARMMMUIdx_MUser:
 233    case ARMMMUIdx_MPriv:
 234        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 235    case ARMMMUIdx_MUserNegPri:
 236    case ARMMMUIdx_MPrivNegPri:
 237        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 238    case ARMMMUIdx_MSUser:
 239    case ARMMMUIdx_MSPriv:
 240        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 241    case ARMMMUIdx_MSUserNegPri:
 242    case ARMMMUIdx_MSPrivNegPri:
 243        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 244    default:
 245        g_assert_not_reached();
 246    }
 247}
 248
 249/* The architectural value of PC.  */
 250static uint32_t read_pc(DisasContext *s)
 251{
 252    return s->pc_curr + (s->thumb ? 4 : 8);
 253}
 254
 255/* Set a variable to the value of a CPU register.  */
 256void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 257{
 258    if (reg == 15) {
 259        tcg_gen_movi_i32(var, read_pc(s));
 260    } else {
 261        tcg_gen_mov_i32(var, cpu_R[reg]);
 262    }
 263}
 264
 265/*
 266 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 267 * This is used for load/store for which use of PC implies (literal),
 268 * or ADD that implies ADR.
 269 */
 270TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 271{
 272    TCGv_i32 tmp = tcg_temp_new_i32();
 273
 274    if (reg == 15) {
 275        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 276    } else {
 277        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 278    }
 279    return tmp;
 280}
 281
 282/* Set a CPU register.  The source must be a temporary and will be
 283   marked as dead.  */
 284void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 285{
 286    if (reg == 15) {
 287        /* In Thumb mode, we must ignore bit 0.
 288         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 289         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 290         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 291         */
 292        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 293        s->base.is_jmp = DISAS_JUMP;
 294    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
 295        /* For M-profile SP bits [1:0] are always zero */
 296        tcg_gen_andi_i32(var, var, ~3);
 297    }
 298    tcg_gen_mov_i32(cpu_R[reg], var);
 299    tcg_temp_free_i32(var);
 300}
 301
 302/*
 303 * Variant of store_reg which applies v8M stack-limit checks before updating
 304 * SP. If the check fails this will result in an exception being taken.
 305 * We disable the stack checks for CONFIG_USER_ONLY because we have
 306 * no idea what the stack limits should be in that case.
 307 * If stack checking is not being done this just acts like store_reg().
 308 */
 309static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 310{
 311#ifndef CONFIG_USER_ONLY
 312    if (s->v8m_stackcheck) {
 313        gen_helper_v8m_stackcheck(cpu_env, var);
 314    }
 315#endif
 316    store_reg(s, 13, var);
 317}
 318
 319/* Value extensions.  */
 320#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 321#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 322#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 323#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 324
 325#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 326#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 327
 328void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 329{
 330    TCGv_i32 tmp_mask = tcg_const_i32(mask);
 331    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 332    tcg_temp_free_i32(tmp_mask);
 333}
 334
 335static void gen_exception_internal(int excp)
 336{
 337    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 338
 339    assert(excp_is_internal(excp));
 340    gen_helper_exception_internal(cpu_env, tcg_excp);
 341    tcg_temp_free_i32(tcg_excp);
 342}
 343
 344static void gen_singlestep_exception(DisasContext *s)
 345{
 346    /* We just completed step of an insn. Move from Active-not-pending
 347     * to Active-pending, and then also take the swstep exception.
 348     * This corresponds to making the (IMPDEF) choice to prioritize
 349     * swstep exceptions over asynchronous exceptions taken to an exception
 350     * level where debug is disabled. This choice has the advantage that
 351     * we do not need to maintain internal state corresponding to the
 352     * ISV/EX syndrome bits between completion of the step and generation
 353     * of the exception, and our syndrome information is always correct.
 354     */
 355    gen_ss_advance(s);
 356    gen_swstep_exception(s, 1, s->is_ldex);
 357    s->base.is_jmp = DISAS_NORETURN;
 358}
 359
 360void clear_eci_state(DisasContext *s)
 361{
 362    /*
 363     * Clear any ECI/ICI state: used when a load multiple/store
 364     * multiple insn executes.
 365     */
 366    if (s->eci) {
 367        store_cpu_field_constant(0, condexec_bits);
 368        s->eci = 0;
 369    }
 370}
 371
 372static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 373{
 374    TCGv_i32 tmp1 = tcg_temp_new_i32();
 375    TCGv_i32 tmp2 = tcg_temp_new_i32();
 376    tcg_gen_ext16s_i32(tmp1, a);
 377    tcg_gen_ext16s_i32(tmp2, b);
 378    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 379    tcg_temp_free_i32(tmp2);
 380    tcg_gen_sari_i32(a, a, 16);
 381    tcg_gen_sari_i32(b, b, 16);
 382    tcg_gen_mul_i32(b, b, a);
 383    tcg_gen_mov_i32(a, tmp1);
 384    tcg_temp_free_i32(tmp1);
 385}
 386
 387/* Byteswap each halfword.  */
 388void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 389{
 390    TCGv_i32 tmp = tcg_temp_new_i32();
 391    TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
 392    tcg_gen_shri_i32(tmp, var, 8);
 393    tcg_gen_and_i32(tmp, tmp, mask);
 394    tcg_gen_and_i32(var, var, mask);
 395    tcg_gen_shli_i32(var, var, 8);
 396    tcg_gen_or_i32(dest, var, tmp);
 397    tcg_temp_free_i32(tmp);
 398}
 399
 400/* Byteswap low halfword and sign extend.  */
 401static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 402{
 403    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
 404}
 405
 406/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 407    tmp = (t0 ^ t1) & 0x8000;
 408    t0 &= ~0x8000;
 409    t1 &= ~0x8000;
 410    t0 = (t0 + t1) ^ tmp;
 411 */
 412
 413static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 414{
 415    TCGv_i32 tmp = tcg_temp_new_i32();
 416    tcg_gen_xor_i32(tmp, t0, t1);
 417    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 418    tcg_gen_andi_i32(t0, t0, ~0x8000);
 419    tcg_gen_andi_i32(t1, t1, ~0x8000);
 420    tcg_gen_add_i32(t0, t0, t1);
 421    tcg_gen_xor_i32(dest, t0, tmp);
 422    tcg_temp_free_i32(tmp);
 423}
 424
 425/* Set N and Z flags from var.  */
 426static inline void gen_logic_CC(TCGv_i32 var)
 427{
 428    tcg_gen_mov_i32(cpu_NF, var);
 429    tcg_gen_mov_i32(cpu_ZF, var);
 430}
 431
 432/* dest = T0 + T1 + CF. */
 433static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 434{
 435    tcg_gen_add_i32(dest, t0, t1);
 436    tcg_gen_add_i32(dest, dest, cpu_CF);
 437}
 438
 439/* dest = T0 - T1 + CF - 1.  */
 440static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 441{
 442    tcg_gen_sub_i32(dest, t0, t1);
 443    tcg_gen_add_i32(dest, dest, cpu_CF);
 444    tcg_gen_subi_i32(dest, dest, 1);
 445}
 446
 447/* dest = T0 + T1. Compute C, N, V and Z flags */
 448static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 449{
 450    TCGv_i32 tmp = tcg_temp_new_i32();
 451    tcg_gen_movi_i32(tmp, 0);
 452    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 453    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 454    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 455    tcg_gen_xor_i32(tmp, t0, t1);
 456    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 457    tcg_temp_free_i32(tmp);
 458    tcg_gen_mov_i32(dest, cpu_NF);
 459}
 460
 461/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 462static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 463{
 464    TCGv_i32 tmp = tcg_temp_new_i32();
 465    if (TCG_TARGET_HAS_add2_i32) {
 466        tcg_gen_movi_i32(tmp, 0);
 467        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 468        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 469    } else {
 470        TCGv_i64 q0 = tcg_temp_new_i64();
 471        TCGv_i64 q1 = tcg_temp_new_i64();
 472        tcg_gen_extu_i32_i64(q0, t0);
 473        tcg_gen_extu_i32_i64(q1, t1);
 474        tcg_gen_add_i64(q0, q0, q1);
 475        tcg_gen_extu_i32_i64(q1, cpu_CF);
 476        tcg_gen_add_i64(q0, q0, q1);
 477        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 478        tcg_temp_free_i64(q0);
 479        tcg_temp_free_i64(q1);
 480    }
 481    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 482    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 483    tcg_gen_xor_i32(tmp, t0, t1);
 484    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 485    tcg_temp_free_i32(tmp);
 486    tcg_gen_mov_i32(dest, cpu_NF);
 487}
 488
 489/* dest = T0 - T1. Compute C, N, V and Z flags */
 490static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 491{
 492    TCGv_i32 tmp;
 493    tcg_gen_sub_i32(cpu_NF, t0, t1);
 494    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 495    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 496    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 497    tmp = tcg_temp_new_i32();
 498    tcg_gen_xor_i32(tmp, t0, t1);
 499    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 500    tcg_temp_free_i32(tmp);
 501    tcg_gen_mov_i32(dest, cpu_NF);
 502}
 503
 504/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 505static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 506{
 507    TCGv_i32 tmp = tcg_temp_new_i32();
 508    tcg_gen_not_i32(tmp, t1);
 509    gen_adc_CC(dest, t0, tmp);
 510    tcg_temp_free_i32(tmp);
 511}
 512
 513#define GEN_SHIFT(name)                                               \
 514static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 515{                                                                     \
 516    TCGv_i32 tmp1, tmp2, tmp3;                                        \
 517    tmp1 = tcg_temp_new_i32();                                        \
 518    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 519    tmp2 = tcg_const_i32(0);                                          \
 520    tmp3 = tcg_const_i32(0x1f);                                       \
 521    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 522    tcg_temp_free_i32(tmp3);                                          \
 523    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 524    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 525    tcg_temp_free_i32(tmp2);                                          \
 526    tcg_temp_free_i32(tmp1);                                          \
 527}
 528GEN_SHIFT(shl)
 529GEN_SHIFT(shr)
 530#undef GEN_SHIFT
 531
 532static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 533{
 534    TCGv_i32 tmp1, tmp2;
 535    tmp1 = tcg_temp_new_i32();
 536    tcg_gen_andi_i32(tmp1, t1, 0xff);
 537    tmp2 = tcg_const_i32(0x1f);
 538    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 539    tcg_temp_free_i32(tmp2);
 540    tcg_gen_sar_i32(dest, t0, tmp1);
 541    tcg_temp_free_i32(tmp1);
 542}
 543
 544static void shifter_out_im(TCGv_i32 var, int shift)
 545{
 546    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 547}
 548
 549/* Shift by immediate.  Includes special handling for shift == 0.  */
 550static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 551                                    int shift, int flags)
 552{
 553    switch (shiftop) {
 554    case 0: /* LSL */
 555        if (shift != 0) {
 556            if (flags)
 557                shifter_out_im(var, 32 - shift);
 558            tcg_gen_shli_i32(var, var, shift);
 559        }
 560        break;
 561    case 1: /* LSR */
 562        if (shift == 0) {
 563            if (flags) {
 564                tcg_gen_shri_i32(cpu_CF, var, 31);
 565            }
 566            tcg_gen_movi_i32(var, 0);
 567        } else {
 568            if (flags)
 569                shifter_out_im(var, shift - 1);
 570            tcg_gen_shri_i32(var, var, shift);
 571        }
 572        break;
 573    case 2: /* ASR */
 574        if (shift == 0)
 575            shift = 32;
 576        if (flags)
 577            shifter_out_im(var, shift - 1);
 578        if (shift == 32)
 579          shift = 31;
 580        tcg_gen_sari_i32(var, var, shift);
 581        break;
 582    case 3: /* ROR/RRX */
 583        if (shift != 0) {
 584            if (flags)
 585                shifter_out_im(var, shift - 1);
 586            tcg_gen_rotri_i32(var, var, shift); break;
 587        } else {
 588            TCGv_i32 tmp = tcg_temp_new_i32();
 589            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 590            if (flags)
 591                shifter_out_im(var, 0);
 592            tcg_gen_shri_i32(var, var, 1);
 593            tcg_gen_or_i32(var, var, tmp);
 594            tcg_temp_free_i32(tmp);
 595        }
 596    }
 597};
 598
 599static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 600                                     TCGv_i32 shift, int flags)
 601{
 602    if (flags) {
 603        switch (shiftop) {
 604        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 605        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 606        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 607        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 608        }
 609    } else {
 610        switch (shiftop) {
 611        case 0:
 612            gen_shl(var, var, shift);
 613            break;
 614        case 1:
 615            gen_shr(var, var, shift);
 616            break;
 617        case 2:
 618            gen_sar(var, var, shift);
 619            break;
 620        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 621                tcg_gen_rotr_i32(var, var, shift); break;
 622        }
 623    }
 624    tcg_temp_free_i32(shift);
 625}
 626
 627/*
 628 * Generate a conditional based on ARM condition code cc.
 629 * This is common between ARM and Aarch64 targets.
 630 */
 631void arm_test_cc(DisasCompare *cmp, int cc)
 632{
 633    TCGv_i32 value;
 634    TCGCond cond;
 635    bool global = true;
 636
 637    switch (cc) {
 638    case 0: /* eq: Z */
 639    case 1: /* ne: !Z */
 640        cond = TCG_COND_EQ;
 641        value = cpu_ZF;
 642        break;
 643
 644    case 2: /* cs: C */
 645    case 3: /* cc: !C */
 646        cond = TCG_COND_NE;
 647        value = cpu_CF;
 648        break;
 649
 650    case 4: /* mi: N */
 651    case 5: /* pl: !N */
 652        cond = TCG_COND_LT;
 653        value = cpu_NF;
 654        break;
 655
 656    case 6: /* vs: V */
 657    case 7: /* vc: !V */
 658        cond = TCG_COND_LT;
 659        value = cpu_VF;
 660        break;
 661
 662    case 8: /* hi: C && !Z */
 663    case 9: /* ls: !C || Z -> !(C && !Z) */
 664        cond = TCG_COND_NE;
 665        value = tcg_temp_new_i32();
 666        global = false;
 667        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 668           ZF is non-zero for !Z; so AND the two subexpressions.  */
 669        tcg_gen_neg_i32(value, cpu_CF);
 670        tcg_gen_and_i32(value, value, cpu_ZF);
 671        break;
 672
 673    case 10: /* ge: N == V -> N ^ V == 0 */
 674    case 11: /* lt: N != V -> N ^ V != 0 */
 675        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 676        cond = TCG_COND_GE;
 677        value = tcg_temp_new_i32();
 678        global = false;
 679        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 680        break;
 681
 682    case 12: /* gt: !Z && N == V */
 683    case 13: /* le: Z || N != V */
 684        cond = TCG_COND_NE;
 685        value = tcg_temp_new_i32();
 686        global = false;
 687        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 688         * the sign bit then AND with ZF to yield the result.  */
 689        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 690        tcg_gen_sari_i32(value, value, 31);
 691        tcg_gen_andc_i32(value, cpu_ZF, value);
 692        break;
 693
 694    case 14: /* always */
 695    case 15: /* always */
 696        /* Use the ALWAYS condition, which will fold early.
 697         * It doesn't matter what we use for the value.  */
 698        cond = TCG_COND_ALWAYS;
 699        value = cpu_ZF;
 700        goto no_invert;
 701
 702    default:
 703        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 704        abort();
 705    }
 706
 707    if (cc & 1) {
 708        cond = tcg_invert_cond(cond);
 709    }
 710
 711 no_invert:
 712    cmp->cond = cond;
 713    cmp->value = value;
 714    cmp->value_global = global;
 715}
 716
 717void arm_free_cc(DisasCompare *cmp)
 718{
 719    if (!cmp->value_global) {
 720        tcg_temp_free_i32(cmp->value);
 721    }
 722}
 723
 724void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 725{
 726    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 727}
 728
 729void arm_gen_test_cc(int cc, TCGLabel *label)
 730{
 731    DisasCompare cmp;
 732    arm_test_cc(&cmp, cc);
 733    arm_jump_cc(&cmp, label);
 734    arm_free_cc(&cmp);
 735}
 736
 737void gen_set_condexec(DisasContext *s)
 738{
 739    if (s->condexec_mask) {
 740        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 741
 742        store_cpu_field_constant(val, condexec_bits);
 743    }
 744}
 745
 746void gen_set_pc_im(DisasContext *s, target_ulong val)
 747{
 748    tcg_gen_movi_i32(cpu_R[15], val);
 749}
 750
 751/* Set PC and Thumb state from var.  var is marked as dead.  */
 752static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 753{
 754    s->base.is_jmp = DISAS_JUMP;
 755    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 756    tcg_gen_andi_i32(var, var, 1);
 757    store_cpu_field(var, thumb);
 758}
 759
 760/*
 761 * Set PC and Thumb state from var. var is marked as dead.
 762 * For M-profile CPUs, include logic to detect exception-return
 763 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 764 * and BX reg, and no others, and happens only for code in Handler mode.
 765 * The Security Extension also requires us to check for the FNC_RETURN
 766 * which signals a function return from non-secure state; this can happen
 767 * in both Handler and Thread mode.
 768 * To avoid having to do multiple comparisons in inline generated code,
 769 * we make the check we do here loose, so it will match for EXC_RETURN
 770 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 771 * for these spurious cases and returns without doing anything (giving
 772 * the same behaviour as for a branch to a non-magic address).
 773 *
 774 * In linux-user mode it is unclear what the right behaviour for an
 775 * attempted FNC_RETURN should be, because in real hardware this will go
 776 * directly to Secure code (ie not the Linux kernel) which will then treat
 777 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 778 * attempt behave the way it would on a CPU without the security extension,
 779 * which is to say "like a normal branch". That means we can simply treat
 780 * all branches as normal with no magic address behaviour.
 781 */
 782static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 783{
 784    /* Generate the same code here as for a simple bx, but flag via
 785     * s->base.is_jmp that we need to do the rest of the work later.
 786     */
 787    gen_bx(s, var);
 788#ifndef CONFIG_USER_ONLY
 789    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 790        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 791        s->base.is_jmp = DISAS_BX_EXCRET;
 792    }
 793#endif
 794}
 795
 796static inline void gen_bx_excret_final_code(DisasContext *s)
 797{
 798    /* Generate the code to finish possible exception return and end the TB */
 799    TCGLabel *excret_label = gen_new_label();
 800    uint32_t min_magic;
 801
 802    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 803        /* Covers FNC_RETURN and EXC_RETURN magic */
 804        min_magic = FNC_RETURN_MIN_MAGIC;
 805    } else {
 806        /* EXC_RETURN magic only */
 807        min_magic = EXC_RETURN_MIN_MAGIC;
 808    }
 809
 810    /* Is the new PC value in the magic range indicating exception return? */
 811    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 812    /* No: end the TB as we would for a DISAS_JMP */
 813    if (s->ss_active) {
 814        gen_singlestep_exception(s);
 815    } else {
 816        tcg_gen_exit_tb(NULL, 0);
 817    }
 818    gen_set_label(excret_label);
 819    /* Yes: this is an exception return.
 820     * At this point in runtime env->regs[15] and env->thumb will hold
 821     * the exception-return magic number, which do_v7m_exception_exit()
 822     * will read. Nothing else will be able to see those values because
 823     * the cpu-exec main loop guarantees that we will always go straight
 824     * from raising the exception to the exception-handling code.
 825     *
 826     * gen_ss_advance(s) does nothing on M profile currently but
 827     * calling it is conceptually the right thing as we have executed
 828     * this instruction (compare SWI, HVC, SMC handling).
 829     */
 830    gen_ss_advance(s);
 831    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 832}
 833
 834static inline void gen_bxns(DisasContext *s, int rm)
 835{
 836    TCGv_i32 var = load_reg(s, rm);
 837
 838    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 839     * we need to sync state before calling it, but:
 840     *  - we don't need to do gen_set_pc_im() because the bxns helper will
 841     *    always set the PC itself
 842     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 843     *    unless it's outside an IT block or the last insn in an IT block,
 844     *    so we know that condexec == 0 (already set at the top of the TB)
 845     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 846     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 847     */
 848    gen_helper_v7m_bxns(cpu_env, var);
 849    tcg_temp_free_i32(var);
 850    s->base.is_jmp = DISAS_EXIT;
 851}
 852
 853static inline void gen_blxns(DisasContext *s, int rm)
 854{
 855    TCGv_i32 var = load_reg(s, rm);
 856
 857    /* We don't need to sync condexec state, for the same reason as bxns.
 858     * We do however need to set the PC, because the blxns helper reads it.
 859     * The blxns helper may throw an exception.
 860     */
 861    gen_set_pc_im(s, s->base.pc_next);
 862    gen_helper_v7m_blxns(cpu_env, var);
 863    tcg_temp_free_i32(var);
 864    s->base.is_jmp = DISAS_EXIT;
 865}
 866
 867/* Variant of store_reg which uses branch&exchange logic when storing
 868   to r15 in ARM architecture v7 and above. The source must be a temporary
 869   and will be marked as dead. */
 870static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 871{
 872    if (reg == 15 && ENABLE_ARCH_7) {
 873        gen_bx(s, var);
 874    } else {
 875        store_reg(s, reg, var);
 876    }
 877}
 878
 879/* Variant of store_reg which uses branch&exchange logic when storing
 880 * to r15 in ARM architecture v5T and above. This is used for storing
 881 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 882 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 883static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 884{
 885    if (reg == 15 && ENABLE_ARCH_5) {
 886        gen_bx_excret(s, var);
 887    } else {
 888        store_reg(s, reg, var);
 889    }
 890}
 891
 892#ifdef CONFIG_USER_ONLY
 893#define IS_USER_ONLY 1
 894#else
 895#define IS_USER_ONLY 0
 896#endif
 897
 898MemOp pow2_align(unsigned i)
 899{
 900    static const MemOp mop_align[] = {
 901        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
 902        /*
 903         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
 904         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
 905         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
 906         */
 907        MO_ALIGN_16
 908    };
 909    g_assert(i < ARRAY_SIZE(mop_align));
 910    return mop_align[i];
 911}
 912
 913/*
 914 * Abstractions of "generate code to do a guest load/store for
 915 * AArch32", where a vaddr is always 32 bits (and is zero
 916 * extended if we're a 64 bit core) and  data is also
 917 * 32 bits unless specifically doing a 64 bit access.
 918 * These functions work like tcg_gen_qemu_{ld,st}* except
 919 * that the address argument is TCGv_i32 rather than TCGv.
 920 */
 921
 922static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 923{
 924    TCGv addr = tcg_temp_new();
 925    tcg_gen_extu_i32_tl(addr, a32);
 926
 927    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 928    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 929        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 930    }
 931    return addr;
 932}
 933
 934/*
 935 * Internal routines are used for NEON cases where the endianness
 936 * and/or alignment has already been taken into account and manipulated.
 937 */
 938void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
 939                              TCGv_i32 a32, int index, MemOp opc)
 940{
 941    TCGv addr = gen_aa32_addr(s, a32, opc);
 942    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 943    tcg_temp_free(addr);
 944}
 945
 946void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
 947                              TCGv_i32 a32, int index, MemOp opc)
 948{
 949    TCGv addr = gen_aa32_addr(s, a32, opc);
 950    tcg_gen_qemu_st_i32(val, addr, index, opc);
 951    tcg_temp_free(addr);
 952}
 953
 954void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
 955                              TCGv_i32 a32, int index, MemOp opc)
 956{
 957    TCGv addr = gen_aa32_addr(s, a32, opc);
 958
 959    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 960
 961    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 962    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 963        tcg_gen_rotri_i64(val, val, 32);
 964    }
 965    tcg_temp_free(addr);
 966}
 967
 968void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
 969                              TCGv_i32 a32, int index, MemOp opc)
 970{
 971    TCGv addr = gen_aa32_addr(s, a32, opc);
 972
 973    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 974    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
 975        TCGv_i64 tmp = tcg_temp_new_i64();
 976        tcg_gen_rotri_i64(tmp, val, 32);
 977        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
 978        tcg_temp_free_i64(tmp);
 979    } else {
 980        tcg_gen_qemu_st_i64(val, addr, index, opc);
 981    }
 982    tcg_temp_free(addr);
 983}
 984
 985void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 986                     int index, MemOp opc)
 987{
 988    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
 989}
 990
 991void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 992                     int index, MemOp opc)
 993{
 994    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
 995}
 996
 997void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 998                     int index, MemOp opc)
 999{
1000    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1001}
1002
1003void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                     int index, MemOp opc)
1005{
1006    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1007}
1008
1009#define DO_GEN_LD(SUFF, OPC)                                            \
1010    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1011                                         TCGv_i32 a32, int index)       \
1012    {                                                                   \
1013        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1014    }
1015
1016#define DO_GEN_ST(SUFF, OPC)                                            \
1017    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1018                                         TCGv_i32 a32, int index)       \
1019    {                                                                   \
1020        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1021    }
1022
1023static inline void gen_hvc(DisasContext *s, int imm16)
1024{
1025    /* The pre HVC helper handles cases when HVC gets trapped
1026     * as an undefined insn by runtime configuration (ie before
1027     * the insn really executes).
1028     */
1029    gen_set_pc_im(s, s->pc_curr);
1030    gen_helper_pre_hvc(cpu_env);
1031    /* Otherwise we will treat this as a real exception which
1032     * happens after execution of the insn. (The distinction matters
1033     * for the PC value reported to the exception handler and also
1034     * for single stepping.)
1035     */
1036    s->svc_imm = imm16;
1037    gen_set_pc_im(s, s->base.pc_next);
1038    s->base.is_jmp = DISAS_HVC;
1039}
1040
1041static inline void gen_smc(DisasContext *s)
1042{
1043    /* As with HVC, we may take an exception either before or after
1044     * the insn executes.
1045     */
1046    TCGv_i32 tmp;
1047
1048    gen_set_pc_im(s, s->pc_curr);
1049    tmp = tcg_const_i32(syn_aa32_smc());
1050    gen_helper_pre_smc(cpu_env, tmp);
1051    tcg_temp_free_i32(tmp);
1052    gen_set_pc_im(s, s->base.pc_next);
1053    s->base.is_jmp = DISAS_SMC;
1054}
1055
1056static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1057{
1058    gen_set_condexec(s);
1059    gen_set_pc_im(s, pc);
1060    gen_exception_internal(excp);
1061    s->base.is_jmp = DISAS_NORETURN;
1062}
1063
1064void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
1065                        uint32_t syn, uint32_t target_el)
1066{
1067    if (s->aarch64) {
1068        gen_a64_set_pc_im(pc);
1069    } else {
1070        gen_set_condexec(s);
1071        gen_set_pc_im(s, pc);
1072    }
1073    gen_exception(excp, syn, target_el);
1074    s->base.is_jmp = DISAS_NORETURN;
1075}
1076
1077static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1078{
1079    TCGv_i32 tcg_syn;
1080
1081    gen_set_condexec(s);
1082    gen_set_pc_im(s, s->pc_curr);
1083    tcg_syn = tcg_const_i32(syn);
1084    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1085    tcg_temp_free_i32(tcg_syn);
1086    s->base.is_jmp = DISAS_NORETURN;
1087}
1088
1089void unallocated_encoding(DisasContext *s)
1090{
1091    /* Unallocated and reserved encodings are uncategorized */
1092    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1093                       default_exception_el(s));
1094}
1095
1096static void gen_exception_el(DisasContext *s, int excp, uint32_t syn,
1097                             TCGv_i32 tcg_el)
1098{
1099    TCGv_i32 tcg_excp;
1100    TCGv_i32 tcg_syn;
1101
1102    gen_set_condexec(s);
1103    gen_set_pc_im(s, s->pc_curr);
1104    tcg_excp = tcg_const_i32(excp);
1105    tcg_syn = tcg_const_i32(syn);
1106    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn, tcg_el);
1107    tcg_temp_free_i32(tcg_syn);
1108    tcg_temp_free_i32(tcg_excp);
1109    s->base.is_jmp = DISAS_NORETURN;
1110}
1111
1112/* Force a TB lookup after an instruction that changes the CPU state.  */
1113void gen_lookup_tb(DisasContext *s)
1114{
1115    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1116    s->base.is_jmp = DISAS_EXIT;
1117}
1118
1119static inline void gen_hlt(DisasContext *s, int imm)
1120{
1121    /* HLT. This has two purposes.
1122     * Architecturally, it is an external halting debug instruction.
1123     * Since QEMU doesn't implement external debug, we treat this as
1124     * it is required for halting debug disabled: it will UNDEF.
1125     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1126     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1127     * must trigger semihosting even for ARMv7 and earlier, where
1128     * HLT was an undefined encoding.
1129     * In system mode, we don't allow userspace access to
1130     * semihosting, to provide some semblance of security
1131     * (and for consistency with our 32-bit semihosting).
1132     */
1133    if (semihosting_enabled() &&
1134#ifndef CONFIG_USER_ONLY
1135        s->current_el != 0 &&
1136#endif
1137        (imm == (s->thumb ? 0x3c : 0xf000))) {
1138        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1139        return;
1140    }
1141
1142    unallocated_encoding(s);
1143}
1144
1145/*
1146 * Return the offset of a "full" NEON Dreg.
1147 */
1148long neon_full_reg_offset(unsigned reg)
1149{
1150    return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1151}
1152
1153/*
1154 * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1155 * where 0 is the least significant end of the register.
1156 */
1157long neon_element_offset(int reg, int element, MemOp memop)
1158{
1159    int element_size = 1 << (memop & MO_SIZE);
1160    int ofs = element * element_size;
1161#ifdef HOST_WORDS_BIGENDIAN
1162    /*
1163     * Calculate the offset assuming fully little-endian,
1164     * then XOR to account for the order of the 8-byte units.
1165     */
1166    if (element_size < 8) {
1167        ofs ^= 8 - element_size;
1168    }
1169#endif
1170    return neon_full_reg_offset(reg) + ofs;
1171}
1172
1173/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1174long vfp_reg_offset(bool dp, unsigned reg)
1175{
1176    if (dp) {
1177        return neon_element_offset(reg, 0, MO_64);
1178    } else {
1179        return neon_element_offset(reg >> 1, reg & 1, MO_32);
1180    }
1181}
1182
1183void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1184{
1185    long off = neon_element_offset(reg, ele, memop);
1186
1187    switch (memop) {
1188    case MO_SB:
1189        tcg_gen_ld8s_i32(dest, cpu_env, off);
1190        break;
1191    case MO_UB:
1192        tcg_gen_ld8u_i32(dest, cpu_env, off);
1193        break;
1194    case MO_SW:
1195        tcg_gen_ld16s_i32(dest, cpu_env, off);
1196        break;
1197    case MO_UW:
1198        tcg_gen_ld16u_i32(dest, cpu_env, off);
1199        break;
1200    case MO_UL:
1201    case MO_SL:
1202        tcg_gen_ld_i32(dest, cpu_env, off);
1203        break;
1204    default:
1205        g_assert_not_reached();
1206    }
1207}
1208
1209void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1210{
1211    long off = neon_element_offset(reg, ele, memop);
1212
1213    switch (memop) {
1214    case MO_SL:
1215        tcg_gen_ld32s_i64(dest, cpu_env, off);
1216        break;
1217    case MO_UL:
1218        tcg_gen_ld32u_i64(dest, cpu_env, off);
1219        break;
1220    case MO_Q:
1221        tcg_gen_ld_i64(dest, cpu_env, off);
1222        break;
1223    default:
1224        g_assert_not_reached();
1225    }
1226}
1227
1228void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1229{
1230    long off = neon_element_offset(reg, ele, memop);
1231
1232    switch (memop) {
1233    case MO_8:
1234        tcg_gen_st8_i32(src, cpu_env, off);
1235        break;
1236    case MO_16:
1237        tcg_gen_st16_i32(src, cpu_env, off);
1238        break;
1239    case MO_32:
1240        tcg_gen_st_i32(src, cpu_env, off);
1241        break;
1242    default:
1243        g_assert_not_reached();
1244    }
1245}
1246
1247void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1248{
1249    long off = neon_element_offset(reg, ele, memop);
1250
1251    switch (memop) {
1252    case MO_32:
1253        tcg_gen_st32_i64(src, cpu_env, off);
1254        break;
1255    case MO_64:
1256        tcg_gen_st_i64(src, cpu_env, off);
1257        break;
1258    default:
1259        g_assert_not_reached();
1260    }
1261}
1262
1263#define ARM_CP_RW_BIT   (1 << 20)
1264
1265static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1266{
1267    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1268}
1269
1270static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1271{
1272    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1273}
1274
1275static inline TCGv_i32 iwmmxt_load_creg(int reg)
1276{
1277    TCGv_i32 var = tcg_temp_new_i32();
1278    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1279    return var;
1280}
1281
1282static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1283{
1284    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285    tcg_temp_free_i32(var);
1286}
1287
1288static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1289{
1290    iwmmxt_store_reg(cpu_M0, rn);
1291}
1292
1293static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1294{
1295    iwmmxt_load_reg(cpu_M0, rn);
1296}
1297
1298static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1299{
1300    iwmmxt_load_reg(cpu_V1, rn);
1301    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1302}
1303
1304static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1305{
1306    iwmmxt_load_reg(cpu_V1, rn);
1307    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1308}
1309
1310static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1311{
1312    iwmmxt_load_reg(cpu_V1, rn);
1313    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1314}
1315
1316#define IWMMXT_OP(name) \
1317static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1318{ \
1319    iwmmxt_load_reg(cpu_V1, rn); \
1320    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1321}
1322
1323#define IWMMXT_OP_ENV(name) \
1324static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1325{ \
1326    iwmmxt_load_reg(cpu_V1, rn); \
1327    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1328}
1329
1330#define IWMMXT_OP_ENV_SIZE(name) \
1331IWMMXT_OP_ENV(name##b) \
1332IWMMXT_OP_ENV(name##w) \
1333IWMMXT_OP_ENV(name##l)
1334
1335#define IWMMXT_OP_ENV1(name) \
1336static inline void gen_op_iwmmxt_##name##_M0(void) \
1337{ \
1338    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1339}
1340
1341IWMMXT_OP(maddsq)
1342IWMMXT_OP(madduq)
1343IWMMXT_OP(sadb)
1344IWMMXT_OP(sadw)
1345IWMMXT_OP(mulslw)
1346IWMMXT_OP(mulshw)
1347IWMMXT_OP(mululw)
1348IWMMXT_OP(muluhw)
1349IWMMXT_OP(macsw)
1350IWMMXT_OP(macuw)
1351
1352IWMMXT_OP_ENV_SIZE(unpackl)
1353IWMMXT_OP_ENV_SIZE(unpackh)
1354
1355IWMMXT_OP_ENV1(unpacklub)
1356IWMMXT_OP_ENV1(unpackluw)
1357IWMMXT_OP_ENV1(unpacklul)
1358IWMMXT_OP_ENV1(unpackhub)
1359IWMMXT_OP_ENV1(unpackhuw)
1360IWMMXT_OP_ENV1(unpackhul)
1361IWMMXT_OP_ENV1(unpacklsb)
1362IWMMXT_OP_ENV1(unpacklsw)
1363IWMMXT_OP_ENV1(unpacklsl)
1364IWMMXT_OP_ENV1(unpackhsb)
1365IWMMXT_OP_ENV1(unpackhsw)
1366IWMMXT_OP_ENV1(unpackhsl)
1367
1368IWMMXT_OP_ENV_SIZE(cmpeq)
1369IWMMXT_OP_ENV_SIZE(cmpgtu)
1370IWMMXT_OP_ENV_SIZE(cmpgts)
1371
1372IWMMXT_OP_ENV_SIZE(mins)
1373IWMMXT_OP_ENV_SIZE(minu)
1374IWMMXT_OP_ENV_SIZE(maxs)
1375IWMMXT_OP_ENV_SIZE(maxu)
1376
1377IWMMXT_OP_ENV_SIZE(subn)
1378IWMMXT_OP_ENV_SIZE(addn)
1379IWMMXT_OP_ENV_SIZE(subu)
1380IWMMXT_OP_ENV_SIZE(addu)
1381IWMMXT_OP_ENV_SIZE(subs)
1382IWMMXT_OP_ENV_SIZE(adds)
1383
1384IWMMXT_OP_ENV(avgb0)
1385IWMMXT_OP_ENV(avgb1)
1386IWMMXT_OP_ENV(avgw0)
1387IWMMXT_OP_ENV(avgw1)
1388
1389IWMMXT_OP_ENV(packuw)
1390IWMMXT_OP_ENV(packul)
1391IWMMXT_OP_ENV(packuq)
1392IWMMXT_OP_ENV(packsw)
1393IWMMXT_OP_ENV(packsl)
1394IWMMXT_OP_ENV(packsq)
1395
1396static void gen_op_iwmmxt_set_mup(void)
1397{
1398    TCGv_i32 tmp;
1399    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1400    tcg_gen_ori_i32(tmp, tmp, 2);
1401    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1402}
1403
1404static void gen_op_iwmmxt_set_cup(void)
1405{
1406    TCGv_i32 tmp;
1407    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1408    tcg_gen_ori_i32(tmp, tmp, 1);
1409    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1410}
1411
1412static void gen_op_iwmmxt_setpsr_nz(void)
1413{
1414    TCGv_i32 tmp = tcg_temp_new_i32();
1415    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1416    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1417}
1418
1419static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1420{
1421    iwmmxt_load_reg(cpu_V1, rn);
1422    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1423    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1424}
1425
1426static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1427                                     TCGv_i32 dest)
1428{
1429    int rd;
1430    uint32_t offset;
1431    TCGv_i32 tmp;
1432
1433    rd = (insn >> 16) & 0xf;
1434    tmp = load_reg(s, rd);
1435
1436    offset = (insn & 0xff) << ((insn >> 7) & 2);
1437    if (insn & (1 << 24)) {
1438        /* Pre indexed */
1439        if (insn & (1 << 23))
1440            tcg_gen_addi_i32(tmp, tmp, offset);
1441        else
1442            tcg_gen_addi_i32(tmp, tmp, -offset);
1443        tcg_gen_mov_i32(dest, tmp);
1444        if (insn & (1 << 21))
1445            store_reg(s, rd, tmp);
1446        else
1447            tcg_temp_free_i32(tmp);
1448    } else if (insn & (1 << 21)) {
1449        /* Post indexed */
1450        tcg_gen_mov_i32(dest, tmp);
1451        if (insn & (1 << 23))
1452            tcg_gen_addi_i32(tmp, tmp, offset);
1453        else
1454            tcg_gen_addi_i32(tmp, tmp, -offset);
1455        store_reg(s, rd, tmp);
1456    } else if (!(insn & (1 << 23)))
1457        return 1;
1458    return 0;
1459}
1460
1461static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1462{
1463    int rd = (insn >> 0) & 0xf;
1464    TCGv_i32 tmp;
1465
1466    if (insn & (1 << 8)) {
1467        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1468            return 1;
1469        } else {
1470            tmp = iwmmxt_load_creg(rd);
1471        }
1472    } else {
1473        tmp = tcg_temp_new_i32();
1474        iwmmxt_load_reg(cpu_V0, rd);
1475        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1476    }
1477    tcg_gen_andi_i32(tmp, tmp, mask);
1478    tcg_gen_mov_i32(dest, tmp);
1479    tcg_temp_free_i32(tmp);
1480    return 0;
1481}
1482
1483/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1484   (ie. an undefined instruction).  */
1485static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1486{
1487    int rd, wrd;
1488    int rdhi, rdlo, rd0, rd1, i;
1489    TCGv_i32 addr;
1490    TCGv_i32 tmp, tmp2, tmp3;
1491
1492    if ((insn & 0x0e000e00) == 0x0c000000) {
1493        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1494            wrd = insn & 0xf;
1495            rdlo = (insn >> 12) & 0xf;
1496            rdhi = (insn >> 16) & 0xf;
1497            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1498                iwmmxt_load_reg(cpu_V0, wrd);
1499                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1500                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1501            } else {                                    /* TMCRR */
1502                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1503                iwmmxt_store_reg(cpu_V0, wrd);
1504                gen_op_iwmmxt_set_mup();
1505            }
1506            return 0;
1507        }
1508
1509        wrd = (insn >> 12) & 0xf;
1510        addr = tcg_temp_new_i32();
1511        if (gen_iwmmxt_address(s, insn, addr)) {
1512            tcg_temp_free_i32(addr);
1513            return 1;
1514        }
1515        if (insn & ARM_CP_RW_BIT) {
1516            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1517                tmp = tcg_temp_new_i32();
1518                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1519                iwmmxt_store_creg(wrd, tmp);
1520            } else {
1521                i = 1;
1522                if (insn & (1 << 8)) {
1523                    if (insn & (1 << 22)) {             /* WLDRD */
1524                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1525                        i = 0;
1526                    } else {                            /* WLDRW wRd */
1527                        tmp = tcg_temp_new_i32();
1528                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1529                    }
1530                } else {
1531                    tmp = tcg_temp_new_i32();
1532                    if (insn & (1 << 22)) {             /* WLDRH */
1533                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1534                    } else {                            /* WLDRB */
1535                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1536                    }
1537                }
1538                if (i) {
1539                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1540                    tcg_temp_free_i32(tmp);
1541                }
1542                gen_op_iwmmxt_movq_wRn_M0(wrd);
1543            }
1544        } else {
1545            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1546                tmp = iwmmxt_load_creg(wrd);
1547                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1548            } else {
1549                gen_op_iwmmxt_movq_M0_wRn(wrd);
1550                tmp = tcg_temp_new_i32();
1551                if (insn & (1 << 8)) {
1552                    if (insn & (1 << 22)) {             /* WSTRD */
1553                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1554                    } else {                            /* WSTRW wRd */
1555                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1556                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1557                    }
1558                } else {
1559                    if (insn & (1 << 22)) {             /* WSTRH */
1560                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1561                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1562                    } else {                            /* WSTRB */
1563                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1564                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1565                    }
1566                }
1567            }
1568            tcg_temp_free_i32(tmp);
1569        }
1570        tcg_temp_free_i32(addr);
1571        return 0;
1572    }
1573
1574    if ((insn & 0x0f000000) != 0x0e000000)
1575        return 1;
1576
1577    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1578    case 0x000:                                                 /* WOR */
1579        wrd = (insn >> 12) & 0xf;
1580        rd0 = (insn >> 0) & 0xf;
1581        rd1 = (insn >> 16) & 0xf;
1582        gen_op_iwmmxt_movq_M0_wRn(rd0);
1583        gen_op_iwmmxt_orq_M0_wRn(rd1);
1584        gen_op_iwmmxt_setpsr_nz();
1585        gen_op_iwmmxt_movq_wRn_M0(wrd);
1586        gen_op_iwmmxt_set_mup();
1587        gen_op_iwmmxt_set_cup();
1588        break;
1589    case 0x011:                                                 /* TMCR */
1590        if (insn & 0xf)
1591            return 1;
1592        rd = (insn >> 12) & 0xf;
1593        wrd = (insn >> 16) & 0xf;
1594        switch (wrd) {
1595        case ARM_IWMMXT_wCID:
1596        case ARM_IWMMXT_wCASF:
1597            break;
1598        case ARM_IWMMXT_wCon:
1599            gen_op_iwmmxt_set_cup();
1600            /* Fall through.  */
1601        case ARM_IWMMXT_wCSSF:
1602            tmp = iwmmxt_load_creg(wrd);
1603            tmp2 = load_reg(s, rd);
1604            tcg_gen_andc_i32(tmp, tmp, tmp2);
1605            tcg_temp_free_i32(tmp2);
1606            iwmmxt_store_creg(wrd, tmp);
1607            break;
1608        case ARM_IWMMXT_wCGR0:
1609        case ARM_IWMMXT_wCGR1:
1610        case ARM_IWMMXT_wCGR2:
1611        case ARM_IWMMXT_wCGR3:
1612            gen_op_iwmmxt_set_cup();
1613            tmp = load_reg(s, rd);
1614            iwmmxt_store_creg(wrd, tmp);
1615            break;
1616        default:
1617            return 1;
1618        }
1619        break;
1620    case 0x100:                                                 /* WXOR */
1621        wrd = (insn >> 12) & 0xf;
1622        rd0 = (insn >> 0) & 0xf;
1623        rd1 = (insn >> 16) & 0xf;
1624        gen_op_iwmmxt_movq_M0_wRn(rd0);
1625        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1626        gen_op_iwmmxt_setpsr_nz();
1627        gen_op_iwmmxt_movq_wRn_M0(wrd);
1628        gen_op_iwmmxt_set_mup();
1629        gen_op_iwmmxt_set_cup();
1630        break;
1631    case 0x111:                                                 /* TMRC */
1632        if (insn & 0xf)
1633            return 1;
1634        rd = (insn >> 12) & 0xf;
1635        wrd = (insn >> 16) & 0xf;
1636        tmp = iwmmxt_load_creg(wrd);
1637        store_reg(s, rd, tmp);
1638        break;
1639    case 0x300:                                                 /* WANDN */
1640        wrd = (insn >> 12) & 0xf;
1641        rd0 = (insn >> 0) & 0xf;
1642        rd1 = (insn >> 16) & 0xf;
1643        gen_op_iwmmxt_movq_M0_wRn(rd0);
1644        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1645        gen_op_iwmmxt_andq_M0_wRn(rd1);
1646        gen_op_iwmmxt_setpsr_nz();
1647        gen_op_iwmmxt_movq_wRn_M0(wrd);
1648        gen_op_iwmmxt_set_mup();
1649        gen_op_iwmmxt_set_cup();
1650        break;
1651    case 0x200:                                                 /* WAND */
1652        wrd = (insn >> 12) & 0xf;
1653        rd0 = (insn >> 0) & 0xf;
1654        rd1 = (insn >> 16) & 0xf;
1655        gen_op_iwmmxt_movq_M0_wRn(rd0);
1656        gen_op_iwmmxt_andq_M0_wRn(rd1);
1657        gen_op_iwmmxt_setpsr_nz();
1658        gen_op_iwmmxt_movq_wRn_M0(wrd);
1659        gen_op_iwmmxt_set_mup();
1660        gen_op_iwmmxt_set_cup();
1661        break;
1662    case 0x810: case 0xa10:                             /* WMADD */
1663        wrd = (insn >> 12) & 0xf;
1664        rd0 = (insn >> 0) & 0xf;
1665        rd1 = (insn >> 16) & 0xf;
1666        gen_op_iwmmxt_movq_M0_wRn(rd0);
1667        if (insn & (1 << 21))
1668            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1669        else
1670            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1671        gen_op_iwmmxt_movq_wRn_M0(wrd);
1672        gen_op_iwmmxt_set_mup();
1673        break;
1674    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1675        wrd = (insn >> 12) & 0xf;
1676        rd0 = (insn >> 16) & 0xf;
1677        rd1 = (insn >> 0) & 0xf;
1678        gen_op_iwmmxt_movq_M0_wRn(rd0);
1679        switch ((insn >> 22) & 3) {
1680        case 0:
1681            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1682            break;
1683        case 1:
1684            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1685            break;
1686        case 2:
1687            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1688            break;
1689        case 3:
1690            return 1;
1691        }
1692        gen_op_iwmmxt_movq_wRn_M0(wrd);
1693        gen_op_iwmmxt_set_mup();
1694        gen_op_iwmmxt_set_cup();
1695        break;
1696    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1697        wrd = (insn >> 12) & 0xf;
1698        rd0 = (insn >> 16) & 0xf;
1699        rd1 = (insn >> 0) & 0xf;
1700        gen_op_iwmmxt_movq_M0_wRn(rd0);
1701        switch ((insn >> 22) & 3) {
1702        case 0:
1703            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1704            break;
1705        case 1:
1706            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1707            break;
1708        case 2:
1709            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1710            break;
1711        case 3:
1712            return 1;
1713        }
1714        gen_op_iwmmxt_movq_wRn_M0(wrd);
1715        gen_op_iwmmxt_set_mup();
1716        gen_op_iwmmxt_set_cup();
1717        break;
1718    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1719        wrd = (insn >> 12) & 0xf;
1720        rd0 = (insn >> 16) & 0xf;
1721        rd1 = (insn >> 0) & 0xf;
1722        gen_op_iwmmxt_movq_M0_wRn(rd0);
1723        if (insn & (1 << 22))
1724            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1725        else
1726            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1727        if (!(insn & (1 << 20)))
1728            gen_op_iwmmxt_addl_M0_wRn(wrd);
1729        gen_op_iwmmxt_movq_wRn_M0(wrd);
1730        gen_op_iwmmxt_set_mup();
1731        break;
1732    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1733        wrd = (insn >> 12) & 0xf;
1734        rd0 = (insn >> 16) & 0xf;
1735        rd1 = (insn >> 0) & 0xf;
1736        gen_op_iwmmxt_movq_M0_wRn(rd0);
1737        if (insn & (1 << 21)) {
1738            if (insn & (1 << 20))
1739                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1740            else
1741                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1742        } else {
1743            if (insn & (1 << 20))
1744                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1745            else
1746                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1747        }
1748        gen_op_iwmmxt_movq_wRn_M0(wrd);
1749        gen_op_iwmmxt_set_mup();
1750        break;
1751    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1752        wrd = (insn >> 12) & 0xf;
1753        rd0 = (insn >> 16) & 0xf;
1754        rd1 = (insn >> 0) & 0xf;
1755        gen_op_iwmmxt_movq_M0_wRn(rd0);
1756        if (insn & (1 << 21))
1757            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1758        else
1759            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1760        if (!(insn & (1 << 20))) {
1761            iwmmxt_load_reg(cpu_V1, wrd);
1762            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1763        }
1764        gen_op_iwmmxt_movq_wRn_M0(wrd);
1765        gen_op_iwmmxt_set_mup();
1766        break;
1767    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1768        wrd = (insn >> 12) & 0xf;
1769        rd0 = (insn >> 16) & 0xf;
1770        rd1 = (insn >> 0) & 0xf;
1771        gen_op_iwmmxt_movq_M0_wRn(rd0);
1772        switch ((insn >> 22) & 3) {
1773        case 0:
1774            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1775            break;
1776        case 1:
1777            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1778            break;
1779        case 2:
1780            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1781            break;
1782        case 3:
1783            return 1;
1784        }
1785        gen_op_iwmmxt_movq_wRn_M0(wrd);
1786        gen_op_iwmmxt_set_mup();
1787        gen_op_iwmmxt_set_cup();
1788        break;
1789    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1790        wrd = (insn >> 12) & 0xf;
1791        rd0 = (insn >> 16) & 0xf;
1792        rd1 = (insn >> 0) & 0xf;
1793        gen_op_iwmmxt_movq_M0_wRn(rd0);
1794        if (insn & (1 << 22)) {
1795            if (insn & (1 << 20))
1796                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1797            else
1798                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1799        } else {
1800            if (insn & (1 << 20))
1801                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1802            else
1803                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1804        }
1805        gen_op_iwmmxt_movq_wRn_M0(wrd);
1806        gen_op_iwmmxt_set_mup();
1807        gen_op_iwmmxt_set_cup();
1808        break;
1809    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1810        wrd = (insn >> 12) & 0xf;
1811        rd0 = (insn >> 16) & 0xf;
1812        rd1 = (insn >> 0) & 0xf;
1813        gen_op_iwmmxt_movq_M0_wRn(rd0);
1814        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1815        tcg_gen_andi_i32(tmp, tmp, 7);
1816        iwmmxt_load_reg(cpu_V1, rd1);
1817        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1818        tcg_temp_free_i32(tmp);
1819        gen_op_iwmmxt_movq_wRn_M0(wrd);
1820        gen_op_iwmmxt_set_mup();
1821        break;
1822    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1823        if (((insn >> 6) & 3) == 3)
1824            return 1;
1825        rd = (insn >> 12) & 0xf;
1826        wrd = (insn >> 16) & 0xf;
1827        tmp = load_reg(s, rd);
1828        gen_op_iwmmxt_movq_M0_wRn(wrd);
1829        switch ((insn >> 6) & 3) {
1830        case 0:
1831            tmp2 = tcg_const_i32(0xff);
1832            tmp3 = tcg_const_i32((insn & 7) << 3);
1833            break;
1834        case 1:
1835            tmp2 = tcg_const_i32(0xffff);
1836            tmp3 = tcg_const_i32((insn & 3) << 4);
1837            break;
1838        case 2:
1839            tmp2 = tcg_const_i32(0xffffffff);
1840            tmp3 = tcg_const_i32((insn & 1) << 5);
1841            break;
1842        default:
1843            tmp2 = NULL;
1844            tmp3 = NULL;
1845        }
1846        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1847        tcg_temp_free_i32(tmp3);
1848        tcg_temp_free_i32(tmp2);
1849        tcg_temp_free_i32(tmp);
1850        gen_op_iwmmxt_movq_wRn_M0(wrd);
1851        gen_op_iwmmxt_set_mup();
1852        break;
1853    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1854        rd = (insn >> 12) & 0xf;
1855        wrd = (insn >> 16) & 0xf;
1856        if (rd == 15 || ((insn >> 22) & 3) == 3)
1857            return 1;
1858        gen_op_iwmmxt_movq_M0_wRn(wrd);
1859        tmp = tcg_temp_new_i32();
1860        switch ((insn >> 22) & 3) {
1861        case 0:
1862            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1863            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1864            if (insn & 8) {
1865                tcg_gen_ext8s_i32(tmp, tmp);
1866            } else {
1867                tcg_gen_andi_i32(tmp, tmp, 0xff);
1868            }
1869            break;
1870        case 1:
1871            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1872            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1873            if (insn & 8) {
1874                tcg_gen_ext16s_i32(tmp, tmp);
1875            } else {
1876                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1877            }
1878            break;
1879        case 2:
1880            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1881            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1882            break;
1883        }
1884        store_reg(s, rd, tmp);
1885        break;
1886    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1887        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1888            return 1;
1889        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1890        switch ((insn >> 22) & 3) {
1891        case 0:
1892            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1893            break;
1894        case 1:
1895            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1896            break;
1897        case 2:
1898            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1899            break;
1900        }
1901        tcg_gen_shli_i32(tmp, tmp, 28);
1902        gen_set_nzcv(tmp);
1903        tcg_temp_free_i32(tmp);
1904        break;
1905    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1906        if (((insn >> 6) & 3) == 3)
1907            return 1;
1908        rd = (insn >> 12) & 0xf;
1909        wrd = (insn >> 16) & 0xf;
1910        tmp = load_reg(s, rd);
1911        switch ((insn >> 6) & 3) {
1912        case 0:
1913            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1914            break;
1915        case 1:
1916            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1917            break;
1918        case 2:
1919            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1920            break;
1921        }
1922        tcg_temp_free_i32(tmp);
1923        gen_op_iwmmxt_movq_wRn_M0(wrd);
1924        gen_op_iwmmxt_set_mup();
1925        break;
1926    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1927        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1928            return 1;
1929        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1930        tmp2 = tcg_temp_new_i32();
1931        tcg_gen_mov_i32(tmp2, tmp);
1932        switch ((insn >> 22) & 3) {
1933        case 0:
1934            for (i = 0; i < 7; i ++) {
1935                tcg_gen_shli_i32(tmp2, tmp2, 4);
1936                tcg_gen_and_i32(tmp, tmp, tmp2);
1937            }
1938            break;
1939        case 1:
1940            for (i = 0; i < 3; i ++) {
1941                tcg_gen_shli_i32(tmp2, tmp2, 8);
1942                tcg_gen_and_i32(tmp, tmp, tmp2);
1943            }
1944            break;
1945        case 2:
1946            tcg_gen_shli_i32(tmp2, tmp2, 16);
1947            tcg_gen_and_i32(tmp, tmp, tmp2);
1948            break;
1949        }
1950        gen_set_nzcv(tmp);
1951        tcg_temp_free_i32(tmp2);
1952        tcg_temp_free_i32(tmp);
1953        break;
1954    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1955        wrd = (insn >> 12) & 0xf;
1956        rd0 = (insn >> 16) & 0xf;
1957        gen_op_iwmmxt_movq_M0_wRn(rd0);
1958        switch ((insn >> 22) & 3) {
1959        case 0:
1960            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1961            break;
1962        case 1:
1963            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1964            break;
1965        case 2:
1966            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1967            break;
1968        case 3:
1969            return 1;
1970        }
1971        gen_op_iwmmxt_movq_wRn_M0(wrd);
1972        gen_op_iwmmxt_set_mup();
1973        break;
1974    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1975        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1976            return 1;
1977        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1978        tmp2 = tcg_temp_new_i32();
1979        tcg_gen_mov_i32(tmp2, tmp);
1980        switch ((insn >> 22) & 3) {
1981        case 0:
1982            for (i = 0; i < 7; i ++) {
1983                tcg_gen_shli_i32(tmp2, tmp2, 4);
1984                tcg_gen_or_i32(tmp, tmp, tmp2);
1985            }
1986            break;
1987        case 1:
1988            for (i = 0; i < 3; i ++) {
1989                tcg_gen_shli_i32(tmp2, tmp2, 8);
1990                tcg_gen_or_i32(tmp, tmp, tmp2);
1991            }
1992            break;
1993        case 2:
1994            tcg_gen_shli_i32(tmp2, tmp2, 16);
1995            tcg_gen_or_i32(tmp, tmp, tmp2);
1996            break;
1997        }
1998        gen_set_nzcv(tmp);
1999        tcg_temp_free_i32(tmp2);
2000        tcg_temp_free_i32(tmp);
2001        break;
2002    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2003        rd = (insn >> 12) & 0xf;
2004        rd0 = (insn >> 16) & 0xf;
2005        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2006            return 1;
2007        gen_op_iwmmxt_movq_M0_wRn(rd0);
2008        tmp = tcg_temp_new_i32();
2009        switch ((insn >> 22) & 3) {
2010        case 0:
2011            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2012            break;
2013        case 1:
2014            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2015            break;
2016        case 2:
2017            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2018            break;
2019        }
2020        store_reg(s, rd, tmp);
2021        break;
2022    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2023    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2024        wrd = (insn >> 12) & 0xf;
2025        rd0 = (insn >> 16) & 0xf;
2026        rd1 = (insn >> 0) & 0xf;
2027        gen_op_iwmmxt_movq_M0_wRn(rd0);
2028        switch ((insn >> 22) & 3) {
2029        case 0:
2030            if (insn & (1 << 21))
2031                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2032            else
2033                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2034            break;
2035        case 1:
2036            if (insn & (1 << 21))
2037                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2038            else
2039                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2040            break;
2041        case 2:
2042            if (insn & (1 << 21))
2043                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2044            else
2045                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2046            break;
2047        case 3:
2048            return 1;
2049        }
2050        gen_op_iwmmxt_movq_wRn_M0(wrd);
2051        gen_op_iwmmxt_set_mup();
2052        gen_op_iwmmxt_set_cup();
2053        break;
2054    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2055    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2056        wrd = (insn >> 12) & 0xf;
2057        rd0 = (insn >> 16) & 0xf;
2058        gen_op_iwmmxt_movq_M0_wRn(rd0);
2059        switch ((insn >> 22) & 3) {
2060        case 0:
2061            if (insn & (1 << 21))
2062                gen_op_iwmmxt_unpacklsb_M0();
2063            else
2064                gen_op_iwmmxt_unpacklub_M0();
2065            break;
2066        case 1:
2067            if (insn & (1 << 21))
2068                gen_op_iwmmxt_unpacklsw_M0();
2069            else
2070                gen_op_iwmmxt_unpackluw_M0();
2071            break;
2072        case 2:
2073            if (insn & (1 << 21))
2074                gen_op_iwmmxt_unpacklsl_M0();
2075            else
2076                gen_op_iwmmxt_unpacklul_M0();
2077            break;
2078        case 3:
2079            return 1;
2080        }
2081        gen_op_iwmmxt_movq_wRn_M0(wrd);
2082        gen_op_iwmmxt_set_mup();
2083        gen_op_iwmmxt_set_cup();
2084        break;
2085    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2086    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2087        wrd = (insn >> 12) & 0xf;
2088        rd0 = (insn >> 16) & 0xf;
2089        gen_op_iwmmxt_movq_M0_wRn(rd0);
2090        switch ((insn >> 22) & 3) {
2091        case 0:
2092            if (insn & (1 << 21))
2093                gen_op_iwmmxt_unpackhsb_M0();
2094            else
2095                gen_op_iwmmxt_unpackhub_M0();
2096            break;
2097        case 1:
2098            if (insn & (1 << 21))
2099                gen_op_iwmmxt_unpackhsw_M0();
2100            else
2101                gen_op_iwmmxt_unpackhuw_M0();
2102            break;
2103        case 2:
2104            if (insn & (1 << 21))
2105                gen_op_iwmmxt_unpackhsl_M0();
2106            else
2107                gen_op_iwmmxt_unpackhul_M0();
2108            break;
2109        case 3:
2110            return 1;
2111        }
2112        gen_op_iwmmxt_movq_wRn_M0(wrd);
2113        gen_op_iwmmxt_set_mup();
2114        gen_op_iwmmxt_set_cup();
2115        break;
2116    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2117    case 0x214: case 0x614: case 0xa14: case 0xe14:
2118        if (((insn >> 22) & 3) == 0)
2119            return 1;
2120        wrd = (insn >> 12) & 0xf;
2121        rd0 = (insn >> 16) & 0xf;
2122        gen_op_iwmmxt_movq_M0_wRn(rd0);
2123        tmp = tcg_temp_new_i32();
2124        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2125            tcg_temp_free_i32(tmp);
2126            return 1;
2127        }
2128        switch ((insn >> 22) & 3) {
2129        case 1:
2130            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2131            break;
2132        case 2:
2133            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2134            break;
2135        case 3:
2136            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2137            break;
2138        }
2139        tcg_temp_free_i32(tmp);
2140        gen_op_iwmmxt_movq_wRn_M0(wrd);
2141        gen_op_iwmmxt_set_mup();
2142        gen_op_iwmmxt_set_cup();
2143        break;
2144    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2145    case 0x014: case 0x414: case 0x814: case 0xc14:
2146        if (((insn >> 22) & 3) == 0)
2147            return 1;
2148        wrd = (insn >> 12) & 0xf;
2149        rd0 = (insn >> 16) & 0xf;
2150        gen_op_iwmmxt_movq_M0_wRn(rd0);
2151        tmp = tcg_temp_new_i32();
2152        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2153            tcg_temp_free_i32(tmp);
2154            return 1;
2155        }
2156        switch ((insn >> 22) & 3) {
2157        case 1:
2158            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2159            break;
2160        case 2:
2161            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2162            break;
2163        case 3:
2164            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2165            break;
2166        }
2167        tcg_temp_free_i32(tmp);
2168        gen_op_iwmmxt_movq_wRn_M0(wrd);
2169        gen_op_iwmmxt_set_mup();
2170        gen_op_iwmmxt_set_cup();
2171        break;
2172    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2173    case 0x114: case 0x514: case 0x914: case 0xd14:
2174        if (((insn >> 22) & 3) == 0)
2175            return 1;
2176        wrd = (insn >> 12) & 0xf;
2177        rd0 = (insn >> 16) & 0xf;
2178        gen_op_iwmmxt_movq_M0_wRn(rd0);
2179        tmp = tcg_temp_new_i32();
2180        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2181            tcg_temp_free_i32(tmp);
2182            return 1;
2183        }
2184        switch ((insn >> 22) & 3) {
2185        case 1:
2186            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2187            break;
2188        case 2:
2189            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2190            break;
2191        case 3:
2192            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2193            break;
2194        }
2195        tcg_temp_free_i32(tmp);
2196        gen_op_iwmmxt_movq_wRn_M0(wrd);
2197        gen_op_iwmmxt_set_mup();
2198        gen_op_iwmmxt_set_cup();
2199        break;
2200    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2201    case 0x314: case 0x714: case 0xb14: case 0xf14:
2202        if (((insn >> 22) & 3) == 0)
2203            return 1;
2204        wrd = (insn >> 12) & 0xf;
2205        rd0 = (insn >> 16) & 0xf;
2206        gen_op_iwmmxt_movq_M0_wRn(rd0);
2207        tmp = tcg_temp_new_i32();
2208        switch ((insn >> 22) & 3) {
2209        case 1:
2210            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2211                tcg_temp_free_i32(tmp);
2212                return 1;
2213            }
2214            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2215            break;
2216        case 2:
2217            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2218                tcg_temp_free_i32(tmp);
2219                return 1;
2220            }
2221            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2222            break;
2223        case 3:
2224            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2225                tcg_temp_free_i32(tmp);
2226                return 1;
2227            }
2228            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2229            break;
2230        }
2231        tcg_temp_free_i32(tmp);
2232        gen_op_iwmmxt_movq_wRn_M0(wrd);
2233        gen_op_iwmmxt_set_mup();
2234        gen_op_iwmmxt_set_cup();
2235        break;
2236    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2237    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2238        wrd = (insn >> 12) & 0xf;
2239        rd0 = (insn >> 16) & 0xf;
2240        rd1 = (insn >> 0) & 0xf;
2241        gen_op_iwmmxt_movq_M0_wRn(rd0);
2242        switch ((insn >> 22) & 3) {
2243        case 0:
2244            if (insn & (1 << 21))
2245                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2246            else
2247                gen_op_iwmmxt_minub_M0_wRn(rd1);
2248            break;
2249        case 1:
2250            if (insn & (1 << 21))
2251                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2252            else
2253                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2254            break;
2255        case 2:
2256            if (insn & (1 << 21))
2257                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2258            else
2259                gen_op_iwmmxt_minul_M0_wRn(rd1);
2260            break;
2261        case 3:
2262            return 1;
2263        }
2264        gen_op_iwmmxt_movq_wRn_M0(wrd);
2265        gen_op_iwmmxt_set_mup();
2266        break;
2267    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2268    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2269        wrd = (insn >> 12) & 0xf;
2270        rd0 = (insn >> 16) & 0xf;
2271        rd1 = (insn >> 0) & 0xf;
2272        gen_op_iwmmxt_movq_M0_wRn(rd0);
2273        switch ((insn >> 22) & 3) {
2274        case 0:
2275            if (insn & (1 << 21))
2276                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2277            else
2278                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2279            break;
2280        case 1:
2281            if (insn & (1 << 21))
2282                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2283            else
2284                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2285            break;
2286        case 2:
2287            if (insn & (1 << 21))
2288                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2289            else
2290                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2291            break;
2292        case 3:
2293            return 1;
2294        }
2295        gen_op_iwmmxt_movq_wRn_M0(wrd);
2296        gen_op_iwmmxt_set_mup();
2297        break;
2298    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2299    case 0x402: case 0x502: case 0x602: case 0x702:
2300        wrd = (insn >> 12) & 0xf;
2301        rd0 = (insn >> 16) & 0xf;
2302        rd1 = (insn >> 0) & 0xf;
2303        gen_op_iwmmxt_movq_M0_wRn(rd0);
2304        tmp = tcg_const_i32((insn >> 20) & 3);
2305        iwmmxt_load_reg(cpu_V1, rd1);
2306        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2307        tcg_temp_free_i32(tmp);
2308        gen_op_iwmmxt_movq_wRn_M0(wrd);
2309        gen_op_iwmmxt_set_mup();
2310        break;
2311    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2312    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2313    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2314    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2315        wrd = (insn >> 12) & 0xf;
2316        rd0 = (insn >> 16) & 0xf;
2317        rd1 = (insn >> 0) & 0xf;
2318        gen_op_iwmmxt_movq_M0_wRn(rd0);
2319        switch ((insn >> 20) & 0xf) {
2320        case 0x0:
2321            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2322            break;
2323        case 0x1:
2324            gen_op_iwmmxt_subub_M0_wRn(rd1);
2325            break;
2326        case 0x3:
2327            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2328            break;
2329        case 0x4:
2330            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2331            break;
2332        case 0x5:
2333            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2334            break;
2335        case 0x7:
2336            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2337            break;
2338        case 0x8:
2339            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2340            break;
2341        case 0x9:
2342            gen_op_iwmmxt_subul_M0_wRn(rd1);
2343            break;
2344        case 0xb:
2345            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2346            break;
2347        default:
2348            return 1;
2349        }
2350        gen_op_iwmmxt_movq_wRn_M0(wrd);
2351        gen_op_iwmmxt_set_mup();
2352        gen_op_iwmmxt_set_cup();
2353        break;
2354    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2355    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2356    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2357    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2358        wrd = (insn >> 12) & 0xf;
2359        rd0 = (insn >> 16) & 0xf;
2360        gen_op_iwmmxt_movq_M0_wRn(rd0);
2361        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2362        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2363        tcg_temp_free_i32(tmp);
2364        gen_op_iwmmxt_movq_wRn_M0(wrd);
2365        gen_op_iwmmxt_set_mup();
2366        gen_op_iwmmxt_set_cup();
2367        break;
2368    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2369    case 0x418: case 0x518: case 0x618: case 0x718:
2370    case 0x818: case 0x918: case 0xa18: case 0xb18:
2371    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2372        wrd = (insn >> 12) & 0xf;
2373        rd0 = (insn >> 16) & 0xf;
2374        rd1 = (insn >> 0) & 0xf;
2375        gen_op_iwmmxt_movq_M0_wRn(rd0);
2376        switch ((insn >> 20) & 0xf) {
2377        case 0x0:
2378            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2379            break;
2380        case 0x1:
2381            gen_op_iwmmxt_addub_M0_wRn(rd1);
2382            break;
2383        case 0x3:
2384            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2385            break;
2386        case 0x4:
2387            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2388            break;
2389        case 0x5:
2390            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2391            break;
2392        case 0x7:
2393            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2394            break;
2395        case 0x8:
2396            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2397            break;
2398        case 0x9:
2399            gen_op_iwmmxt_addul_M0_wRn(rd1);
2400            break;
2401        case 0xb:
2402            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2403            break;
2404        default:
2405            return 1;
2406        }
2407        gen_op_iwmmxt_movq_wRn_M0(wrd);
2408        gen_op_iwmmxt_set_mup();
2409        gen_op_iwmmxt_set_cup();
2410        break;
2411    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2412    case 0x408: case 0x508: case 0x608: case 0x708:
2413    case 0x808: case 0x908: case 0xa08: case 0xb08:
2414    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2415        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2416            return 1;
2417        wrd = (insn >> 12) & 0xf;
2418        rd0 = (insn >> 16) & 0xf;
2419        rd1 = (insn >> 0) & 0xf;
2420        gen_op_iwmmxt_movq_M0_wRn(rd0);
2421        switch ((insn >> 22) & 3) {
2422        case 1:
2423            if (insn & (1 << 21))
2424                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2425            else
2426                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2427            break;
2428        case 2:
2429            if (insn & (1 << 21))
2430                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2431            else
2432                gen_op_iwmmxt_packul_M0_wRn(rd1);
2433            break;
2434        case 3:
2435            if (insn & (1 << 21))
2436                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2437            else
2438                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2439            break;
2440        }
2441        gen_op_iwmmxt_movq_wRn_M0(wrd);
2442        gen_op_iwmmxt_set_mup();
2443        gen_op_iwmmxt_set_cup();
2444        break;
2445    case 0x201: case 0x203: case 0x205: case 0x207:
2446    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2447    case 0x211: case 0x213: case 0x215: case 0x217:
2448    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2449        wrd = (insn >> 5) & 0xf;
2450        rd0 = (insn >> 12) & 0xf;
2451        rd1 = (insn >> 0) & 0xf;
2452        if (rd0 == 0xf || rd1 == 0xf)
2453            return 1;
2454        gen_op_iwmmxt_movq_M0_wRn(wrd);
2455        tmp = load_reg(s, rd0);
2456        tmp2 = load_reg(s, rd1);
2457        switch ((insn >> 16) & 0xf) {
2458        case 0x0:                                       /* TMIA */
2459            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2460            break;
2461        case 0x8:                                       /* TMIAPH */
2462            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2463            break;
2464        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2465            if (insn & (1 << 16))
2466                tcg_gen_shri_i32(tmp, tmp, 16);
2467            if (insn & (1 << 17))
2468                tcg_gen_shri_i32(tmp2, tmp2, 16);
2469            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2470            break;
2471        default:
2472            tcg_temp_free_i32(tmp2);
2473            tcg_temp_free_i32(tmp);
2474            return 1;
2475        }
2476        tcg_temp_free_i32(tmp2);
2477        tcg_temp_free_i32(tmp);
2478        gen_op_iwmmxt_movq_wRn_M0(wrd);
2479        gen_op_iwmmxt_set_mup();
2480        break;
2481    default:
2482        return 1;
2483    }
2484
2485    return 0;
2486}
2487
2488/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2489   (ie. an undefined instruction).  */
2490static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2491{
2492    int acc, rd0, rd1, rdhi, rdlo;
2493    TCGv_i32 tmp, tmp2;
2494
2495    if ((insn & 0x0ff00f10) == 0x0e200010) {
2496        /* Multiply with Internal Accumulate Format */
2497        rd0 = (insn >> 12) & 0xf;
2498        rd1 = insn & 0xf;
2499        acc = (insn >> 5) & 7;
2500
2501        if (acc != 0)
2502            return 1;
2503
2504        tmp = load_reg(s, rd0);
2505        tmp2 = load_reg(s, rd1);
2506        switch ((insn >> 16) & 0xf) {
2507        case 0x0:                                       /* MIA */
2508            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2509            break;
2510        case 0x8:                                       /* MIAPH */
2511            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2512            break;
2513        case 0xc:                                       /* MIABB */
2514        case 0xd:                                       /* MIABT */
2515        case 0xe:                                       /* MIATB */
2516        case 0xf:                                       /* MIATT */
2517            if (insn & (1 << 16))
2518                tcg_gen_shri_i32(tmp, tmp, 16);
2519            if (insn & (1 << 17))
2520                tcg_gen_shri_i32(tmp2, tmp2, 16);
2521            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2522            break;
2523        default:
2524            return 1;
2525        }
2526        tcg_temp_free_i32(tmp2);
2527        tcg_temp_free_i32(tmp);
2528
2529        gen_op_iwmmxt_movq_wRn_M0(acc);
2530        return 0;
2531    }
2532
2533    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2534        /* Internal Accumulator Access Format */
2535        rdhi = (insn >> 16) & 0xf;
2536        rdlo = (insn >> 12) & 0xf;
2537        acc = insn & 7;
2538
2539        if (acc != 0)
2540            return 1;
2541
2542        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2543            iwmmxt_load_reg(cpu_V0, acc);
2544            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2545            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2546            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2547        } else {                                        /* MAR */
2548            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2549            iwmmxt_store_reg(cpu_V0, acc);
2550        }
2551        return 0;
2552    }
2553
2554    return 1;
2555}
2556
2557static void gen_goto_ptr(void)
2558{
2559    tcg_gen_lookup_and_goto_ptr();
2560}
2561
2562/* This will end the TB but doesn't guarantee we'll return to
2563 * cpu_loop_exec. Any live exit_requests will be processed as we
2564 * enter the next TB.
2565 */
2566static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2567{
2568    if (translator_use_goto_tb(&s->base, dest)) {
2569        tcg_gen_goto_tb(n);
2570        gen_set_pc_im(s, dest);
2571        tcg_gen_exit_tb(s->base.tb, n);
2572    } else {
2573        gen_set_pc_im(s, dest);
2574        gen_goto_ptr();
2575    }
2576    s->base.is_jmp = DISAS_NORETURN;
2577}
2578
2579/* Jump, specifying which TB number to use if we gen_goto_tb() */
2580static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
2581{
2582    if (unlikely(s->ss_active)) {
2583        /* An indirect jump so that we still trigger the debug exception.  */
2584        gen_set_pc_im(s, dest);
2585        s->base.is_jmp = DISAS_JUMP;
2586        return;
2587    }
2588    switch (s->base.is_jmp) {
2589    case DISAS_NEXT:
2590    case DISAS_TOO_MANY:
2591    case DISAS_NORETURN:
2592        /*
2593         * The normal case: just go to the destination TB.
2594         * NB: NORETURN happens if we generate code like
2595         *    gen_brcondi(l);
2596         *    gen_jmp();
2597         *    gen_set_label(l);
2598         *    gen_jmp();
2599         * on the second call to gen_jmp().
2600         */
2601        gen_goto_tb(s, tbno, dest);
2602        break;
2603    case DISAS_UPDATE_NOCHAIN:
2604    case DISAS_UPDATE_EXIT:
2605        /*
2606         * We already decided we're leaving the TB for some other reason.
2607         * Avoid using goto_tb so we really do exit back to the main loop
2608         * and don't chain to another TB.
2609         */
2610        gen_set_pc_im(s, dest);
2611        gen_goto_ptr();
2612        s->base.is_jmp = DISAS_NORETURN;
2613        break;
2614    default:
2615        /*
2616         * We shouldn't be emitting code for a jump and also have
2617         * is_jmp set to one of the special cases like DISAS_SWI.
2618         */
2619        g_assert_not_reached();
2620    }
2621}
2622
2623static inline void gen_jmp(DisasContext *s, uint32_t dest)
2624{
2625    gen_jmp_tb(s, dest, 0);
2626}
2627
2628static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2629{
2630    if (x)
2631        tcg_gen_sari_i32(t0, t0, 16);
2632    else
2633        gen_sxth(t0);
2634    if (y)
2635        tcg_gen_sari_i32(t1, t1, 16);
2636    else
2637        gen_sxth(t1);
2638    tcg_gen_mul_i32(t0, t0, t1);
2639}
2640
2641/* Return the mask of PSR bits set by a MSR instruction.  */
2642static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2643{
2644    uint32_t mask = 0;
2645
2646    if (flags & (1 << 0)) {
2647        mask |= 0xff;
2648    }
2649    if (flags & (1 << 1)) {
2650        mask |= 0xff00;
2651    }
2652    if (flags & (1 << 2)) {
2653        mask |= 0xff0000;
2654    }
2655    if (flags & (1 << 3)) {
2656        mask |= 0xff000000;
2657    }
2658
2659    /* Mask out undefined and reserved bits.  */
2660    mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2661
2662    /* Mask out execution state.  */
2663    if (!spsr) {
2664        mask &= ~CPSR_EXEC;
2665    }
2666
2667    /* Mask out privileged bits.  */
2668    if (IS_USER(s)) {
2669        mask &= CPSR_USER;
2670    }
2671    return mask;
2672}
2673
2674/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2675static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2676{
2677    TCGv_i32 tmp;
2678    if (spsr) {
2679        /* ??? This is also undefined in system mode.  */
2680        if (IS_USER(s))
2681            return 1;
2682
2683        tmp = load_cpu_field(spsr);
2684        tcg_gen_andi_i32(tmp, tmp, ~mask);
2685        tcg_gen_andi_i32(t0, t0, mask);
2686        tcg_gen_or_i32(tmp, tmp, t0);
2687        store_cpu_field(tmp, spsr);
2688    } else {
2689        gen_set_cpsr(t0, mask);
2690    }
2691    tcg_temp_free_i32(t0);
2692    gen_lookup_tb(s);
2693    return 0;
2694}
2695
2696/* Returns nonzero if access to the PSR is not permitted.  */
2697static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2698{
2699    TCGv_i32 tmp;
2700    tmp = tcg_temp_new_i32();
2701    tcg_gen_movi_i32(tmp, val);
2702    return gen_set_psr(s, mask, spsr, tmp);
2703}
2704
2705static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2706                                     int *tgtmode, int *regno)
2707{
2708    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2709     * the target mode and register number, and identify the various
2710     * unpredictable cases.
2711     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2712     *  + executed in user mode
2713     *  + using R15 as the src/dest register
2714     *  + accessing an unimplemented register
2715     *  + accessing a register that's inaccessible at current PL/security state*
2716     *  + accessing a register that you could access with a different insn
2717     * We choose to UNDEF in all these cases.
2718     * Since we don't know which of the various AArch32 modes we are in
2719     * we have to defer some checks to runtime.
2720     * Accesses to Monitor mode registers from Secure EL1 (which implies
2721     * that EL3 is AArch64) must trap to EL3.
2722     *
2723     * If the access checks fail this function will emit code to take
2724     * an exception and return false. Otherwise it will return true,
2725     * and set *tgtmode and *regno appropriately.
2726     */
2727    int exc_target = default_exception_el(s);
2728
2729    /* These instructions are present only in ARMv8, or in ARMv7 with the
2730     * Virtualization Extensions.
2731     */
2732    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2733        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2734        goto undef;
2735    }
2736
2737    if (IS_USER(s) || rn == 15) {
2738        goto undef;
2739    }
2740
2741    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2742     * of registers into (r, sysm).
2743     */
2744    if (r) {
2745        /* SPSRs for other modes */
2746        switch (sysm) {
2747        case 0xe: /* SPSR_fiq */
2748            *tgtmode = ARM_CPU_MODE_FIQ;
2749            break;
2750        case 0x10: /* SPSR_irq */
2751            *tgtmode = ARM_CPU_MODE_IRQ;
2752            break;
2753        case 0x12: /* SPSR_svc */
2754            *tgtmode = ARM_CPU_MODE_SVC;
2755            break;
2756        case 0x14: /* SPSR_abt */
2757            *tgtmode = ARM_CPU_MODE_ABT;
2758            break;
2759        case 0x16: /* SPSR_und */
2760            *tgtmode = ARM_CPU_MODE_UND;
2761            break;
2762        case 0x1c: /* SPSR_mon */
2763            *tgtmode = ARM_CPU_MODE_MON;
2764            break;
2765        case 0x1e: /* SPSR_hyp */
2766            *tgtmode = ARM_CPU_MODE_HYP;
2767            break;
2768        default: /* unallocated */
2769            goto undef;
2770        }
2771        /* We arbitrarily assign SPSR a register number of 16. */
2772        *regno = 16;
2773    } else {
2774        /* general purpose registers for other modes */
2775        switch (sysm) {
2776        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2777            *tgtmode = ARM_CPU_MODE_USR;
2778            *regno = sysm + 8;
2779            break;
2780        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2781            *tgtmode = ARM_CPU_MODE_FIQ;
2782            *regno = sysm;
2783            break;
2784        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2785            *tgtmode = ARM_CPU_MODE_IRQ;
2786            *regno = sysm & 1 ? 13 : 14;
2787            break;
2788        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2789            *tgtmode = ARM_CPU_MODE_SVC;
2790            *regno = sysm & 1 ? 13 : 14;
2791            break;
2792        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2793            *tgtmode = ARM_CPU_MODE_ABT;
2794            *regno = sysm & 1 ? 13 : 14;
2795            break;
2796        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2797            *tgtmode = ARM_CPU_MODE_UND;
2798            *regno = sysm & 1 ? 13 : 14;
2799            break;
2800        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2801            *tgtmode = ARM_CPU_MODE_MON;
2802            *regno = sysm & 1 ? 13 : 14;
2803            break;
2804        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2805            *tgtmode = ARM_CPU_MODE_HYP;
2806            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2807            *regno = sysm & 1 ? 13 : 17;
2808            break;
2809        default: /* unallocated */
2810            goto undef;
2811        }
2812    }
2813
2814    /* Catch the 'accessing inaccessible register' cases we can detect
2815     * at translate time.
2816     */
2817    switch (*tgtmode) {
2818    case ARM_CPU_MODE_MON:
2819        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2820            goto undef;
2821        }
2822        if (s->current_el == 1) {
2823            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2824             * then accesses to Mon registers trap to Secure EL2, if it exists,
2825             * otherwise EL3.
2826             */
2827            TCGv_i32 tcg_el;
2828
2829            if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2830                dc_isar_feature(aa64_sel2, s)) {
2831                /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2832                tcg_el = load_cpu_field(cp15.scr_el3);
2833                tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2834                tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2835            } else {
2836                tcg_el = tcg_const_i32(3);
2837            }
2838
2839            gen_exception_el(s, EXCP_UDEF, syn_uncategorized(), tcg_el);
2840            tcg_temp_free_i32(tcg_el);
2841            return false;
2842        }
2843        break;
2844    case ARM_CPU_MODE_HYP:
2845        /*
2846         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2847         * (and so we can forbid accesses from EL2 or below). elr_hyp
2848         * can be accessed also from Hyp mode, so forbid accesses from
2849         * EL0 or EL1.
2850         */
2851        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2852            (s->current_el < 3 && *regno != 17)) {
2853            goto undef;
2854        }
2855        break;
2856    default:
2857        break;
2858    }
2859
2860    return true;
2861
2862undef:
2863    /* If we get here then some access check did not pass */
2864    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2865                       syn_uncategorized(), exc_target);
2866    return false;
2867}
2868
2869static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2870{
2871    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2872    int tgtmode = 0, regno = 0;
2873
2874    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2875        return;
2876    }
2877
2878    /* Sync state because msr_banked() can raise exceptions */
2879    gen_set_condexec(s);
2880    gen_set_pc_im(s, s->pc_curr);
2881    tcg_reg = load_reg(s, rn);
2882    tcg_tgtmode = tcg_const_i32(tgtmode);
2883    tcg_regno = tcg_const_i32(regno);
2884    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2885    tcg_temp_free_i32(tcg_tgtmode);
2886    tcg_temp_free_i32(tcg_regno);
2887    tcg_temp_free_i32(tcg_reg);
2888    s->base.is_jmp = DISAS_UPDATE_EXIT;
2889}
2890
2891static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2892{
2893    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2894    int tgtmode = 0, regno = 0;
2895
2896    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2897        return;
2898    }
2899
2900    /* Sync state because mrs_banked() can raise exceptions */
2901    gen_set_condexec(s);
2902    gen_set_pc_im(s, s->pc_curr);
2903    tcg_reg = tcg_temp_new_i32();
2904    tcg_tgtmode = tcg_const_i32(tgtmode);
2905    tcg_regno = tcg_const_i32(regno);
2906    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2907    tcg_temp_free_i32(tcg_tgtmode);
2908    tcg_temp_free_i32(tcg_regno);
2909    store_reg(s, rn, tcg_reg);
2910    s->base.is_jmp = DISAS_UPDATE_EXIT;
2911}
2912
2913/* Store value to PC as for an exception return (ie don't
2914 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2915 * will do the masking based on the new value of the Thumb bit.
2916 */
2917static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2918{
2919    tcg_gen_mov_i32(cpu_R[15], pc);
2920    tcg_temp_free_i32(pc);
2921}
2922
2923/* Generate a v6 exception return.  Marks both values as dead.  */
2924static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2925{
2926    store_pc_exc_ret(s, pc);
2927    /* The cpsr_write_eret helper will mask the low bits of PC
2928     * appropriately depending on the new Thumb bit, so it must
2929     * be called after storing the new PC.
2930     */
2931    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2932        gen_io_start();
2933    }
2934    gen_helper_cpsr_write_eret(cpu_env, cpsr);
2935    tcg_temp_free_i32(cpsr);
2936    /* Must exit loop to check un-masked IRQs */
2937    s->base.is_jmp = DISAS_EXIT;
2938}
2939
2940/* Generate an old-style exception return. Marks pc as dead. */
2941static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2942{
2943    gen_rfe(s, pc, load_cpu_field(spsr));
2944}
2945
2946static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2947                            uint32_t opr_sz, uint32_t max_sz,
2948                            gen_helper_gvec_3_ptr *fn)
2949{
2950    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2951
2952    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2953    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2954                       opr_sz, max_sz, 0, fn);
2955    tcg_temp_free_ptr(qc_ptr);
2956}
2957
2958void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2959                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2960{
2961    static gen_helper_gvec_3_ptr * const fns[2] = {
2962        gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2963    };
2964    tcg_debug_assert(vece >= 1 && vece <= 2);
2965    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2966}
2967
2968void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2969                          uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2970{
2971    static gen_helper_gvec_3_ptr * const fns[2] = {
2972        gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2973    };
2974    tcg_debug_assert(vece >= 1 && vece <= 2);
2975    gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2976}
2977
2978#define GEN_CMP0(NAME, COND)                                            \
2979    static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2980    {                                                                   \
2981        tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2982        tcg_gen_neg_i32(d, d);                                          \
2983    }                                                                   \
2984    static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2985    {                                                                   \
2986        tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2987        tcg_gen_neg_i64(d, d);                                          \
2988    }                                                                   \
2989    static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2990    {                                                                   \
2991        TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
2992        tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2993        tcg_temp_free_vec(zero);                                        \
2994    }                                                                   \
2995    void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2996                            uint32_t opr_sz, uint32_t max_sz)           \
2997    {                                                                   \
2998        const GVecGen2 op[4] = {                                        \
2999            { .fno = gen_helper_gvec_##NAME##0_b,                       \
3000              .fniv = gen_##NAME##0_vec,                                \
3001              .opt_opc = vecop_list_cmp,                                \
3002              .vece = MO_8 },                                           \
3003            { .fno = gen_helper_gvec_##NAME##0_h,                       \
3004              .fniv = gen_##NAME##0_vec,                                \
3005              .opt_opc = vecop_list_cmp,                                \
3006              .vece = MO_16 },                                          \
3007            { .fni4 = gen_##NAME##0_i32,                                \
3008              .fniv = gen_##NAME##0_vec,                                \
3009              .opt_opc = vecop_list_cmp,                                \
3010              .vece = MO_32 },                                          \
3011            { .fni8 = gen_##NAME##0_i64,                                \
3012              .fniv = gen_##NAME##0_vec,                                \
3013              .opt_opc = vecop_list_cmp,                                \
3014              .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3015              .vece = MO_64 },                                          \
3016        };                                                              \
3017        tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3018    }
3019
3020static const TCGOpcode vecop_list_cmp[] = {
3021    INDEX_op_cmp_vec, 0
3022};
3023
3024GEN_CMP0(ceq, TCG_COND_EQ)
3025GEN_CMP0(cle, TCG_COND_LE)
3026GEN_CMP0(cge, TCG_COND_GE)
3027GEN_CMP0(clt, TCG_COND_LT)
3028GEN_CMP0(cgt, TCG_COND_GT)
3029
3030#undef GEN_CMP0
3031
3032static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3033{
3034    tcg_gen_vec_sar8i_i64(a, a, shift);
3035    tcg_gen_vec_add8_i64(d, d, a);
3036}
3037
3038static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3039{
3040    tcg_gen_vec_sar16i_i64(a, a, shift);
3041    tcg_gen_vec_add16_i64(d, d, a);
3042}
3043
3044static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3045{
3046    tcg_gen_sari_i32(a, a, shift);
3047    tcg_gen_add_i32(d, d, a);
3048}
3049
3050static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3051{
3052    tcg_gen_sari_i64(a, a, shift);
3053    tcg_gen_add_i64(d, d, a);
3054}
3055
3056static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3057{
3058    tcg_gen_sari_vec(vece, a, a, sh);
3059    tcg_gen_add_vec(vece, d, d, a);
3060}
3061
3062void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3063                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3064{
3065    static const TCGOpcode vecop_list[] = {
3066        INDEX_op_sari_vec, INDEX_op_add_vec, 0
3067    };
3068    static const GVecGen2i ops[4] = {
3069        { .fni8 = gen_ssra8_i64,
3070          .fniv = gen_ssra_vec,
3071          .fno = gen_helper_gvec_ssra_b,
3072          .load_dest = true,
3073          .opt_opc = vecop_list,
3074          .vece = MO_8 },
3075        { .fni8 = gen_ssra16_i64,
3076          .fniv = gen_ssra_vec,
3077          .fno = gen_helper_gvec_ssra_h,
3078          .load_dest = true,
3079          .opt_opc = vecop_list,
3080          .vece = MO_16 },
3081        { .fni4 = gen_ssra32_i32,
3082          .fniv = gen_ssra_vec,
3083          .fno = gen_helper_gvec_ssra_s,
3084          .load_dest = true,
3085          .opt_opc = vecop_list,
3086          .vece = MO_32 },
3087        { .fni8 = gen_ssra64_i64,
3088          .fniv = gen_ssra_vec,
3089          .fno = gen_helper_gvec_ssra_b,
3090          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3091          .opt_opc = vecop_list,
3092          .load_dest = true,
3093          .vece = MO_64 },
3094    };
3095
3096    /* tszimm encoding produces immediates in the range [1..esize]. */
3097    tcg_debug_assert(shift > 0);
3098    tcg_debug_assert(shift <= (8 << vece));
3099
3100    /*
3101     * Shifts larger than the element size are architecturally valid.
3102     * Signed results in all sign bits.
3103     */
3104    shift = MIN(shift, (8 << vece) - 1);
3105    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3106}
3107
3108static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3109{
3110    tcg_gen_vec_shr8i_i64(a, a, shift);
3111    tcg_gen_vec_add8_i64(d, d, a);
3112}
3113
3114static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3115{
3116    tcg_gen_vec_shr16i_i64(a, a, shift);
3117    tcg_gen_vec_add16_i64(d, d, a);
3118}
3119
3120static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3121{
3122    tcg_gen_shri_i32(a, a, shift);
3123    tcg_gen_add_i32(d, d, a);
3124}
3125
3126static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3127{
3128    tcg_gen_shri_i64(a, a, shift);
3129    tcg_gen_add_i64(d, d, a);
3130}
3131
3132static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3133{
3134    tcg_gen_shri_vec(vece, a, a, sh);
3135    tcg_gen_add_vec(vece, d, d, a);
3136}
3137
3138void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3139                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3140{
3141    static const TCGOpcode vecop_list[] = {
3142        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3143    };
3144    static const GVecGen2i ops[4] = {
3145        { .fni8 = gen_usra8_i64,
3146          .fniv = gen_usra_vec,
3147          .fno = gen_helper_gvec_usra_b,
3148          .load_dest = true,
3149          .opt_opc = vecop_list,
3150          .vece = MO_8, },
3151        { .fni8 = gen_usra16_i64,
3152          .fniv = gen_usra_vec,
3153          .fno = gen_helper_gvec_usra_h,
3154          .load_dest = true,
3155          .opt_opc = vecop_list,
3156          .vece = MO_16, },
3157        { .fni4 = gen_usra32_i32,
3158          .fniv = gen_usra_vec,
3159          .fno = gen_helper_gvec_usra_s,
3160          .load_dest = true,
3161          .opt_opc = vecop_list,
3162          .vece = MO_32, },
3163        { .fni8 = gen_usra64_i64,
3164          .fniv = gen_usra_vec,
3165          .fno = gen_helper_gvec_usra_d,
3166          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3167          .load_dest = true,
3168          .opt_opc = vecop_list,
3169          .vece = MO_64, },
3170    };
3171
3172    /* tszimm encoding produces immediates in the range [1..esize]. */
3173    tcg_debug_assert(shift > 0);
3174    tcg_debug_assert(shift <= (8 << vece));
3175
3176    /*
3177     * Shifts larger than the element size are architecturally valid.
3178     * Unsigned results in all zeros as input to accumulate: nop.
3179     */
3180    if (shift < (8 << vece)) {
3181        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3182    } else {
3183        /* Nop, but we do need to clear the tail. */
3184        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3185    }
3186}
3187
3188/*
3189 * Shift one less than the requested amount, and the low bit is
3190 * the rounding bit.  For the 8 and 16-bit operations, because we
3191 * mask the low bit, we can perform a normal integer shift instead
3192 * of a vector shift.
3193 */
3194static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3195{
3196    TCGv_i64 t = tcg_temp_new_i64();
3197
3198    tcg_gen_shri_i64(t, a, sh - 1);
3199    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3200    tcg_gen_vec_sar8i_i64(d, a, sh);
3201    tcg_gen_vec_add8_i64(d, d, t);
3202    tcg_temp_free_i64(t);
3203}
3204
3205static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3206{
3207    TCGv_i64 t = tcg_temp_new_i64();
3208
3209    tcg_gen_shri_i64(t, a, sh - 1);
3210    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3211    tcg_gen_vec_sar16i_i64(d, a, sh);
3212    tcg_gen_vec_add16_i64(d, d, t);
3213    tcg_temp_free_i64(t);
3214}
3215
3216static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3217{
3218    TCGv_i32 t;
3219
3220    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3221    if (sh == 32) {
3222        tcg_gen_movi_i32(d, 0);
3223        return;
3224    }
3225    t = tcg_temp_new_i32();
3226    tcg_gen_extract_i32(t, a, sh - 1, 1);
3227    tcg_gen_sari_i32(d, a, sh);
3228    tcg_gen_add_i32(d, d, t);
3229    tcg_temp_free_i32(t);
3230}
3231
3232static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3233{
3234    TCGv_i64 t = tcg_temp_new_i64();
3235
3236    tcg_gen_extract_i64(t, a, sh - 1, 1);
3237    tcg_gen_sari_i64(d, a, sh);
3238    tcg_gen_add_i64(d, d, t);
3239    tcg_temp_free_i64(t);
3240}
3241
3242static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3243{
3244    TCGv_vec t = tcg_temp_new_vec_matching(d);
3245    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3246
3247    tcg_gen_shri_vec(vece, t, a, sh - 1);
3248    tcg_gen_dupi_vec(vece, ones, 1);
3249    tcg_gen_and_vec(vece, t, t, ones);
3250    tcg_gen_sari_vec(vece, d, a, sh);
3251    tcg_gen_add_vec(vece, d, d, t);
3252
3253    tcg_temp_free_vec(t);
3254    tcg_temp_free_vec(ones);
3255}
3256
3257void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3258                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3259{
3260    static const TCGOpcode vecop_list[] = {
3261        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3262    };
3263    static const GVecGen2i ops[4] = {
3264        { .fni8 = gen_srshr8_i64,
3265          .fniv = gen_srshr_vec,
3266          .fno = gen_helper_gvec_srshr_b,
3267          .opt_opc = vecop_list,
3268          .vece = MO_8 },
3269        { .fni8 = gen_srshr16_i64,
3270          .fniv = gen_srshr_vec,
3271          .fno = gen_helper_gvec_srshr_h,
3272          .opt_opc = vecop_list,
3273          .vece = MO_16 },
3274        { .fni4 = gen_srshr32_i32,
3275          .fniv = gen_srshr_vec,
3276          .fno = gen_helper_gvec_srshr_s,
3277          .opt_opc = vecop_list,
3278          .vece = MO_32 },
3279        { .fni8 = gen_srshr64_i64,
3280          .fniv = gen_srshr_vec,
3281          .fno = gen_helper_gvec_srshr_d,
3282          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3283          .opt_opc = vecop_list,
3284          .vece = MO_64 },
3285    };
3286
3287    /* tszimm encoding produces immediates in the range [1..esize] */
3288    tcg_debug_assert(shift > 0);
3289    tcg_debug_assert(shift <= (8 << vece));
3290
3291    if (shift == (8 << vece)) {
3292        /*
3293         * Shifts larger than the element size are architecturally valid.
3294         * Signed results in all sign bits.  With rounding, this produces
3295         *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3296         * I.e. always zero.
3297         */
3298        tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3299    } else {
3300        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3301    }
3302}
3303
3304static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3305{
3306    TCGv_i64 t = tcg_temp_new_i64();
3307
3308    gen_srshr8_i64(t, a, sh);
3309    tcg_gen_vec_add8_i64(d, d, t);
3310    tcg_temp_free_i64(t);
3311}
3312
3313static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3314{
3315    TCGv_i64 t = tcg_temp_new_i64();
3316
3317    gen_srshr16_i64(t, a, sh);
3318    tcg_gen_vec_add16_i64(d, d, t);
3319    tcg_temp_free_i64(t);
3320}
3321
3322static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3323{
3324    TCGv_i32 t = tcg_temp_new_i32();
3325
3326    gen_srshr32_i32(t, a, sh);
3327    tcg_gen_add_i32(d, d, t);
3328    tcg_temp_free_i32(t);
3329}
3330
3331static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3332{
3333    TCGv_i64 t = tcg_temp_new_i64();
3334
3335    gen_srshr64_i64(t, a, sh);
3336    tcg_gen_add_i64(d, d, t);
3337    tcg_temp_free_i64(t);
3338}
3339
3340static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3341{
3342    TCGv_vec t = tcg_temp_new_vec_matching(d);
3343
3344    gen_srshr_vec(vece, t, a, sh);
3345    tcg_gen_add_vec(vece, d, d, t);
3346    tcg_temp_free_vec(t);
3347}
3348
3349void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3350                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3351{
3352    static const TCGOpcode vecop_list[] = {
3353        INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3354    };
3355    static const GVecGen2i ops[4] = {
3356        { .fni8 = gen_srsra8_i64,
3357          .fniv = gen_srsra_vec,
3358          .fno = gen_helper_gvec_srsra_b,
3359          .opt_opc = vecop_list,
3360          .load_dest = true,
3361          .vece = MO_8 },
3362        { .fni8 = gen_srsra16_i64,
3363          .fniv = gen_srsra_vec,
3364          .fno = gen_helper_gvec_srsra_h,
3365          .opt_opc = vecop_list,
3366          .load_dest = true,
3367          .vece = MO_16 },
3368        { .fni4 = gen_srsra32_i32,
3369          .fniv = gen_srsra_vec,
3370          .fno = gen_helper_gvec_srsra_s,
3371          .opt_opc = vecop_list,
3372          .load_dest = true,
3373          .vece = MO_32 },
3374        { .fni8 = gen_srsra64_i64,
3375          .fniv = gen_srsra_vec,
3376          .fno = gen_helper_gvec_srsra_d,
3377          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3378          .opt_opc = vecop_list,
3379          .load_dest = true,
3380          .vece = MO_64 },
3381    };
3382
3383    /* tszimm encoding produces immediates in the range [1..esize] */
3384    tcg_debug_assert(shift > 0);
3385    tcg_debug_assert(shift <= (8 << vece));
3386
3387    /*
3388     * Shifts larger than the element size are architecturally valid.
3389     * Signed results in all sign bits.  With rounding, this produces
3390     *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3391     * I.e. always zero.  With accumulation, this leaves D unchanged.
3392     */
3393    if (shift == (8 << vece)) {
3394        /* Nop, but we do need to clear the tail. */
3395        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3396    } else {
3397        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3398    }
3399}
3400
3401static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3402{
3403    TCGv_i64 t = tcg_temp_new_i64();
3404
3405    tcg_gen_shri_i64(t, a, sh - 1);
3406    tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3407    tcg_gen_vec_shr8i_i64(d, a, sh);
3408    tcg_gen_vec_add8_i64(d, d, t);
3409    tcg_temp_free_i64(t);
3410}
3411
3412static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3413{
3414    TCGv_i64 t = tcg_temp_new_i64();
3415
3416    tcg_gen_shri_i64(t, a, sh - 1);
3417    tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3418    tcg_gen_vec_shr16i_i64(d, a, sh);
3419    tcg_gen_vec_add16_i64(d, d, t);
3420    tcg_temp_free_i64(t);
3421}
3422
3423static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3424{
3425    TCGv_i32 t;
3426
3427    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3428    if (sh == 32) {
3429        tcg_gen_extract_i32(d, a, sh - 1, 1);
3430        return;
3431    }
3432    t = tcg_temp_new_i32();
3433    tcg_gen_extract_i32(t, a, sh - 1, 1);
3434    tcg_gen_shri_i32(d, a, sh);
3435    tcg_gen_add_i32(d, d, t);
3436    tcg_temp_free_i32(t);
3437}
3438
3439static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3440{
3441    TCGv_i64 t = tcg_temp_new_i64();
3442
3443    tcg_gen_extract_i64(t, a, sh - 1, 1);
3444    tcg_gen_shri_i64(d, a, sh);
3445    tcg_gen_add_i64(d, d, t);
3446    tcg_temp_free_i64(t);
3447}
3448
3449static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3450{
3451    TCGv_vec t = tcg_temp_new_vec_matching(d);
3452    TCGv_vec ones = tcg_temp_new_vec_matching(d);
3453
3454    tcg_gen_shri_vec(vece, t, a, shift - 1);
3455    tcg_gen_dupi_vec(vece, ones, 1);
3456    tcg_gen_and_vec(vece, t, t, ones);
3457    tcg_gen_shri_vec(vece, d, a, shift);
3458    tcg_gen_add_vec(vece, d, d, t);
3459
3460    tcg_temp_free_vec(t);
3461    tcg_temp_free_vec(ones);
3462}
3463
3464void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3465                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3466{
3467    static const TCGOpcode vecop_list[] = {
3468        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3469    };
3470    static const GVecGen2i ops[4] = {
3471        { .fni8 = gen_urshr8_i64,
3472          .fniv = gen_urshr_vec,
3473          .fno = gen_helper_gvec_urshr_b,
3474          .opt_opc = vecop_list,
3475          .vece = MO_8 },
3476        { .fni8 = gen_urshr16_i64,
3477          .fniv = gen_urshr_vec,
3478          .fno = gen_helper_gvec_urshr_h,
3479          .opt_opc = vecop_list,
3480          .vece = MO_16 },
3481        { .fni4 = gen_urshr32_i32,
3482          .fniv = gen_urshr_vec,
3483          .fno = gen_helper_gvec_urshr_s,
3484          .opt_opc = vecop_list,
3485          .vece = MO_32 },
3486        { .fni8 = gen_urshr64_i64,
3487          .fniv = gen_urshr_vec,
3488          .fno = gen_helper_gvec_urshr_d,
3489          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3490          .opt_opc = vecop_list,
3491          .vece = MO_64 },
3492    };
3493
3494    /* tszimm encoding produces immediates in the range [1..esize] */
3495    tcg_debug_assert(shift > 0);
3496    tcg_debug_assert(shift <= (8 << vece));
3497
3498    if (shift == (8 << vece)) {
3499        /*
3500         * Shifts larger than the element size are architecturally valid.
3501         * Unsigned results in zero.  With rounding, this produces a
3502         * copy of the most significant bit.
3503         */
3504        tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3505    } else {
3506        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3507    }
3508}
3509
3510static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3511{
3512    TCGv_i64 t = tcg_temp_new_i64();
3513
3514    if (sh == 8) {
3515        tcg_gen_vec_shr8i_i64(t, a, 7);
3516    } else {
3517        gen_urshr8_i64(t, a, sh);
3518    }
3519    tcg_gen_vec_add8_i64(d, d, t);
3520    tcg_temp_free_i64(t);
3521}
3522
3523static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3524{
3525    TCGv_i64 t = tcg_temp_new_i64();
3526
3527    if (sh == 16) {
3528        tcg_gen_vec_shr16i_i64(t, a, 15);
3529    } else {
3530        gen_urshr16_i64(t, a, sh);
3531    }
3532    tcg_gen_vec_add16_i64(d, d, t);
3533    tcg_temp_free_i64(t);
3534}
3535
3536static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3537{
3538    TCGv_i32 t = tcg_temp_new_i32();
3539
3540    if (sh == 32) {
3541        tcg_gen_shri_i32(t, a, 31);
3542    } else {
3543        gen_urshr32_i32(t, a, sh);
3544    }
3545    tcg_gen_add_i32(d, d, t);
3546    tcg_temp_free_i32(t);
3547}
3548
3549static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3550{
3551    TCGv_i64 t = tcg_temp_new_i64();
3552
3553    if (sh == 64) {
3554        tcg_gen_shri_i64(t, a, 63);
3555    } else {
3556        gen_urshr64_i64(t, a, sh);
3557    }
3558    tcg_gen_add_i64(d, d, t);
3559    tcg_temp_free_i64(t);
3560}
3561
3562static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3563{
3564    TCGv_vec t = tcg_temp_new_vec_matching(d);
3565
3566    if (sh == (8 << vece)) {
3567        tcg_gen_shri_vec(vece, t, a, sh - 1);
3568    } else {
3569        gen_urshr_vec(vece, t, a, sh);
3570    }
3571    tcg_gen_add_vec(vece, d, d, t);
3572    tcg_temp_free_vec(t);
3573}
3574
3575void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3576                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3577{
3578    static const TCGOpcode vecop_list[] = {
3579        INDEX_op_shri_vec, INDEX_op_add_vec, 0
3580    };
3581    static const GVecGen2i ops[4] = {
3582        { .fni8 = gen_ursra8_i64,
3583          .fniv = gen_ursra_vec,
3584          .fno = gen_helper_gvec_ursra_b,
3585          .opt_opc = vecop_list,
3586          .load_dest = true,
3587          .vece = MO_8 },
3588        { .fni8 = gen_ursra16_i64,
3589          .fniv = gen_ursra_vec,
3590          .fno = gen_helper_gvec_ursra_h,
3591          .opt_opc = vecop_list,
3592          .load_dest = true,
3593          .vece = MO_16 },
3594        { .fni4 = gen_ursra32_i32,
3595          .fniv = gen_ursra_vec,
3596          .fno = gen_helper_gvec_ursra_s,
3597          .opt_opc = vecop_list,
3598          .load_dest = true,
3599          .vece = MO_32 },
3600        { .fni8 = gen_ursra64_i64,
3601          .fniv = gen_ursra_vec,
3602          .fno = gen_helper_gvec_ursra_d,
3603          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3604          .opt_opc = vecop_list,
3605          .load_dest = true,
3606          .vece = MO_64 },
3607    };
3608
3609    /* tszimm encoding produces immediates in the range [1..esize] */
3610    tcg_debug_assert(shift > 0);
3611    tcg_debug_assert(shift <= (8 << vece));
3612
3613    tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3614}
3615
3616static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3617{
3618    uint64_t mask = dup_const(MO_8, 0xff >> shift);
3619    TCGv_i64 t = tcg_temp_new_i64();
3620
3621    tcg_gen_shri_i64(t, a, shift);
3622    tcg_gen_andi_i64(t, t, mask);
3623    tcg_gen_andi_i64(d, d, ~mask);
3624    tcg_gen_or_i64(d, d, t);
3625    tcg_temp_free_i64(t);
3626}
3627
3628static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3629{
3630    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3631    TCGv_i64 t = tcg_temp_new_i64();
3632
3633    tcg_gen_shri_i64(t, a, shift);
3634    tcg_gen_andi_i64(t, t, mask);
3635    tcg_gen_andi_i64(d, d, ~mask);
3636    tcg_gen_or_i64(d, d, t);
3637    tcg_temp_free_i64(t);
3638}
3639
3640static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3641{
3642    tcg_gen_shri_i32(a, a, shift);
3643    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3644}
3645
3646static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3647{
3648    tcg_gen_shri_i64(a, a, shift);
3649    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3650}
3651
3652static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3653{
3654    TCGv_vec t = tcg_temp_new_vec_matching(d);
3655    TCGv_vec m = tcg_temp_new_vec_matching(d);
3656
3657    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3658    tcg_gen_shri_vec(vece, t, a, sh);
3659    tcg_gen_and_vec(vece, d, d, m);
3660    tcg_gen_or_vec(vece, d, d, t);
3661
3662    tcg_temp_free_vec(t);
3663    tcg_temp_free_vec(m);
3664}
3665
3666void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3667                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3668{
3669    static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3670    const GVecGen2i ops[4] = {
3671        { .fni8 = gen_shr8_ins_i64,
3672          .fniv = gen_shr_ins_vec,
3673          .fno = gen_helper_gvec_sri_b,
3674          .load_dest = true,
3675          .opt_opc = vecop_list,
3676          .vece = MO_8 },
3677        { .fni8 = gen_shr16_ins_i64,
3678          .fniv = gen_shr_ins_vec,
3679          .fno = gen_helper_gvec_sri_h,
3680          .load_dest = true,
3681          .opt_opc = vecop_list,
3682          .vece = MO_16 },
3683        { .fni4 = gen_shr32_ins_i32,
3684          .fniv = gen_shr_ins_vec,
3685          .fno = gen_helper_gvec_sri_s,
3686          .load_dest = true,
3687          .opt_opc = vecop_list,
3688          .vece = MO_32 },
3689        { .fni8 = gen_shr64_ins_i64,
3690          .fniv = gen_shr_ins_vec,
3691          .fno = gen_helper_gvec_sri_d,
3692          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3693          .load_dest = true,
3694          .opt_opc = vecop_list,
3695          .vece = MO_64 },
3696    };
3697
3698    /* tszimm encoding produces immediates in the range [1..esize]. */
3699    tcg_debug_assert(shift > 0);
3700    tcg_debug_assert(shift <= (8 << vece));
3701
3702    /* Shift of esize leaves destination unchanged. */
3703    if (shift < (8 << vece)) {
3704        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3705    } else {
3706        /* Nop, but we do need to clear the tail. */
3707        tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3708    }
3709}
3710
3711static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3712{
3713    uint64_t mask = dup_const(MO_8, 0xff << shift);
3714    TCGv_i64 t = tcg_temp_new_i64();
3715
3716    tcg_gen_shli_i64(t, a, shift);
3717    tcg_gen_andi_i64(t, t, mask);
3718    tcg_gen_andi_i64(d, d, ~mask);
3719    tcg_gen_or_i64(d, d, t);
3720    tcg_temp_free_i64(t);
3721}
3722
3723static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3724{
3725    uint64_t mask = dup_const(MO_16, 0xffff << shift);
3726    TCGv_i64 t = tcg_temp_new_i64();
3727
3728    tcg_gen_shli_i64(t, a, shift);
3729    tcg_gen_andi_i64(t, t, mask);
3730    tcg_gen_andi_i64(d, d, ~mask);
3731    tcg_gen_or_i64(d, d, t);
3732    tcg_temp_free_i64(t);
3733}
3734
3735static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3736{
3737    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3738}
3739
3740static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3741{
3742    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3743}
3744
3745static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3746{
3747    TCGv_vec t = tcg_temp_new_vec_matching(d);
3748    TCGv_vec m = tcg_temp_new_vec_matching(d);
3749
3750    tcg_gen_shli_vec(vece, t, a, sh);
3751    tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3752    tcg_gen_and_vec(vece, d, d, m);
3753    tcg_gen_or_vec(vece, d, d, t);
3754
3755    tcg_temp_free_vec(t);
3756    tcg_temp_free_vec(m);
3757}
3758
3759void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3760                  int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3761{
3762    static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3763    const GVecGen2i ops[4] = {
3764        { .fni8 = gen_shl8_ins_i64,
3765          .fniv = gen_shl_ins_vec,
3766          .fno = gen_helper_gvec_sli_b,
3767          .load_dest = true,
3768          .opt_opc = vecop_list,
3769          .vece = MO_8 },
3770        { .fni8 = gen_shl16_ins_i64,
3771          .fniv = gen_shl_ins_vec,
3772          .fno = gen_helper_gvec_sli_h,
3773          .load_dest = true,
3774          .opt_opc = vecop_list,
3775          .vece = MO_16 },
3776        { .fni4 = gen_shl32_ins_i32,
3777          .fniv = gen_shl_ins_vec,
3778          .fno = gen_helper_gvec_sli_s,
3779          .load_dest = true,
3780          .opt_opc = vecop_list,
3781          .vece = MO_32 },
3782        { .fni8 = gen_shl64_ins_i64,
3783          .fniv = gen_shl_ins_vec,
3784          .fno = gen_helper_gvec_sli_d,
3785          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3786          .load_dest = true,
3787          .opt_opc = vecop_list,
3788          .vece = MO_64 },
3789    };
3790
3791    /* tszimm encoding produces immediates in the range [0..esize-1]. */
3792    tcg_debug_assert(shift >= 0);
3793    tcg_debug_assert(shift < (8 << vece));
3794
3795    if (shift == 0) {
3796        tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3797    } else {
3798        tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3799    }
3800}
3801
3802static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3803{
3804    gen_helper_neon_mul_u8(a, a, b);
3805    gen_helper_neon_add_u8(d, d, a);
3806}
3807
3808static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3809{
3810    gen_helper_neon_mul_u8(a, a, b);
3811    gen_helper_neon_sub_u8(d, d, a);
3812}
3813
3814static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3815{
3816    gen_helper_neon_mul_u16(a, a, b);
3817    gen_helper_neon_add_u16(d, d, a);
3818}
3819
3820static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3821{
3822    gen_helper_neon_mul_u16(a, a, b);
3823    gen_helper_neon_sub_u16(d, d, a);
3824}
3825
3826static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3827{
3828    tcg_gen_mul_i32(a, a, b);
3829    tcg_gen_add_i32(d, d, a);
3830}
3831
3832static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3833{
3834    tcg_gen_mul_i32(a, a, b);
3835    tcg_gen_sub_i32(d, d, a);
3836}
3837
3838static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3839{
3840    tcg_gen_mul_i64(a, a, b);
3841    tcg_gen_add_i64(d, d, a);
3842}
3843
3844static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3845{
3846    tcg_gen_mul_i64(a, a, b);
3847    tcg_gen_sub_i64(d, d, a);
3848}
3849
3850static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3851{
3852    tcg_gen_mul_vec(vece, a, a, b);
3853    tcg_gen_add_vec(vece, d, d, a);
3854}
3855
3856static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3857{
3858    tcg_gen_mul_vec(vece, a, a, b);
3859    tcg_gen_sub_vec(vece, d, d, a);
3860}
3861
3862/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3863 * these tables are shared with AArch64 which does support them.
3864 */
3865void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3866                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3867{
3868    static const TCGOpcode vecop_list[] = {
3869        INDEX_op_mul_vec, INDEX_op_add_vec, 0
3870    };
3871    static const GVecGen3 ops[4] = {
3872        { .fni4 = gen_mla8_i32,
3873          .fniv = gen_mla_vec,
3874          .load_dest = true,
3875          .opt_opc = vecop_list,
3876          .vece = MO_8 },
3877        { .fni4 = gen_mla16_i32,
3878          .fniv = gen_mla_vec,
3879          .load_dest = true,
3880          .opt_opc = vecop_list,
3881          .vece = MO_16 },
3882        { .fni4 = gen_mla32_i32,
3883          .fniv = gen_mla_vec,
3884          .load_dest = true,
3885          .opt_opc = vecop_list,
3886          .vece = MO_32 },
3887        { .fni8 = gen_mla64_i64,
3888          .fniv = gen_mla_vec,
3889          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3890          .load_dest = true,
3891          .opt_opc = vecop_list,
3892          .vece = MO_64 },
3893    };
3894    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3895}
3896
3897void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3898                  uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3899{
3900    static const TCGOpcode vecop_list[] = {
3901        INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3902    };
3903    static const GVecGen3 ops[4] = {
3904        { .fni4 = gen_mls8_i32,
3905          .fniv = gen_mls_vec,
3906          .load_dest = true,
3907          .opt_opc = vecop_list,
3908          .vece = MO_8 },
3909        { .fni4 = gen_mls16_i32,
3910          .fniv = gen_mls_vec,
3911          .load_dest = true,
3912          .opt_opc = vecop_list,
3913          .vece = MO_16 },
3914        { .fni4 = gen_mls32_i32,
3915          .fniv = gen_mls_vec,
3916          .load_dest = true,
3917          .opt_opc = vecop_list,
3918          .vece = MO_32 },
3919        { .fni8 = gen_mls64_i64,
3920          .fniv = gen_mls_vec,
3921          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3922          .load_dest = true,
3923          .opt_opc = vecop_list,
3924          .vece = MO_64 },
3925    };
3926    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3927}
3928
3929/* CMTST : test is "if (X & Y != 0)". */
3930static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3931{
3932    tcg_gen_and_i32(d, a, b);
3933    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3934    tcg_gen_neg_i32(d, d);
3935}
3936
3937void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3938{
3939    tcg_gen_and_i64(d, a, b);
3940    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3941    tcg_gen_neg_i64(d, d);
3942}
3943
3944static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3945{
3946    tcg_gen_and_vec(vece, d, a, b);
3947    tcg_gen_dupi_vec(vece, a, 0);
3948    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3949}
3950
3951void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3952                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3953{
3954    static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3955    static const GVecGen3 ops[4] = {
3956        { .fni4 = gen_helper_neon_tst_u8,
3957          .fniv = gen_cmtst_vec,
3958          .opt_opc = vecop_list,
3959          .vece = MO_8 },
3960        { .fni4 = gen_helper_neon_tst_u16,
3961          .fniv = gen_cmtst_vec,
3962          .opt_opc = vecop_list,
3963          .vece = MO_16 },
3964        { .fni4 = gen_cmtst_i32,
3965          .fniv = gen_cmtst_vec,
3966          .opt_opc = vecop_list,
3967          .vece = MO_32 },
3968        { .fni8 = gen_cmtst_i64,
3969          .fniv = gen_cmtst_vec,
3970          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3971          .opt_opc = vecop_list,
3972          .vece = MO_64 },
3973    };
3974    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3975}
3976
3977void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3978{
3979    TCGv_i32 lval = tcg_temp_new_i32();
3980    TCGv_i32 rval = tcg_temp_new_i32();
3981    TCGv_i32 lsh = tcg_temp_new_i32();
3982    TCGv_i32 rsh = tcg_temp_new_i32();
3983    TCGv_i32 zero = tcg_const_i32(0);
3984    TCGv_i32 max = tcg_const_i32(32);
3985
3986    /*
3987     * Rely on the TCG guarantee that out of range shifts produce
3988     * unspecified results, not undefined behaviour (i.e. no trap).
3989     * Discard out-of-range results after the fact.
3990     */
3991    tcg_gen_ext8s_i32(lsh, shift);
3992    tcg_gen_neg_i32(rsh, lsh);
3993    tcg_gen_shl_i32(lval, src, lsh);
3994    tcg_gen_shr_i32(rval, src, rsh);
3995    tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3996    tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3997
3998    tcg_temp_free_i32(lval);
3999    tcg_temp_free_i32(rval);
4000    tcg_temp_free_i32(lsh);
4001    tcg_temp_free_i32(rsh);
4002    tcg_temp_free_i32(zero);
4003    tcg_temp_free_i32(max);
4004}
4005
4006void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4007{
4008    TCGv_i64 lval = tcg_temp_new_i64();
4009    TCGv_i64 rval = tcg_temp_new_i64();
4010    TCGv_i64 lsh = tcg_temp_new_i64();
4011    TCGv_i64 rsh = tcg_temp_new_i64();
4012    TCGv_i64 zero = tcg_const_i64(0);
4013    TCGv_i64 max = tcg_const_i64(64);
4014
4015    /*
4016     * Rely on the TCG guarantee that out of range shifts produce
4017     * unspecified results, not undefined behaviour (i.e. no trap).
4018     * Discard out-of-range results after the fact.
4019     */
4020    tcg_gen_ext8s_i64(lsh, shift);
4021    tcg_gen_neg_i64(rsh, lsh);
4022    tcg_gen_shl_i64(lval, src, lsh);
4023    tcg_gen_shr_i64(rval, src, rsh);
4024    tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4025    tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4026
4027    tcg_temp_free_i64(lval);
4028    tcg_temp_free_i64(rval);
4029    tcg_temp_free_i64(lsh);
4030    tcg_temp_free_i64(rsh);
4031    tcg_temp_free_i64(zero);
4032    tcg_temp_free_i64(max);
4033}
4034
4035static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4036                         TCGv_vec src, TCGv_vec shift)
4037{
4038    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4039    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4040    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4041    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4042    TCGv_vec msk, max;
4043
4044    tcg_gen_neg_vec(vece, rsh, shift);
4045    if (vece == MO_8) {
4046        tcg_gen_mov_vec(lsh, shift);
4047    } else {
4048        msk = tcg_temp_new_vec_matching(dst);
4049        tcg_gen_dupi_vec(vece, msk, 0xff);
4050        tcg_gen_and_vec(vece, lsh, shift, msk);
4051        tcg_gen_and_vec(vece, rsh, rsh, msk);
4052        tcg_temp_free_vec(msk);
4053    }
4054
4055    /*
4056     * Rely on the TCG guarantee that out of range shifts produce
4057     * unspecified results, not undefined behaviour (i.e. no trap).
4058     * Discard out-of-range results after the fact.
4059     */
4060    tcg_gen_shlv_vec(vece, lval, src, lsh);
4061    tcg_gen_shrv_vec(vece, rval, src, rsh);
4062
4063    max = tcg_temp_new_vec_matching(dst);
4064    tcg_gen_dupi_vec(vece, max, 8 << vece);
4065
4066    /*
4067     * The choice of LT (signed) and GEU (unsigned) are biased toward
4068     * the instructions of the x86_64 host.  For MO_8, the whole byte
4069     * is significant so we must use an unsigned compare; otherwise we
4070     * have already masked to a byte and so a signed compare works.
4071     * Other tcg hosts have a full set of comparisons and do not care.
4072     */
4073    if (vece == MO_8) {
4074        tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4075        tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4076        tcg_gen_andc_vec(vece, lval, lval, lsh);
4077        tcg_gen_andc_vec(vece, rval, rval, rsh);
4078    } else {
4079        tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4080        tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4081        tcg_gen_and_vec(vece, lval, lval, lsh);
4082        tcg_gen_and_vec(vece, rval, rval, rsh);
4083    }
4084    tcg_gen_or_vec(vece, dst, lval, rval);
4085
4086    tcg_temp_free_vec(max);
4087    tcg_temp_free_vec(lval);
4088    tcg_temp_free_vec(rval);
4089    tcg_temp_free_vec(lsh);
4090    tcg_temp_free_vec(rsh);
4091}
4092
4093void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4094                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4095{
4096    static const TCGOpcode vecop_list[] = {
4097        INDEX_op_neg_vec, INDEX_op_shlv_vec,
4098        INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4099    };
4100    static const GVecGen3 ops[4] = {
4101        { .fniv = gen_ushl_vec,
4102          .fno = gen_helper_gvec_ushl_b,
4103          .opt_opc = vecop_list,
4104          .vece = MO_8 },
4105        { .fniv = gen_ushl_vec,
4106          .fno = gen_helper_gvec_ushl_h,
4107          .opt_opc = vecop_list,
4108          .vece = MO_16 },
4109        { .fni4 = gen_ushl_i32,
4110          .fniv = gen_ushl_vec,
4111          .opt_opc = vecop_list,
4112          .vece = MO_32 },
4113        { .fni8 = gen_ushl_i64,
4114          .fniv = gen_ushl_vec,
4115          .opt_opc = vecop_list,
4116          .vece = MO_64 },
4117    };
4118    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4119}
4120
4121void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4122{
4123    TCGv_i32 lval = tcg_temp_new_i32();
4124    TCGv_i32 rval = tcg_temp_new_i32();
4125    TCGv_i32 lsh = tcg_temp_new_i32();
4126    TCGv_i32 rsh = tcg_temp_new_i32();
4127    TCGv_i32 zero = tcg_const_i32(0);
4128    TCGv_i32 max = tcg_const_i32(31);
4129
4130    /*
4131     * Rely on the TCG guarantee that out of range shifts produce
4132     * unspecified results, not undefined behaviour (i.e. no trap).
4133     * Discard out-of-range results after the fact.
4134     */
4135    tcg_gen_ext8s_i32(lsh, shift);
4136    tcg_gen_neg_i32(rsh, lsh);
4137    tcg_gen_shl_i32(lval, src, lsh);
4138    tcg_gen_umin_i32(rsh, rsh, max);
4139    tcg_gen_sar_i32(rval, src, rsh);
4140    tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4141    tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4142
4143    tcg_temp_free_i32(lval);
4144    tcg_temp_free_i32(rval);
4145    tcg_temp_free_i32(lsh);
4146    tcg_temp_free_i32(rsh);
4147    tcg_temp_free_i32(zero);
4148    tcg_temp_free_i32(max);
4149}
4150
4151void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4152{
4153    TCGv_i64 lval = tcg_temp_new_i64();
4154    TCGv_i64 rval = tcg_temp_new_i64();
4155    TCGv_i64 lsh = tcg_temp_new_i64();
4156    TCGv_i64 rsh = tcg_temp_new_i64();
4157    TCGv_i64 zero = tcg_const_i64(0);
4158    TCGv_i64 max = tcg_const_i64(63);
4159
4160    /*
4161     * Rely on the TCG guarantee that out of range shifts produce
4162     * unspecified results, not undefined behaviour (i.e. no trap).
4163     * Discard out-of-range results after the fact.
4164     */
4165    tcg_gen_ext8s_i64(lsh, shift);
4166    tcg_gen_neg_i64(rsh, lsh);
4167    tcg_gen_shl_i64(lval, src, lsh);
4168    tcg_gen_umin_i64(rsh, rsh, max);
4169    tcg_gen_sar_i64(rval, src, rsh);
4170    tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4171    tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4172
4173    tcg_temp_free_i64(lval);
4174    tcg_temp_free_i64(rval);
4175    tcg_temp_free_i64(lsh);
4176    tcg_temp_free_i64(rsh);
4177    tcg_temp_free_i64(zero);
4178    tcg_temp_free_i64(max);
4179}
4180
4181static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4182                         TCGv_vec src, TCGv_vec shift)
4183{
4184    TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4185    TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4186    TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4187    TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4188    TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4189
4190    /*
4191     * Rely on the TCG guarantee that out of range shifts produce
4192     * unspecified results, not undefined behaviour (i.e. no trap).
4193     * Discard out-of-range results after the fact.
4194     */
4195    tcg_gen_neg_vec(vece, rsh, shift);
4196    if (vece == MO_8) {
4197        tcg_gen_mov_vec(lsh, shift);
4198    } else {
4199        tcg_gen_dupi_vec(vece, tmp, 0xff);
4200        tcg_gen_and_vec(vece, lsh, shift, tmp);
4201        tcg_gen_and_vec(vece, rsh, rsh, tmp);
4202    }
4203
4204    /* Bound rsh so out of bound right shift gets -1.  */
4205    tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4206    tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4207    tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4208
4209    tcg_gen_shlv_vec(vece, lval, src, lsh);
4210    tcg_gen_sarv_vec(vece, rval, src, rsh);
4211
4212    /* Select in-bound left shift.  */
4213    tcg_gen_andc_vec(vece, lval, lval, tmp);
4214
4215    /* Select between left and right shift.  */
4216    if (vece == MO_8) {
4217        tcg_gen_dupi_vec(vece, tmp, 0);
4218        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4219    } else {
4220        tcg_gen_dupi_vec(vece, tmp, 0x80);
4221        tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4222    }
4223
4224    tcg_temp_free_vec(lval);
4225    tcg_temp_free_vec(rval);
4226    tcg_temp_free_vec(lsh);
4227    tcg_temp_free_vec(rsh);
4228    tcg_temp_free_vec(tmp);
4229}
4230
4231void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4232                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4233{
4234    static const TCGOpcode vecop_list[] = {
4235        INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4236        INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4237    };
4238    static const GVecGen3 ops[4] = {
4239        { .fniv = gen_sshl_vec,
4240          .fno = gen_helper_gvec_sshl_b,
4241          .opt_opc = vecop_list,
4242          .vece = MO_8 },
4243        { .fniv = gen_sshl_vec,
4244          .fno = gen_helper_gvec_sshl_h,
4245          .opt_opc = vecop_list,
4246          .vece = MO_16 },
4247        { .fni4 = gen_sshl_i32,
4248          .fniv = gen_sshl_vec,
4249          .opt_opc = vecop_list,
4250          .vece = MO_32 },
4251        { .fni8 = gen_sshl_i64,
4252          .fniv = gen_sshl_vec,
4253          .opt_opc = vecop_list,
4254          .vece = MO_64 },
4255    };
4256    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4257}
4258
4259static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4260                          TCGv_vec a, TCGv_vec b)
4261{
4262    TCGv_vec x = tcg_temp_new_vec_matching(t);
4263    tcg_gen_add_vec(vece, x, a, b);
4264    tcg_gen_usadd_vec(vece, t, a, b);
4265    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4266    tcg_gen_or_vec(vece, sat, sat, x);
4267    tcg_temp_free_vec(x);
4268}
4269
4270void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4271                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4272{
4273    static const TCGOpcode vecop_list[] = {
4274        INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4275    };
4276    static const GVecGen4 ops[4] = {
4277        { .fniv = gen_uqadd_vec,
4278          .fno = gen_helper_gvec_uqadd_b,
4279          .write_aofs = true,
4280          .opt_opc = vecop_list,
4281          .vece = MO_8 },
4282        { .fniv = gen_uqadd_vec,
4283          .fno = gen_helper_gvec_uqadd_h,
4284          .write_aofs = true,
4285          .opt_opc = vecop_list,
4286          .vece = MO_16 },
4287        { .fniv = gen_uqadd_vec,
4288          .fno = gen_helper_gvec_uqadd_s,
4289          .write_aofs = true,
4290          .opt_opc = vecop_list,
4291          .vece = MO_32 },
4292        { .fniv = gen_uqadd_vec,
4293          .fno = gen_helper_gvec_uqadd_d,
4294          .write_aofs = true,
4295          .opt_opc = vecop_list,
4296          .vece = MO_64 },
4297    };
4298    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4299                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4300}
4301
4302static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4303                          TCGv_vec a, TCGv_vec b)
4304{
4305    TCGv_vec x = tcg_temp_new_vec_matching(t);
4306    tcg_gen_add_vec(vece, x, a, b);
4307    tcg_gen_ssadd_vec(vece, t, a, b);
4308    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4309    tcg_gen_or_vec(vece, sat, sat, x);
4310    tcg_temp_free_vec(x);
4311}
4312
4313void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4314                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4315{
4316    static const TCGOpcode vecop_list[] = {
4317        INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4318    };
4319    static const GVecGen4 ops[4] = {
4320        { .fniv = gen_sqadd_vec,
4321          .fno = gen_helper_gvec_sqadd_b,
4322          .opt_opc = vecop_list,
4323          .write_aofs = true,
4324          .vece = MO_8 },
4325        { .fniv = gen_sqadd_vec,
4326          .fno = gen_helper_gvec_sqadd_h,
4327          .opt_opc = vecop_list,
4328          .write_aofs = true,
4329          .vece = MO_16 },
4330        { .fniv = gen_sqadd_vec,
4331          .fno = gen_helper_gvec_sqadd_s,
4332          .opt_opc = vecop_list,
4333          .write_aofs = true,
4334          .vece = MO_32 },
4335        { .fniv = gen_sqadd_vec,
4336          .fno = gen_helper_gvec_sqadd_d,
4337          .opt_opc = vecop_list,
4338          .write_aofs = true,
4339          .vece = MO_64 },
4340    };
4341    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4342                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4343}
4344
4345static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4346                          TCGv_vec a, TCGv_vec b)
4347{
4348    TCGv_vec x = tcg_temp_new_vec_matching(t);
4349    tcg_gen_sub_vec(vece, x, a, b);
4350    tcg_gen_ussub_vec(vece, t, a, b);
4351    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4352    tcg_gen_or_vec(vece, sat, sat, x);
4353    tcg_temp_free_vec(x);
4354}
4355
4356void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4357                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4358{
4359    static const TCGOpcode vecop_list[] = {
4360        INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4361    };
4362    static const GVecGen4 ops[4] = {
4363        { .fniv = gen_uqsub_vec,
4364          .fno = gen_helper_gvec_uqsub_b,
4365          .opt_opc = vecop_list,
4366          .write_aofs = true,
4367          .vece = MO_8 },
4368        { .fniv = gen_uqsub_vec,
4369          .fno = gen_helper_gvec_uqsub_h,
4370          .opt_opc = vecop_list,
4371          .write_aofs = true,
4372          .vece = MO_16 },
4373        { .fniv = gen_uqsub_vec,
4374          .fno = gen_helper_gvec_uqsub_s,
4375          .opt_opc = vecop_list,
4376          .write_aofs = true,
4377          .vece = MO_32 },
4378        { .fniv = gen_uqsub_vec,
4379          .fno = gen_helper_gvec_uqsub_d,
4380          .opt_opc = vecop_list,
4381          .write_aofs = true,
4382          .vece = MO_64 },
4383    };
4384    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4385                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4386}
4387
4388static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4389                          TCGv_vec a, TCGv_vec b)
4390{
4391    TCGv_vec x = tcg_temp_new_vec_matching(t);
4392    tcg_gen_sub_vec(vece, x, a, b);
4393    tcg_gen_sssub_vec(vece, t, a, b);
4394    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4395    tcg_gen_or_vec(vece, sat, sat, x);
4396    tcg_temp_free_vec(x);
4397}
4398
4399void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4400                       uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4401{
4402    static const TCGOpcode vecop_list[] = {
4403        INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4404    };
4405    static const GVecGen4 ops[4] = {
4406        { .fniv = gen_sqsub_vec,
4407          .fno = gen_helper_gvec_sqsub_b,
4408          .opt_opc = vecop_list,
4409          .write_aofs = true,
4410          .vece = MO_8 },
4411        { .fniv = gen_sqsub_vec,
4412          .fno = gen_helper_gvec_sqsub_h,
4413          .opt_opc = vecop_list,
4414          .write_aofs = true,
4415          .vece = MO_16 },
4416        { .fniv = gen_sqsub_vec,
4417          .fno = gen_helper_gvec_sqsub_s,
4418          .opt_opc = vecop_list,
4419          .write_aofs = true,
4420          .vece = MO_32 },
4421        { .fniv = gen_sqsub_vec,
4422          .fno = gen_helper_gvec_sqsub_d,
4423          .opt_opc = vecop_list,
4424          .write_aofs = true,
4425          .vece = MO_64 },
4426    };
4427    tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4428                   rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4429}
4430
4431static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4432{
4433    TCGv_i32 t = tcg_temp_new_i32();
4434
4435    tcg_gen_sub_i32(t, a, b);
4436    tcg_gen_sub_i32(d, b, a);
4437    tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4438    tcg_temp_free_i32(t);
4439}
4440
4441static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4442{
4443    TCGv_i64 t = tcg_temp_new_i64();
4444
4445    tcg_gen_sub_i64(t, a, b);
4446    tcg_gen_sub_i64(d, b, a);
4447    tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4448    tcg_temp_free_i64(t);
4449}
4450
4451static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4452{
4453    TCGv_vec t = tcg_temp_new_vec_matching(d);
4454
4455    tcg_gen_smin_vec(vece, t, a, b);
4456    tcg_gen_smax_vec(vece, d, a, b);
4457    tcg_gen_sub_vec(vece, d, d, t);
4458    tcg_temp_free_vec(t);
4459}
4460
4461void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4462                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4463{
4464    static const TCGOpcode vecop_list[] = {
4465        INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4466    };
4467    static const GVecGen3 ops[4] = {
4468        { .fniv = gen_sabd_vec,
4469          .fno = gen_helper_gvec_sabd_b,
4470          .opt_opc = vecop_list,
4471          .vece = MO_8 },
4472        { .fniv = gen_sabd_vec,
4473          .fno = gen_helper_gvec_sabd_h,
4474          .opt_opc = vecop_list,
4475          .vece = MO_16 },
4476        { .fni4 = gen_sabd_i32,
4477          .fniv = gen_sabd_vec,
4478          .fno = gen_helper_gvec_sabd_s,
4479          .opt_opc = vecop_list,
4480          .vece = MO_32 },
4481        { .fni8 = gen_sabd_i64,
4482          .fniv = gen_sabd_vec,
4483          .fno = gen_helper_gvec_sabd_d,
4484          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4485          .opt_opc = vecop_list,
4486          .vece = MO_64 },
4487    };
4488    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4489}
4490
4491static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4492{
4493    TCGv_i32 t = tcg_temp_new_i32();
4494
4495    tcg_gen_sub_i32(t, a, b);
4496    tcg_gen_sub_i32(d, b, a);
4497    tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4498    tcg_temp_free_i32(t);
4499}
4500
4501static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4502{
4503    TCGv_i64 t = tcg_temp_new_i64();
4504
4505    tcg_gen_sub_i64(t, a, b);
4506    tcg_gen_sub_i64(d, b, a);
4507    tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4508    tcg_temp_free_i64(t);
4509}
4510
4511static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4512{
4513    TCGv_vec t = tcg_temp_new_vec_matching(d);
4514
4515    tcg_gen_umin_vec(vece, t, a, b);
4516    tcg_gen_umax_vec(vece, d, a, b);
4517    tcg_gen_sub_vec(vece, d, d, t);
4518    tcg_temp_free_vec(t);
4519}
4520
4521void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4522                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4523{
4524    static const TCGOpcode vecop_list[] = {
4525        INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4526    };
4527    static const GVecGen3 ops[4] = {
4528        { .fniv = gen_uabd_vec,
4529          .fno = gen_helper_gvec_uabd_b,
4530          .opt_opc = vecop_list,
4531          .vece = MO_8 },
4532        { .fniv = gen_uabd_vec,
4533          .fno = gen_helper_gvec_uabd_h,
4534          .opt_opc = vecop_list,
4535          .vece = MO_16 },
4536        { .fni4 = gen_uabd_i32,
4537          .fniv = gen_uabd_vec,
4538          .fno = gen_helper_gvec_uabd_s,
4539          .opt_opc = vecop_list,
4540          .vece = MO_32 },
4541        { .fni8 = gen_uabd_i64,
4542          .fniv = gen_uabd_vec,
4543          .fno = gen_helper_gvec_uabd_d,
4544          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4545          .opt_opc = vecop_list,
4546          .vece = MO_64 },
4547    };
4548    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4549}
4550
4551static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4552{
4553    TCGv_i32 t = tcg_temp_new_i32();
4554    gen_sabd_i32(t, a, b);
4555    tcg_gen_add_i32(d, d, t);
4556    tcg_temp_free_i32(t);
4557}
4558
4559static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4560{
4561    TCGv_i64 t = tcg_temp_new_i64();
4562    gen_sabd_i64(t, a, b);
4563    tcg_gen_add_i64(d, d, t);
4564    tcg_temp_free_i64(t);
4565}
4566
4567static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4568{
4569    TCGv_vec t = tcg_temp_new_vec_matching(d);
4570    gen_sabd_vec(vece, t, a, b);
4571    tcg_gen_add_vec(vece, d, d, t);
4572    tcg_temp_free_vec(t);
4573}
4574
4575void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4576                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4577{
4578    static const TCGOpcode vecop_list[] = {
4579        INDEX_op_sub_vec, INDEX_op_add_vec,
4580        INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4581    };
4582    static const GVecGen3 ops[4] = {
4583        { .fniv = gen_saba_vec,
4584          .fno = gen_helper_gvec_saba_b,
4585          .opt_opc = vecop_list,
4586          .load_dest = true,
4587          .vece = MO_8 },
4588        { .fniv = gen_saba_vec,
4589          .fno = gen_helper_gvec_saba_h,
4590          .opt_opc = vecop_list,
4591          .load_dest = true,
4592          .vece = MO_16 },
4593        { .fni4 = gen_saba_i32,
4594          .fniv = gen_saba_vec,
4595          .fno = gen_helper_gvec_saba_s,
4596          .opt_opc = vecop_list,
4597          .load_dest = true,
4598          .vece = MO_32 },
4599        { .fni8 = gen_saba_i64,
4600          .fniv = gen_saba_vec,
4601          .fno = gen_helper_gvec_saba_d,
4602          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4603          .opt_opc = vecop_list,
4604          .load_dest = true,
4605          .vece = MO_64 },
4606    };
4607    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4608}
4609
4610static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4611{
4612    TCGv_i32 t = tcg_temp_new_i32();
4613    gen_uabd_i32(t, a, b);
4614    tcg_gen_add_i32(d, d, t);
4615    tcg_temp_free_i32(t);
4616}
4617
4618static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4619{
4620    TCGv_i64 t = tcg_temp_new_i64();
4621    gen_uabd_i64(t, a, b);
4622    tcg_gen_add_i64(d, d, t);
4623    tcg_temp_free_i64(t);
4624}
4625
4626static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4627{
4628    TCGv_vec t = tcg_temp_new_vec_matching(d);
4629    gen_uabd_vec(vece, t, a, b);
4630    tcg_gen_add_vec(vece, d, d, t);
4631    tcg_temp_free_vec(t);
4632}
4633
4634void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4635                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4636{
4637    static const TCGOpcode vecop_list[] = {
4638        INDEX_op_sub_vec, INDEX_op_add_vec,
4639        INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4640    };
4641    static const GVecGen3 ops[4] = {
4642        { .fniv = gen_uaba_vec,
4643          .fno = gen_helper_gvec_uaba_b,
4644          .opt_opc = vecop_list,
4645          .load_dest = true,
4646          .vece = MO_8 },
4647        { .fniv = gen_uaba_vec,
4648          .fno = gen_helper_gvec_uaba_h,
4649          .opt_opc = vecop_list,
4650          .load_dest = true,
4651          .vece = MO_16 },
4652        { .fni4 = gen_uaba_i32,
4653          .fniv = gen_uaba_vec,
4654          .fno = gen_helper_gvec_uaba_s,
4655          .opt_opc = vecop_list,
4656          .load_dest = true,
4657          .vece = MO_32 },
4658        { .fni8 = gen_uaba_i64,
4659          .fniv = gen_uaba_vec,
4660          .fno = gen_helper_gvec_uaba_d,
4661          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4662          .opt_opc = vecop_list,
4663          .load_dest = true,
4664          .vece = MO_64 },
4665    };
4666    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4667}
4668
4669static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4670                           int opc1, int crn, int crm, int opc2,
4671                           bool isread, int rt, int rt2)
4672{
4673    const ARMCPRegInfo *ri;
4674
4675    ri = get_arm_cp_reginfo(s->cp_regs,
4676            ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
4677    if (ri) {
4678        bool need_exit_tb;
4679
4680        /* Check access permissions */
4681        if (!cp_access_ok(s->current_el, ri, isread)) {
4682            unallocated_encoding(s);
4683            return;
4684        }
4685
4686        if (s->hstr_active || ri->accessfn ||
4687            (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4688            /* Emit code to perform further access permissions checks at
4689             * runtime; this may result in an exception.
4690             * Note that on XScale all cp0..c13 registers do an access check
4691             * call in order to handle c15_cpar.
4692             */
4693            TCGv_ptr tmpptr;
4694            TCGv_i32 tcg_syn, tcg_isread;
4695            uint32_t syndrome;
4696
4697            /* Note that since we are an implementation which takes an
4698             * exception on a trapped conditional instruction only if the
4699             * instruction passes its condition code check, we can take
4700             * advantage of the clause in the ARM ARM that allows us to set
4701             * the COND field in the instruction to 0xE in all cases.
4702             * We could fish the actual condition out of the insn (ARM)
4703             * or the condexec bits (Thumb) but it isn't necessary.
4704             */
4705            switch (cpnum) {
4706            case 14:
4707                if (is64) {
4708                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4709                                                 isread, false);
4710                } else {
4711                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4712                                                rt, isread, false);
4713                }
4714                break;
4715            case 15:
4716                if (is64) {
4717                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4718                                                 isread, false);
4719                } else {
4720                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4721                                                rt, isread, false);
4722                }
4723                break;
4724            default:
4725                /* ARMv8 defines that only coprocessors 14 and 15 exist,
4726                 * so this can only happen if this is an ARMv7 or earlier CPU,
4727                 * in which case the syndrome information won't actually be
4728                 * guest visible.
4729                 */
4730                assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4731                syndrome = syn_uncategorized();
4732                break;
4733            }
4734
4735            gen_set_condexec(s);
4736            gen_set_pc_im(s, s->pc_curr);
4737            tmpptr = tcg_const_ptr(ri);
4738            tcg_syn = tcg_const_i32(syndrome);
4739            tcg_isread = tcg_const_i32(isread);
4740            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
4741                                           tcg_isread);
4742            tcg_temp_free_ptr(tmpptr);
4743            tcg_temp_free_i32(tcg_syn);
4744            tcg_temp_free_i32(tcg_isread);
4745        } else if (ri->type & ARM_CP_RAISES_EXC) {
4746            /*
4747             * The readfn or writefn might raise an exception;
4748             * synchronize the CPU state in case it does.
4749             */
4750            gen_set_condexec(s);
4751            gen_set_pc_im(s, s->pc_curr);
4752        }
4753
4754        /* Handle special cases first */
4755        switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
4756        case ARM_CP_NOP:
4757            return;
4758        case ARM_CP_WFI:
4759            if (isread) {
4760                unallocated_encoding(s);
4761                return;
4762            }
4763            gen_set_pc_im(s, s->base.pc_next);
4764            s->base.is_jmp = DISAS_WFI;
4765            return;
4766        default:
4767            break;
4768        }
4769
4770        if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4771            gen_io_start();
4772        }
4773
4774        if (isread) {
4775            /* Read */
4776            if (is64) {
4777                TCGv_i64 tmp64;
4778                TCGv_i32 tmp;
4779                if (ri->type & ARM_CP_CONST) {
4780                    tmp64 = tcg_const_i64(ri->resetvalue);
4781                } else if (ri->readfn) {
4782                    TCGv_ptr tmpptr;
4783                    tmp64 = tcg_temp_new_i64();
4784                    tmpptr = tcg_const_ptr(ri);
4785                    gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
4786                    tcg_temp_free_ptr(tmpptr);
4787                } else {
4788                    tmp64 = tcg_temp_new_i64();
4789                    tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4790                }
4791                tmp = tcg_temp_new_i32();
4792                tcg_gen_extrl_i64_i32(tmp, tmp64);
4793                store_reg(s, rt, tmp);
4794                tmp = tcg_temp_new_i32();
4795                tcg_gen_extrh_i64_i32(tmp, tmp64);
4796                tcg_temp_free_i64(tmp64);
4797                store_reg(s, rt2, tmp);
4798            } else {
4799                TCGv_i32 tmp;
4800                if (ri->type & ARM_CP_CONST) {
4801                    tmp = tcg_const_i32(ri->resetvalue);
4802                } else if (ri->readfn) {
4803                    TCGv_ptr tmpptr;
4804                    tmp = tcg_temp_new_i32();
4805                    tmpptr = tcg_const_ptr(ri);
4806                    gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
4807                    tcg_temp_free_ptr(tmpptr);
4808                } else {
4809                    tmp = load_cpu_offset(ri->fieldoffset);
4810                }
4811                if (rt == 15) {
4812                    /* Destination register of r15 for 32 bit loads sets
4813                     * the condition codes from the high 4 bits of the value
4814                     */
4815                    gen_set_nzcv(tmp);
4816                    tcg_temp_free_i32(tmp);
4817                } else {
4818                    store_reg(s, rt, tmp);
4819                }
4820            }
4821        } else {
4822            /* Write */
4823            if (ri->type & ARM_CP_CONST) {
4824                /* If not forbidden by access permissions, treat as WI */
4825                return;
4826            }
4827
4828            if (is64) {
4829                TCGv_i32 tmplo, tmphi;
4830                TCGv_i64 tmp64 = tcg_temp_new_i64();
4831                tmplo = load_reg(s, rt);
4832                tmphi = load_reg(s, rt2);
4833                tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4834                tcg_temp_free_i32(tmplo);
4835                tcg_temp_free_i32(tmphi);
4836                if (ri->writefn) {
4837                    TCGv_ptr tmpptr = tcg_const_ptr(ri);
4838                    gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
4839                    tcg_temp_free_ptr(tmpptr);
4840                } else {
4841                    tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4842                }
4843                tcg_temp_free_i64(tmp64);
4844            } else {
4845                if (ri->writefn) {
4846                    TCGv_i32 tmp;
4847                    TCGv_ptr tmpptr;
4848                    tmp = load_reg(s, rt);
4849                    tmpptr = tcg_const_ptr(ri);
4850                    gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
4851                    tcg_temp_free_ptr(tmpptr);
4852                    tcg_temp_free_i32(tmp);
4853                } else {
4854                    TCGv_i32 tmp = load_reg(s, rt);
4855                    store_cpu_offset(tmp, ri->fieldoffset);
4856                }
4857            }
4858        }
4859
4860        /* I/O operations must end the TB here (whether read or write) */
4861        need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4862                        (ri->type & ARM_CP_IO));
4863
4864        if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4865            /*
4866             * A write to any coprocessor register that ends a TB
4867             * must rebuild the hflags for the next TB.
4868             */
4869            TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
4870            if (arm_dc_feature(s, ARM_FEATURE_M)) {
4871                gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
4872            } else {
4873                if (ri->type & ARM_CP_NEWEL) {
4874                    gen_helper_rebuild_hflags_a32_newel(cpu_env);
4875                } else {
4876                    gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
4877                }
4878            }
4879            tcg_temp_free_i32(tcg_el);
4880            /*
4881             * We default to ending the TB on a coprocessor register write,
4882             * but allow this to be suppressed by the register definition
4883             * (usually only necessary to work around guest bugs).
4884             */
4885            need_exit_tb = true;
4886        }
4887        if (need_exit_tb) {
4888            gen_lookup_tb(s);
4889        }
4890
4891        return;
4892    }
4893
4894    /* Unknown register; this might be a guest error or a QEMU
4895     * unimplemented feature.
4896     */
4897    if (is64) {
4898        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4899                      "64 bit system register cp:%d opc1: %d crm:%d "
4900                      "(%s)\n",
4901                      isread ? "read" : "write", cpnum, opc1, crm,
4902                      s->ns ? "non-secure" : "secure");
4903    } else {
4904        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4905                      "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
4906                      "(%s)\n",
4907                      isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
4908                      s->ns ? "non-secure" : "secure");
4909    }
4910
4911    unallocated_encoding(s);
4912    return;
4913}
4914
4915/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4916static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4917{
4918    int cpnum = (insn >> 8) & 0xf;
4919
4920    if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4921        unallocated_encoding(s);
4922    } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4923        if (disas_iwmmxt_insn(s, insn)) {
4924            unallocated_encoding(s);
4925        }
4926    } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4927        if (disas_dsp_insn(s, insn)) {
4928            unallocated_encoding(s);
4929        }
4930    }
4931}
4932
4933/* Store a 64-bit value to a register pair.  Clobbers val.  */
4934static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4935{
4936    TCGv_i32 tmp;
4937    tmp = tcg_temp_new_i32();
4938    tcg_gen_extrl_i64_i32(tmp, val);
4939    store_reg(s, rlow, tmp);
4940    tmp = tcg_temp_new_i32();
4941    tcg_gen_extrh_i64_i32(tmp, val);
4942    store_reg(s, rhigh, tmp);
4943}
4944
4945/* load and add a 64-bit value from a register pair.  */
4946static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4947{
4948    TCGv_i64 tmp;
4949    TCGv_i32 tmpl;
4950    TCGv_i32 tmph;
4951
4952    /* Load 64-bit value rd:rn.  */
4953    tmpl = load_reg(s, rlow);
4954    tmph = load_reg(s, rhigh);
4955    tmp = tcg_temp_new_i64();
4956    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4957    tcg_temp_free_i32(tmpl);
4958    tcg_temp_free_i32(tmph);
4959    tcg_gen_add_i64(val, val, tmp);
4960    tcg_temp_free_i64(tmp);
4961}
4962
4963/* Set N and Z flags from hi|lo.  */
4964static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4965{
4966    tcg_gen_mov_i32(cpu_NF, hi);
4967    tcg_gen_or_i32(cpu_ZF, lo, hi);
4968}
4969
4970/* Load/Store exclusive instructions are implemented by remembering
4971   the value/address loaded, and seeing if these are the same
4972   when the store is performed.  This should be sufficient to implement
4973   the architecturally mandated semantics, and avoids having to monitor
4974   regular stores.  The compare vs the remembered value is done during
4975   the cmpxchg operation, but we must compare the addresses manually.  */
4976static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4977                               TCGv_i32 addr, int size)
4978{
4979    TCGv_i32 tmp = tcg_temp_new_i32();
4980    MemOp opc = size | MO_ALIGN | s->be_data;
4981
4982    s->is_ldex = true;
4983
4984    if (size == 3) {
4985        TCGv_i32 tmp2 = tcg_temp_new_i32();
4986        TCGv_i64 t64 = tcg_temp_new_i64();
4987
4988        /*
4989         * For AArch32, architecturally the 32-bit word at the lowest
4990         * address is always Rt and the one at addr+4 is Rt2, even if
4991         * the CPU is big-endian. That means we don't want to do a
4992         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4993         * architecturally 64-bit access, but instead do a 64-bit access
4994         * using MO_BE if appropriate and then split the two halves.
4995         */
4996        TCGv taddr = gen_aa32_addr(s, addr, opc);
4997
4998        tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4999        tcg_temp_free(taddr);
5000        tcg_gen_mov_i64(cpu_exclusive_val, t64);
5001        if (s->be_data == MO_BE) {
5002            tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5003        } else {
5004            tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5005        }
5006        tcg_temp_free_i64(t64);
5007
5008        store_reg(s, rt2, tmp2);
5009    } else {
5010        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5011        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5012    }
5013
5014    store_reg(s, rt, tmp);
5015    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5016}
5017
5018static void gen_clrex(DisasContext *s)
5019{
5020    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5021}
5022
5023static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5024                                TCGv_i32 addr, int size)
5025{
5026    TCGv_i32 t0, t1, t2;
5027    TCGv_i64 extaddr;
5028    TCGv taddr;
5029    TCGLabel *done_label;
5030    TCGLabel *fail_label;
5031    MemOp opc = size | MO_ALIGN | s->be_data;
5032
5033    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5034         [addr] = {Rt};
5035         {Rd} = 0;
5036       } else {
5037         {Rd} = 1;
5038       } */
5039    fail_label = gen_new_label();
5040    done_label = gen_new_label();
5041    extaddr = tcg_temp_new_i64();
5042    tcg_gen_extu_i32_i64(extaddr, addr);
5043    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5044    tcg_temp_free_i64(extaddr);
5045
5046    taddr = gen_aa32_addr(s, addr, opc);
5047    t0 = tcg_temp_new_i32();
5048    t1 = load_reg(s, rt);
5049    if (size == 3) {
5050        TCGv_i64 o64 = tcg_temp_new_i64();
5051        TCGv_i64 n64 = tcg_temp_new_i64();
5052
5053        t2 = load_reg(s, rt2);
5054
5055        /*
5056         * For AArch32, architecturally the 32-bit word at the lowest
5057         * address is always Rt and the one at addr+4 is Rt2, even if
5058         * the CPU is big-endian. Since we're going to treat this as a
5059         * single 64-bit BE store, we need to put the two halves in the
5060         * opposite order for BE to LE, so that they end up in the right
5061         * places.  We don't want gen_aa32_st_i64, because that checks
5062         * SCTLR_B as if for an architectural 64-bit access.
5063         */
5064        if (s->be_data == MO_BE) {
5065            tcg_gen_concat_i32_i64(n64, t2, t1);
5066        } else {
5067            tcg_gen_concat_i32_i64(n64, t1, t2);
5068        }
5069        tcg_temp_free_i32(t2);
5070
5071        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5072                                   get_mem_index(s), opc);
5073        tcg_temp_free_i64(n64);
5074
5075        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5076        tcg_gen_extrl_i64_i32(t0, o64);
5077
5078        tcg_temp_free_i64(o64);
5079    } else {
5080        t2 = tcg_temp_new_i32();
5081        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5082        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5083        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5084        tcg_temp_free_i32(t2);
5085    }
5086    tcg_temp_free_i32(t1);
5087    tcg_temp_free(taddr);
5088    tcg_gen_mov_i32(cpu_R[rd], t0);
5089    tcg_temp_free_i32(t0);
5090    tcg_gen_br(done_label);
5091
5092    gen_set_label(fail_label);
5093    tcg_gen_movi_i32(cpu_R[rd], 1);
5094    gen_set_label(done_label);
5095    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5096}
5097
5098/* gen_srs:
5099 * @env: CPUARMState
5100 * @s: DisasContext
5101 * @mode: mode field from insn (which stack to store to)
5102 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5103 * @writeback: true if writeback bit set
5104 *
5105 * Generate code for the SRS (Store Return State) insn.
5106 */
5107static void gen_srs(DisasContext *s,
5108                    uint32_t mode, uint32_t amode, bool writeback)
5109{
5110    int32_t offset;
5111    TCGv_i32 addr, tmp;
5112    bool undef = false;
5113
5114    /* SRS is:
5115     * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5116     *   and specified mode is monitor mode
5117     * - UNDEFINED in Hyp mode
5118     * - UNPREDICTABLE in User or System mode
5119     * - UNPREDICTABLE if the specified mode is:
5120     * -- not implemented
5121     * -- not a valid mode number
5122     * -- a mode that's at a higher exception level
5123     * -- Monitor, if we are Non-secure
5124     * For the UNPREDICTABLE cases we choose to UNDEF.
5125     */
5126    if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5127        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
5128        return;
5129    }
5130
5131    if (s->current_el == 0 || s->current_el == 2) {
5132        undef = true;
5133    }
5134
5135    switch (mode) {
5136    case ARM_CPU_MODE_USR:
5137    case ARM_CPU_MODE_FIQ:
5138    case ARM_CPU_MODE_IRQ:
5139    case ARM_CPU_MODE_SVC:
5140    case ARM_CPU_MODE_ABT:
5141    case ARM_CPU_MODE_UND:
5142    case ARM_CPU_MODE_SYS:
5143        break;
5144    case ARM_CPU_MODE_HYP:
5145        if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5146            undef = true;
5147        }
5148        break;
5149    case ARM_CPU_MODE_MON:
5150        /* No need to check specifically for "are we non-secure" because
5151         * we've already made EL0 UNDEF and handled the trap for S-EL1;
5152         * so if this isn't EL3 then we must be non-secure.
5153         */
5154        if (s->current_el != 3) {
5155            undef = true;
5156        }
5157        break;
5158    default:
5159        undef = true;
5160    }
5161
5162    if (undef) {
5163        unallocated_encoding(s);
5164        return;
5165    }
5166
5167    addr = tcg_temp_new_i32();
5168    tmp = tcg_const_i32(mode);
5169    /* get_r13_banked() will raise an exception if called from System mode */
5170    gen_set_condexec(s);
5171    gen_set_pc_im(s, s->pc_curr);
5172    gen_helper_get_r13_banked(addr, cpu_env, tmp);
5173    tcg_temp_free_i32(tmp);
5174    switch (amode) {
5175    case 0: /* DA */
5176        offset = -4;
5177        break;
5178    case 1: /* IA */
5179        offset = 0;
5180        break;
5181    case 2: /* DB */
5182        offset = -8;
5183        break;
5184    case 3: /* IB */
5185        offset = 4;
5186        break;
5187    default:
5188        abort();
5189    }
5190    tcg_gen_addi_i32(addr, addr, offset);
5191    tmp = load_reg(s, 14);
5192    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5193    tcg_temp_free_i32(tmp);
5194    tmp = load_cpu_field(spsr);
5195    tcg_gen_addi_i32(addr, addr, 4);
5196    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5197    tcg_temp_free_i32(tmp);
5198    if (writeback) {
5199        switch (amode) {
5200        case 0:
5201            offset = -8;
5202            break;
5203        case 1:
5204            offset = 4;
5205            break;
5206        case 2:
5207            offset = -4;
5208            break;
5209        case 3:
5210            offset = 0;
5211            break;
5212        default:
5213            abort();
5214        }
5215        tcg_gen_addi_i32(addr, addr, offset);
5216        tmp = tcg_const_i32(mode);
5217        gen_helper_set_r13_banked(cpu_env, tmp, addr);
5218        tcg_temp_free_i32(tmp);
5219    }
5220    tcg_temp_free_i32(addr);
5221    s->base.is_jmp = DISAS_UPDATE_EXIT;
5222}
5223
5224/* Skip this instruction if the ARM condition is false */
5225static void arm_skip_unless(DisasContext *s, uint32_t cond)
5226{
5227    arm_gen_condlabel(s);
5228    arm_gen_test_cc(cond ^ 1, s->condlabel);
5229}
5230
5231
5232/*
5233 * Constant expanders used by T16/T32 decode
5234 */
5235
5236/* Return only the rotation part of T32ExpandImm.  */
5237static int t32_expandimm_rot(DisasContext *s, int x)
5238{
5239    return x & 0xc00 ? extract32(x, 7, 5) : 0;
5240}
5241
5242/* Return the unrotated immediate from T32ExpandImm.  */
5243static int t32_expandimm_imm(DisasContext *s, int x)
5244{
5245    int imm = extract32(x, 0, 8);
5246
5247    switch (extract32(x, 8, 4)) {
5248    case 0: /* XY */
5249        /* Nothing to do.  */
5250        break;
5251    case 1: /* 00XY00XY */
5252        imm *= 0x00010001;
5253        break;
5254    case 2: /* XY00XY00 */
5255        imm *= 0x01000100;
5256        break;
5257    case 3: /* XYXYXYXY */
5258        imm *= 0x01010101;
5259        break;
5260    default:
5261        /* Rotated constant.  */
5262        imm |= 0x80;
5263        break;
5264    }
5265    return imm;
5266}
5267
5268static int t32_branch24(DisasContext *s, int x)
5269{
5270    /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5271    x ^= !(x < 0) * (3 << 21);
5272    /* Append the final zero.  */
5273    return x << 1;
5274}
5275
5276static int t16_setflags(DisasContext *s)
5277{
5278    return s->condexec_mask == 0;
5279}
5280
5281static int t16_push_list(DisasContext *s, int x)
5282{
5283    return (x & 0xff) | (x & 0x100) << (14 - 8);
5284}
5285
5286static int t16_pop_list(DisasContext *s, int x)
5287{
5288    return (x & 0xff) | (x & 0x100) << (15 - 8);
5289}
5290
5291/*
5292 * Include the generated decoders.
5293 */
5294
5295#include "decode-a32.c.inc"
5296#include "decode-a32-uncond.c.inc"
5297#include "decode-t32.c.inc"
5298#include "decode-t16.c.inc"
5299
5300static bool valid_cp(DisasContext *s, int cp)
5301{
5302    /*
5303     * Return true if this coprocessor field indicates something
5304     * that's really a possible coprocessor.
5305     * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5306     * and of those only cp14 and cp15 were used for registers.
5307     * cp10 and cp11 were used for VFP and Neon, whose decode is
5308     * dealt with elsewhere. With the advent of fp16, cp9 is also
5309     * now part of VFP.
5310     * For v8A and later, the encoding has been tightened so that
5311     * only cp14 and cp15 are valid, and other values aren't considered
5312     * to be in the coprocessor-instruction space at all. v8M still
5313     * permits coprocessors 0..7.
5314     * For XScale, we must not decode the XScale cp0, cp1 space as
5315     * a standard coprocessor insn, because we want to fall through to
5316     * the legacy disas_xscale_insn() decoder after decodetree is done.
5317     */
5318    if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5319        return false;
5320    }
5321
5322    if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5323        !arm_dc_feature(s, ARM_FEATURE_M)) {
5324        return cp >= 14;
5325    }
5326    return cp < 8 || cp >= 14;
5327}
5328
5329static bool trans_MCR(DisasContext *s, arg_MCR *a)
5330{
5331    if (!valid_cp(s, a->cp)) {
5332        return false;
5333    }
5334    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5335                   false, a->rt, 0);
5336    return true;
5337}
5338
5339static bool trans_MRC(DisasContext *s, arg_MRC *a)
5340{
5341    if (!valid_cp(s, a->cp)) {
5342        return false;
5343    }
5344    do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5345                   true, a->rt, 0);
5346    return true;
5347}
5348
5349static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5350{
5351    if (!valid_cp(s, a->cp)) {
5352        return false;
5353    }
5354    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5355                   false, a->rt, a->rt2);
5356    return true;
5357}
5358
5359static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5360{
5361    if (!valid_cp(s, a->cp)) {
5362        return false;
5363    }
5364    do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5365                   true, a->rt, a->rt2);
5366    return true;
5367}
5368
5369/* Helpers to swap operands for reverse-subtract.  */
5370static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5371{
5372    tcg_gen_sub_i32(dst, b, a);
5373}
5374
5375static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5376{
5377    gen_sub_CC(dst, b, a);
5378}
5379
5380static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5381{
5382    gen_sub_carry(dest, b, a);
5383}
5384
5385static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5386{
5387    gen_sbc_CC(dest, b, a);
5388}
5389
5390/*
5391 * Helpers for the data processing routines.
5392 *
5393 * After the computation store the results back.
5394 * This may be suppressed altogether (STREG_NONE), require a runtime
5395 * check against the stack limits (STREG_SP_CHECK), or generate an
5396 * exception return.  Oh, or store into a register.
5397 *
5398 * Always return true, indicating success for a trans_* function.
5399 */
5400typedef enum {
5401   STREG_NONE,
5402   STREG_NORMAL,
5403   STREG_SP_CHECK,
5404   STREG_EXC_RET,
5405} StoreRegKind;
5406
5407static bool store_reg_kind(DisasContext *s, int rd,
5408                            TCGv_i32 val, StoreRegKind kind)
5409{
5410    switch (kind) {
5411    case STREG_NONE:
5412        tcg_temp_free_i32(val);
5413        return true;
5414    case STREG_NORMAL:
5415        /* See ALUWritePC: Interworking only from a32 mode. */
5416        if (s->thumb) {
5417            store_reg(s, rd, val);
5418        } else {
5419            store_reg_bx(s, rd, val);
5420        }
5421        return true;
5422    case STREG_SP_CHECK:
5423        store_sp_checked(s, val);
5424        return true;
5425    case STREG_EXC_RET:
5426        gen_exception_return(s, val);
5427        return true;
5428    }
5429    g_assert_not_reached();
5430}
5431
5432/*
5433 * Data Processing (register)
5434 *
5435 * Operate, with set flags, one register source,
5436 * one immediate shifted register source, and a destination.
5437 */
5438static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5439                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5440                         int logic_cc, StoreRegKind kind)
5441{
5442    TCGv_i32 tmp1, tmp2;
5443
5444    tmp2 = load_reg(s, a->rm);
5445    gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5446    tmp1 = load_reg(s, a->rn);
5447
5448    gen(tmp1, tmp1, tmp2);
5449    tcg_temp_free_i32(tmp2);
5450
5451    if (logic_cc) {
5452        gen_logic_CC(tmp1);
5453    }
5454    return store_reg_kind(s, a->rd, tmp1, kind);
5455}
5456
5457static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5458                         void (*gen)(TCGv_i32, TCGv_i32),
5459                         int logic_cc, StoreRegKind kind)
5460{
5461    TCGv_i32 tmp;
5462
5463    tmp = load_reg(s, a->rm);
5464    gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5465
5466    gen(tmp, tmp);
5467    if (logic_cc) {
5468        gen_logic_CC(tmp);
5469    }
5470    return store_reg_kind(s, a->rd, tmp, kind);
5471}
5472
5473/*
5474 * Data-processing (register-shifted register)
5475 *
5476 * Operate, with set flags, one register source,
5477 * one register shifted register source, and a destination.
5478 */
5479static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5480                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5481                         int logic_cc, StoreRegKind kind)
5482{
5483    TCGv_i32 tmp1, tmp2;
5484
5485    tmp1 = load_reg(s, a->rs);
5486    tmp2 = load_reg(s, a->rm);
5487    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5488    tmp1 = load_reg(s, a->rn);
5489
5490    gen(tmp1, tmp1, tmp2);
5491    tcg_temp_free_i32(tmp2);
5492
5493    if (logic_cc) {
5494        gen_logic_CC(tmp1);
5495    }
5496    return store_reg_kind(s, a->rd, tmp1, kind);
5497}
5498
5499static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5500                         void (*gen)(TCGv_i32, TCGv_i32),
5501                         int logic_cc, StoreRegKind kind)
5502{
5503    TCGv_i32 tmp1, tmp2;
5504
5505    tmp1 = load_reg(s, a->rs);
5506    tmp2 = load_reg(s, a->rm);
5507    gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5508
5509    gen(tmp2, tmp2);
5510    if (logic_cc) {
5511        gen_logic_CC(tmp2);
5512    }
5513    return store_reg_kind(s, a->rd, tmp2, kind);
5514}
5515
5516/*
5517 * Data-processing (immediate)
5518 *
5519 * Operate, with set flags, one register source,
5520 * one rotated immediate, and a destination.
5521 *
5522 * Note that logic_cc && a->rot setting CF based on the msb of the
5523 * immediate is the reason why we must pass in the unrotated form
5524 * of the immediate.
5525 */
5526static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5527                         void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5528                         int logic_cc, StoreRegKind kind)
5529{
5530    TCGv_i32 tmp1, tmp2;
5531    uint32_t imm;
5532
5533    imm = ror32(a->imm, a->rot);
5534    if (logic_cc && a->rot) {
5535        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5536    }
5537    tmp2 = tcg_const_i32(imm);
5538    tmp1 = load_reg(s, a->rn);
5539
5540    gen(tmp1, tmp1, tmp2);
5541    tcg_temp_free_i32(tmp2);
5542
5543    if (logic_cc) {
5544        gen_logic_CC(tmp1);
5545    }
5546    return store_reg_kind(s, a->rd, tmp1, kind);
5547}
5548
5549static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5550                         void (*gen)(TCGv_i32, TCGv_i32),
5551                         int logic_cc, StoreRegKind kind)
5552{
5553    TCGv_i32 tmp;
5554    uint32_t imm;
5555
5556    imm = ror32(a->imm, a->rot);
5557    if (logic_cc && a->rot) {
5558        tcg_gen_movi_i32(cpu_CF, imm >> 31);
5559    }
5560    tmp = tcg_const_i32(imm);
5561
5562    gen(tmp, tmp);
5563    if (logic_cc) {
5564        gen_logic_CC(tmp);
5565    }
5566    return store_reg_kind(s, a->rd, tmp, kind);
5567}
5568
5569#define DO_ANY3(NAME, OP, L, K)                                         \
5570    static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5571    { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5572    static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5573    { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5574    static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5575    { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5576
5577#define DO_ANY2(NAME, OP, L, K)                                         \
5578    static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5579    { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5580    static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5581    { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5582    static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5583    { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5584
5585#define DO_CMP2(NAME, OP, L)                                            \
5586    static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5587    { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5588    static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5589    { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5590    static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5591    { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5592
5593DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5594DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5595DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5596DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5597
5598DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5599DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5600DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5601DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5602
5603DO_CMP2(TST, tcg_gen_and_i32, true)
5604DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5605DO_CMP2(CMN, gen_add_CC, false)
5606DO_CMP2(CMP, gen_sub_CC, false)
5607
5608DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5609        a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5610
5611/*
5612 * Note for the computation of StoreRegKind we return out of the
5613 * middle of the functions that are expanded by DO_ANY3, and that
5614 * we modify a->s via that parameter before it is used by OP.
5615 */
5616DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5617        ({
5618            StoreRegKind ret = STREG_NORMAL;
5619            if (a->rd == 15 && a->s) {
5620                /*
5621                 * See ALUExceptionReturn:
5622                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5623                 * In Hyp mode, UNDEFINED.
5624                 */
5625                if (IS_USER(s) || s->current_el == 2) {
5626                    unallocated_encoding(s);
5627                    return true;
5628                }
5629                /* There is no writeback of nzcv to PSTATE.  */
5630                a->s = 0;
5631                ret = STREG_EXC_RET;
5632            } else if (a->rd == 13 && a->rn == 13) {
5633                ret = STREG_SP_CHECK;
5634            }
5635            ret;
5636        }))
5637
5638DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5639        ({
5640            StoreRegKind ret = STREG_NORMAL;
5641            if (a->rd == 15 && a->s) {
5642                /*
5643                 * See ALUExceptionReturn:
5644                 * In User mode, UNPREDICTABLE; we choose UNDEF.
5645                 * In Hyp mode, UNDEFINED.
5646                 */
5647                if (IS_USER(s) || s->current_el == 2) {
5648                    unallocated_encoding(s);
5649                    return true;
5650                }
5651                /* There is no writeback of nzcv to PSTATE.  */
5652                a->s = 0;
5653                ret = STREG_EXC_RET;
5654            } else if (a->rd == 13) {
5655                ret = STREG_SP_CHECK;
5656            }
5657            ret;
5658        }))
5659
5660DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5661
5662/*
5663 * ORN is only available with T32, so there is no register-shifted-register
5664 * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5665 */
5666static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5667{
5668    return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5669}
5670
5671static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5672{
5673    return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5674}
5675
5676#undef DO_ANY3
5677#undef DO_ANY2
5678#undef DO_CMP2
5679
5680static bool trans_ADR(DisasContext *s, arg_ri *a)
5681{
5682    store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5683    return true;
5684}
5685
5686static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5687{
5688    TCGv_i32 tmp;
5689
5690    if (!ENABLE_ARCH_6T2) {
5691        return false;
5692    }
5693
5694    tmp = tcg_const_i32(a->imm);
5695    store_reg(s, a->rd, tmp);
5696    return true;
5697}
5698
5699static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5700{
5701    TCGv_i32 tmp;
5702
5703    if (!ENABLE_ARCH_6T2) {
5704        return false;
5705    }
5706
5707    tmp = load_reg(s, a->rd);
5708    tcg_gen_ext16u_i32(tmp, tmp);
5709    tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5710    store_reg(s, a->rd, tmp);
5711    return true;
5712}
5713
5714/*
5715 * v8.1M MVE wide-shifts
5716 */
5717static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5718                          WideShiftImmFn *fn)
5719{
5720    TCGv_i64 rda;
5721    TCGv_i32 rdalo, rdahi;
5722
5723    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5724        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5725        return false;
5726    }
5727    if (a->rdahi == 15) {
5728        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5729        return false;
5730    }
5731    if (!dc_isar_feature(aa32_mve, s) ||
5732        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5733        a->rdahi == 13) {
5734        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5735        unallocated_encoding(s);
5736        return true;
5737    }
5738
5739    if (a->shim == 0) {
5740        a->shim = 32;
5741    }
5742
5743    rda = tcg_temp_new_i64();
5744    rdalo = load_reg(s, a->rdalo);
5745    rdahi = load_reg(s, a->rdahi);
5746    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5747
5748    fn(rda, rda, a->shim);
5749
5750    tcg_gen_extrl_i64_i32(rdalo, rda);
5751    tcg_gen_extrh_i64_i32(rdahi, rda);
5752    store_reg(s, a->rdalo, rdalo);
5753    store_reg(s, a->rdahi, rdahi);
5754    tcg_temp_free_i64(rda);
5755
5756    return true;
5757}
5758
5759static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5760{
5761    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5762}
5763
5764static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5765{
5766    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5767}
5768
5769static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5770{
5771    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5772}
5773
5774static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5775{
5776    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5777}
5778
5779static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5780{
5781    return do_mve_shl_ri(s, a, gen_mve_sqshll);
5782}
5783
5784static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5785{
5786    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5787}
5788
5789static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5790{
5791    return do_mve_shl_ri(s, a, gen_mve_uqshll);
5792}
5793
5794static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5795{
5796    return do_mve_shl_ri(s, a, gen_srshr64_i64);
5797}
5798
5799static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5800{
5801    return do_mve_shl_ri(s, a, gen_urshr64_i64);
5802}
5803
5804static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5805{
5806    TCGv_i64 rda;
5807    TCGv_i32 rdalo, rdahi;
5808
5809    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5810        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5811        return false;
5812    }
5813    if (a->rdahi == 15) {
5814        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5815        return false;
5816    }
5817    if (!dc_isar_feature(aa32_mve, s) ||
5818        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5819        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5820        a->rm == a->rdahi || a->rm == a->rdalo) {
5821        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5822        unallocated_encoding(s);
5823        return true;
5824    }
5825
5826    rda = tcg_temp_new_i64();
5827    rdalo = load_reg(s, a->rdalo);
5828    rdahi = load_reg(s, a->rdahi);
5829    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5830
5831    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5832    fn(rda, cpu_env, rda, cpu_R[a->rm]);
5833
5834    tcg_gen_extrl_i64_i32(rdalo, rda);
5835    tcg_gen_extrh_i64_i32(rdahi, rda);
5836    store_reg(s, a->rdalo, rdalo);
5837    store_reg(s, a->rdahi, rdahi);
5838    tcg_temp_free_i64(rda);
5839
5840    return true;
5841}
5842
5843static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5844{
5845    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5846}
5847
5848static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5849{
5850    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5851}
5852
5853static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5854{
5855    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5856}
5857
5858static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5859{
5860    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5861}
5862
5863static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5864{
5865    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5866}
5867
5868static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5869{
5870    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5871}
5872
5873static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5874{
5875    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5876        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5877        return false;
5878    }
5879    if (!dc_isar_feature(aa32_mve, s) ||
5880        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5881        a->rda == 13 || a->rda == 15) {
5882        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5883        unallocated_encoding(s);
5884        return true;
5885    }
5886
5887    if (a->shim == 0) {
5888        a->shim = 32;
5889    }
5890    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5891
5892    return true;
5893}
5894
5895static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5896{
5897    return do_mve_sh_ri(s, a, gen_urshr32_i32);
5898}
5899
5900static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5901{
5902    return do_mve_sh_ri(s, a, gen_srshr32_i32);
5903}
5904
5905static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5906{
5907    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5908}
5909
5910static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5911{
5912    return do_mve_sh_ri(s, a, gen_mve_sqshl);
5913}
5914
5915static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5916{
5917    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5918}
5919
5920static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5921{
5922    return do_mve_sh_ri(s, a, gen_mve_uqshl);
5923}
5924
5925static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5926{
5927    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5928        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5929        return false;
5930    }
5931    if (!dc_isar_feature(aa32_mve, s) ||
5932        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5933        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5934        a->rm == a->rda) {
5935        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5936        unallocated_encoding(s);
5937        return true;
5938    }
5939
5940    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5941    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5942    return true;
5943}
5944
5945static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5946{
5947    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5948}
5949
5950static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5951{
5952    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5953}
5954
5955/*
5956 * Multiply and multiply accumulate
5957 */
5958
5959static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5960{
5961    TCGv_i32 t1, t2;
5962
5963    t1 = load_reg(s, a->rn);
5964    t2 = load_reg(s, a->rm);
5965    tcg_gen_mul_i32(t1, t1, t2);
5966    tcg_temp_free_i32(t2);
5967    if (add) {
5968        t2 = load_reg(s, a->ra);
5969        tcg_gen_add_i32(t1, t1, t2);
5970        tcg_temp_free_i32(t2);
5971    }
5972    if (a->s) {
5973        gen_logic_CC(t1);
5974    }
5975    store_reg(s, a->rd, t1);
5976    return true;
5977}
5978
5979static bool trans_MUL(DisasContext *s, arg_MUL *a)
5980{
5981    return op_mla(s, a, false);
5982}
5983
5984static bool trans_MLA(DisasContext *s, arg_MLA *a)
5985{
5986    return op_mla(s, a, true);
5987}
5988
5989static bool trans_MLS(DisasContext *s, arg_MLS *a)
5990{
5991    TCGv_i32 t1, t2;
5992
5993    if (!ENABLE_ARCH_6T2) {
5994        return false;
5995    }
5996    t1 = load_reg(s, a->rn);
5997    t2 = load_reg(s, a->rm);
5998    tcg_gen_mul_i32(t1, t1, t2);
5999    tcg_temp_free_i32(t2);
6000    t2 = load_reg(s, a->ra);
6001    tcg_gen_sub_i32(t1, t2, t1);
6002    tcg_temp_free_i32(t2);
6003    store_reg(s, a->rd, t1);
6004    return true;
6005}
6006
6007static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
6008{
6009    TCGv_i32 t0, t1, t2, t3;
6010
6011    t0 = load_reg(s, a->rm);
6012    t1 = load_reg(s, a->rn);
6013    if (uns) {
6014        tcg_gen_mulu2_i32(t0, t1, t0, t1);
6015    } else {
6016        tcg_gen_muls2_i32(t0, t1, t0, t1);
6017    }
6018    if (add) {
6019        t2 = load_reg(s, a->ra);
6020        t3 = load_reg(s, a->rd);
6021        tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6022        tcg_temp_free_i32(t2);
6023        tcg_temp_free_i32(t3);
6024    }
6025    if (a->s) {
6026        gen_logicq_cc(t0, t1);
6027    }
6028    store_reg(s, a->ra, t0);
6029    store_reg(s, a->rd, t1);
6030    return true;
6031}
6032
6033static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6034{
6035    return op_mlal(s, a, true, false);
6036}
6037
6038static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6039{
6040    return op_mlal(s, a, false, false);
6041}
6042
6043static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6044{
6045    return op_mlal(s, a, true, true);
6046}
6047
6048static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6049{
6050    return op_mlal(s, a, false, true);
6051}
6052
6053static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6054{
6055    TCGv_i32 t0, t1, t2, zero;
6056
6057    if (s->thumb
6058        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6059        : !ENABLE_ARCH_6) {
6060        return false;
6061    }
6062
6063    t0 = load_reg(s, a->rm);
6064    t1 = load_reg(s, a->rn);
6065    tcg_gen_mulu2_i32(t0, t1, t0, t1);
6066    zero = tcg_const_i32(0);
6067    t2 = load_reg(s, a->ra);
6068    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6069    tcg_temp_free_i32(t2);
6070    t2 = load_reg(s, a->rd);
6071    tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6072    tcg_temp_free_i32(t2);
6073    tcg_temp_free_i32(zero);
6074    store_reg(s, a->ra, t0);
6075    store_reg(s, a->rd, t1);
6076    return true;
6077}
6078
6079/*
6080 * Saturating addition and subtraction
6081 */
6082
6083static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6084{
6085    TCGv_i32 t0, t1;
6086
6087    if (s->thumb
6088        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6089        : !ENABLE_ARCH_5TE) {
6090        return false;
6091    }
6092
6093    t0 = load_reg(s, a->rm);
6094    t1 = load_reg(s, a->rn);
6095    if (doub) {
6096        gen_helper_add_saturate(t1, cpu_env, t1, t1);
6097    }
6098    if (add) {
6099        gen_helper_add_saturate(t0, cpu_env, t0, t1);
6100    } else {
6101        gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6102    }
6103    tcg_temp_free_i32(t1);
6104    store_reg(s, a->rd, t0);
6105    return true;
6106}
6107
6108#define DO_QADDSUB(NAME, ADD, DOUB) \
6109static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6110{                                                        \
6111    return op_qaddsub(s, a, ADD, DOUB);                  \
6112}
6113
6114DO_QADDSUB(QADD, true, false)
6115DO_QADDSUB(QSUB, false, false)
6116DO_QADDSUB(QDADD, true, true)
6117DO_QADDSUB(QDSUB, false, true)
6118
6119#undef DO_QADDSUB
6120
6121/*
6122 * Halfword multiply and multiply accumulate
6123 */
6124
6125static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6126                       int add_long, bool nt, bool mt)
6127{
6128    TCGv_i32 t0, t1, tl, th;
6129
6130    if (s->thumb
6131        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6132        : !ENABLE_ARCH_5TE) {
6133        return false;
6134    }
6135
6136    t0 = load_reg(s, a->rn);
6137    t1 = load_reg(s, a->rm);
6138    gen_mulxy(t0, t1, nt, mt);
6139    tcg_temp_free_i32(t1);
6140
6141    switch (add_long) {
6142    case 0:
6143        store_reg(s, a->rd, t0);
6144        break;
6145    case 1:
6146        t1 = load_reg(s, a->ra);
6147        gen_helper_add_setq(t0, cpu_env, t0, t1);
6148        tcg_temp_free_i32(t1);
6149        store_reg(s, a->rd, t0);
6150        break;
6151    case 2:
6152        tl = load_reg(s, a->ra);
6153        th = load_reg(s, a->rd);
6154        /* Sign-extend the 32-bit product to 64 bits.  */
6155        t1 = tcg_temp_new_i32();
6156        tcg_gen_sari_i32(t1, t0, 31);
6157        tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6158        tcg_temp_free_i32(t0);
6159        tcg_temp_free_i32(t1);
6160        store_reg(s, a->ra, tl);
6161        store_reg(s, a->rd, th);
6162        break;
6163    default:
6164        g_assert_not_reached();
6165    }
6166    return true;
6167}
6168
6169#define DO_SMLAX(NAME, add, nt, mt) \
6170static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6171{                                                          \
6172    return op_smlaxxx(s, a, add, nt, mt);                  \
6173}
6174
6175DO_SMLAX(SMULBB, 0, 0, 0)
6176DO_SMLAX(SMULBT, 0, 0, 1)
6177DO_SMLAX(SMULTB, 0, 1, 0)
6178DO_SMLAX(SMULTT, 0, 1, 1)
6179
6180DO_SMLAX(SMLABB, 1, 0, 0)
6181DO_SMLAX(SMLABT, 1, 0, 1)
6182DO_SMLAX(SMLATB, 1, 1, 0)
6183DO_SMLAX(SMLATT, 1, 1, 1)
6184
6185DO_SMLAX(SMLALBB, 2, 0, 0)
6186DO_SMLAX(SMLALBT, 2, 0, 1)
6187DO_SMLAX(SMLALTB, 2, 1, 0)
6188DO_SMLAX(SMLALTT, 2, 1, 1)
6189
6190#undef DO_SMLAX
6191
6192static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6193{
6194    TCGv_i32 t0, t1;
6195
6196    if (!ENABLE_ARCH_5TE) {
6197        return false;
6198    }
6199
6200    t0 = load_reg(s, a->rn);
6201    t1 = load_reg(s, a->rm);
6202    /*
6203     * Since the nominal result is product<47:16>, shift the 16-bit
6204     * input up by 16 bits, so that the result is at product<63:32>.
6205     */
6206    if (mt) {
6207        tcg_gen_andi_i32(t1, t1, 0xffff0000);
6208    } else {
6209        tcg_gen_shli_i32(t1, t1, 16);
6210    }
6211    tcg_gen_muls2_i32(t0, t1, t0, t1);
6212    tcg_temp_free_i32(t0);
6213    if (add) {
6214        t0 = load_reg(s, a->ra);
6215        gen_helper_add_setq(t1, cpu_env, t1, t0);
6216        tcg_temp_free_i32(t0);
6217    }
6218    store_reg(s, a->rd, t1);
6219    return true;
6220}
6221
6222#define DO_SMLAWX(NAME, add, mt) \
6223static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6224{                                                          \
6225    return op_smlawx(s, a, add, mt);                       \
6226}
6227
6228DO_SMLAWX(SMULWB, 0, 0)
6229DO_SMLAWX(SMULWT, 0, 1)
6230DO_SMLAWX(SMLAWB, 1, 0)
6231DO_SMLAWX(SMLAWT, 1, 1)
6232
6233#undef DO_SMLAWX
6234
6235/*
6236 * MSR (immediate) and hints
6237 */
6238
6239static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6240{
6241    /*
6242     * When running single-threaded TCG code, use the helper to ensure that
6243     * the next round-robin scheduled vCPU gets a crack.  When running in
6244     * MTTCG we don't generate jumps to the helper as it won't affect the
6245     * scheduling of other vCPUs.
6246     */
6247    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6248        gen_set_pc_im(s, s->base.pc_next);
6249        s->base.is_jmp = DISAS_YIELD;
6250    }
6251    return true;
6252}
6253
6254static bool trans_WFE(DisasContext *s, arg_WFE *a)
6255{
6256    /*
6257     * When running single-threaded TCG code, use the helper to ensure that
6258     * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6259     * just skip this instruction.  Currently the SEV/SEVL instructions,
6260     * which are *one* of many ways to wake the CPU from WFE, are not
6261     * implemented so we can't sleep like WFI does.
6262     */
6263    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6264        gen_set_pc_im(s, s->base.pc_next);
6265        s->base.is_jmp = DISAS_WFE;
6266    }
6267    return true;
6268}
6269
6270static bool trans_WFI(DisasContext *s, arg_WFI *a)
6271{
6272    /* For WFI, halt the vCPU until an IRQ. */
6273    gen_set_pc_im(s, s->base.pc_next);
6274    s->base.is_jmp = DISAS_WFI;
6275    return true;
6276}
6277
6278static bool trans_NOP(DisasContext *s, arg_NOP *a)
6279{
6280    return true;
6281}
6282
6283static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6284{
6285    uint32_t val = ror32(a->imm, a->rot * 2);
6286    uint32_t mask = msr_mask(s, a->mask, a->r);
6287
6288    if (gen_set_psr_im(s, mask, a->r, val)) {
6289        unallocated_encoding(s);
6290    }
6291    return true;
6292}
6293
6294/*
6295 * Cyclic Redundancy Check
6296 */
6297
6298static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6299{
6300    TCGv_i32 t1, t2, t3;
6301
6302    if (!dc_isar_feature(aa32_crc32, s)) {
6303        return false;
6304    }
6305
6306    t1 = load_reg(s, a->rn);
6307    t2 = load_reg(s, a->rm);
6308    switch (sz) {
6309    case MO_8:
6310        gen_uxtb(t2);
6311        break;
6312    case MO_16:
6313        gen_uxth(t2);
6314        break;
6315    case MO_32:
6316        break;
6317    default:
6318        g_assert_not_reached();
6319    }
6320    t3 = tcg_const_i32(1 << sz);
6321    if (c) {
6322        gen_helper_crc32c(t1, t1, t2, t3);
6323    } else {
6324        gen_helper_crc32(t1, t1, t2, t3);
6325    }
6326    tcg_temp_free_i32(t2);
6327    tcg_temp_free_i32(t3);
6328    store_reg(s, a->rd, t1);
6329    return true;
6330}
6331
6332#define DO_CRC32(NAME, c, sz) \
6333static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6334    { return op_crc32(s, a, c, sz); }
6335
6336DO_CRC32(CRC32B, false, MO_8)
6337DO_CRC32(CRC32H, false, MO_16)
6338DO_CRC32(CRC32W, false, MO_32)
6339DO_CRC32(CRC32CB, true, MO_8)
6340DO_CRC32(CRC32CH, true, MO_16)
6341DO_CRC32(CRC32CW, true, MO_32)
6342
6343#undef DO_CRC32
6344
6345/*
6346 * Miscellaneous instructions
6347 */
6348
6349static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6350{
6351    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6352        return false;
6353    }
6354    gen_mrs_banked(s, a->r, a->sysm, a->rd);
6355    return true;
6356}
6357
6358static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6359{
6360    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6361        return false;
6362    }
6363    gen_msr_banked(s, a->r, a->sysm, a->rn);
6364    return true;
6365}
6366
6367static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6368{
6369    TCGv_i32 tmp;
6370
6371    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6372        return false;
6373    }
6374    if (a->r) {
6375        if (IS_USER(s)) {
6376            unallocated_encoding(s);
6377            return true;
6378        }
6379        tmp = load_cpu_field(spsr);
6380    } else {
6381        tmp = tcg_temp_new_i32();
6382        gen_helper_cpsr_read(tmp, cpu_env);
6383    }
6384    store_reg(s, a->rd, tmp);
6385    return true;
6386}
6387
6388static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6389{
6390    TCGv_i32 tmp;
6391    uint32_t mask = msr_mask(s, a->mask, a->r);
6392
6393    if (arm_dc_feature(s, ARM_FEATURE_M)) {
6394        return false;
6395    }
6396    tmp = load_reg(s, a->rn);
6397    if (gen_set_psr(s, mask, a->r, tmp)) {
6398        unallocated_encoding(s);
6399    }
6400    return true;
6401}
6402
6403static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6404{
6405    TCGv_i32 tmp;
6406
6407    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6408        return false;
6409    }
6410    tmp = tcg_const_i32(a->sysm);
6411    gen_helper_v7m_mrs(tmp, cpu_env, tmp);
6412    store_reg(s, a->rd, tmp);
6413    return true;
6414}
6415
6416static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6417{
6418    TCGv_i32 addr, reg;
6419
6420    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6421        return false;
6422    }
6423    addr = tcg_const_i32((a->mask << 10) | a->sysm);
6424    reg = load_reg(s, a->rn);
6425    gen_helper_v7m_msr(cpu_env, addr, reg);
6426    tcg_temp_free_i32(addr);
6427    tcg_temp_free_i32(reg);
6428    /* If we wrote to CONTROL, the EL might have changed */
6429    gen_helper_rebuild_hflags_m32_newel(cpu_env);
6430    gen_lookup_tb(s);
6431    return true;
6432}
6433
6434static bool trans_BX(DisasContext *s, arg_BX *a)
6435{
6436    if (!ENABLE_ARCH_4T) {
6437        return false;
6438    }
6439    gen_bx_excret(s, load_reg(s, a->rm));
6440    return true;
6441}
6442
6443static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6444{
6445    if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6446        return false;
6447    }
6448    /*
6449     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6450     * TBFLAGS bit on a basically-never-happens case, so call a helper
6451     * function to check for the trap and raise the exception if needed
6452     * (passing it the register number for the syndrome value).
6453     * v8A doesn't have this HSTR bit.
6454     */
6455    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6456        arm_dc_feature(s, ARM_FEATURE_EL2) &&
6457        s->current_el < 2 && s->ns) {
6458        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6459    }
6460    /* Trivial implementation equivalent to bx.  */
6461    gen_bx(s, load_reg(s, a->rm));
6462    return true;
6463}
6464
6465static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6466{
6467    TCGv_i32 tmp;
6468
6469    if (!ENABLE_ARCH_5) {
6470        return false;
6471    }
6472    tmp = load_reg(s, a->rm);
6473    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
6474    gen_bx(s, tmp);
6475    return true;
6476}
6477
6478/*
6479 * BXNS/BLXNS: only exist for v8M with the security extensions,
6480 * and always UNDEF if NonSecure.  We don't implement these in
6481 * the user-only mode either (in theory you can use them from
6482 * Secure User mode but they are too tied in to system emulation).
6483 */
6484static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6485{
6486    if (!s->v8m_secure || IS_USER_ONLY) {
6487        unallocated_encoding(s);
6488    } else {
6489        gen_bxns(s, a->rm);
6490    }
6491    return true;
6492}
6493
6494static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6495{
6496    if (!s->v8m_secure || IS_USER_ONLY) {
6497        unallocated_encoding(s);
6498    } else {
6499        gen_blxns(s, a->rm);
6500    }
6501    return true;
6502}
6503
6504static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6505{
6506    TCGv_i32 tmp;
6507
6508    if (!ENABLE_ARCH_5) {
6509        return false;
6510    }
6511    tmp = load_reg(s, a->rm);
6512    tcg_gen_clzi_i32(tmp, tmp, 32);
6513    store_reg(s, a->rd, tmp);
6514    return true;
6515}
6516
6517static bool trans_ERET(DisasContext *s, arg_ERET *a)
6518{
6519    TCGv_i32 tmp;
6520
6521    if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6522        return false;
6523    }
6524    if (IS_USER(s)) {
6525        unallocated_encoding(s);
6526        return true;
6527    }
6528    if (s->current_el == 2) {
6529        /* ERET from Hyp uses ELR_Hyp, not LR */
6530        tmp = load_cpu_field(elr_el[2]);
6531    } else {
6532        tmp = load_reg(s, 14);
6533    }
6534    gen_exception_return(s, tmp);
6535    return true;
6536}
6537
6538static bool trans_HLT(DisasContext *s, arg_HLT *a)
6539{
6540    gen_hlt(s, a->imm);
6541    return true;
6542}
6543
6544static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6545{
6546    if (!ENABLE_ARCH_5) {
6547        return false;
6548    }
6549    /* BKPT is OK with ECI set and leaves it untouched */
6550    s->eci_handled = true;
6551    if (arm_dc_feature(s, ARM_FEATURE_M) &&
6552        semihosting_enabled() &&
6553#ifndef CONFIG_USER_ONLY
6554        !IS_USER(s) &&
6555#endif
6556        (a->imm == 0xab)) {
6557        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
6558    } else {
6559        gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6560    }
6561    return true;
6562}
6563
6564static bool trans_HVC(DisasContext *s, arg_HVC *a)
6565{
6566    if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6567        return false;
6568    }
6569    if (IS_USER(s)) {
6570        unallocated_encoding(s);
6571    } else {
6572        gen_hvc(s, a->imm);
6573    }
6574    return true;
6575}
6576
6577static bool trans_SMC(DisasContext *s, arg_SMC *a)
6578{
6579    if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6580        return false;
6581    }
6582    if (IS_USER(s)) {
6583        unallocated_encoding(s);
6584    } else {
6585        gen_smc(s);
6586    }
6587    return true;
6588}
6589
6590static bool trans_SG(DisasContext *s, arg_SG *a)
6591{
6592    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6593        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6594        return false;
6595    }
6596    /*
6597     * SG (v8M only)
6598     * The bulk of the behaviour for this instruction is implemented
6599     * in v7m_handle_execute_nsc(), which deals with the insn when
6600     * it is executed by a CPU in non-secure state from memory
6601     * which is Secure & NonSecure-Callable.
6602     * Here we only need to handle the remaining cases:
6603     *  * in NS memory (including the "security extension not
6604     *    implemented" case) : NOP
6605     *  * in S memory but CPU already secure (clear IT bits)
6606     * We know that the attribute for the memory this insn is
6607     * in must match the current CPU state, because otherwise
6608     * get_phys_addr_pmsav8 would have generated an exception.
6609     */
6610    if (s->v8m_secure) {
6611        /* Like the IT insn, we don't need to generate any code */
6612        s->condexec_cond = 0;
6613        s->condexec_mask = 0;
6614    }
6615    return true;
6616}
6617
6618static bool trans_TT(DisasContext *s, arg_TT *a)
6619{
6620    TCGv_i32 addr, tmp;
6621
6622    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6623        !arm_dc_feature(s, ARM_FEATURE_V8)) {
6624        return false;
6625    }
6626    if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6627        /* We UNDEF for these UNPREDICTABLE cases */
6628        unallocated_encoding(s);
6629        return true;
6630    }
6631    if (a->A && !s->v8m_secure) {
6632        /* This case is UNDEFINED.  */
6633        unallocated_encoding(s);
6634        return true;
6635    }
6636
6637    addr = load_reg(s, a->rn);
6638    tmp = tcg_const_i32((a->A << 1) | a->T);
6639    gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
6640    tcg_temp_free_i32(addr);
6641    store_reg(s, a->rd, tmp);
6642    return true;
6643}
6644
6645/*
6646 * Load/store register index
6647 */
6648
6649static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6650{
6651    ISSInfo ret;
6652
6653    /* ISS not valid if writeback */
6654    if (p && !w) {
6655        ret = rd;
6656        if (s->base.pc_next - s->pc_curr == 2) {
6657            ret |= ISSIs16Bit;
6658        }
6659    } else {
6660        ret = ISSInvalid;
6661    }
6662    return ret;
6663}
6664
6665static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6666{
6667    TCGv_i32 addr = load_reg(s, a->rn);
6668
6669    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6670        gen_helper_v8m_stackcheck(cpu_env, addr);
6671    }
6672
6673    if (a->p) {
6674        TCGv_i32 ofs = load_reg(s, a->rm);
6675        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6676        if (a->u) {
6677            tcg_gen_add_i32(addr, addr, ofs);
6678        } else {
6679            tcg_gen_sub_i32(addr, addr, ofs);
6680        }
6681        tcg_temp_free_i32(ofs);
6682    }
6683    return addr;
6684}
6685
6686static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6687                            TCGv_i32 addr, int address_offset)
6688{
6689    if (!a->p) {
6690        TCGv_i32 ofs = load_reg(s, a->rm);
6691        gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6692        if (a->u) {
6693            tcg_gen_add_i32(addr, addr, ofs);
6694        } else {
6695            tcg_gen_sub_i32(addr, addr, ofs);
6696        }
6697        tcg_temp_free_i32(ofs);
6698    } else if (!a->w) {
6699        tcg_temp_free_i32(addr);
6700        return;
6701    }
6702    tcg_gen_addi_i32(addr, addr, address_offset);
6703    store_reg(s, a->rn, addr);
6704}
6705
6706static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6707                       MemOp mop, int mem_idx)
6708{
6709    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6710    TCGv_i32 addr, tmp;
6711
6712    addr = op_addr_rr_pre(s, a);
6713
6714    tmp = tcg_temp_new_i32();
6715    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6716    disas_set_da_iss(s, mop, issinfo);
6717
6718    /*
6719     * Perform base writeback before the loaded value to
6720     * ensure correct behavior with overlapping index registers.
6721     */
6722    op_addr_rr_post(s, a, addr, 0);
6723    store_reg_from_load(s, a->rt, tmp);
6724    return true;
6725}
6726
6727static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6728                        MemOp mop, int mem_idx)
6729{
6730    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6731    TCGv_i32 addr, tmp;
6732
6733    /*
6734     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6735     * is either UNPREDICTABLE or has defined behaviour
6736     */
6737    if (s->thumb && a->rn == 15) {
6738        return false;
6739    }
6740
6741    addr = op_addr_rr_pre(s, a);
6742
6743    tmp = load_reg(s, a->rt);
6744    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6745    disas_set_da_iss(s, mop, issinfo);
6746    tcg_temp_free_i32(tmp);
6747
6748    op_addr_rr_post(s, a, addr, 0);
6749    return true;
6750}
6751
6752static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6753{
6754    int mem_idx = get_mem_index(s);
6755    TCGv_i32 addr, tmp;
6756
6757    if (!ENABLE_ARCH_5TE) {
6758        return false;
6759    }
6760    if (a->rt & 1) {
6761        unallocated_encoding(s);
6762        return true;
6763    }
6764    addr = op_addr_rr_pre(s, a);
6765
6766    tmp = tcg_temp_new_i32();
6767    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6768    store_reg(s, a->rt, tmp);
6769
6770    tcg_gen_addi_i32(addr, addr, 4);
6771
6772    tmp = tcg_temp_new_i32();
6773    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6774    store_reg(s, a->rt + 1, tmp);
6775
6776    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6777    op_addr_rr_post(s, a, addr, -4);
6778    return true;
6779}
6780
6781static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6782{
6783    int mem_idx = get_mem_index(s);
6784    TCGv_i32 addr, tmp;
6785
6786    if (!ENABLE_ARCH_5TE) {
6787        return false;
6788    }
6789    if (a->rt & 1) {
6790        unallocated_encoding(s);
6791        return true;
6792    }
6793    addr = op_addr_rr_pre(s, a);
6794
6795    tmp = load_reg(s, a->rt);
6796    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6797    tcg_temp_free_i32(tmp);
6798
6799    tcg_gen_addi_i32(addr, addr, 4);
6800
6801    tmp = load_reg(s, a->rt + 1);
6802    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6803    tcg_temp_free_i32(tmp);
6804
6805    op_addr_rr_post(s, a, addr, -4);
6806    return true;
6807}
6808
6809/*
6810 * Load/store immediate index
6811 */
6812
6813static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6814{
6815    int ofs = a->imm;
6816
6817    if (!a->u) {
6818        ofs = -ofs;
6819    }
6820
6821    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6822        /*
6823         * Stackcheck. Here we know 'addr' is the current SP;
6824         * U is set if we're moving SP up, else down. It is
6825         * UNKNOWN whether the limit check triggers when SP starts
6826         * below the limit and ends up above it; we chose to do so.
6827         */
6828        if (!a->u) {
6829            TCGv_i32 newsp = tcg_temp_new_i32();
6830            tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6831            gen_helper_v8m_stackcheck(cpu_env, newsp);
6832            tcg_temp_free_i32(newsp);
6833        } else {
6834            gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6835        }
6836    }
6837
6838    return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6839}
6840
6841static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6842                            TCGv_i32 addr, int address_offset)
6843{
6844    if (!a->p) {
6845        if (a->u) {
6846            address_offset += a->imm;
6847        } else {
6848            address_offset -= a->imm;
6849        }
6850    } else if (!a->w) {
6851        tcg_temp_free_i32(addr);
6852        return;
6853    }
6854    tcg_gen_addi_i32(addr, addr, address_offset);
6855    store_reg(s, a->rn, addr);
6856}
6857
6858static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6859                       MemOp mop, int mem_idx)
6860{
6861    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6862    TCGv_i32 addr, tmp;
6863
6864    addr = op_addr_ri_pre(s, a);
6865
6866    tmp = tcg_temp_new_i32();
6867    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6868    disas_set_da_iss(s, mop, issinfo);
6869
6870    /*
6871     * Perform base writeback before the loaded value to
6872     * ensure correct behavior with overlapping index registers.
6873     */
6874    op_addr_ri_post(s, a, addr, 0);
6875    store_reg_from_load(s, a->rt, tmp);
6876    return true;
6877}
6878
6879static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6880                        MemOp mop, int mem_idx)
6881{
6882    ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6883    TCGv_i32 addr, tmp;
6884
6885    /*
6886     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6887     * is either UNPREDICTABLE or has defined behaviour
6888     */
6889    if (s->thumb && a->rn == 15) {
6890        return false;
6891    }
6892
6893    addr = op_addr_ri_pre(s, a);
6894
6895    tmp = load_reg(s, a->rt);
6896    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6897    disas_set_da_iss(s, mop, issinfo);
6898    tcg_temp_free_i32(tmp);
6899
6900    op_addr_ri_post(s, a, addr, 0);
6901    return true;
6902}
6903
6904static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6905{
6906    int mem_idx = get_mem_index(s);
6907    TCGv_i32 addr, tmp;
6908
6909    addr = op_addr_ri_pre(s, a);
6910
6911    tmp = tcg_temp_new_i32();
6912    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6913    store_reg(s, a->rt, tmp);
6914
6915    tcg_gen_addi_i32(addr, addr, 4);
6916
6917    tmp = tcg_temp_new_i32();
6918    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6919    store_reg(s, rt2, tmp);
6920
6921    /* LDRD w/ base writeback is undefined if the registers overlap.  */
6922    op_addr_ri_post(s, a, addr, -4);
6923    return true;
6924}
6925
6926static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6927{
6928    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6929        return false;
6930    }
6931    return op_ldrd_ri(s, a, a->rt + 1);
6932}
6933
6934static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6935{
6936    arg_ldst_ri b = {
6937        .u = a->u, .w = a->w, .p = a->p,
6938        .rn = a->rn, .rt = a->rt, .imm = a->imm
6939    };
6940    return op_ldrd_ri(s, &b, a->rt2);
6941}
6942
6943static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6944{
6945    int mem_idx = get_mem_index(s);
6946    TCGv_i32 addr, tmp;
6947
6948    addr = op_addr_ri_pre(s, a);
6949
6950    tmp = load_reg(s, a->rt);
6951    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6952    tcg_temp_free_i32(tmp);
6953
6954    tcg_gen_addi_i32(addr, addr, 4);
6955
6956    tmp = load_reg(s, rt2);
6957    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6958    tcg_temp_free_i32(tmp);
6959
6960    op_addr_ri_post(s, a, addr, -4);
6961    return true;
6962}
6963
6964static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6965{
6966    if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6967        return false;
6968    }
6969    return op_strd_ri(s, a, a->rt + 1);
6970}
6971
6972static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6973{
6974    arg_ldst_ri b = {
6975        .u = a->u, .w = a->w, .p = a->p,
6976        .rn = a->rn, .rt = a->rt, .imm = a->imm
6977    };
6978    return op_strd_ri(s, &b, a->rt2);
6979}
6980
6981#define DO_LDST(NAME, WHICH, MEMOP) \
6982static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6983{                                                                     \
6984    return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6985}                                                                     \
6986static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6987{                                                                     \
6988    return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6989}                                                                     \
6990static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6991{                                                                     \
6992    return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6993}                                                                     \
6994static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6995{                                                                     \
6996    return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6997}
6998
6999DO_LDST(LDR, load, MO_UL)
7000DO_LDST(LDRB, load, MO_UB)
7001DO_LDST(LDRH, load, MO_UW)
7002DO_LDST(LDRSB, load, MO_SB)
7003DO_LDST(LDRSH, load, MO_SW)
7004
7005DO_LDST(STR, store, MO_UL)
7006DO_LDST(STRB, store, MO_UB)
7007DO_LDST(STRH, store, MO_UW)
7008
7009#undef DO_LDST
7010
7011/*
7012 * Synchronization primitives
7013 */
7014
7015static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7016{
7017    TCGv_i32 addr, tmp;
7018    TCGv taddr;
7019
7020    opc |= s->be_data;
7021    addr = load_reg(s, a->rn);
7022    taddr = gen_aa32_addr(s, addr, opc);
7023    tcg_temp_free_i32(addr);
7024
7025    tmp = load_reg(s, a->rt2);
7026    tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7027    tcg_temp_free(taddr);
7028
7029    store_reg(s, a->rt, tmp);
7030    return true;
7031}
7032
7033static bool trans_SWP(DisasContext *s, arg_SWP *a)
7034{
7035    return op_swp(s, a, MO_UL | MO_ALIGN);
7036}
7037
7038static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7039{
7040    return op_swp(s, a, MO_UB);
7041}
7042
7043/*
7044 * Load/Store Exclusive and Load-Acquire/Store-Release
7045 */
7046
7047static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7048{
7049    TCGv_i32 addr;
7050    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7051    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7052
7053    /* We UNDEF for these UNPREDICTABLE cases.  */
7054    if (a->rd == 15 || a->rn == 15 || a->rt == 15
7055        || a->rd == a->rn || a->rd == a->rt
7056        || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7057        || (mop == MO_64
7058            && (a->rt2 == 15
7059                || a->rd == a->rt2
7060                || (!v8a && s->thumb && a->rt2 == 13)))) {
7061        unallocated_encoding(s);
7062        return true;
7063    }
7064
7065    if (rel) {
7066        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7067    }
7068
7069    addr = tcg_temp_local_new_i32();
7070    load_reg_var(s, addr, a->rn);
7071    tcg_gen_addi_i32(addr, addr, a->imm);
7072
7073    gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7074    tcg_temp_free_i32(addr);
7075    return true;
7076}
7077
7078static bool trans_STREX(DisasContext *s, arg_STREX *a)
7079{
7080    if (!ENABLE_ARCH_6) {
7081        return false;
7082    }
7083    return op_strex(s, a, MO_32, false);
7084}
7085
7086static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7087{
7088    if (!ENABLE_ARCH_6K) {
7089        return false;
7090    }
7091    /* We UNDEF for these UNPREDICTABLE cases.  */
7092    if (a->rt & 1) {
7093        unallocated_encoding(s);
7094        return true;
7095    }
7096    a->rt2 = a->rt + 1;
7097    return op_strex(s, a, MO_64, false);
7098}
7099
7100static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7101{
7102    return op_strex(s, a, MO_64, false);
7103}
7104
7105static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7106{
7107    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7108        return false;
7109    }
7110    return op_strex(s, a, MO_8, false);
7111}
7112
7113static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7114{
7115    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7116        return false;
7117    }
7118    return op_strex(s, a, MO_16, false);
7119}
7120
7121static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7122{
7123    if (!ENABLE_ARCH_8) {
7124        return false;
7125    }
7126    return op_strex(s, a, MO_32, true);
7127}
7128
7129static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7130{
7131    if (!ENABLE_ARCH_8) {
7132        return false;
7133    }
7134    /* We UNDEF for these UNPREDICTABLE cases.  */
7135    if (a->rt & 1) {
7136        unallocated_encoding(s);
7137        return true;
7138    }
7139    a->rt2 = a->rt + 1;
7140    return op_strex(s, a, MO_64, true);
7141}
7142
7143static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7144{
7145    if (!ENABLE_ARCH_8) {
7146        return false;
7147    }
7148    return op_strex(s, a, MO_64, true);
7149}
7150
7151static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7152{
7153    if (!ENABLE_ARCH_8) {
7154        return false;
7155    }
7156    return op_strex(s, a, MO_8, true);
7157}
7158
7159static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7160{
7161    if (!ENABLE_ARCH_8) {
7162        return false;
7163    }
7164    return op_strex(s, a, MO_16, true);
7165}
7166
7167static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7168{
7169    TCGv_i32 addr, tmp;
7170
7171    if (!ENABLE_ARCH_8) {
7172        return false;
7173    }
7174    /* We UNDEF for these UNPREDICTABLE cases.  */
7175    if (a->rn == 15 || a->rt == 15) {
7176        unallocated_encoding(s);
7177        return true;
7178    }
7179
7180    addr = load_reg(s, a->rn);
7181    tmp = load_reg(s, a->rt);
7182    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7183    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7184    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7185
7186    tcg_temp_free_i32(tmp);
7187    tcg_temp_free_i32(addr);
7188    return true;
7189}
7190
7191static bool trans_STL(DisasContext *s, arg_STL *a)
7192{
7193    return op_stl(s, a, MO_UL);
7194}
7195
7196static bool trans_STLB(DisasContext *s, arg_STL *a)
7197{
7198    return op_stl(s, a, MO_UB);
7199}
7200
7201static bool trans_STLH(DisasContext *s, arg_STL *a)
7202{
7203    return op_stl(s, a, MO_UW);
7204}
7205
7206static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7207{
7208    TCGv_i32 addr;
7209    /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7210    bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7211
7212    /* We UNDEF for these UNPREDICTABLE cases.  */
7213    if (a->rn == 15 || a->rt == 15
7214        || (!v8a && s->thumb && a->rt == 13)
7215        || (mop == MO_64
7216            && (a->rt2 == 15 || a->rt == a->rt2
7217                || (!v8a && s->thumb && a->rt2 == 13)))) {
7218        unallocated_encoding(s);
7219        return true;
7220    }
7221
7222    addr = tcg_temp_local_new_i32();
7223    load_reg_var(s, addr, a->rn);
7224    tcg_gen_addi_i32(addr, addr, a->imm);
7225
7226    gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7227    tcg_temp_free_i32(addr);
7228
7229    if (acq) {
7230        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7231    }
7232    return true;
7233}
7234
7235static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7236{
7237    if (!ENABLE_ARCH_6) {
7238        return false;
7239    }
7240    return op_ldrex(s, a, MO_32, false);
7241}
7242
7243static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7244{
7245    if (!ENABLE_ARCH_6K) {
7246        return false;
7247    }
7248    /* We UNDEF for these UNPREDICTABLE cases.  */
7249    if (a->rt & 1) {
7250        unallocated_encoding(s);
7251        return true;
7252    }
7253    a->rt2 = a->rt + 1;
7254    return op_ldrex(s, a, MO_64, false);
7255}
7256
7257static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7258{
7259    return op_ldrex(s, a, MO_64, false);
7260}
7261
7262static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7263{
7264    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7265        return false;
7266    }
7267    return op_ldrex(s, a, MO_8, false);
7268}
7269
7270static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7271{
7272    if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7273        return false;
7274    }
7275    return op_ldrex(s, a, MO_16, false);
7276}
7277
7278static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7279{
7280    if (!ENABLE_ARCH_8) {
7281        return false;
7282    }
7283    return op_ldrex(s, a, MO_32, true);
7284}
7285
7286static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7287{
7288    if (!ENABLE_ARCH_8) {
7289        return false;
7290    }
7291    /* We UNDEF for these UNPREDICTABLE cases.  */
7292    if (a->rt & 1) {
7293        unallocated_encoding(s);
7294        return true;
7295    }
7296    a->rt2 = a->rt + 1;
7297    return op_ldrex(s, a, MO_64, true);
7298}
7299
7300static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7301{
7302    if (!ENABLE_ARCH_8) {
7303        return false;
7304    }
7305    return op_ldrex(s, a, MO_64, true);
7306}
7307
7308static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7309{
7310    if (!ENABLE_ARCH_8) {
7311        return false;
7312    }
7313    return op_ldrex(s, a, MO_8, true);
7314}
7315
7316static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7317{
7318    if (!ENABLE_ARCH_8) {
7319        return false;
7320    }
7321    return op_ldrex(s, a, MO_16, true);
7322}
7323
7324static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7325{
7326    TCGv_i32 addr, tmp;
7327
7328    if (!ENABLE_ARCH_8) {
7329        return false;
7330    }
7331    /* We UNDEF for these UNPREDICTABLE cases.  */
7332    if (a->rn == 15 || a->rt == 15) {
7333        unallocated_encoding(s);
7334        return true;
7335    }
7336
7337    addr = load_reg(s, a->rn);
7338    tmp = tcg_temp_new_i32();
7339    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7340    disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7341    tcg_temp_free_i32(addr);
7342
7343    store_reg(s, a->rt, tmp);
7344    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7345    return true;
7346}
7347
7348static bool trans_LDA(DisasContext *s, arg_LDA *a)
7349{
7350    return op_lda(s, a, MO_UL);
7351}
7352
7353static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7354{
7355    return op_lda(s, a, MO_UB);
7356}
7357
7358static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7359{
7360    return op_lda(s, a, MO_UW);
7361}
7362
7363/*
7364 * Media instructions
7365 */
7366
7367static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7368{
7369    TCGv_i32 t1, t2;
7370
7371    if (!ENABLE_ARCH_6) {
7372        return false;
7373    }
7374
7375    t1 = load_reg(s, a->rn);
7376    t2 = load_reg(s, a->rm);
7377    gen_helper_usad8(t1, t1, t2);
7378    tcg_temp_free_i32(t2);
7379    if (a->ra != 15) {
7380        t2 = load_reg(s, a->ra);
7381        tcg_gen_add_i32(t1, t1, t2);
7382        tcg_temp_free_i32(t2);
7383    }
7384    store_reg(s, a->rd, t1);
7385    return true;
7386}
7387
7388static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7389{
7390    TCGv_i32 tmp;
7391    int width = a->widthm1 + 1;
7392    int shift = a->lsb;
7393
7394    if (!ENABLE_ARCH_6T2) {
7395        return false;
7396    }
7397    if (shift + width > 32) {
7398        /* UNPREDICTABLE; we choose to UNDEF */
7399        unallocated_encoding(s);
7400        return true;
7401    }
7402
7403    tmp = load_reg(s, a->rn);
7404    if (u) {
7405        tcg_gen_extract_i32(tmp, tmp, shift, width);
7406    } else {
7407        tcg_gen_sextract_i32(tmp, tmp, shift, width);
7408    }
7409    store_reg(s, a->rd, tmp);
7410    return true;
7411}
7412
7413static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7414{
7415    return op_bfx(s, a, false);
7416}
7417
7418static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7419{
7420    return op_bfx(s, a, true);
7421}
7422
7423static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7424{
7425    TCGv_i32 tmp;
7426    int msb = a->msb, lsb = a->lsb;
7427    int width;
7428
7429    if (!ENABLE_ARCH_6T2) {
7430        return false;
7431    }
7432    if (msb < lsb) {
7433        /* UNPREDICTABLE; we choose to UNDEF */
7434        unallocated_encoding(s);
7435        return true;
7436    }
7437
7438    width = msb + 1 - lsb;
7439    if (a->rn == 15) {
7440        /* BFC */
7441        tmp = tcg_const_i32(0);
7442    } else {
7443        /* BFI */
7444        tmp = load_reg(s, a->rn);
7445    }
7446    if (width != 32) {
7447        TCGv_i32 tmp2 = load_reg(s, a->rd);
7448        tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7449        tcg_temp_free_i32(tmp2);
7450    }
7451    store_reg(s, a->rd, tmp);
7452    return true;
7453}
7454
7455static bool trans_UDF(DisasContext *s, arg_UDF *a)
7456{
7457    unallocated_encoding(s);
7458    return true;
7459}
7460
7461/*
7462 * Parallel addition and subtraction
7463 */
7464
7465static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7466                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7467{
7468    TCGv_i32 t0, t1;
7469
7470    if (s->thumb
7471        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7472        : !ENABLE_ARCH_6) {
7473        return false;
7474    }
7475
7476    t0 = load_reg(s, a->rn);
7477    t1 = load_reg(s, a->rm);
7478
7479    gen(t0, t0, t1);
7480
7481    tcg_temp_free_i32(t1);
7482    store_reg(s, a->rd, t0);
7483    return true;
7484}
7485
7486static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7487                             void (*gen)(TCGv_i32, TCGv_i32,
7488                                         TCGv_i32, TCGv_ptr))
7489{
7490    TCGv_i32 t0, t1;
7491    TCGv_ptr ge;
7492
7493    if (s->thumb
7494        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7495        : !ENABLE_ARCH_6) {
7496        return false;
7497    }
7498
7499    t0 = load_reg(s, a->rn);
7500    t1 = load_reg(s, a->rm);
7501
7502    ge = tcg_temp_new_ptr();
7503    tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7504    gen(t0, t0, t1, ge);
7505
7506    tcg_temp_free_ptr(ge);
7507    tcg_temp_free_i32(t1);
7508    store_reg(s, a->rd, t0);
7509    return true;
7510}
7511
7512#define DO_PAR_ADDSUB(NAME, helper) \
7513static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7514{                                                       \
7515    return op_par_addsub(s, a, helper);                 \
7516}
7517
7518#define DO_PAR_ADDSUB_GE(NAME, helper) \
7519static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7520{                                                       \
7521    return op_par_addsub_ge(s, a, helper);              \
7522}
7523
7524DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7525DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7526DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7527DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7528DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7529DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7530
7531DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7532DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7533DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7534DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7535DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7536DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7537
7538DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7539DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7540DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7541DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7542DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7543DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7544
7545DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7546DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7547DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7548DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7549DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7550DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7551
7552DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7553DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7554DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7555DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7556DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7557DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7558
7559DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7560DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7561DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7562DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7563DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7564DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7565
7566#undef DO_PAR_ADDSUB
7567#undef DO_PAR_ADDSUB_GE
7568
7569/*
7570 * Packing, unpacking, saturation, and reversal
7571 */
7572
7573static bool trans_PKH(DisasContext *s, arg_PKH *a)
7574{
7575    TCGv_i32 tn, tm;
7576    int shift = a->imm;
7577
7578    if (s->thumb
7579        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7580        : !ENABLE_ARCH_6) {
7581        return false;
7582    }
7583
7584    tn = load_reg(s, a->rn);
7585    tm = load_reg(s, a->rm);
7586    if (a->tb) {
7587        /* PKHTB */
7588        if (shift == 0) {
7589            shift = 31;
7590        }
7591        tcg_gen_sari_i32(tm, tm, shift);
7592        tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7593    } else {
7594        /* PKHBT */
7595        tcg_gen_shli_i32(tm, tm, shift);
7596        tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7597    }
7598    tcg_temp_free_i32(tm);
7599    store_reg(s, a->rd, tn);
7600    return true;
7601}
7602
7603static bool op_sat(DisasContext *s, arg_sat *a,
7604                   void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7605{
7606    TCGv_i32 tmp, satimm;
7607    int shift = a->imm;
7608
7609    if (!ENABLE_ARCH_6) {
7610        return false;
7611    }
7612
7613    tmp = load_reg(s, a->rn);
7614    if (a->sh) {
7615        tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7616    } else {
7617        tcg_gen_shli_i32(tmp, tmp, shift);
7618    }
7619
7620    satimm = tcg_const_i32(a->satimm);
7621    gen(tmp, cpu_env, tmp, satimm);
7622    tcg_temp_free_i32(satimm);
7623
7624    store_reg(s, a->rd, tmp);
7625    return true;
7626}
7627
7628static bool trans_SSAT(DisasContext *s, arg_sat *a)
7629{
7630    return op_sat(s, a, gen_helper_ssat);
7631}
7632
7633static bool trans_USAT(DisasContext *s, arg_sat *a)
7634{
7635    return op_sat(s, a, gen_helper_usat);
7636}
7637
7638static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7639{
7640    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7641        return false;
7642    }
7643    return op_sat(s, a, gen_helper_ssat16);
7644}
7645
7646static bool trans_USAT16(DisasContext *s, arg_sat *a)
7647{
7648    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7649        return false;
7650    }
7651    return op_sat(s, a, gen_helper_usat16);
7652}
7653
7654static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7655                   void (*gen_extract)(TCGv_i32, TCGv_i32),
7656                   void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7657{
7658    TCGv_i32 tmp;
7659
7660    if (!ENABLE_ARCH_6) {
7661        return false;
7662    }
7663
7664    tmp = load_reg(s, a->rm);
7665    /*
7666     * TODO: In many cases we could do a shift instead of a rotate.
7667     * Combined with a simple extend, that becomes an extract.
7668     */
7669    tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7670    gen_extract(tmp, tmp);
7671
7672    if (a->rn != 15) {
7673        TCGv_i32 tmp2 = load_reg(s, a->rn);
7674        gen_add(tmp, tmp, tmp2);
7675        tcg_temp_free_i32(tmp2);
7676    }
7677    store_reg(s, a->rd, tmp);
7678    return true;
7679}
7680
7681static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7682{
7683    return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7684}
7685
7686static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7687{
7688    return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7689}
7690
7691static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7692{
7693    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7694        return false;
7695    }
7696    return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7697}
7698
7699static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7700{
7701    return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7702}
7703
7704static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7705{
7706    return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7707}
7708
7709static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7710{
7711    if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7712        return false;
7713    }
7714    return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7715}
7716
7717static bool trans_SEL(DisasContext *s, arg_rrr *a)
7718{
7719    TCGv_i32 t1, t2, t3;
7720
7721    if (s->thumb
7722        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7723        : !ENABLE_ARCH_6) {
7724        return false;
7725    }
7726
7727    t1 = load_reg(s, a->rn);
7728    t2 = load_reg(s, a->rm);
7729    t3 = tcg_temp_new_i32();
7730    tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7731    gen_helper_sel_flags(t1, t3, t1, t2);
7732    tcg_temp_free_i32(t3);
7733    tcg_temp_free_i32(t2);
7734    store_reg(s, a->rd, t1);
7735    return true;
7736}
7737
7738static bool op_rr(DisasContext *s, arg_rr *a,
7739                  void (*gen)(TCGv_i32, TCGv_i32))
7740{
7741    TCGv_i32 tmp;
7742
7743    tmp = load_reg(s, a->rm);
7744    gen(tmp, tmp);
7745    store_reg(s, a->rd, tmp);
7746    return true;
7747}
7748
7749static bool trans_REV(DisasContext *s, arg_rr *a)
7750{
7751    if (!ENABLE_ARCH_6) {
7752        return false;
7753    }
7754    return op_rr(s, a, tcg_gen_bswap32_i32);
7755}
7756
7757static bool trans_REV16(DisasContext *s, arg_rr *a)
7758{
7759    if (!ENABLE_ARCH_6) {
7760        return false;
7761    }
7762    return op_rr(s, a, gen_rev16);
7763}
7764
7765static bool trans_REVSH(DisasContext *s, arg_rr *a)
7766{
7767    if (!ENABLE_ARCH_6) {
7768        return false;
7769    }
7770    return op_rr(s, a, gen_revsh);
7771}
7772
7773static bool trans_RBIT(DisasContext *s, arg_rr *a)
7774{
7775    if (!ENABLE_ARCH_6T2) {
7776        return false;
7777    }
7778    return op_rr(s, a, gen_helper_rbit);
7779}
7780
7781/*
7782 * Signed multiply, signed and unsigned divide
7783 */
7784
7785static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7786{
7787    TCGv_i32 t1, t2;
7788
7789    if (!ENABLE_ARCH_6) {
7790        return false;
7791    }
7792
7793    t1 = load_reg(s, a->rn);
7794    t2 = load_reg(s, a->rm);
7795    if (m_swap) {
7796        gen_swap_half(t2, t2);
7797    }
7798    gen_smul_dual(t1, t2);
7799
7800    if (sub) {
7801        /*
7802         * This subtraction cannot overflow, so we can do a simple
7803         * 32-bit subtraction and then a possible 32-bit saturating
7804         * addition of Ra.
7805         */
7806        tcg_gen_sub_i32(t1, t1, t2);
7807        tcg_temp_free_i32(t2);
7808
7809        if (a->ra != 15) {
7810            t2 = load_reg(s, a->ra);
7811            gen_helper_add_setq(t1, cpu_env, t1, t2);
7812            tcg_temp_free_i32(t2);
7813        }
7814    } else if (a->ra == 15) {
7815        /* Single saturation-checking addition */
7816        gen_helper_add_setq(t1, cpu_env, t1, t2);
7817        tcg_temp_free_i32(t2);
7818    } else {
7819        /*
7820         * We need to add the products and Ra together and then
7821         * determine whether the final result overflowed. Doing
7822         * this as two separate add-and-check-overflow steps incorrectly
7823         * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7824         * Do all the arithmetic at 64-bits and then check for overflow.
7825         */
7826        TCGv_i64 p64, q64;
7827        TCGv_i32 t3, qf, one;
7828
7829        p64 = tcg_temp_new_i64();
7830        q64 = tcg_temp_new_i64();
7831        tcg_gen_ext_i32_i64(p64, t1);
7832        tcg_gen_ext_i32_i64(q64, t2);
7833        tcg_gen_add_i64(p64, p64, q64);
7834        load_reg_var(s, t2, a->ra);
7835        tcg_gen_ext_i32_i64(q64, t2);
7836        tcg_gen_add_i64(p64, p64, q64);
7837        tcg_temp_free_i64(q64);
7838
7839        tcg_gen_extr_i64_i32(t1, t2, p64);
7840        tcg_temp_free_i64(p64);
7841        /*
7842         * t1 is the low half of the result which goes into Rd.
7843         * We have overflow and must set Q if the high half (t2)
7844         * is different from the sign-extension of t1.
7845         */
7846        t3 = tcg_temp_new_i32();
7847        tcg_gen_sari_i32(t3, t1, 31);
7848        qf = load_cpu_field(QF);
7849        one = tcg_constant_i32(1);
7850        tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7851        store_cpu_field(qf, QF);
7852        tcg_temp_free_i32(t3);
7853        tcg_temp_free_i32(t2);
7854    }
7855    store_reg(s, a->rd, t1);
7856    return true;
7857}
7858
7859static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7860{
7861    return op_smlad(s, a, false, false);
7862}
7863
7864static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7865{
7866    return op_smlad(s, a, true, false);
7867}
7868
7869static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7870{
7871    return op_smlad(s, a, false, true);
7872}
7873
7874static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7875{
7876    return op_smlad(s, a, true, true);
7877}
7878
7879static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7880{
7881    TCGv_i32 t1, t2;
7882    TCGv_i64 l1, l2;
7883
7884    if (!ENABLE_ARCH_6) {
7885        return false;
7886    }
7887
7888    t1 = load_reg(s, a->rn);
7889    t2 = load_reg(s, a->rm);
7890    if (m_swap) {
7891        gen_swap_half(t2, t2);
7892    }
7893    gen_smul_dual(t1, t2);
7894
7895    l1 = tcg_temp_new_i64();
7896    l2 = tcg_temp_new_i64();
7897    tcg_gen_ext_i32_i64(l1, t1);
7898    tcg_gen_ext_i32_i64(l2, t2);
7899    tcg_temp_free_i32(t1);
7900    tcg_temp_free_i32(t2);
7901
7902    if (sub) {
7903        tcg_gen_sub_i64(l1, l1, l2);
7904    } else {
7905        tcg_gen_add_i64(l1, l1, l2);
7906    }
7907    tcg_temp_free_i64(l2);
7908
7909    gen_addq(s, l1, a->ra, a->rd);
7910    gen_storeq_reg(s, a->ra, a->rd, l1);
7911    tcg_temp_free_i64(l1);
7912    return true;
7913}
7914
7915static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7916{
7917    return op_smlald(s, a, false, false);
7918}
7919
7920static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7921{
7922    return op_smlald(s, a, true, false);
7923}
7924
7925static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7926{
7927    return op_smlald(s, a, false, true);
7928}
7929
7930static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7931{
7932    return op_smlald(s, a, true, true);
7933}
7934
7935static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7936{
7937    TCGv_i32 t1, t2;
7938
7939    if (s->thumb
7940        ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7941        : !ENABLE_ARCH_6) {
7942        return false;
7943    }
7944
7945    t1 = load_reg(s, a->rn);
7946    t2 = load_reg(s, a->rm);
7947    tcg_gen_muls2_i32(t2, t1, t1, t2);
7948
7949    if (a->ra != 15) {
7950        TCGv_i32 t3 = load_reg(s, a->ra);
7951        if (sub) {
7952            /*
7953             * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7954             * a non-zero multiplicand lowpart, and the correct result
7955             * lowpart for rounding.
7956             */
7957            TCGv_i32 zero = tcg_const_i32(0);
7958            tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
7959            tcg_temp_free_i32(zero);
7960        } else {
7961            tcg_gen_add_i32(t1, t1, t3);
7962        }
7963        tcg_temp_free_i32(t3);
7964    }
7965    if (round) {
7966        /*
7967         * Adding 0x80000000 to the 64-bit quantity means that we have
7968         * carry in to the high word when the low word has the msb set.
7969         */
7970        tcg_gen_shri_i32(t2, t2, 31);
7971        tcg_gen_add_i32(t1, t1, t2);
7972    }
7973    tcg_temp_free_i32(t2);
7974    store_reg(s, a->rd, t1);
7975    return true;
7976}
7977
7978static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7979{
7980    return op_smmla(s, a, false, false);
7981}
7982
7983static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7984{
7985    return op_smmla(s, a, true, false);
7986}
7987
7988static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7989{
7990    return op_smmla(s, a, false, true);
7991}
7992
7993static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7994{
7995    return op_smmla(s, a, true, true);
7996}
7997
7998static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7999{
8000    TCGv_i32 t1, t2;
8001
8002    if (s->thumb
8003        ? !dc_isar_feature(aa32_thumb_div, s)
8004        : !dc_isar_feature(aa32_arm_div, s)) {
8005        return false;
8006    }
8007
8008    t1 = load_reg(s, a->rn);
8009    t2 = load_reg(s, a->rm);
8010    if (u) {
8011        gen_helper_udiv(t1, cpu_env, t1, t2);
8012    } else {
8013        gen_helper_sdiv(t1, cpu_env, t1, t2);
8014    }
8015    tcg_temp_free_i32(t2);
8016    store_reg(s, a->rd, t1);
8017    return true;
8018}
8019
8020static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8021{
8022    return op_div(s, a, false);
8023}
8024
8025static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8026{
8027    return op_div(s, a, true);
8028}
8029
8030/*
8031 * Block data transfer
8032 */
8033
8034static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8035{
8036    TCGv_i32 addr = load_reg(s, a->rn);
8037
8038    if (a->b) {
8039        if (a->i) {
8040            /* pre increment */
8041            tcg_gen_addi_i32(addr, addr, 4);
8042        } else {
8043            /* pre decrement */
8044            tcg_gen_addi_i32(addr, addr, -(n * 4));
8045        }
8046    } else if (!a->i && n != 1) {
8047        /* post decrement */
8048        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8049    }
8050
8051    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8052        /*
8053         * If the writeback is incrementing SP rather than
8054         * decrementing it, and the initial SP is below the
8055         * stack limit but the final written-back SP would
8056         * be above, then then we must not perform any memory
8057         * accesses, but it is IMPDEF whether we generate
8058         * an exception. We choose to do so in this case.
8059         * At this point 'addr' is the lowest address, so
8060         * either the original SP (if incrementing) or our
8061         * final SP (if decrementing), so that's what we check.
8062         */
8063        gen_helper_v8m_stackcheck(cpu_env, addr);
8064    }
8065
8066    return addr;
8067}
8068
8069static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8070                               TCGv_i32 addr, int n)
8071{
8072    if (a->w) {
8073        /* write back */
8074        if (!a->b) {
8075            if (a->i) {
8076                /* post increment */
8077                tcg_gen_addi_i32(addr, addr, 4);
8078            } else {
8079                /* post decrement */
8080                tcg_gen_addi_i32(addr, addr, -(n * 4));
8081            }
8082        } else if (!a->i && n != 1) {
8083            /* pre decrement */
8084            tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8085        }
8086        store_reg(s, a->rn, addr);
8087    } else {
8088        tcg_temp_free_i32(addr);
8089    }
8090}
8091
8092static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8093{
8094    int i, j, n, list, mem_idx;
8095    bool user = a->u;
8096    TCGv_i32 addr, tmp, tmp2;
8097
8098    if (user) {
8099        /* STM (user) */
8100        if (IS_USER(s)) {
8101            /* Only usable in supervisor mode.  */
8102            unallocated_encoding(s);
8103            return true;
8104        }
8105    }
8106
8107    list = a->list;
8108    n = ctpop16(list);
8109    if (n < min_n || a->rn == 15) {
8110        unallocated_encoding(s);
8111        return true;
8112    }
8113
8114    s->eci_handled = true;
8115
8116    addr = op_addr_block_pre(s, a, n);
8117    mem_idx = get_mem_index(s);
8118
8119    for (i = j = 0; i < 16; i++) {
8120        if (!(list & (1 << i))) {
8121            continue;
8122        }
8123
8124        if (user && i != 15) {
8125            tmp = tcg_temp_new_i32();
8126            tmp2 = tcg_const_i32(i);
8127            gen_helper_get_user_reg(tmp, cpu_env, tmp2);
8128            tcg_temp_free_i32(tmp2);
8129        } else {
8130            tmp = load_reg(s, i);
8131        }
8132        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8133        tcg_temp_free_i32(tmp);
8134
8135        /* No need to add after the last transfer.  */
8136        if (++j != n) {
8137            tcg_gen_addi_i32(addr, addr, 4);
8138        }
8139    }
8140
8141    op_addr_block_post(s, a, addr, n);
8142    clear_eci_state(s);
8143    return true;
8144}
8145
8146static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8147{
8148    /* BitCount(list) < 1 is UNPREDICTABLE */
8149    return op_stm(s, a, 1);
8150}
8151
8152static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8153{
8154    /* Writeback register in register list is UNPREDICTABLE for T32.  */
8155    if (a->w && (a->list & (1 << a->rn))) {
8156        unallocated_encoding(s);
8157        return true;
8158    }
8159    /* BitCount(list) < 2 is UNPREDICTABLE */
8160    return op_stm(s, a, 2);
8161}
8162
8163static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8164{
8165    int i, j, n, list, mem_idx;
8166    bool loaded_base;
8167    bool user = a->u;
8168    bool exc_return = false;
8169    TCGv_i32 addr, tmp, tmp2, loaded_var;
8170
8171    if (user) {
8172        /* LDM (user), LDM (exception return) */
8173        if (IS_USER(s)) {
8174            /* Only usable in supervisor mode.  */
8175            unallocated_encoding(s);
8176            return true;
8177        }
8178        if (extract32(a->list, 15, 1)) {
8179            exc_return = true;
8180            user = false;
8181        } else {
8182            /* LDM (user) does not allow writeback.  */
8183            if (a->w) {
8184                unallocated_encoding(s);
8185                return true;
8186            }
8187        }
8188    }
8189
8190    list = a->list;
8191    n = ctpop16(list);
8192    if (n < min_n || a->rn == 15) {
8193        unallocated_encoding(s);
8194        return true;
8195    }
8196
8197    s->eci_handled = true;
8198
8199    addr = op_addr_block_pre(s, a, n);
8200    mem_idx = get_mem_index(s);
8201    loaded_base = false;
8202    loaded_var = NULL;
8203
8204    for (i = j = 0; i < 16; i++) {
8205        if (!(list & (1 << i))) {
8206            continue;
8207        }
8208
8209        tmp = tcg_temp_new_i32();
8210        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8211        if (user) {
8212            tmp2 = tcg_const_i32(i);
8213            gen_helper_set_user_reg(cpu_env, tmp2, tmp);
8214            tcg_temp_free_i32(tmp2);
8215            tcg_temp_free_i32(tmp);
8216        } else if (i == a->rn) {
8217            loaded_var = tmp;
8218            loaded_base = true;
8219        } else if (i == 15 && exc_return) {
8220            store_pc_exc_ret(s, tmp);
8221        } else {
8222            store_reg_from_load(s, i, tmp);
8223        }
8224
8225        /* No need to add after the last transfer.  */
8226        if (++j != n) {
8227            tcg_gen_addi_i32(addr, addr, 4);
8228        }
8229    }
8230
8231    op_addr_block_post(s, a, addr, n);
8232
8233    if (loaded_base) {
8234        /* Note that we reject base == pc above.  */
8235        store_reg(s, a->rn, loaded_var);
8236    }
8237
8238    if (exc_return) {
8239        /* Restore CPSR from SPSR.  */
8240        tmp = load_cpu_field(spsr);
8241        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8242            gen_io_start();
8243        }
8244        gen_helper_cpsr_write_eret(cpu_env, tmp);
8245        tcg_temp_free_i32(tmp);
8246        /* Must exit loop to check un-masked IRQs */
8247        s->base.is_jmp = DISAS_EXIT;
8248    }
8249    clear_eci_state(s);
8250    return true;
8251}
8252
8253static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8254{
8255    /*
8256     * Writeback register in register list is UNPREDICTABLE
8257     * for ArchVersion() >= 7.  Prior to v7, A32 would write
8258     * an UNKNOWN value to the base register.
8259     */
8260    if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8261        unallocated_encoding(s);
8262        return true;
8263    }
8264    /* BitCount(list) < 1 is UNPREDICTABLE */
8265    return do_ldm(s, a, 1);
8266}
8267
8268static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8269{
8270    /* Writeback register in register list is UNPREDICTABLE for T32. */
8271    if (a->w && (a->list & (1 << a->rn))) {
8272        unallocated_encoding(s);
8273        return true;
8274    }
8275    /* BitCount(list) < 2 is UNPREDICTABLE */
8276    return do_ldm(s, a, 2);
8277}
8278
8279static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8280{
8281    /* Writeback is conditional on the base register not being loaded.  */
8282    a->w = !(a->list & (1 << a->rn));
8283    /* BitCount(list) < 1 is UNPREDICTABLE */
8284    return do_ldm(s, a, 1);
8285}
8286
8287static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8288{
8289    int i;
8290    TCGv_i32 zero;
8291
8292    if (!dc_isar_feature(aa32_m_sec_state, s)) {
8293        return false;
8294    }
8295
8296    if (extract32(a->list, 13, 1)) {
8297        return false;
8298    }
8299
8300    if (!a->list) {
8301        /* UNPREDICTABLE; we choose to UNDEF */
8302        return false;
8303    }
8304
8305    s->eci_handled = true;
8306
8307    zero = tcg_const_i32(0);
8308    for (i = 0; i < 15; i++) {
8309        if (extract32(a->list, i, 1)) {
8310            /* Clear R[i] */
8311            tcg_gen_mov_i32(cpu_R[i], zero);
8312        }
8313    }
8314    if (extract32(a->list, 15, 1)) {
8315        /*
8316         * Clear APSR (by calling the MSR helper with the same argument
8317         * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8318         */
8319        TCGv_i32 maskreg = tcg_const_i32(0xc << 8);
8320        gen_helper_v7m_msr(cpu_env, maskreg, zero);
8321        tcg_temp_free_i32(maskreg);
8322    }
8323    tcg_temp_free_i32(zero);
8324    clear_eci_state(s);
8325    return true;
8326}
8327
8328/*
8329 * Branch, branch with link
8330 */
8331
8332static bool trans_B(DisasContext *s, arg_i *a)
8333{
8334    gen_jmp(s, read_pc(s) + a->imm);
8335    return true;
8336}
8337
8338static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8339{
8340    /* This has cond from encoding, required to be outside IT block.  */
8341    if (a->cond >= 0xe) {
8342        return false;
8343    }
8344    if (s->condexec_mask) {
8345        unallocated_encoding(s);
8346        return true;
8347    }
8348    arm_skip_unless(s, a->cond);
8349    gen_jmp(s, read_pc(s) + a->imm);
8350    return true;
8351}
8352
8353static bool trans_BL(DisasContext *s, arg_i *a)
8354{
8355    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8356    gen_jmp(s, read_pc(s) + a->imm);
8357    return true;
8358}
8359
8360static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8361{
8362    /*
8363     * BLX <imm> would be useless on M-profile; the encoding space
8364     * is used for other insns from v8.1M onward, and UNDEFs before that.
8365     */
8366    if (arm_dc_feature(s, ARM_FEATURE_M)) {
8367        return false;
8368    }
8369
8370    /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8371    if (s->thumb && (a->imm & 2)) {
8372        return false;
8373    }
8374    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8375    store_cpu_field_constant(!s->thumb, thumb);
8376    gen_jmp(s, (read_pc(s) & ~3) + a->imm);
8377    return true;
8378}
8379
8380static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8381{
8382    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8383    tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
8384    return true;
8385}
8386
8387static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8388{
8389    TCGv_i32 tmp = tcg_temp_new_i32();
8390
8391    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8392    tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8393    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8394    gen_bx(s, tmp);
8395    return true;
8396}
8397
8398static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8399{
8400    TCGv_i32 tmp;
8401
8402    assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8403    if (!ENABLE_ARCH_5) {
8404        return false;
8405    }
8406    tmp = tcg_temp_new_i32();
8407    tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8408    tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8409    tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
8410    gen_bx(s, tmp);
8411    return true;
8412}
8413
8414static bool trans_BF(DisasContext *s, arg_BF *a)
8415{
8416    /*
8417     * M-profile branch future insns. The architecture permits an
8418     * implementation to implement these as NOPs (equivalent to
8419     * discarding the LO_BRANCH_INFO cache immediately), and we
8420     * take that IMPDEF option because for QEMU a "real" implementation
8421     * would be complicated and wouldn't execute any faster.
8422     */
8423    if (!dc_isar_feature(aa32_lob, s)) {
8424        return false;
8425    }
8426    if (a->boff == 0) {
8427        /* SEE "Related encodings" (loop insns) */
8428        return false;
8429    }
8430    /* Handle as NOP */
8431    return true;
8432}
8433
8434static bool trans_DLS(DisasContext *s, arg_DLS *a)
8435{
8436    /* M-profile low-overhead loop start */
8437    TCGv_i32 tmp;
8438
8439    if (!dc_isar_feature(aa32_lob, s)) {
8440        return false;
8441    }
8442    if (a->rn == 13 || a->rn == 15) {
8443        /*
8444         * For DLSTP rn == 15 is a related encoding (LCTP); the
8445         * other cases caught by this condition are all
8446         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8447         */
8448        return false;
8449    }
8450
8451    if (a->size != 4) {
8452        /* DLSTP */
8453        if (!dc_isar_feature(aa32_mve, s)) {
8454            return false;
8455        }
8456        if (!vfp_access_check(s)) {
8457            return true;
8458        }
8459    }
8460
8461    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8462    tmp = load_reg(s, a->rn);
8463    store_reg(s, 14, tmp);
8464    if (a->size != 4) {
8465        /* DLSTP: set FPSCR.LTPSIZE */
8466        tmp = tcg_const_i32(a->size);
8467        store_cpu_field(tmp, v7m.ltpsize);
8468        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8469    }
8470    return true;
8471}
8472
8473static bool trans_WLS(DisasContext *s, arg_WLS *a)
8474{
8475    /* M-profile low-overhead while-loop start */
8476    TCGv_i32 tmp;
8477    TCGLabel *nextlabel;
8478
8479    if (!dc_isar_feature(aa32_lob, s)) {
8480        return false;
8481    }
8482    if (a->rn == 13 || a->rn == 15) {
8483        /*
8484         * For WLSTP rn == 15 is a related encoding (LE); the
8485         * other cases caught by this condition are all
8486         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8487         */
8488        return false;
8489    }
8490    if (s->condexec_mask) {
8491        /*
8492         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8493         * we choose to UNDEF, because otherwise our use of
8494         * gen_goto_tb(1) would clash with the use of TB exit 1
8495         * in the dc->condjmp condition-failed codepath in
8496         * arm_tr_tb_stop() and we'd get an assertion.
8497         */
8498        return false;
8499    }
8500    if (a->size != 4) {
8501        /* WLSTP */
8502        if (!dc_isar_feature(aa32_mve, s)) {
8503            return false;
8504        }
8505        /*
8506         * We need to check that the FPU is enabled here, but mustn't
8507         * call vfp_access_check() to do that because we don't want to
8508         * do the lazy state preservation in the "loop count is zero" case.
8509         * Do the check-and-raise-exception by hand.
8510         */
8511        if (s->fp_excp_el) {
8512            gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
8513                               syn_uncategorized(), s->fp_excp_el);
8514            return true;
8515        }
8516    }
8517
8518    nextlabel = gen_new_label();
8519    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
8520    tmp = load_reg(s, a->rn);
8521    store_reg(s, 14, tmp);
8522    if (a->size != 4) {
8523        /*
8524         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8525         * lazy state preservation, new FP context creation, etc,
8526         * that vfp_access_check() does. We know that the actual
8527         * access check will succeed (ie it won't generate code that
8528         * throws an exception) because we did that check by hand earlier.
8529         */
8530        bool ok = vfp_access_check(s);
8531        assert(ok);
8532        tmp = tcg_const_i32(a->size);
8533        store_cpu_field(tmp, v7m.ltpsize);
8534        /*
8535         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8536         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8537         */
8538    }
8539    gen_jmp_tb(s, s->base.pc_next, 1);
8540
8541    gen_set_label(nextlabel);
8542    gen_jmp(s, read_pc(s) + a->imm);
8543    return true;
8544}
8545
8546static bool trans_LE(DisasContext *s, arg_LE *a)
8547{
8548    /*
8549     * M-profile low-overhead loop end. The architecture permits an
8550     * implementation to discard the LO_BRANCH_INFO cache at any time,
8551     * and we take the IMPDEF option to never set it in the first place
8552     * (equivalent to always discarding it immediately), because for QEMU
8553     * a "real" implementation would be complicated and wouldn't execute
8554     * any faster.
8555     */
8556    TCGv_i32 tmp;
8557    TCGLabel *loopend;
8558    bool fpu_active;
8559
8560    if (!dc_isar_feature(aa32_lob, s)) {
8561        return false;
8562    }
8563    if (a->f && a->tp) {
8564        return false;
8565    }
8566    if (s->condexec_mask) {
8567        /*
8568         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8569         * we choose to UNDEF, because otherwise our use of
8570         * gen_goto_tb(1) would clash with the use of TB exit 1
8571         * in the dc->condjmp condition-failed codepath in
8572         * arm_tr_tb_stop() and we'd get an assertion.
8573         */
8574        return false;
8575    }
8576    if (a->tp) {
8577        /* LETP */
8578        if (!dc_isar_feature(aa32_mve, s)) {
8579            return false;
8580        }
8581        if (!vfp_access_check(s)) {
8582            s->eci_handled = true;
8583            return true;
8584        }
8585    }
8586
8587    /* LE/LETP is OK with ECI set and leaves it untouched */
8588    s->eci_handled = true;
8589
8590    /*
8591     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8592     * UsageFault exception for the LE insn in that case. Note that we
8593     * are not directly checking FPSCR.LTPSIZE but instead check the
8594     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8595     * not currently active (ie ActiveFPState() returns false). We
8596     * can identify not-active purely from our TB state flags, as the
8597     * FPU is active only if:
8598     *  the FPU is enabled
8599     *  AND lazy state preservation is not active
8600     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8601     *
8602     * Usually we don't need to care about this distinction between
8603     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8604     * will either take an exception or clear the conditions that make
8605     * the FPU not active. But LE is an unusual case of a non-FP insn
8606     * that looks at LTPSIZE.
8607     */
8608    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8609
8610    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8611        /* Need to do a runtime check for LTPSIZE != 4 */
8612        TCGLabel *skipexc = gen_new_label();
8613        tmp = load_cpu_field(v7m.ltpsize);
8614        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
8615        tcg_temp_free_i32(tmp);
8616        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
8617                           default_exception_el(s));
8618        gen_set_label(skipexc);
8619    }
8620
8621    if (a->f) {
8622        /* Loop-forever: just jump back to the loop start */
8623        gen_jmp(s, read_pc(s) - a->imm);
8624        return true;
8625    }
8626
8627    /*
8628     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8629     * For LE, we know at this point that LTPSIZE must be 4 and the
8630     * loop decrement value is 1. For LETP we need to calculate the decrement
8631     * value from LTPSIZE.
8632     */
8633    loopend = gen_new_label();
8634    if (!a->tp) {
8635        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
8636        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8637    } else {
8638        /*
8639         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8640         * so that decr stays live after the brcondi.
8641         */
8642        TCGv_i32 decr = tcg_temp_local_new_i32();
8643        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8644        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8645        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8646        tcg_temp_free_i32(ltpsize);
8647
8648        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
8649
8650        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8651        tcg_temp_free_i32(decr);
8652    }
8653    /* Jump back to the loop start */
8654    gen_jmp(s, read_pc(s) - a->imm);
8655
8656    gen_set_label(loopend);
8657    if (a->tp) {
8658        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8659        tmp = tcg_const_i32(4);
8660        store_cpu_field(tmp, v7m.ltpsize);
8661    }
8662    /* End TB, continuing to following insn */
8663    gen_jmp_tb(s, s->base.pc_next, 1);
8664    return true;
8665}
8666
8667static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8668{
8669    /*
8670     * M-profile Loop Clear with Tail Predication. Since our implementation
8671     * doesn't cache branch information, all we need to do is reset
8672     * FPSCR.LTPSIZE to 4.
8673     */
8674
8675    if (!dc_isar_feature(aa32_lob, s) ||
8676        !dc_isar_feature(aa32_mve, s)) {
8677        return false;
8678    }
8679
8680    if (!vfp_access_check(s)) {
8681        return true;
8682    }
8683
8684    store_cpu_field_constant(4, v7m.ltpsize);
8685    return true;
8686}
8687
8688static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8689{
8690    /*
8691     * M-profile Create Vector Tail Predicate. This insn is itself
8692     * predicated and is subject to beatwise execution.
8693     */
8694    TCGv_i32 rn_shifted, masklen;
8695
8696    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8697        return false;
8698    }
8699
8700    if (!mve_eci_check(s) || !vfp_access_check(s)) {
8701        return true;
8702    }
8703
8704    /*
8705     * We pre-calculate the mask length here to avoid having
8706     * to have multiple helpers specialized for size.
8707     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8708     */
8709    rn_shifted = tcg_temp_new_i32();
8710    masklen = load_reg(s, a->rn);
8711    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8712    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8713                        masklen, tcg_constant_i32(1 << (4 - a->size)),
8714                        rn_shifted, tcg_constant_i32(16));
8715    gen_helper_mve_vctp(cpu_env, masklen);
8716    tcg_temp_free_i32(masklen);
8717    tcg_temp_free_i32(rn_shifted);
8718    /* This insn updates predication bits */
8719    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8720    mve_update_eci(s);
8721    return true;
8722}
8723
8724static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8725{
8726    TCGv_i32 addr, tmp;
8727
8728    tmp = load_reg(s, a->rm);
8729    if (half) {
8730        tcg_gen_add_i32(tmp, tmp, tmp);
8731    }
8732    addr = load_reg(s, a->rn);
8733    tcg_gen_add_i32(addr, addr, tmp);
8734
8735    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8736    tcg_temp_free_i32(addr);
8737
8738    tcg_gen_add_i32(tmp, tmp, tmp);
8739    tcg_gen_addi_i32(tmp, tmp, read_pc(s));
8740    store_reg(s, 15, tmp);
8741    return true;
8742}
8743
8744static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8745{
8746    return op_tbranch(s, a, false);
8747}
8748
8749static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8750{
8751    return op_tbranch(s, a, true);
8752}
8753
8754static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8755{
8756    TCGv_i32 tmp = load_reg(s, a->rn);
8757
8758    arm_gen_condlabel(s);
8759    tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8760                        tmp, 0, s->condlabel);
8761    tcg_temp_free_i32(tmp);
8762    gen_jmp(s, read_pc(s) + a->imm);
8763    return true;
8764}
8765
8766/*
8767 * Supervisor call - both T32 & A32 come here so we need to check
8768 * which mode we are in when checking for semihosting.
8769 */
8770
8771static bool trans_SVC(DisasContext *s, arg_SVC *a)
8772{
8773    const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8774
8775    if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
8776#ifndef CONFIG_USER_ONLY
8777        !IS_USER(s) &&
8778#endif
8779        (a->imm == semihost_imm)) {
8780        gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8781    } else {
8782        gen_set_pc_im(s, s->base.pc_next);
8783        s->svc_imm = a->imm;
8784        s->base.is_jmp = DISAS_SWI;
8785    }
8786    return true;
8787}
8788
8789/*
8790 * Unconditional system instructions
8791 */
8792
8793static bool trans_RFE(DisasContext *s, arg_RFE *a)
8794{
8795    static const int8_t pre_offset[4] = {
8796        /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8797    };
8798    static const int8_t post_offset[4] = {
8799        /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8800    };
8801    TCGv_i32 addr, t1, t2;
8802
8803    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8804        return false;
8805    }
8806    if (IS_USER(s)) {
8807        unallocated_encoding(s);
8808        return true;
8809    }
8810
8811    addr = load_reg(s, a->rn);
8812    tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8813
8814    /* Load PC into tmp and CPSR into tmp2.  */
8815    t1 = tcg_temp_new_i32();
8816    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8817    tcg_gen_addi_i32(addr, addr, 4);
8818    t2 = tcg_temp_new_i32();
8819    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8820
8821    if (a->w) {
8822        /* Base writeback.  */
8823        tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8824        store_reg(s, a->rn, addr);
8825    } else {
8826        tcg_temp_free_i32(addr);
8827    }
8828    gen_rfe(s, t1, t2);
8829    return true;
8830}
8831
8832static bool trans_SRS(DisasContext *s, arg_SRS *a)
8833{
8834    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8835        return false;
8836    }
8837    gen_srs(s, a->mode, a->pu, a->w);
8838    return true;
8839}
8840
8841static bool trans_CPS(DisasContext *s, arg_CPS *a)
8842{
8843    uint32_t mask, val;
8844
8845    if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8846        return false;
8847    }
8848    if (IS_USER(s)) {
8849        /* Implemented as NOP in user mode.  */
8850        return true;
8851    }
8852    /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8853
8854    mask = val = 0;
8855    if (a->imod & 2) {
8856        if (a->A) {
8857            mask |= CPSR_A;
8858        }
8859        if (a->I) {
8860            mask |= CPSR_I;
8861        }
8862        if (a->F) {
8863            mask |= CPSR_F;
8864        }
8865        if (a->imod & 1) {
8866            val |= mask;
8867        }
8868    }
8869    if (a->M) {
8870        mask |= CPSR_M;
8871        val |= a->mode;
8872    }
8873    if (mask) {
8874        gen_set_psr_im(s, mask, 0, val);
8875    }
8876    return true;
8877}
8878
8879static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8880{
8881    TCGv_i32 tmp, addr, el;
8882
8883    if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8884        return false;
8885    }
8886    if (IS_USER(s)) {
8887        /* Implemented as NOP in user mode.  */
8888        return true;
8889    }
8890
8891    tmp = tcg_const_i32(a->im);
8892    /* FAULTMASK */
8893    if (a->F) {
8894        addr = tcg_const_i32(19);
8895        gen_helper_v7m_msr(cpu_env, addr, tmp);
8896        tcg_temp_free_i32(addr);
8897    }
8898    /* PRIMASK */
8899    if (a->I) {
8900        addr = tcg_const_i32(16);
8901        gen_helper_v7m_msr(cpu_env, addr, tmp);
8902        tcg_temp_free_i32(addr);
8903    }
8904    el = tcg_const_i32(s->current_el);
8905    gen_helper_rebuild_hflags_m32(cpu_env, el);
8906    tcg_temp_free_i32(el);
8907    tcg_temp_free_i32(tmp);
8908    gen_lookup_tb(s);
8909    return true;
8910}
8911
8912/*
8913 * Clear-Exclusive, Barriers
8914 */
8915
8916static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8917{
8918    if (s->thumb
8919        ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8920        : !ENABLE_ARCH_6K) {
8921        return false;
8922    }
8923    gen_clrex(s);
8924    return true;
8925}
8926
8927static bool trans_DSB(DisasContext *s, arg_DSB *a)
8928{
8929    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8930        return false;
8931    }
8932    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8933    return true;
8934}
8935
8936static bool trans_DMB(DisasContext *s, arg_DMB *a)
8937{
8938    return trans_DSB(s, NULL);
8939}
8940
8941static bool trans_ISB(DisasContext *s, arg_ISB *a)
8942{
8943    if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8944        return false;
8945    }
8946    /*
8947     * We need to break the TB after this insn to execute
8948     * self-modifying code correctly and also to take
8949     * any pending interrupts immediately.
8950     */
8951    s->base.is_jmp = DISAS_TOO_MANY;
8952    return true;
8953}
8954
8955static bool trans_SB(DisasContext *s, arg_SB *a)
8956{
8957    if (!dc_isar_feature(aa32_sb, s)) {
8958        return false;
8959    }
8960    /*
8961     * TODO: There is no speculation barrier opcode
8962     * for TCG; MB and end the TB instead.
8963     */
8964    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8965    s->base.is_jmp = DISAS_TOO_MANY;
8966    return true;
8967}
8968
8969static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8970{
8971    if (!ENABLE_ARCH_6) {
8972        return false;
8973    }
8974    if (a->E != (s->be_data == MO_BE)) {
8975        gen_helper_setend(cpu_env);
8976        s->base.is_jmp = DISAS_UPDATE_EXIT;
8977    }
8978    return true;
8979}
8980
8981/*
8982 * Preload instructions
8983 * All are nops, contingent on the appropriate arch level.
8984 */
8985
8986static bool trans_PLD(DisasContext *s, arg_PLD *a)
8987{
8988    return ENABLE_ARCH_5TE;
8989}
8990
8991static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8992{
8993    return arm_dc_feature(s, ARM_FEATURE_V7MP);
8994}
8995
8996static bool trans_PLI(DisasContext *s, arg_PLD *a)
8997{
8998    return ENABLE_ARCH_7;
8999}
9000
9001/*
9002 * If-then
9003 */
9004
9005static bool trans_IT(DisasContext *s, arg_IT *a)
9006{
9007    int cond_mask = a->cond_mask;
9008
9009    /*
9010     * No actual code generated for this insn, just setup state.
9011     *
9012     * Combinations of firstcond and mask which set up an 0b1111
9013     * condition are UNPREDICTABLE; we take the CONSTRAINED
9014     * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9015     * i.e. both meaning "execute always".
9016     */
9017    s->condexec_cond = (cond_mask >> 4) & 0xe;
9018    s->condexec_mask = cond_mask & 0x1f;
9019    return true;
9020}
9021
9022/* v8.1M CSEL/CSINC/CSNEG/CSINV */
9023static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9024{
9025    TCGv_i32 rn, rm, zero;
9026    DisasCompare c;
9027
9028    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
9029        return false;
9030    }
9031
9032    if (a->rm == 13) {
9033        /* SEE "Related encodings" (MVE shifts) */
9034        return false;
9035    }
9036
9037    if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
9038        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
9039        return false;
9040    }
9041
9042    /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
9043    if (a->rn == 15) {
9044        rn = tcg_const_i32(0);
9045    } else {
9046        rn = load_reg(s, a->rn);
9047    }
9048    if (a->rm == 15) {
9049        rm = tcg_const_i32(0);
9050    } else {
9051        rm = load_reg(s, a->rm);
9052    }
9053
9054    switch (a->op) {
9055    case 0: /* CSEL */
9056        break;
9057    case 1: /* CSINC */
9058        tcg_gen_addi_i32(rm, rm, 1);
9059        break;
9060    case 2: /* CSINV */
9061        tcg_gen_not_i32(rm, rm);
9062        break;
9063    case 3: /* CSNEG */
9064        tcg_gen_neg_i32(rm, rm);
9065        break;
9066    default:
9067        g_assert_not_reached();
9068    }
9069
9070    arm_test_cc(&c, a->fcond);
9071    zero = tcg_const_i32(0);
9072    tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
9073    arm_free_cc(&c);
9074    tcg_temp_free_i32(zero);
9075
9076    store_reg(s, a->rd, rn);
9077    tcg_temp_free_i32(rm);
9078
9079    return true;
9080}
9081
9082/*
9083 * Legacy decoder.
9084 */
9085
9086static void disas_arm_insn(DisasContext *s, unsigned int insn)
9087{
9088    unsigned int cond = insn >> 28;
9089
9090    /* M variants do not implement ARM mode; this must raise the INVSTATE
9091     * UsageFault exception.
9092     */
9093    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9094        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
9095                           default_exception_el(s));
9096        return;
9097    }
9098
9099    if (s->pstate_il) {
9100        /*
9101         * Illegal execution state. This has priority over BTI
9102         * exceptions, but comes after instruction abort exceptions.
9103         */
9104        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
9105                           syn_illegalstate(), default_exception_el(s));
9106        return;
9107    }
9108
9109    if (cond == 0xf) {
9110        /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9111         * choose to UNDEF. In ARMv5 and above the space is used
9112         * for miscellaneous unconditional instructions.
9113         */
9114        if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
9115            unallocated_encoding(s);
9116            return;
9117        }
9118
9119        /* Unconditional instructions.  */
9120        /* TODO: Perhaps merge these into one decodetree output file.  */
9121        if (disas_a32_uncond(s, insn) ||
9122            disas_vfp_uncond(s, insn) ||
9123            disas_neon_dp(s, insn) ||
9124            disas_neon_ls(s, insn) ||
9125            disas_neon_shared(s, insn)) {
9126            return;
9127        }
9128        /* fall back to legacy decoder */
9129
9130        if ((insn & 0x0e000f00) == 0x0c000100) {
9131            if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9132                /* iWMMXt register transfer.  */
9133                if (extract32(s->c15_cpar, 1, 1)) {
9134                    if (!disas_iwmmxt_insn(s, insn)) {
9135                        return;
9136                    }
9137                }
9138            }
9139        }
9140        goto illegal_op;
9141    }
9142    if (cond != 0xe) {
9143        /* if not always execute, we generate a conditional jump to
9144           next instruction */
9145        arm_skip_unless(s, cond);
9146    }
9147
9148    /* TODO: Perhaps merge these into one decodetree output file.  */
9149    if (disas_a32(s, insn) ||
9150        disas_vfp(s, insn)) {
9151        return;
9152    }
9153    /* fall back to legacy decoder */
9154    /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
9155    if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
9156        if (((insn & 0x0c000e00) == 0x0c000000)
9157            && ((insn & 0x03000000) != 0x03000000)) {
9158            /* Coprocessor insn, coprocessor 0 or 1 */
9159            disas_xscale_insn(s, insn);
9160            return;
9161        }
9162    }
9163
9164illegal_op:
9165    unallocated_encoding(s);
9166}
9167
9168static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9169{
9170    /*
9171     * Return true if this is a 16 bit instruction. We must be precise
9172     * about this (matching the decode).
9173     */
9174    if ((insn >> 11) < 0x1d) {
9175        /* Definitely a 16-bit instruction */
9176        return true;
9177    }
9178
9179    /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9180     * first half of a 32-bit Thumb insn. Thumb-1 cores might
9181     * end up actually treating this as two 16-bit insns, though,
9182     * if it's half of a bl/blx pair that might span a page boundary.
9183     */
9184    if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9185        arm_dc_feature(s, ARM_FEATURE_M)) {
9186        /* Thumb2 cores (including all M profile ones) always treat
9187         * 32-bit insns as 32-bit.
9188         */
9189        return false;
9190    }
9191
9192    if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9193        /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9194         * is not on the next page; we merge this into a 32-bit
9195         * insn.
9196         */
9197        return false;
9198    }
9199    /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9200     * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9201     * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9202     *  -- handle as single 16 bit insn
9203     */
9204    return true;
9205}
9206
9207/* Translate a 32-bit thumb instruction. */
9208static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9209{
9210    /*
9211     * ARMv6-M supports a limited subset of Thumb2 instructions.
9212     * Other Thumb1 architectures allow only 32-bit
9213     * combined BL/BLX prefix and suffix.
9214     */
9215    if (arm_dc_feature(s, ARM_FEATURE_M) &&
9216        !arm_dc_feature(s, ARM_FEATURE_V7)) {
9217        int i;
9218        bool found = false;
9219        static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9220                                               0xf3b08040 /* dsb */,
9221                                               0xf3b08050 /* dmb */,
9222                                               0xf3b08060 /* isb */,
9223                                               0xf3e08000 /* mrs */,
9224                                               0xf000d000 /* bl */};
9225        static const uint32_t armv6m_mask[] = {0xffe0d000,
9226                                               0xfff0d0f0,
9227                                               0xfff0d0f0,
9228                                               0xfff0d0f0,
9229                                               0xffe0d000,
9230                                               0xf800d000};
9231
9232        for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9233            if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9234                found = true;
9235                break;
9236            }
9237        }
9238        if (!found) {
9239            goto illegal_op;
9240        }
9241    } else if ((insn & 0xf800e800) != 0xf000e800)  {
9242        if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9243            unallocated_encoding(s);
9244            return;
9245        }
9246    }
9247
9248    if (arm_dc_feature(s, ARM_FEATURE_M)) {
9249        /*
9250         * NOCP takes precedence over any UNDEF for (almost) the
9251         * entire wide range of coprocessor-space encodings, so check
9252         * for it first before proceeding to actually decode eg VFP
9253         * insns. This decode also handles the few insns which are
9254         * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9255         */
9256        if (disas_m_nocp(s, insn)) {
9257            return;
9258        }
9259    }
9260
9261    if ((insn & 0xef000000) == 0xef000000) {
9262        /*
9263         * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9264         * transform into
9265         * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9266         */
9267        uint32_t a32_insn = (insn & 0xe2ffffff) |
9268            ((insn & (1 << 28)) >> 4) | (1 << 28);
9269
9270        if (disas_neon_dp(s, a32_insn)) {
9271            return;
9272        }
9273    }
9274
9275    if ((insn & 0xff100000) == 0xf9000000) {
9276        /*
9277         * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9278         * transform into
9279         * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9280         */
9281        uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9282
9283        if (disas_neon_ls(s, a32_insn)) {
9284            return;
9285        }
9286    }
9287
9288    /*
9289     * TODO: Perhaps merge these into one decodetree output file.
9290     * Note disas_vfp is written for a32 with cond field in the
9291     * top nibble.  The t32 encoding requires 0xe in the top nibble.
9292     */
9293    if (disas_t32(s, insn) ||
9294        disas_vfp_uncond(s, insn) ||
9295        disas_neon_shared(s, insn) ||
9296        disas_mve(s, insn) ||
9297        ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9298        return;
9299    }
9300
9301illegal_op:
9302    unallocated_encoding(s);
9303}
9304
9305static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9306{
9307    if (!disas_t16(s, insn)) {
9308        unallocated_encoding(s);
9309    }
9310}
9311
9312static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9313{
9314    /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9315     * (False positives are OK, false negatives are not.)
9316     * We know this is a Thumb insn, and our caller ensures we are
9317     * only called if dc->base.pc_next is less than 4 bytes from the page
9318     * boundary, so we cross the page if the first 16 bits indicate
9319     * that this is a 32 bit insn.
9320     */
9321    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9322
9323    return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9324}
9325
9326static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9327{
9328    DisasContext *dc = container_of(dcbase, DisasContext, base);
9329    CPUARMState *env = cs->env_ptr;
9330    ARMCPU *cpu = env_archcpu(env);
9331    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9332    uint32_t condexec, core_mmu_idx;
9333
9334    dc->isar = &cpu->isar;
9335    dc->condjmp = 0;
9336
9337    dc->aarch64 = 0;
9338    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
9339     * there is no secure EL1, so we route exceptions to EL3.
9340     */
9341    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
9342                               !arm_el_is_aa64(env, 3);
9343    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9344    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9345    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9346    /*
9347     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9348     * is always the IT bits. On M-profile, some of the reserved encodings
9349     * of IT are used instead to indicate either ICI or ECI, which
9350     * indicate partial progress of a restartable insn that was interrupted
9351     * partway through by an exception:
9352     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9353     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9354     * In all cases CONDEXEC == 0 means "not in IT block or restartable
9355     * insn, behave normally".
9356     */
9357    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9358    dc->eci_handled = false;
9359    dc->insn_eci_rewind = NULL;
9360    if (condexec & 0xf) {
9361        dc->condexec_mask = (condexec & 0xf) << 1;
9362        dc->condexec_cond = condexec >> 4;
9363    } else {
9364        if (arm_feature(env, ARM_FEATURE_M)) {
9365            dc->eci = condexec >> 4;
9366        }
9367    }
9368
9369    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9370    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9371    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9372#if !defined(CONFIG_USER_ONLY)
9373    dc->user = (dc->current_el == 0);
9374#endif
9375    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9376    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9377    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9378
9379    if (arm_feature(env, ARM_FEATURE_M)) {
9380        dc->vfp_enabled = 1;
9381        dc->be_data = MO_TE;
9382        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9383        dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
9384            regime_is_secure(env, dc->mmu_idx);
9385        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9386        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9387        dc->v7m_new_fp_ctxt_needed =
9388            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9389        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9390        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9391    } else {
9392        dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
9393        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9394        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9395        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9396        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9397        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9398            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9399        } else {
9400            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9401            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9402        }
9403    }
9404    dc->cp_regs = cpu->cp_regs;
9405    dc->features = env->features;
9406
9407    /* Single step state. The code-generation logic here is:
9408     *  SS_ACTIVE == 0:
9409     *   generate code with no special handling for single-stepping (except
9410     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9411     *   this happens anyway because those changes are all system register or
9412     *   PSTATE writes).
9413     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9414     *   emit code for one insn
9415     *   emit code to clear PSTATE.SS
9416     *   emit code to generate software step exception for completed step
9417     *   end TB (as usual for having generated an exception)
9418     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9419     *   emit code to generate a software step exception
9420     *   end the TB
9421     */
9422    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9423    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9424    dc->is_ldex = false;
9425
9426    dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9427
9428    /* If architectural single step active, limit to 1.  */
9429    if (dc->ss_active) {
9430        dc->base.max_insns = 1;
9431    }
9432
9433    /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9434       to those left on the page.  */
9435    if (!dc->thumb) {
9436        int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9437        dc->base.max_insns = MIN(dc->base.max_insns, bound);
9438    }
9439
9440    cpu_V0 = tcg_temp_new_i64();
9441    cpu_V1 = tcg_temp_new_i64();
9442    cpu_M0 = tcg_temp_new_i64();
9443}
9444
9445static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9446{
9447    DisasContext *dc = container_of(dcbase, DisasContext, base);
9448
9449    /* A note on handling of the condexec (IT) bits:
9450     *
9451     * We want to avoid the overhead of having to write the updated condexec
9452     * bits back to the CPUARMState for every instruction in an IT block. So:
9453     * (1) if the condexec bits are not already zero then we write
9454     * zero back into the CPUARMState now. This avoids complications trying
9455     * to do it at the end of the block. (For example if we don't do this
9456     * it's hard to identify whether we can safely skip writing condexec
9457     * at the end of the TB, which we definitely want to do for the case
9458     * where a TB doesn't do anything with the IT state at all.)
9459     * (2) if we are going to leave the TB then we call gen_set_condexec()
9460     * which will write the correct value into CPUARMState if zero is wrong.
9461     * This is done both for leaving the TB at the end, and for leaving
9462     * it because of an exception we know will happen, which is done in
9463     * gen_exception_insn(). The latter is necessary because we need to
9464     * leave the TB with the PC/IT state just prior to execution of the
9465     * instruction which caused the exception.
9466     * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9467     * then the CPUARMState will be wrong and we need to reset it.
9468     * This is handled in the same way as restoration of the
9469     * PC in these situations; we save the value of the condexec bits
9470     * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9471     * then uses this to restore them after an exception.
9472     *
9473     * Note that there are no instructions which can read the condexec
9474     * bits, and none which can write non-static values to them, so
9475     * we don't need to care about whether CPUARMState is correct in the
9476     * middle of a TB.
9477     */
9478
9479    /* Reset the conditional execution bits immediately. This avoids
9480       complications trying to do it at the end of the block.  */
9481    if (dc->condexec_mask || dc->condexec_cond) {
9482        store_cpu_field_constant(0, condexec_bits);
9483    }
9484}
9485
9486static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9487{
9488    DisasContext *dc = container_of(dcbase, DisasContext, base);
9489    /*
9490     * The ECI/ICI bits share PSR bits with the IT bits, so we
9491     * need to reconstitute the bits from the split-out DisasContext
9492     * fields here.
9493     */
9494    uint32_t condexec_bits;
9495
9496    if (dc->eci) {
9497        condexec_bits = dc->eci << 4;
9498    } else {
9499        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9500    }
9501    tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0);
9502    dc->insn_start = tcg_last_op();
9503}
9504
9505static bool arm_pre_translate_insn(DisasContext *dc)
9506{
9507#ifdef CONFIG_USER_ONLY
9508    /* Intercept jump to the magic kernel page.  */
9509    if (dc->base.pc_next >= 0xffff0000) {
9510        /* We always get here via a jump, so know we are not in a
9511           conditional execution block.  */
9512        gen_exception_internal(EXCP_KERNEL_TRAP);
9513        dc->base.is_jmp = DISAS_NORETURN;
9514        return true;
9515    }
9516#endif
9517
9518    if (dc->ss_active && !dc->pstate_ss) {
9519        /* Singlestep state is Active-pending.
9520         * If we're in this state at the start of a TB then either
9521         *  a) we just took an exception to an EL which is being debugged
9522         *     and this is the first insn in the exception handler
9523         *  b) debug exceptions were masked and we just unmasked them
9524         *     without changing EL (eg by clearing PSTATE.D)
9525         * In either case we're going to take a swstep exception in the
9526         * "did not step an insn" case, and so the syndrome ISV and EX
9527         * bits should be zero.
9528         */
9529        assert(dc->base.num_insns == 1);
9530        gen_swstep_exception(dc, 0, 0);
9531        dc->base.is_jmp = DISAS_NORETURN;
9532        return true;
9533    }
9534
9535    return false;
9536}
9537
9538static void arm_post_translate_insn(DisasContext *dc)
9539{
9540    if (dc->condjmp && !dc->base.is_jmp) {
9541        gen_set_label(dc->condlabel);
9542        dc->condjmp = 0;
9543    }
9544    translator_loop_temp_check(&dc->base);
9545}
9546
9547static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9548{
9549    DisasContext *dc = container_of(dcbase, DisasContext, base);
9550    CPUARMState *env = cpu->env_ptr;
9551    unsigned int insn;
9552
9553    if (arm_pre_translate_insn(dc)) {
9554        dc->base.pc_next += 4;
9555        return;
9556    }
9557
9558    dc->pc_curr = dc->base.pc_next;
9559    insn = arm_ldl_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
9560    dc->insn = insn;
9561    dc->base.pc_next += 4;
9562    disas_arm_insn(dc, insn);
9563
9564    arm_post_translate_insn(dc);
9565
9566    /* ARM is a fixed-length ISA.  We performed the cross-page check
9567       in init_disas_context by adjusting max_insns.  */
9568}
9569
9570static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9571{
9572    /* Return true if this Thumb insn is always unconditional,
9573     * even inside an IT block. This is true of only a very few
9574     * instructions: BKPT, HLT, and SG.
9575     *
9576     * A larger class of instructions are UNPREDICTABLE if used
9577     * inside an IT block; we do not need to detect those here, because
9578     * what we do by default (perform the cc check and update the IT
9579     * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9580     * choice for those situations.
9581     *
9582     * insn is either a 16-bit or a 32-bit instruction; the two are
9583     * distinguishable because for the 16-bit case the top 16 bits
9584     * are zeroes, and that isn't a valid 32-bit encoding.
9585     */
9586    if ((insn & 0xffffff00) == 0xbe00) {
9587        /* BKPT */
9588        return true;
9589    }
9590
9591    if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9592        !arm_dc_feature(s, ARM_FEATURE_M)) {
9593        /* HLT: v8A only. This is unconditional even when it is going to
9594         * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9595         * For v7 cores this was a plain old undefined encoding and so
9596         * honours its cc check. (We might be using the encoding as
9597         * a semihosting trap, but we don't change the cc check behaviour
9598         * on that account, because a debugger connected to a real v7A
9599         * core and emulating semihosting traps by catching the UNDEF
9600         * exception would also only see cases where the cc check passed.
9601         * No guest code should be trying to do a HLT semihosting trap
9602         * in an IT block anyway.
9603         */
9604        return true;
9605    }
9606
9607    if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9608        arm_dc_feature(s, ARM_FEATURE_M)) {
9609        /* SG: v8M only */
9610        return true;
9611    }
9612
9613    return false;
9614}
9615
9616static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9617{
9618    DisasContext *dc = container_of(dcbase, DisasContext, base);
9619    CPUARMState *env = cpu->env_ptr;
9620    uint32_t insn;
9621    bool is_16bit;
9622
9623    if (arm_pre_translate_insn(dc)) {
9624        dc->base.pc_next += 2;
9625        return;
9626    }
9627
9628    dc->pc_curr = dc->base.pc_next;
9629    insn = arm_lduw_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
9630    is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9631    dc->base.pc_next += 2;
9632    if (!is_16bit) {
9633        uint32_t insn2 = arm_lduw_code(env, &dc->base, dc->base.pc_next,
9634                                       dc->sctlr_b);
9635
9636        insn = insn << 16 | insn2;
9637        dc->base.pc_next += 2;
9638    }
9639    dc->insn = insn;
9640
9641    if (dc->pstate_il) {
9642        /*
9643         * Illegal execution state. This has priority over BTI
9644         * exceptions, but comes after instruction abort exceptions.
9645         */
9646        gen_exception_insn(dc, dc->pc_curr, EXCP_UDEF,
9647                           syn_illegalstate(), default_exception_el(dc));
9648        return;
9649    }
9650
9651    if (dc->eci) {
9652        /*
9653         * For M-profile continuable instructions, ECI/ICI handling
9654         * falls into these cases:
9655         *  - interrupt-continuable instructions
9656         *     These are the various load/store multiple insns (both
9657         *     integer and fp). The ICI bits indicate the register
9658         *     where the load/store can resume. We make the IMPDEF
9659         *     choice to always do "instruction restart", ie ignore
9660         *     the ICI value and always execute the ldm/stm from the
9661         *     start. So all we need to do is zero PSR.ICI if the
9662         *     insn executes.
9663         *  - MVE instructions subject to beat-wise execution
9664         *     Here the ECI bits indicate which beats have already been
9665         *     executed, and we must honour this. Each insn of this
9666         *     type will handle it correctly. We will update PSR.ECI
9667         *     in the helper function for the insn (some ECI values
9668         *     mean that the following insn also has been partially
9669         *     executed).
9670         *  - Special cases which don't advance ECI
9671         *     The insns LE, LETP and BKPT leave the ECI/ICI state
9672         *     bits untouched.
9673         *  - all other insns (the common case)
9674         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9675         *     We place a rewind-marker here. Insns in the previous
9676         *     three categories will set a flag in the DisasContext.
9677         *     If the flag isn't set after we call disas_thumb_insn()
9678         *     or disas_thumb2_insn() then we know we have a "some other
9679         *     insn" case. We will rewind to the marker (ie throwing away
9680         *     all the generated code) and instead emit "take exception".
9681         */
9682        dc->insn_eci_rewind = tcg_last_op();
9683    }
9684
9685    if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9686        uint32_t cond = dc->condexec_cond;
9687
9688        /*
9689         * Conditionally skip the insn. Note that both 0xe and 0xf mean
9690         * "always"; 0xf is not "never".
9691         */
9692        if (cond < 0x0e) {
9693            arm_skip_unless(dc, cond);
9694        }
9695    }
9696
9697    if (is_16bit) {
9698        disas_thumb_insn(dc, insn);
9699    } else {
9700        disas_thumb2_insn(dc, insn);
9701    }
9702
9703    /* Advance the Thumb condexec condition.  */
9704    if (dc->condexec_mask) {
9705        dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9706                             ((dc->condexec_mask >> 4) & 1));
9707        dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9708        if (dc->condexec_mask == 0) {
9709            dc->condexec_cond = 0;
9710        }
9711    }
9712
9713    if (dc->eci && !dc->eci_handled) {
9714        /*
9715         * Insn wasn't valid for ECI/ICI at all: undo what we
9716         * just generated and instead emit an exception
9717         */
9718        tcg_remove_ops_after(dc->insn_eci_rewind);
9719        dc->condjmp = 0;
9720        gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
9721                           default_exception_el(dc));
9722    }
9723
9724    arm_post_translate_insn(dc);
9725
9726    /* Thumb is a variable-length ISA.  Stop translation when the next insn
9727     * will touch a new page.  This ensures that prefetch aborts occur at
9728     * the right place.
9729     *
9730     * We want to stop the TB if the next insn starts in a new page,
9731     * or if it spans between this page and the next. This means that
9732     * if we're looking at the last halfword in the page we need to
9733     * see if it's a 16-bit Thumb insn (which will fit in this TB)
9734     * or a 32-bit Thumb insn (which won't).
9735     * This is to avoid generating a silly TB with a single 16-bit insn
9736     * in it at the end of this page (which would execute correctly
9737     * but isn't very efficient).
9738     */
9739    if (dc->base.is_jmp == DISAS_NEXT
9740        && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9741            || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9742                && insn_crosses_page(env, dc)))) {
9743        dc->base.is_jmp = DISAS_TOO_MANY;
9744    }
9745}
9746
9747static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9748{
9749    DisasContext *dc = container_of(dcbase, DisasContext, base);
9750
9751    /* At this stage dc->condjmp will only be set when the skipped
9752       instruction was a conditional branch or trap, and the PC has
9753       already been written.  */
9754    gen_set_condexec(dc);
9755    if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9756        /* Exception return branches need some special case code at the
9757         * end of the TB, which is complex enough that it has to
9758         * handle the single-step vs not and the condition-failed
9759         * insn codepath itself.
9760         */
9761        gen_bx_excret_final_code(dc);
9762    } else if (unlikely(dc->ss_active)) {
9763        /* Unconditional and "condition passed" instruction codepath. */
9764        switch (dc->base.is_jmp) {
9765        case DISAS_SWI:
9766            gen_ss_advance(dc);
9767            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9768                          default_exception_el(dc));
9769            break;
9770        case DISAS_HVC:
9771            gen_ss_advance(dc);
9772            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9773            break;
9774        case DISAS_SMC:
9775            gen_ss_advance(dc);
9776            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9777            break;
9778        case DISAS_NEXT:
9779        case DISAS_TOO_MANY:
9780        case DISAS_UPDATE_EXIT:
9781        case DISAS_UPDATE_NOCHAIN:
9782            gen_set_pc_im(dc, dc->base.pc_next);
9783            /* fall through */
9784        default:
9785            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9786            gen_singlestep_exception(dc);
9787            break;
9788        case DISAS_NORETURN:
9789            break;
9790        }
9791    } else {
9792        /* While branches must always occur at the end of an IT block,
9793           there are a few other things that can cause us to terminate
9794           the TB in the middle of an IT block:
9795            - Exception generating instructions (bkpt, swi, undefined).
9796            - Page boundaries.
9797            - Hardware watchpoints.
9798           Hardware breakpoints have already been handled and skip this code.
9799         */
9800        switch (dc->base.is_jmp) {
9801        case DISAS_NEXT:
9802        case DISAS_TOO_MANY:
9803            gen_goto_tb(dc, 1, dc->base.pc_next);
9804            break;
9805        case DISAS_UPDATE_NOCHAIN:
9806            gen_set_pc_im(dc, dc->base.pc_next);
9807            /* fall through */
9808        case DISAS_JUMP:
9809            gen_goto_ptr();
9810            break;
9811        case DISAS_UPDATE_EXIT:
9812            gen_set_pc_im(dc, dc->base.pc_next);
9813            /* fall through */
9814        default:
9815            /* indicate that the hash table must be used to find the next TB */
9816            tcg_gen_exit_tb(NULL, 0);
9817            break;
9818        case DISAS_NORETURN:
9819            /* nothing more to generate */
9820            break;
9821        case DISAS_WFI:
9822        {
9823            TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
9824                                          !(dc->insn & (1U << 31))) ? 2 : 4);
9825
9826            gen_helper_wfi(cpu_env, tmp);
9827            tcg_temp_free_i32(tmp);
9828            /* The helper doesn't necessarily throw an exception, but we
9829             * must go back to the main loop to check for interrupts anyway.
9830             */
9831            tcg_gen_exit_tb(NULL, 0);
9832            break;
9833        }
9834        case DISAS_WFE:
9835            gen_helper_wfe(cpu_env);
9836            break;
9837        case DISAS_YIELD:
9838            gen_helper_yield(cpu_env);
9839            break;
9840        case DISAS_SWI:
9841            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
9842                          default_exception_el(dc));
9843            break;
9844        case DISAS_HVC:
9845            gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9846            break;
9847        case DISAS_SMC:
9848            gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
9849            break;
9850        }
9851    }
9852
9853    if (dc->condjmp) {
9854        /* "Condition failed" instruction codepath for the branch/trap insn */
9855        gen_set_label(dc->condlabel);
9856        gen_set_condexec(dc);
9857        if (unlikely(dc->ss_active)) {
9858            gen_set_pc_im(dc, dc->base.pc_next);
9859            gen_singlestep_exception(dc);
9860        } else {
9861            gen_goto_tb(dc, 1, dc->base.pc_next);
9862        }
9863    }
9864}
9865
9866static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
9867{
9868    DisasContext *dc = container_of(dcbase, DisasContext, base);
9869
9870    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
9871    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
9872}
9873
9874static const TranslatorOps arm_translator_ops = {
9875    .init_disas_context = arm_tr_init_disas_context,
9876    .tb_start           = arm_tr_tb_start,
9877    .insn_start         = arm_tr_insn_start,
9878    .translate_insn     = arm_tr_translate_insn,
9879    .tb_stop            = arm_tr_tb_stop,
9880    .disas_log          = arm_tr_disas_log,
9881};
9882
9883static const TranslatorOps thumb_translator_ops = {
9884    .init_disas_context = arm_tr_init_disas_context,
9885    .tb_start           = arm_tr_tb_start,
9886    .insn_start         = arm_tr_insn_start,
9887    .translate_insn     = thumb_tr_translate_insn,
9888    .tb_stop            = arm_tr_tb_stop,
9889    .disas_log          = arm_tr_disas_log,
9890};
9891
9892/* generate intermediate code for basic block 'tb'.  */
9893void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
9894{
9895    DisasContext dc = { };
9896    const TranslatorOps *ops = &arm_translator_ops;
9897    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9898
9899    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9900        ops = &thumb_translator_ops;
9901    }
9902#ifdef TARGET_AARCH64
9903    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9904        ops = &aarch64_translator_ops;
9905    }
9906#endif
9907
9908    translator_loop(ops, &dc.base, cpu, tb, max_insns);
9909}
9910
9911void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
9912                          target_ulong *data)
9913{
9914    if (is_a64(env)) {
9915        env->pc = data[0];
9916        env->condexec_bits = 0;
9917        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9918    } else {
9919        env->regs[15] = data[0];
9920        env->condexec_bits = data[1];
9921        env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
9922    }
9923}
9924