qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg-op.h"
  28#include "tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "exec/semihost.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36
  37#include "trace-tcg.h"
  38#include "exec/log.h"
  39
  40
  41#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43/* currently all emulated v5 cores are also v5TE, so don't bother */
  44#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45#define ENABLE_ARCH_5J    dc_isar_feature(jazelle, s)
  46#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54#include "translate.h"
  55
  56#if defined(CONFIG_USER_ONLY)
  57#define IS_USER(s) 1
  58#else
  59#define IS_USER(s) (s->user)
  60#endif
  61
  62/* We reuse the same 64-bit temporaries for efficiency.  */
  63static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64static TCGv_i32 cpu_R[16];
  65TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66TCGv_i64 cpu_exclusive_addr;
  67TCGv_i64 cpu_exclusive_val;
  68
  69/* FIXME:  These should be removed.  */
  70static TCGv_i32 cpu_F0s, cpu_F1s;
  71static TCGv_i64 cpu_F0d, cpu_F1d;
  72
  73#include "exec/gen-icount.h"
  74
  75static const char * const regnames[] =
  76    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  77      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  78
  79/* Function prototypes for gen_ functions calling Neon helpers.  */
  80typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  81                                 TCGv_i32, TCGv_i32);
  82
  83/* initialize TCG globals.  */
  84void arm_translate_init(void)
  85{
  86    int i;
  87
  88    for (i = 0; i < 16; i++) {
  89        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  90                                          offsetof(CPUARMState, regs[i]),
  91                                          regnames[i]);
  92    }
  93    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  94    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  95    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  96    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  97
  98    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  99        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
 100    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
 101        offsetof(CPUARMState, exclusive_val), "exclusive_val");
 102
 103    a64_translate_init();
 104}
 105
 106/* Flags for the disas_set_da_iss info argument:
 107 * lower bits hold the Rt register number, higher bits are flags.
 108 */
 109typedef enum ISSInfo {
 110    ISSNone = 0,
 111    ISSRegMask = 0x1f,
 112    ISSInvalid = (1 << 5),
 113    ISSIsAcqRel = (1 << 6),
 114    ISSIsWrite = (1 << 7),
 115    ISSIs16Bit = (1 << 8),
 116} ISSInfo;
 117
 118/* Save the syndrome information for a Data Abort */
 119static void disas_set_da_iss(DisasContext *s, TCGMemOp memop, ISSInfo issinfo)
 120{
 121    uint32_t syn;
 122    int sas = memop & MO_SIZE;
 123    bool sse = memop & MO_SIGN;
 124    bool is_acqrel = issinfo & ISSIsAcqRel;
 125    bool is_write = issinfo & ISSIsWrite;
 126    bool is_16bit = issinfo & ISSIs16Bit;
 127    int srt = issinfo & ISSRegMask;
 128
 129    if (issinfo & ISSInvalid) {
 130        /* Some callsites want to conditionally provide ISS info,
 131         * eg "only if this was not a writeback"
 132         */
 133        return;
 134    }
 135
 136    if (srt == 15) {
 137        /* For AArch32, insns where the src/dest is R15 never generate
 138         * ISS information. Catching that here saves checking at all
 139         * the call sites.
 140         */
 141        return;
 142    }
 143
 144    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 145                                  0, 0, 0, is_write, 0, is_16bit);
 146    disas_set_insn_syndrome(s, syn);
 147}
 148
 149static inline int get_a32_user_mem_index(DisasContext *s)
 150{
 151    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 152     * insns:
 153     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 154     *  otherwise, access as if at PL0.
 155     */
 156    switch (s->mmu_idx) {
 157    case ARMMMUIdx_S1E2:        /* this one is UNPREDICTABLE */
 158    case ARMMMUIdx_S12NSE0:
 159    case ARMMMUIdx_S12NSE1:
 160        return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
 161    case ARMMMUIdx_S1E3:
 162    case ARMMMUIdx_S1SE0:
 163    case ARMMMUIdx_S1SE1:
 164        return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
 165    case ARMMMUIdx_MUser:
 166    case ARMMMUIdx_MPriv:
 167        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168    case ARMMMUIdx_MUserNegPri:
 169    case ARMMMUIdx_MPrivNegPri:
 170        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171    case ARMMMUIdx_MSUser:
 172    case ARMMMUIdx_MSPriv:
 173        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174    case ARMMMUIdx_MSUserNegPri:
 175    case ARMMMUIdx_MSPrivNegPri:
 176        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177    case ARMMMUIdx_S2NS:
 178    default:
 179        g_assert_not_reached();
 180    }
 181}
 182
 183static inline TCGv_i32 load_cpu_offset(int offset)
 184{
 185    TCGv_i32 tmp = tcg_temp_new_i32();
 186    tcg_gen_ld_i32(tmp, cpu_env, offset);
 187    return tmp;
 188}
 189
 190#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 191
 192static inline void store_cpu_offset(TCGv_i32 var, int offset)
 193{
 194    tcg_gen_st_i32(var, cpu_env, offset);
 195    tcg_temp_free_i32(var);
 196}
 197
 198#define store_cpu_field(var, name) \
 199    store_cpu_offset(var, offsetof(CPUARMState, name))
 200
 201/* Set a variable to the value of a CPU register.  */
 202static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 203{
 204    if (reg == 15) {
 205        uint32_t addr;
 206        /* normally, since we updated PC, we need only to add one insn */
 207        if (s->thumb)
 208            addr = (long)s->pc + 2;
 209        else
 210            addr = (long)s->pc + 4;
 211        tcg_gen_movi_i32(var, addr);
 212    } else {
 213        tcg_gen_mov_i32(var, cpu_R[reg]);
 214    }
 215}
 216
 217/* Create a new temporary and set it to the value of a CPU register.  */
 218static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 219{
 220    TCGv_i32 tmp = tcg_temp_new_i32();
 221    load_reg_var(s, tmp, reg);
 222    return tmp;
 223}
 224
 225/* Set a CPU register.  The source must be a temporary and will be
 226   marked as dead.  */
 227static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 228{
 229    if (reg == 15) {
 230        /* In Thumb mode, we must ignore bit 0.
 231         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 232         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 233         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 234         */
 235        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 236        s->base.is_jmp = DISAS_JUMP;
 237    }
 238    tcg_gen_mov_i32(cpu_R[reg], var);
 239    tcg_temp_free_i32(var);
 240}
 241
 242/*
 243 * Variant of store_reg which applies v8M stack-limit checks before updating
 244 * SP. If the check fails this will result in an exception being taken.
 245 * We disable the stack checks for CONFIG_USER_ONLY because we have
 246 * no idea what the stack limits should be in that case.
 247 * If stack checking is not being done this just acts like store_reg().
 248 */
 249static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 250{
 251#ifndef CONFIG_USER_ONLY
 252    if (s->v8m_stackcheck) {
 253        gen_helper_v8m_stackcheck(cpu_env, var);
 254    }
 255#endif
 256    store_reg(s, 13, var);
 257}
 258
 259/* Value extensions.  */
 260#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 261#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 262#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 263#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 264
 265#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 266#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 267
 268
 269static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 270{
 271    TCGv_i32 tmp_mask = tcg_const_i32(mask);
 272    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 273    tcg_temp_free_i32(tmp_mask);
 274}
 275/* Set NZCV flags from the high 4 bits of var.  */
 276#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 277
 278static void gen_exception_internal(int excp)
 279{
 280    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 281
 282    assert(excp_is_internal(excp));
 283    gen_helper_exception_internal(cpu_env, tcg_excp);
 284    tcg_temp_free_i32(tcg_excp);
 285}
 286
 287static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 288{
 289    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 290    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 291    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 292
 293    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 294                                       tcg_syn, tcg_el);
 295
 296    tcg_temp_free_i32(tcg_el);
 297    tcg_temp_free_i32(tcg_syn);
 298    tcg_temp_free_i32(tcg_excp);
 299}
 300
 301static void gen_ss_advance(DisasContext *s)
 302{
 303    /* If the singlestep state is Active-not-pending, advance to
 304     * Active-pending.
 305     */
 306    if (s->ss_active) {
 307        s->pstate_ss = 0;
 308        gen_helper_clear_pstate_ss(cpu_env);
 309    }
 310}
 311
 312static void gen_step_complete_exception(DisasContext *s)
 313{
 314    /* We just completed step of an insn. Move from Active-not-pending
 315     * to Active-pending, and then also take the swstep exception.
 316     * This corresponds to making the (IMPDEF) choice to prioritize
 317     * swstep exceptions over asynchronous exceptions taken to an exception
 318     * level where debug is disabled. This choice has the advantage that
 319     * we do not need to maintain internal state corresponding to the
 320     * ISV/EX syndrome bits between completion of the step and generation
 321     * of the exception, and our syndrome information is always correct.
 322     */
 323    gen_ss_advance(s);
 324    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 325                  default_exception_el(s));
 326    s->base.is_jmp = DISAS_NORETURN;
 327}
 328
 329static void gen_singlestep_exception(DisasContext *s)
 330{
 331    /* Generate the right kind of exception for singlestep, which is
 332     * either the architectural singlestep or EXCP_DEBUG for QEMU's
 333     * gdb singlestepping.
 334     */
 335    if (s->ss_active) {
 336        gen_step_complete_exception(s);
 337    } else {
 338        gen_exception_internal(EXCP_DEBUG);
 339    }
 340}
 341
 342static inline bool is_singlestepping(DisasContext *s)
 343{
 344    /* Return true if we are singlestepping either because of
 345     * architectural singlestep or QEMU gdbstub singlestep. This does
 346     * not include the command line '-singlestep' mode which is rather
 347     * misnamed as it only means "one instruction per TB" and doesn't
 348     * affect the code we generate.
 349     */
 350    return s->base.singlestep_enabled || s->ss_active;
 351}
 352
 353static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 354{
 355    TCGv_i32 tmp1 = tcg_temp_new_i32();
 356    TCGv_i32 tmp2 = tcg_temp_new_i32();
 357    tcg_gen_ext16s_i32(tmp1, a);
 358    tcg_gen_ext16s_i32(tmp2, b);
 359    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 360    tcg_temp_free_i32(tmp2);
 361    tcg_gen_sari_i32(a, a, 16);
 362    tcg_gen_sari_i32(b, b, 16);
 363    tcg_gen_mul_i32(b, b, a);
 364    tcg_gen_mov_i32(a, tmp1);
 365    tcg_temp_free_i32(tmp1);
 366}
 367
 368/* Byteswap each halfword.  */
 369static void gen_rev16(TCGv_i32 var)
 370{
 371    TCGv_i32 tmp = tcg_temp_new_i32();
 372    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 373    tcg_gen_shri_i32(tmp, var, 8);
 374    tcg_gen_and_i32(tmp, tmp, mask);
 375    tcg_gen_and_i32(var, var, mask);
 376    tcg_gen_shli_i32(var, var, 8);
 377    tcg_gen_or_i32(var, var, tmp);
 378    tcg_temp_free_i32(mask);
 379    tcg_temp_free_i32(tmp);
 380}
 381
 382/* Byteswap low halfword and sign extend.  */
 383static void gen_revsh(TCGv_i32 var)
 384{
 385    tcg_gen_ext16u_i32(var, var);
 386    tcg_gen_bswap16_i32(var, var);
 387    tcg_gen_ext16s_i32(var, var);
 388}
 389
 390/* Return (b << 32) + a. Mark inputs as dead */
 391static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
 392{
 393    TCGv_i64 tmp64 = tcg_temp_new_i64();
 394
 395    tcg_gen_extu_i32_i64(tmp64, b);
 396    tcg_temp_free_i32(b);
 397    tcg_gen_shli_i64(tmp64, tmp64, 32);
 398    tcg_gen_add_i64(a, tmp64, a);
 399
 400    tcg_temp_free_i64(tmp64);
 401    return a;
 402}
 403
 404/* Return (b << 32) - a. Mark inputs as dead. */
 405static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
 406{
 407    TCGv_i64 tmp64 = tcg_temp_new_i64();
 408
 409    tcg_gen_extu_i32_i64(tmp64, b);
 410    tcg_temp_free_i32(b);
 411    tcg_gen_shli_i64(tmp64, tmp64, 32);
 412    tcg_gen_sub_i64(a, tmp64, a);
 413
 414    tcg_temp_free_i64(tmp64);
 415    return a;
 416}
 417
 418/* 32x32->64 multiply.  Marks inputs as dead.  */
 419static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 420{
 421    TCGv_i32 lo = tcg_temp_new_i32();
 422    TCGv_i32 hi = tcg_temp_new_i32();
 423    TCGv_i64 ret;
 424
 425    tcg_gen_mulu2_i32(lo, hi, a, b);
 426    tcg_temp_free_i32(a);
 427    tcg_temp_free_i32(b);
 428
 429    ret = tcg_temp_new_i64();
 430    tcg_gen_concat_i32_i64(ret, lo, hi);
 431    tcg_temp_free_i32(lo);
 432    tcg_temp_free_i32(hi);
 433
 434    return ret;
 435}
 436
 437static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 438{
 439    TCGv_i32 lo = tcg_temp_new_i32();
 440    TCGv_i32 hi = tcg_temp_new_i32();
 441    TCGv_i64 ret;
 442
 443    tcg_gen_muls2_i32(lo, hi, a, b);
 444    tcg_temp_free_i32(a);
 445    tcg_temp_free_i32(b);
 446
 447    ret = tcg_temp_new_i64();
 448    tcg_gen_concat_i32_i64(ret, lo, hi);
 449    tcg_temp_free_i32(lo);
 450    tcg_temp_free_i32(hi);
 451
 452    return ret;
 453}
 454
 455/* Swap low and high halfwords.  */
 456static void gen_swap_half(TCGv_i32 var)
 457{
 458    TCGv_i32 tmp = tcg_temp_new_i32();
 459    tcg_gen_shri_i32(tmp, var, 16);
 460    tcg_gen_shli_i32(var, var, 16);
 461    tcg_gen_or_i32(var, var, tmp);
 462    tcg_temp_free_i32(tmp);
 463}
 464
 465/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 466    tmp = (t0 ^ t1) & 0x8000;
 467    t0 &= ~0x8000;
 468    t1 &= ~0x8000;
 469    t0 = (t0 + t1) ^ tmp;
 470 */
 471
 472static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
 473{
 474    TCGv_i32 tmp = tcg_temp_new_i32();
 475    tcg_gen_xor_i32(tmp, t0, t1);
 476    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 477    tcg_gen_andi_i32(t0, t0, ~0x8000);
 478    tcg_gen_andi_i32(t1, t1, ~0x8000);
 479    tcg_gen_add_i32(t0, t0, t1);
 480    tcg_gen_xor_i32(t0, t0, tmp);
 481    tcg_temp_free_i32(tmp);
 482    tcg_temp_free_i32(t1);
 483}
 484
 485/* Set CF to the top bit of var.  */
 486static void gen_set_CF_bit31(TCGv_i32 var)
 487{
 488    tcg_gen_shri_i32(cpu_CF, var, 31);
 489}
 490
 491/* Set N and Z flags from var.  */
 492static inline void gen_logic_CC(TCGv_i32 var)
 493{
 494    tcg_gen_mov_i32(cpu_NF, var);
 495    tcg_gen_mov_i32(cpu_ZF, var);
 496}
 497
 498/* T0 += T1 + CF.  */
 499static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
 500{
 501    tcg_gen_add_i32(t0, t0, t1);
 502    tcg_gen_add_i32(t0, t0, cpu_CF);
 503}
 504
 505/* dest = T0 + T1 + CF. */
 506static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 507{
 508    tcg_gen_add_i32(dest, t0, t1);
 509    tcg_gen_add_i32(dest, dest, cpu_CF);
 510}
 511
 512/* dest = T0 - T1 + CF - 1.  */
 513static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 514{
 515    tcg_gen_sub_i32(dest, t0, t1);
 516    tcg_gen_add_i32(dest, dest, cpu_CF);
 517    tcg_gen_subi_i32(dest, dest, 1);
 518}
 519
 520/* dest = T0 + T1. Compute C, N, V and Z flags */
 521static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 522{
 523    TCGv_i32 tmp = tcg_temp_new_i32();
 524    tcg_gen_movi_i32(tmp, 0);
 525    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 526    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 527    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 528    tcg_gen_xor_i32(tmp, t0, t1);
 529    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 530    tcg_temp_free_i32(tmp);
 531    tcg_gen_mov_i32(dest, cpu_NF);
 532}
 533
 534/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 535static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 536{
 537    TCGv_i32 tmp = tcg_temp_new_i32();
 538    if (TCG_TARGET_HAS_add2_i32) {
 539        tcg_gen_movi_i32(tmp, 0);
 540        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 541        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 542    } else {
 543        TCGv_i64 q0 = tcg_temp_new_i64();
 544        TCGv_i64 q1 = tcg_temp_new_i64();
 545        tcg_gen_extu_i32_i64(q0, t0);
 546        tcg_gen_extu_i32_i64(q1, t1);
 547        tcg_gen_add_i64(q0, q0, q1);
 548        tcg_gen_extu_i32_i64(q1, cpu_CF);
 549        tcg_gen_add_i64(q0, q0, q1);
 550        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 551        tcg_temp_free_i64(q0);
 552        tcg_temp_free_i64(q1);
 553    }
 554    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 555    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 556    tcg_gen_xor_i32(tmp, t0, t1);
 557    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 558    tcg_temp_free_i32(tmp);
 559    tcg_gen_mov_i32(dest, cpu_NF);
 560}
 561
 562/* dest = T0 - T1. Compute C, N, V and Z flags */
 563static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 564{
 565    TCGv_i32 tmp;
 566    tcg_gen_sub_i32(cpu_NF, t0, t1);
 567    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 568    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 569    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 570    tmp = tcg_temp_new_i32();
 571    tcg_gen_xor_i32(tmp, t0, t1);
 572    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 573    tcg_temp_free_i32(tmp);
 574    tcg_gen_mov_i32(dest, cpu_NF);
 575}
 576
 577/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 578static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 579{
 580    TCGv_i32 tmp = tcg_temp_new_i32();
 581    tcg_gen_not_i32(tmp, t1);
 582    gen_adc_CC(dest, t0, tmp);
 583    tcg_temp_free_i32(tmp);
 584}
 585
 586#define GEN_SHIFT(name)                                               \
 587static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 588{                                                                     \
 589    TCGv_i32 tmp1, tmp2, tmp3;                                        \
 590    tmp1 = tcg_temp_new_i32();                                        \
 591    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 592    tmp2 = tcg_const_i32(0);                                          \
 593    tmp3 = tcg_const_i32(0x1f);                                       \
 594    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 595    tcg_temp_free_i32(tmp3);                                          \
 596    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 597    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 598    tcg_temp_free_i32(tmp2);                                          \
 599    tcg_temp_free_i32(tmp1);                                          \
 600}
 601GEN_SHIFT(shl)
 602GEN_SHIFT(shr)
 603#undef GEN_SHIFT
 604
 605static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 606{
 607    TCGv_i32 tmp1, tmp2;
 608    tmp1 = tcg_temp_new_i32();
 609    tcg_gen_andi_i32(tmp1, t1, 0xff);
 610    tmp2 = tcg_const_i32(0x1f);
 611    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 612    tcg_temp_free_i32(tmp2);
 613    tcg_gen_sar_i32(dest, t0, tmp1);
 614    tcg_temp_free_i32(tmp1);
 615}
 616
 617static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
 618{
 619    TCGv_i32 c0 = tcg_const_i32(0);
 620    TCGv_i32 tmp = tcg_temp_new_i32();
 621    tcg_gen_neg_i32(tmp, src);
 622    tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
 623    tcg_temp_free_i32(c0);
 624    tcg_temp_free_i32(tmp);
 625}
 626
 627static void shifter_out_im(TCGv_i32 var, int shift)
 628{
 629    if (shift == 0) {
 630        tcg_gen_andi_i32(cpu_CF, var, 1);
 631    } else {
 632        tcg_gen_shri_i32(cpu_CF, var, shift);
 633        if (shift != 31) {
 634            tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
 635        }
 636    }
 637}
 638
 639/* Shift by immediate.  Includes special handling for shift == 0.  */
 640static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 641                                    int shift, int flags)
 642{
 643    switch (shiftop) {
 644    case 0: /* LSL */
 645        if (shift != 0) {
 646            if (flags)
 647                shifter_out_im(var, 32 - shift);
 648            tcg_gen_shli_i32(var, var, shift);
 649        }
 650        break;
 651    case 1: /* LSR */
 652        if (shift == 0) {
 653            if (flags) {
 654                tcg_gen_shri_i32(cpu_CF, var, 31);
 655            }
 656            tcg_gen_movi_i32(var, 0);
 657        } else {
 658            if (flags)
 659                shifter_out_im(var, shift - 1);
 660            tcg_gen_shri_i32(var, var, shift);
 661        }
 662        break;
 663    case 2: /* ASR */
 664        if (shift == 0)
 665            shift = 32;
 666        if (flags)
 667            shifter_out_im(var, shift - 1);
 668        if (shift == 32)
 669          shift = 31;
 670        tcg_gen_sari_i32(var, var, shift);
 671        break;
 672    case 3: /* ROR/RRX */
 673        if (shift != 0) {
 674            if (flags)
 675                shifter_out_im(var, shift - 1);
 676            tcg_gen_rotri_i32(var, var, shift); break;
 677        } else {
 678            TCGv_i32 tmp = tcg_temp_new_i32();
 679            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 680            if (flags)
 681                shifter_out_im(var, 0);
 682            tcg_gen_shri_i32(var, var, 1);
 683            tcg_gen_or_i32(var, var, tmp);
 684            tcg_temp_free_i32(tmp);
 685        }
 686    }
 687};
 688
 689static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 690                                     TCGv_i32 shift, int flags)
 691{
 692    if (flags) {
 693        switch (shiftop) {
 694        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 695        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 696        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 697        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 698        }
 699    } else {
 700        switch (shiftop) {
 701        case 0:
 702            gen_shl(var, var, shift);
 703            break;
 704        case 1:
 705            gen_shr(var, var, shift);
 706            break;
 707        case 2:
 708            gen_sar(var, var, shift);
 709            break;
 710        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 711                tcg_gen_rotr_i32(var, var, shift); break;
 712        }
 713    }
 714    tcg_temp_free_i32(shift);
 715}
 716
 717#define PAS_OP(pfx) \
 718    switch (op2) {  \
 719    case 0: gen_pas_helper(glue(pfx,add16)); break; \
 720    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
 721    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
 722    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
 723    case 4: gen_pas_helper(glue(pfx,add8)); break; \
 724    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
 725    }
 726static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
 727{
 728    TCGv_ptr tmp;
 729
 730    switch (op1) {
 731#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
 732    case 1:
 733        tmp = tcg_temp_new_ptr();
 734        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
 735        PAS_OP(s)
 736        tcg_temp_free_ptr(tmp);
 737        break;
 738    case 5:
 739        tmp = tcg_temp_new_ptr();
 740        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
 741        PAS_OP(u)
 742        tcg_temp_free_ptr(tmp);
 743        break;
 744#undef gen_pas_helper
 745#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
 746    case 2:
 747        PAS_OP(q);
 748        break;
 749    case 3:
 750        PAS_OP(sh);
 751        break;
 752    case 6:
 753        PAS_OP(uq);
 754        break;
 755    case 7:
 756        PAS_OP(uh);
 757        break;
 758#undef gen_pas_helper
 759    }
 760}
 761#undef PAS_OP
 762
 763/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
 764#define PAS_OP(pfx) \
 765    switch (op1) {  \
 766    case 0: gen_pas_helper(glue(pfx,add8)); break; \
 767    case 1: gen_pas_helper(glue(pfx,add16)); break; \
 768    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
 769    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
 770    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
 771    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
 772    }
 773static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
 774{
 775    TCGv_ptr tmp;
 776
 777    switch (op2) {
 778#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
 779    case 0:
 780        tmp = tcg_temp_new_ptr();
 781        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
 782        PAS_OP(s)
 783        tcg_temp_free_ptr(tmp);
 784        break;
 785    case 4:
 786        tmp = tcg_temp_new_ptr();
 787        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
 788        PAS_OP(u)
 789        tcg_temp_free_ptr(tmp);
 790        break;
 791#undef gen_pas_helper
 792#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
 793    case 1:
 794        PAS_OP(q);
 795        break;
 796    case 2:
 797        PAS_OP(sh);
 798        break;
 799    case 5:
 800        PAS_OP(uq);
 801        break;
 802    case 6:
 803        PAS_OP(uh);
 804        break;
 805#undef gen_pas_helper
 806    }
 807}
 808#undef PAS_OP
 809
 810/*
 811 * Generate a conditional based on ARM condition code cc.
 812 * This is common between ARM and Aarch64 targets.
 813 */
 814void arm_test_cc(DisasCompare *cmp, int cc)
 815{
 816    TCGv_i32 value;
 817    TCGCond cond;
 818    bool global = true;
 819
 820    switch (cc) {
 821    case 0: /* eq: Z */
 822    case 1: /* ne: !Z */
 823        cond = TCG_COND_EQ;
 824        value = cpu_ZF;
 825        break;
 826
 827    case 2: /* cs: C */
 828    case 3: /* cc: !C */
 829        cond = TCG_COND_NE;
 830        value = cpu_CF;
 831        break;
 832
 833    case 4: /* mi: N */
 834    case 5: /* pl: !N */
 835        cond = TCG_COND_LT;
 836        value = cpu_NF;
 837        break;
 838
 839    case 6: /* vs: V */
 840    case 7: /* vc: !V */
 841        cond = TCG_COND_LT;
 842        value = cpu_VF;
 843        break;
 844
 845    case 8: /* hi: C && !Z */
 846    case 9: /* ls: !C || Z -> !(C && !Z) */
 847        cond = TCG_COND_NE;
 848        value = tcg_temp_new_i32();
 849        global = false;
 850        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 851           ZF is non-zero for !Z; so AND the two subexpressions.  */
 852        tcg_gen_neg_i32(value, cpu_CF);
 853        tcg_gen_and_i32(value, value, cpu_ZF);
 854        break;
 855
 856    case 10: /* ge: N == V -> N ^ V == 0 */
 857    case 11: /* lt: N != V -> N ^ V != 0 */
 858        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 859        cond = TCG_COND_GE;
 860        value = tcg_temp_new_i32();
 861        global = false;
 862        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 863        break;
 864
 865    case 12: /* gt: !Z && N == V */
 866    case 13: /* le: Z || N != V */
 867        cond = TCG_COND_NE;
 868        value = tcg_temp_new_i32();
 869        global = false;
 870        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 871         * the sign bit then AND with ZF to yield the result.  */
 872        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 873        tcg_gen_sari_i32(value, value, 31);
 874        tcg_gen_andc_i32(value, cpu_ZF, value);
 875        break;
 876
 877    case 14: /* always */
 878    case 15: /* always */
 879        /* Use the ALWAYS condition, which will fold early.
 880         * It doesn't matter what we use for the value.  */
 881        cond = TCG_COND_ALWAYS;
 882        value = cpu_ZF;
 883        goto no_invert;
 884
 885    default:
 886        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 887        abort();
 888    }
 889
 890    if (cc & 1) {
 891        cond = tcg_invert_cond(cond);
 892    }
 893
 894 no_invert:
 895    cmp->cond = cond;
 896    cmp->value = value;
 897    cmp->value_global = global;
 898}
 899
 900void arm_free_cc(DisasCompare *cmp)
 901{
 902    if (!cmp->value_global) {
 903        tcg_temp_free_i32(cmp->value);
 904    }
 905}
 906
 907void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 908{
 909    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 910}
 911
 912void arm_gen_test_cc(int cc, TCGLabel *label)
 913{
 914    DisasCompare cmp;
 915    arm_test_cc(&cmp, cc);
 916    arm_jump_cc(&cmp, label);
 917    arm_free_cc(&cmp);
 918}
 919
 920static const uint8_t table_logic_cc[16] = {
 921    1, /* and */
 922    1, /* xor */
 923    0, /* sub */
 924    0, /* rsb */
 925    0, /* add */
 926    0, /* adc */
 927    0, /* sbc */
 928    0, /* rsc */
 929    1, /* andl */
 930    1, /* xorl */
 931    0, /* cmp */
 932    0, /* cmn */
 933    1, /* orr */
 934    1, /* mov */
 935    1, /* bic */
 936    1, /* mvn */
 937};
 938
 939static inline void gen_set_condexec(DisasContext *s)
 940{
 941    if (s->condexec_mask) {
 942        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 943        TCGv_i32 tmp = tcg_temp_new_i32();
 944        tcg_gen_movi_i32(tmp, val);
 945        store_cpu_field(tmp, condexec_bits);
 946    }
 947}
 948
 949static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 950{
 951    tcg_gen_movi_i32(cpu_R[15], val);
 952}
 953
 954/* Set PC and Thumb state from an immediate address.  */
 955static inline void gen_bx_im(DisasContext *s, uint32_t addr)
 956{
 957    TCGv_i32 tmp;
 958
 959    s->base.is_jmp = DISAS_JUMP;
 960    if (s->thumb != (addr & 1)) {
 961        tmp = tcg_temp_new_i32();
 962        tcg_gen_movi_i32(tmp, addr & 1);
 963        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
 964        tcg_temp_free_i32(tmp);
 965    }
 966    tcg_gen_movi_i32(cpu_R[15], addr & ~1);
 967}
 968
 969/* Set PC and Thumb state from var.  var is marked as dead.  */
 970static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 971{
 972    s->base.is_jmp = DISAS_JUMP;
 973    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 974    tcg_gen_andi_i32(var, var, 1);
 975    store_cpu_field(var, thumb);
 976}
 977
 978/* Set PC and Thumb state from var. var is marked as dead.
 979 * For M-profile CPUs, include logic to detect exception-return
 980 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 981 * and BX reg, and no others, and happens only for code in Handler mode.
 982 */
 983static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 984{
 985    /* Generate the same code here as for a simple bx, but flag via
 986     * s->base.is_jmp that we need to do the rest of the work later.
 987     */
 988    gen_bx(s, var);
 989    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 990        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 991        s->base.is_jmp = DISAS_BX_EXCRET;
 992    }
 993}
 994
 995static inline void gen_bx_excret_final_code(DisasContext *s)
 996{
 997    /* Generate the code to finish possible exception return and end the TB */
 998    TCGLabel *excret_label = gen_new_label();
 999    uint32_t min_magic;
1000
1001    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
1002        /* Covers FNC_RETURN and EXC_RETURN magic */
1003        min_magic = FNC_RETURN_MIN_MAGIC;
1004    } else {
1005        /* EXC_RETURN magic only */
1006        min_magic = EXC_RETURN_MIN_MAGIC;
1007    }
1008
1009    /* Is the new PC value in the magic range indicating exception return? */
1010    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
1011    /* No: end the TB as we would for a DISAS_JMP */
1012    if (is_singlestepping(s)) {
1013        gen_singlestep_exception(s);
1014    } else {
1015        tcg_gen_exit_tb(NULL, 0);
1016    }
1017    gen_set_label(excret_label);
1018    /* Yes: this is an exception return.
1019     * At this point in runtime env->regs[15] and env->thumb will hold
1020     * the exception-return magic number, which do_v7m_exception_exit()
1021     * will read. Nothing else will be able to see those values because
1022     * the cpu-exec main loop guarantees that we will always go straight
1023     * from raising the exception to the exception-handling code.
1024     *
1025     * gen_ss_advance(s) does nothing on M profile currently but
1026     * calling it is conceptually the right thing as we have executed
1027     * this instruction (compare SWI, HVC, SMC handling).
1028     */
1029    gen_ss_advance(s);
1030    gen_exception_internal(EXCP_EXCEPTION_EXIT);
1031}
1032
1033static inline void gen_bxns(DisasContext *s, int rm)
1034{
1035    TCGv_i32 var = load_reg(s, rm);
1036
1037    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
1038     * we need to sync state before calling it, but:
1039     *  - we don't need to do gen_set_pc_im() because the bxns helper will
1040     *    always set the PC itself
1041     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
1042     *    unless it's outside an IT block or the last insn in an IT block,
1043     *    so we know that condexec == 0 (already set at the top of the TB)
1044     *    is correct in the non-UNPREDICTABLE cases, and we can choose
1045     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
1046     */
1047    gen_helper_v7m_bxns(cpu_env, var);
1048    tcg_temp_free_i32(var);
1049    s->base.is_jmp = DISAS_EXIT;
1050}
1051
1052static inline void gen_blxns(DisasContext *s, int rm)
1053{
1054    TCGv_i32 var = load_reg(s, rm);
1055
1056    /* We don't need to sync condexec state, for the same reason as bxns.
1057     * We do however need to set the PC, because the blxns helper reads it.
1058     * The blxns helper may throw an exception.
1059     */
1060    gen_set_pc_im(s, s->pc);
1061    gen_helper_v7m_blxns(cpu_env, var);
1062    tcg_temp_free_i32(var);
1063    s->base.is_jmp = DISAS_EXIT;
1064}
1065
1066/* Variant of store_reg which uses branch&exchange logic when storing
1067   to r15 in ARM architecture v7 and above. The source must be a temporary
1068   and will be marked as dead. */
1069static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
1070{
1071    if (reg == 15 && ENABLE_ARCH_7) {
1072        gen_bx(s, var);
1073    } else {
1074        store_reg(s, reg, var);
1075    }
1076}
1077
1078/* Variant of store_reg which uses branch&exchange logic when storing
1079 * to r15 in ARM architecture v5T and above. This is used for storing
1080 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
1081 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
1082static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
1083{
1084    if (reg == 15 && ENABLE_ARCH_5) {
1085        gen_bx_excret(s, var);
1086    } else {
1087        store_reg(s, reg, var);
1088    }
1089}
1090
1091#ifdef CONFIG_USER_ONLY
1092#define IS_USER_ONLY 1
1093#else
1094#define IS_USER_ONLY 0
1095#endif
1096
1097/* Abstractions of "generate code to do a guest load/store for
1098 * AArch32", where a vaddr is always 32 bits (and is zero
1099 * extended if we're a 64 bit core) and  data is also
1100 * 32 bits unless specifically doing a 64 bit access.
1101 * These functions work like tcg_gen_qemu_{ld,st}* except
1102 * that the address argument is TCGv_i32 rather than TCGv.
1103 */
1104
1105static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
1106{
1107    TCGv addr = tcg_temp_new();
1108    tcg_gen_extu_i32_tl(addr, a32);
1109
1110    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1111    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
1112        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
1113    }
1114    return addr;
1115}
1116
1117static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1118                            int index, TCGMemOp opc)
1119{
1120    TCGv addr;
1121
1122    if (arm_dc_feature(s, ARM_FEATURE_M) &&
1123        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
1124        opc |= MO_ALIGN;
1125    }
1126
1127    addr = gen_aa32_addr(s, a32, opc);
1128    tcg_gen_qemu_ld_i32(val, addr, index, opc);
1129    tcg_temp_free(addr);
1130}
1131
1132static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1133                            int index, TCGMemOp opc)
1134{
1135    TCGv addr;
1136
1137    if (arm_dc_feature(s, ARM_FEATURE_M) &&
1138        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
1139        opc |= MO_ALIGN;
1140    }
1141
1142    addr = gen_aa32_addr(s, a32, opc);
1143    tcg_gen_qemu_st_i32(val, addr, index, opc);
1144    tcg_temp_free(addr);
1145}
1146
1147#define DO_GEN_LD(SUFF, OPC)                                             \
1148static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
1149                                     TCGv_i32 a32, int index)            \
1150{                                                                        \
1151    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
1152}                                                                        \
1153static inline void gen_aa32_ld##SUFF##_iss(DisasContext *s,              \
1154                                           TCGv_i32 val,                 \
1155                                           TCGv_i32 a32, int index,      \
1156                                           ISSInfo issinfo)              \
1157{                                                                        \
1158    gen_aa32_ld##SUFF(s, val, a32, index);                               \
1159    disas_set_da_iss(s, OPC, issinfo);                                   \
1160}
1161
1162#define DO_GEN_ST(SUFF, OPC)                                             \
1163static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
1164                                     TCGv_i32 a32, int index)            \
1165{                                                                        \
1166    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
1167}                                                                        \
1168static inline void gen_aa32_st##SUFF##_iss(DisasContext *s,              \
1169                                           TCGv_i32 val,                 \
1170                                           TCGv_i32 a32, int index,      \
1171                                           ISSInfo issinfo)              \
1172{                                                                        \
1173    gen_aa32_st##SUFF(s, val, a32, index);                               \
1174    disas_set_da_iss(s, OPC, issinfo | ISSIsWrite);                      \
1175}
1176
1177static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
1178{
1179    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1180    if (!IS_USER_ONLY && s->sctlr_b) {
1181        tcg_gen_rotri_i64(val, val, 32);
1182    }
1183}
1184
1185static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1186                            int index, TCGMemOp opc)
1187{
1188    TCGv addr = gen_aa32_addr(s, a32, opc);
1189    tcg_gen_qemu_ld_i64(val, addr, index, opc);
1190    gen_aa32_frob64(s, val);
1191    tcg_temp_free(addr);
1192}
1193
1194static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
1195                                 TCGv_i32 a32, int index)
1196{
1197    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1198}
1199
1200static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1201                            int index, TCGMemOp opc)
1202{
1203    TCGv addr = gen_aa32_addr(s, a32, opc);
1204
1205    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1206    if (!IS_USER_ONLY && s->sctlr_b) {
1207        TCGv_i64 tmp = tcg_temp_new_i64();
1208        tcg_gen_rotri_i64(tmp, val, 32);
1209        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1210        tcg_temp_free_i64(tmp);
1211    } else {
1212        tcg_gen_qemu_st_i64(val, addr, index, opc);
1213    }
1214    tcg_temp_free(addr);
1215}
1216
1217static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1218                                 TCGv_i32 a32, int index)
1219{
1220    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1221}
1222
1223DO_GEN_LD(8s, MO_SB)
1224DO_GEN_LD(8u, MO_UB)
1225DO_GEN_LD(16s, MO_SW)
1226DO_GEN_LD(16u, MO_UW)
1227DO_GEN_LD(32u, MO_UL)
1228DO_GEN_ST(8, MO_UB)
1229DO_GEN_ST(16, MO_UW)
1230DO_GEN_ST(32, MO_UL)
1231
1232static inline void gen_hvc(DisasContext *s, int imm16)
1233{
1234    /* The pre HVC helper handles cases when HVC gets trapped
1235     * as an undefined insn by runtime configuration (ie before
1236     * the insn really executes).
1237     */
1238    gen_set_pc_im(s, s->pc - 4);
1239    gen_helper_pre_hvc(cpu_env);
1240    /* Otherwise we will treat this as a real exception which
1241     * happens after execution of the insn. (The distinction matters
1242     * for the PC value reported to the exception handler and also
1243     * for single stepping.)
1244     */
1245    s->svc_imm = imm16;
1246    gen_set_pc_im(s, s->pc);
1247    s->base.is_jmp = DISAS_HVC;
1248}
1249
1250static inline void gen_smc(DisasContext *s)
1251{
1252    /* As with HVC, we may take an exception either before or after
1253     * the insn executes.
1254     */
1255    TCGv_i32 tmp;
1256
1257    gen_set_pc_im(s, s->pc - 4);
1258    tmp = tcg_const_i32(syn_aa32_smc());
1259    gen_helper_pre_smc(cpu_env, tmp);
1260    tcg_temp_free_i32(tmp);
1261    gen_set_pc_im(s, s->pc);
1262    s->base.is_jmp = DISAS_SMC;
1263}
1264
1265static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1266{
1267    gen_set_condexec(s);
1268    gen_set_pc_im(s, s->pc - offset);
1269    gen_exception_internal(excp);
1270    s->base.is_jmp = DISAS_NORETURN;
1271}
1272
1273static void gen_exception_insn(DisasContext *s, int offset, int excp,
1274                               int syn, uint32_t target_el)
1275{
1276    gen_set_condexec(s);
1277    gen_set_pc_im(s, s->pc - offset);
1278    gen_exception(excp, syn, target_el);
1279    s->base.is_jmp = DISAS_NORETURN;
1280}
1281
1282static void gen_exception_bkpt_insn(DisasContext *s, int offset, uint32_t syn)
1283{
1284    TCGv_i32 tcg_syn;
1285
1286    gen_set_condexec(s);
1287    gen_set_pc_im(s, s->pc - offset);
1288    tcg_syn = tcg_const_i32(syn);
1289    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1290    tcg_temp_free_i32(tcg_syn);
1291    s->base.is_jmp = DISAS_NORETURN;
1292}
1293
1294/* Force a TB lookup after an instruction that changes the CPU state.  */
1295static inline void gen_lookup_tb(DisasContext *s)
1296{
1297    tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1298    s->base.is_jmp = DISAS_EXIT;
1299}
1300
1301static inline void gen_hlt(DisasContext *s, int imm)
1302{
1303    /* HLT. This has two purposes.
1304     * Architecturally, it is an external halting debug instruction.
1305     * Since QEMU doesn't implement external debug, we treat this as
1306     * it is required for halting debug disabled: it will UNDEF.
1307     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1308     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1309     * must trigger semihosting even for ARMv7 and earlier, where
1310     * HLT was an undefined encoding.
1311     * In system mode, we don't allow userspace access to
1312     * semihosting, to provide some semblance of security
1313     * (and for consistency with our 32-bit semihosting).
1314     */
1315    if (semihosting_enabled() &&
1316#ifndef CONFIG_USER_ONLY
1317        s->current_el != 0 &&
1318#endif
1319        (imm == (s->thumb ? 0x3c : 0xf000))) {
1320        gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1321        return;
1322    }
1323
1324    gen_exception_insn(s, s->thumb ? 2 : 4, EXCP_UDEF, syn_uncategorized(),
1325                       default_exception_el(s));
1326}
1327
1328static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1329                                       TCGv_i32 var)
1330{
1331    int val, rm, shift, shiftop;
1332    TCGv_i32 offset;
1333
1334    if (!(insn & (1 << 25))) {
1335        /* immediate */
1336        val = insn & 0xfff;
1337        if (!(insn & (1 << 23)))
1338            val = -val;
1339        if (val != 0)
1340            tcg_gen_addi_i32(var, var, val);
1341    } else {
1342        /* shift/register */
1343        rm = (insn) & 0xf;
1344        shift = (insn >> 7) & 0x1f;
1345        shiftop = (insn >> 5) & 3;
1346        offset = load_reg(s, rm);
1347        gen_arm_shift_im(offset, shiftop, shift, 0);
1348        if (!(insn & (1 << 23)))
1349            tcg_gen_sub_i32(var, var, offset);
1350        else
1351            tcg_gen_add_i32(var, var, offset);
1352        tcg_temp_free_i32(offset);
1353    }
1354}
1355
1356static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1357                                        int extra, TCGv_i32 var)
1358{
1359    int val, rm;
1360    TCGv_i32 offset;
1361
1362    if (insn & (1 << 22)) {
1363        /* immediate */
1364        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1365        if (!(insn & (1 << 23)))
1366            val = -val;
1367        val += extra;
1368        if (val != 0)
1369            tcg_gen_addi_i32(var, var, val);
1370    } else {
1371        /* register */
1372        if (extra)
1373            tcg_gen_addi_i32(var, var, extra);
1374        rm = (insn) & 0xf;
1375        offset = load_reg(s, rm);
1376        if (!(insn & (1 << 23)))
1377            tcg_gen_sub_i32(var, var, offset);
1378        else
1379            tcg_gen_add_i32(var, var, offset);
1380        tcg_temp_free_i32(offset);
1381    }
1382}
1383
1384static TCGv_ptr get_fpstatus_ptr(int neon)
1385{
1386    TCGv_ptr statusptr = tcg_temp_new_ptr();
1387    int offset;
1388    if (neon) {
1389        offset = offsetof(CPUARMState, vfp.standard_fp_status);
1390    } else {
1391        offset = offsetof(CPUARMState, vfp.fp_status);
1392    }
1393    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1394    return statusptr;
1395}
1396
1397#define VFP_OP2(name)                                                 \
1398static inline void gen_vfp_##name(int dp)                             \
1399{                                                                     \
1400    TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
1401    if (dp) {                                                         \
1402        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
1403    } else {                                                          \
1404        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
1405    }                                                                 \
1406    tcg_temp_free_ptr(fpst);                                          \
1407}
1408
1409VFP_OP2(add)
1410VFP_OP2(sub)
1411VFP_OP2(mul)
1412VFP_OP2(div)
1413
1414#undef VFP_OP2
1415
1416static inline void gen_vfp_F1_mul(int dp)
1417{
1418    /* Like gen_vfp_mul() but put result in F1 */
1419    TCGv_ptr fpst = get_fpstatus_ptr(0);
1420    if (dp) {
1421        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1422    } else {
1423        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1424    }
1425    tcg_temp_free_ptr(fpst);
1426}
1427
1428static inline void gen_vfp_F1_neg(int dp)
1429{
1430    /* Like gen_vfp_neg() but put result in F1 */
1431    if (dp) {
1432        gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1433    } else {
1434        gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1435    }
1436}
1437
1438static inline void gen_vfp_abs(int dp)
1439{
1440    if (dp)
1441        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1442    else
1443        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1444}
1445
1446static inline void gen_vfp_neg(int dp)
1447{
1448    if (dp)
1449        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1450    else
1451        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1452}
1453
1454static inline void gen_vfp_sqrt(int dp)
1455{
1456    if (dp)
1457        gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1458    else
1459        gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1460}
1461
1462static inline void gen_vfp_cmp(int dp)
1463{
1464    if (dp)
1465        gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1466    else
1467        gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1468}
1469
1470static inline void gen_vfp_cmpe(int dp)
1471{
1472    if (dp)
1473        gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1474    else
1475        gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1476}
1477
1478static inline void gen_vfp_F1_ld0(int dp)
1479{
1480    if (dp)
1481        tcg_gen_movi_i64(cpu_F1d, 0);
1482    else
1483        tcg_gen_movi_i32(cpu_F1s, 0);
1484}
1485
1486#define VFP_GEN_ITOF(name) \
1487static inline void gen_vfp_##name(int dp, int neon) \
1488{ \
1489    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1490    if (dp) { \
1491        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1492    } else { \
1493        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1494    } \
1495    tcg_temp_free_ptr(statusptr); \
1496}
1497
1498VFP_GEN_ITOF(uito)
1499VFP_GEN_ITOF(sito)
1500#undef VFP_GEN_ITOF
1501
1502#define VFP_GEN_FTOI(name) \
1503static inline void gen_vfp_##name(int dp, int neon) \
1504{ \
1505    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1506    if (dp) { \
1507        gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1508    } else { \
1509        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1510    } \
1511    tcg_temp_free_ptr(statusptr); \
1512}
1513
1514VFP_GEN_FTOI(toui)
1515VFP_GEN_FTOI(touiz)
1516VFP_GEN_FTOI(tosi)
1517VFP_GEN_FTOI(tosiz)
1518#undef VFP_GEN_FTOI
1519
1520#define VFP_GEN_FIX(name, round) \
1521static inline void gen_vfp_##name(int dp, int shift, int neon) \
1522{ \
1523    TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1524    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1525    if (dp) { \
1526        gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1527                                        statusptr); \
1528    } else { \
1529        gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1530                                        statusptr); \
1531    } \
1532    tcg_temp_free_i32(tmp_shift); \
1533    tcg_temp_free_ptr(statusptr); \
1534}
1535VFP_GEN_FIX(tosh, _round_to_zero)
1536VFP_GEN_FIX(tosl, _round_to_zero)
1537VFP_GEN_FIX(touh, _round_to_zero)
1538VFP_GEN_FIX(toul, _round_to_zero)
1539VFP_GEN_FIX(shto, )
1540VFP_GEN_FIX(slto, )
1541VFP_GEN_FIX(uhto, )
1542VFP_GEN_FIX(ulto, )
1543#undef VFP_GEN_FIX
1544
1545static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1546{
1547    if (dp) {
1548        gen_aa32_ld64(s, cpu_F0d, addr, get_mem_index(s));
1549    } else {
1550        gen_aa32_ld32u(s, cpu_F0s, addr, get_mem_index(s));
1551    }
1552}
1553
1554static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1555{
1556    if (dp) {
1557        gen_aa32_st64(s, cpu_F0d, addr, get_mem_index(s));
1558    } else {
1559        gen_aa32_st32(s, cpu_F0s, addr, get_mem_index(s));
1560    }
1561}
1562
1563static inline long vfp_reg_offset(bool dp, unsigned reg)
1564{
1565    if (dp) {
1566        return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1567    } else {
1568        long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1569        if (reg & 1) {
1570            ofs += offsetof(CPU_DoubleU, l.upper);
1571        } else {
1572            ofs += offsetof(CPU_DoubleU, l.lower);
1573        }
1574        return ofs;
1575    }
1576}
1577
1578/* Return the offset of a 32-bit piece of a NEON register.
1579   zero is the least significant end of the register.  */
1580static inline long
1581neon_reg_offset (int reg, int n)
1582{
1583    int sreg;
1584    sreg = reg * 2 + n;
1585    return vfp_reg_offset(0, sreg);
1586}
1587
1588/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1589 * where 0 is the least significant end of the register.
1590 */
1591static inline long
1592neon_element_offset(int reg, int element, TCGMemOp size)
1593{
1594    int element_size = 1 << size;
1595    int ofs = element * element_size;
1596#ifdef HOST_WORDS_BIGENDIAN
1597    /* Calculate the offset assuming fully little-endian,
1598     * then XOR to account for the order of the 8-byte units.
1599     */
1600    if (element_size < 8) {
1601        ofs ^= 8 - element_size;
1602    }
1603#endif
1604    return neon_reg_offset(reg, 0) + ofs;
1605}
1606
1607static TCGv_i32 neon_load_reg(int reg, int pass)
1608{
1609    TCGv_i32 tmp = tcg_temp_new_i32();
1610    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1611    return tmp;
1612}
1613
1614static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop)
1615{
1616    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1617
1618    switch (mop) {
1619    case MO_UB:
1620        tcg_gen_ld8u_i32(var, cpu_env, offset);
1621        break;
1622    case MO_UW:
1623        tcg_gen_ld16u_i32(var, cpu_env, offset);
1624        break;
1625    case MO_UL:
1626        tcg_gen_ld_i32(var, cpu_env, offset);
1627        break;
1628    default:
1629        g_assert_not_reached();
1630    }
1631}
1632
1633static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop)
1634{
1635    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1636
1637    switch (mop) {
1638    case MO_UB:
1639        tcg_gen_ld8u_i64(var, cpu_env, offset);
1640        break;
1641    case MO_UW:
1642        tcg_gen_ld16u_i64(var, cpu_env, offset);
1643        break;
1644    case MO_UL:
1645        tcg_gen_ld32u_i64(var, cpu_env, offset);
1646        break;
1647    case MO_Q:
1648        tcg_gen_ld_i64(var, cpu_env, offset);
1649        break;
1650    default:
1651        g_assert_not_reached();
1652    }
1653}
1654
1655static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1656{
1657    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1658    tcg_temp_free_i32(var);
1659}
1660
1661static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var)
1662{
1663    long offset = neon_element_offset(reg, ele, size);
1664
1665    switch (size) {
1666    case MO_8:
1667        tcg_gen_st8_i32(var, cpu_env, offset);
1668        break;
1669    case MO_16:
1670        tcg_gen_st16_i32(var, cpu_env, offset);
1671        break;
1672    case MO_32:
1673        tcg_gen_st_i32(var, cpu_env, offset);
1674        break;
1675    default:
1676        g_assert_not_reached();
1677    }
1678}
1679
1680static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var)
1681{
1682    long offset = neon_element_offset(reg, ele, size);
1683
1684    switch (size) {
1685    case MO_8:
1686        tcg_gen_st8_i64(var, cpu_env, offset);
1687        break;
1688    case MO_16:
1689        tcg_gen_st16_i64(var, cpu_env, offset);
1690        break;
1691    case MO_32:
1692        tcg_gen_st32_i64(var, cpu_env, offset);
1693        break;
1694    case MO_64:
1695        tcg_gen_st_i64(var, cpu_env, offset);
1696        break;
1697    default:
1698        g_assert_not_reached();
1699    }
1700}
1701
1702static inline void neon_load_reg64(TCGv_i64 var, int reg)
1703{
1704    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1705}
1706
1707static inline void neon_store_reg64(TCGv_i64 var, int reg)
1708{
1709    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1710}
1711
1712static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1713{
1714    TCGv_ptr ret = tcg_temp_new_ptr();
1715    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1716    return ret;
1717}
1718
1719#define tcg_gen_ld_f32 tcg_gen_ld_i32
1720#define tcg_gen_ld_f64 tcg_gen_ld_i64
1721#define tcg_gen_st_f32 tcg_gen_st_i32
1722#define tcg_gen_st_f64 tcg_gen_st_i64
1723
1724static inline void gen_mov_F0_vreg(int dp, int reg)
1725{
1726    if (dp)
1727        tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1728    else
1729        tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1730}
1731
1732static inline void gen_mov_F1_vreg(int dp, int reg)
1733{
1734    if (dp)
1735        tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1736    else
1737        tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1738}
1739
1740static inline void gen_mov_vreg_F0(int dp, int reg)
1741{
1742    if (dp)
1743        tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1744    else
1745        tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1746}
1747
1748#define ARM_CP_RW_BIT   (1 << 20)
1749
1750static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1751{
1752    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1753}
1754
1755static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1756{
1757    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1758}
1759
1760static inline TCGv_i32 iwmmxt_load_creg(int reg)
1761{
1762    TCGv_i32 var = tcg_temp_new_i32();
1763    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1764    return var;
1765}
1766
1767static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1768{
1769    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1770    tcg_temp_free_i32(var);
1771}
1772
1773static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1774{
1775    iwmmxt_store_reg(cpu_M0, rn);
1776}
1777
1778static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1779{
1780    iwmmxt_load_reg(cpu_M0, rn);
1781}
1782
1783static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1784{
1785    iwmmxt_load_reg(cpu_V1, rn);
1786    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1787}
1788
1789static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1790{
1791    iwmmxt_load_reg(cpu_V1, rn);
1792    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1793}
1794
1795static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1796{
1797    iwmmxt_load_reg(cpu_V1, rn);
1798    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1799}
1800
1801#define IWMMXT_OP(name) \
1802static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1803{ \
1804    iwmmxt_load_reg(cpu_V1, rn); \
1805    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1806}
1807
1808#define IWMMXT_OP_ENV(name) \
1809static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1810{ \
1811    iwmmxt_load_reg(cpu_V1, rn); \
1812    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1813}
1814
1815#define IWMMXT_OP_ENV_SIZE(name) \
1816IWMMXT_OP_ENV(name##b) \
1817IWMMXT_OP_ENV(name##w) \
1818IWMMXT_OP_ENV(name##l)
1819
1820#define IWMMXT_OP_ENV1(name) \
1821static inline void gen_op_iwmmxt_##name##_M0(void) \
1822{ \
1823    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1824}
1825
1826IWMMXT_OP(maddsq)
1827IWMMXT_OP(madduq)
1828IWMMXT_OP(sadb)
1829IWMMXT_OP(sadw)
1830IWMMXT_OP(mulslw)
1831IWMMXT_OP(mulshw)
1832IWMMXT_OP(mululw)
1833IWMMXT_OP(muluhw)
1834IWMMXT_OP(macsw)
1835IWMMXT_OP(macuw)
1836
1837IWMMXT_OP_ENV_SIZE(unpackl)
1838IWMMXT_OP_ENV_SIZE(unpackh)
1839
1840IWMMXT_OP_ENV1(unpacklub)
1841IWMMXT_OP_ENV1(unpackluw)
1842IWMMXT_OP_ENV1(unpacklul)
1843IWMMXT_OP_ENV1(unpackhub)
1844IWMMXT_OP_ENV1(unpackhuw)
1845IWMMXT_OP_ENV1(unpackhul)
1846IWMMXT_OP_ENV1(unpacklsb)
1847IWMMXT_OP_ENV1(unpacklsw)
1848IWMMXT_OP_ENV1(unpacklsl)
1849IWMMXT_OP_ENV1(unpackhsb)
1850IWMMXT_OP_ENV1(unpackhsw)
1851IWMMXT_OP_ENV1(unpackhsl)
1852
1853IWMMXT_OP_ENV_SIZE(cmpeq)
1854IWMMXT_OP_ENV_SIZE(cmpgtu)
1855IWMMXT_OP_ENV_SIZE(cmpgts)
1856
1857IWMMXT_OP_ENV_SIZE(mins)
1858IWMMXT_OP_ENV_SIZE(minu)
1859IWMMXT_OP_ENV_SIZE(maxs)
1860IWMMXT_OP_ENV_SIZE(maxu)
1861
1862IWMMXT_OP_ENV_SIZE(subn)
1863IWMMXT_OP_ENV_SIZE(addn)
1864IWMMXT_OP_ENV_SIZE(subu)
1865IWMMXT_OP_ENV_SIZE(addu)
1866IWMMXT_OP_ENV_SIZE(subs)
1867IWMMXT_OP_ENV_SIZE(adds)
1868
1869IWMMXT_OP_ENV(avgb0)
1870IWMMXT_OP_ENV(avgb1)
1871IWMMXT_OP_ENV(avgw0)
1872IWMMXT_OP_ENV(avgw1)
1873
1874IWMMXT_OP_ENV(packuw)
1875IWMMXT_OP_ENV(packul)
1876IWMMXT_OP_ENV(packuq)
1877IWMMXT_OP_ENV(packsw)
1878IWMMXT_OP_ENV(packsl)
1879IWMMXT_OP_ENV(packsq)
1880
1881static void gen_op_iwmmxt_set_mup(void)
1882{
1883    TCGv_i32 tmp;
1884    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1885    tcg_gen_ori_i32(tmp, tmp, 2);
1886    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1887}
1888
1889static void gen_op_iwmmxt_set_cup(void)
1890{
1891    TCGv_i32 tmp;
1892    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1893    tcg_gen_ori_i32(tmp, tmp, 1);
1894    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1895}
1896
1897static void gen_op_iwmmxt_setpsr_nz(void)
1898{
1899    TCGv_i32 tmp = tcg_temp_new_i32();
1900    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1901    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1902}
1903
1904static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1905{
1906    iwmmxt_load_reg(cpu_V1, rn);
1907    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1908    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1909}
1910
1911static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1912                                     TCGv_i32 dest)
1913{
1914    int rd;
1915    uint32_t offset;
1916    TCGv_i32 tmp;
1917
1918    rd = (insn >> 16) & 0xf;
1919    tmp = load_reg(s, rd);
1920
1921    offset = (insn & 0xff) << ((insn >> 7) & 2);
1922    if (insn & (1 << 24)) {
1923        /* Pre indexed */
1924        if (insn & (1 << 23))
1925            tcg_gen_addi_i32(tmp, tmp, offset);
1926        else
1927            tcg_gen_addi_i32(tmp, tmp, -offset);
1928        tcg_gen_mov_i32(dest, tmp);
1929        if (insn & (1 << 21))
1930            store_reg(s, rd, tmp);
1931        else
1932            tcg_temp_free_i32(tmp);
1933    } else if (insn & (1 << 21)) {
1934        /* Post indexed */
1935        tcg_gen_mov_i32(dest, tmp);
1936        if (insn & (1 << 23))
1937            tcg_gen_addi_i32(tmp, tmp, offset);
1938        else
1939            tcg_gen_addi_i32(tmp, tmp, -offset);
1940        store_reg(s, rd, tmp);
1941    } else if (!(insn & (1 << 23)))
1942        return 1;
1943    return 0;
1944}
1945
1946static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1947{
1948    int rd = (insn >> 0) & 0xf;
1949    TCGv_i32 tmp;
1950
1951    if (insn & (1 << 8)) {
1952        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1953            return 1;
1954        } else {
1955            tmp = iwmmxt_load_creg(rd);
1956        }
1957    } else {
1958        tmp = tcg_temp_new_i32();
1959        iwmmxt_load_reg(cpu_V0, rd);
1960        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1961    }
1962    tcg_gen_andi_i32(tmp, tmp, mask);
1963    tcg_gen_mov_i32(dest, tmp);
1964    tcg_temp_free_i32(tmp);
1965    return 0;
1966}
1967
1968/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1969   (ie. an undefined instruction).  */
1970static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1971{
1972    int rd, wrd;
1973    int rdhi, rdlo, rd0, rd1, i;
1974    TCGv_i32 addr;
1975    TCGv_i32 tmp, tmp2, tmp3;
1976
1977    if ((insn & 0x0e000e00) == 0x0c000000) {
1978        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1979            wrd = insn & 0xf;
1980            rdlo = (insn >> 12) & 0xf;
1981            rdhi = (insn >> 16) & 0xf;
1982            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1983                iwmmxt_load_reg(cpu_V0, wrd);
1984                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1985                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1986                tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
1987            } else {                                    /* TMCRR */
1988                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1989                iwmmxt_store_reg(cpu_V0, wrd);
1990                gen_op_iwmmxt_set_mup();
1991            }
1992            return 0;
1993        }
1994
1995        wrd = (insn >> 12) & 0xf;
1996        addr = tcg_temp_new_i32();
1997        if (gen_iwmmxt_address(s, insn, addr)) {
1998            tcg_temp_free_i32(addr);
1999            return 1;
2000        }
2001        if (insn & ARM_CP_RW_BIT) {
2002            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
2003                tmp = tcg_temp_new_i32();
2004                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
2005                iwmmxt_store_creg(wrd, tmp);
2006            } else {
2007                i = 1;
2008                if (insn & (1 << 8)) {
2009                    if (insn & (1 << 22)) {             /* WLDRD */
2010                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
2011                        i = 0;
2012                    } else {                            /* WLDRW wRd */
2013                        tmp = tcg_temp_new_i32();
2014                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
2015                    }
2016                } else {
2017                    tmp = tcg_temp_new_i32();
2018                    if (insn & (1 << 22)) {             /* WLDRH */
2019                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
2020                    } else {                            /* WLDRB */
2021                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
2022                    }
2023                }
2024                if (i) {
2025                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
2026                    tcg_temp_free_i32(tmp);
2027                }
2028                gen_op_iwmmxt_movq_wRn_M0(wrd);
2029            }
2030        } else {
2031            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
2032                tmp = iwmmxt_load_creg(wrd);
2033                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
2034            } else {
2035                gen_op_iwmmxt_movq_M0_wRn(wrd);
2036                tmp = tcg_temp_new_i32();
2037                if (insn & (1 << 8)) {
2038                    if (insn & (1 << 22)) {             /* WSTRD */
2039                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
2040                    } else {                            /* WSTRW wRd */
2041                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2042                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
2043                    }
2044                } else {
2045                    if (insn & (1 << 22)) {             /* WSTRH */
2046                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2047                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
2048                    } else {                            /* WSTRB */
2049                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2050                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
2051                    }
2052                }
2053            }
2054            tcg_temp_free_i32(tmp);
2055        }
2056        tcg_temp_free_i32(addr);
2057        return 0;
2058    }
2059
2060    if ((insn & 0x0f000000) != 0x0e000000)
2061        return 1;
2062
2063    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
2064    case 0x000:                                                 /* WOR */
2065        wrd = (insn >> 12) & 0xf;
2066        rd0 = (insn >> 0) & 0xf;
2067        rd1 = (insn >> 16) & 0xf;
2068        gen_op_iwmmxt_movq_M0_wRn(rd0);
2069        gen_op_iwmmxt_orq_M0_wRn(rd1);
2070        gen_op_iwmmxt_setpsr_nz();
2071        gen_op_iwmmxt_movq_wRn_M0(wrd);
2072        gen_op_iwmmxt_set_mup();
2073        gen_op_iwmmxt_set_cup();
2074        break;
2075    case 0x011:                                                 /* TMCR */
2076        if (insn & 0xf)
2077            return 1;
2078        rd = (insn >> 12) & 0xf;
2079        wrd = (insn >> 16) & 0xf;
2080        switch (wrd) {
2081        case ARM_IWMMXT_wCID:
2082        case ARM_IWMMXT_wCASF:
2083            break;
2084        case ARM_IWMMXT_wCon:
2085            gen_op_iwmmxt_set_cup();
2086            /* Fall through.  */
2087        case ARM_IWMMXT_wCSSF:
2088            tmp = iwmmxt_load_creg(wrd);
2089            tmp2 = load_reg(s, rd);
2090            tcg_gen_andc_i32(tmp, tmp, tmp2);
2091            tcg_temp_free_i32(tmp2);
2092            iwmmxt_store_creg(wrd, tmp);
2093            break;
2094        case ARM_IWMMXT_wCGR0:
2095        case ARM_IWMMXT_wCGR1:
2096        case ARM_IWMMXT_wCGR2:
2097        case ARM_IWMMXT_wCGR3:
2098            gen_op_iwmmxt_set_cup();
2099            tmp = load_reg(s, rd);
2100            iwmmxt_store_creg(wrd, tmp);
2101            break;
2102        default:
2103            return 1;
2104        }
2105        break;
2106    case 0x100:                                                 /* WXOR */
2107        wrd = (insn >> 12) & 0xf;
2108        rd0 = (insn >> 0) & 0xf;
2109        rd1 = (insn >> 16) & 0xf;
2110        gen_op_iwmmxt_movq_M0_wRn(rd0);
2111        gen_op_iwmmxt_xorq_M0_wRn(rd1);
2112        gen_op_iwmmxt_setpsr_nz();
2113        gen_op_iwmmxt_movq_wRn_M0(wrd);
2114        gen_op_iwmmxt_set_mup();
2115        gen_op_iwmmxt_set_cup();
2116        break;
2117    case 0x111:                                                 /* TMRC */
2118        if (insn & 0xf)
2119            return 1;
2120        rd = (insn >> 12) & 0xf;
2121        wrd = (insn >> 16) & 0xf;
2122        tmp = iwmmxt_load_creg(wrd);
2123        store_reg(s, rd, tmp);
2124        break;
2125    case 0x300:                                                 /* WANDN */
2126        wrd = (insn >> 12) & 0xf;
2127        rd0 = (insn >> 0) & 0xf;
2128        rd1 = (insn >> 16) & 0xf;
2129        gen_op_iwmmxt_movq_M0_wRn(rd0);
2130        tcg_gen_neg_i64(cpu_M0, cpu_M0);
2131        gen_op_iwmmxt_andq_M0_wRn(rd1);
2132        gen_op_iwmmxt_setpsr_nz();
2133        gen_op_iwmmxt_movq_wRn_M0(wrd);
2134        gen_op_iwmmxt_set_mup();
2135        gen_op_iwmmxt_set_cup();
2136        break;
2137    case 0x200:                                                 /* WAND */
2138        wrd = (insn >> 12) & 0xf;
2139        rd0 = (insn >> 0) & 0xf;
2140        rd1 = (insn >> 16) & 0xf;
2141        gen_op_iwmmxt_movq_M0_wRn(rd0);
2142        gen_op_iwmmxt_andq_M0_wRn(rd1);
2143        gen_op_iwmmxt_setpsr_nz();
2144        gen_op_iwmmxt_movq_wRn_M0(wrd);
2145        gen_op_iwmmxt_set_mup();
2146        gen_op_iwmmxt_set_cup();
2147        break;
2148    case 0x810: case 0xa10:                             /* WMADD */
2149        wrd = (insn >> 12) & 0xf;
2150        rd0 = (insn >> 0) & 0xf;
2151        rd1 = (insn >> 16) & 0xf;
2152        gen_op_iwmmxt_movq_M0_wRn(rd0);
2153        if (insn & (1 << 21))
2154            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
2155        else
2156            gen_op_iwmmxt_madduq_M0_wRn(rd1);
2157        gen_op_iwmmxt_movq_wRn_M0(wrd);
2158        gen_op_iwmmxt_set_mup();
2159        break;
2160    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
2161        wrd = (insn >> 12) & 0xf;
2162        rd0 = (insn >> 16) & 0xf;
2163        rd1 = (insn >> 0) & 0xf;
2164        gen_op_iwmmxt_movq_M0_wRn(rd0);
2165        switch ((insn >> 22) & 3) {
2166        case 0:
2167            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
2168            break;
2169        case 1:
2170            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
2171            break;
2172        case 2:
2173            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
2174            break;
2175        case 3:
2176            return 1;
2177        }
2178        gen_op_iwmmxt_movq_wRn_M0(wrd);
2179        gen_op_iwmmxt_set_mup();
2180        gen_op_iwmmxt_set_cup();
2181        break;
2182    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
2183        wrd = (insn >> 12) & 0xf;
2184        rd0 = (insn >> 16) & 0xf;
2185        rd1 = (insn >> 0) & 0xf;
2186        gen_op_iwmmxt_movq_M0_wRn(rd0);
2187        switch ((insn >> 22) & 3) {
2188        case 0:
2189            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
2190            break;
2191        case 1:
2192            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
2193            break;
2194        case 2:
2195            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
2196            break;
2197        case 3:
2198            return 1;
2199        }
2200        gen_op_iwmmxt_movq_wRn_M0(wrd);
2201        gen_op_iwmmxt_set_mup();
2202        gen_op_iwmmxt_set_cup();
2203        break;
2204    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
2205        wrd = (insn >> 12) & 0xf;
2206        rd0 = (insn >> 16) & 0xf;
2207        rd1 = (insn >> 0) & 0xf;
2208        gen_op_iwmmxt_movq_M0_wRn(rd0);
2209        if (insn & (1 << 22))
2210            gen_op_iwmmxt_sadw_M0_wRn(rd1);
2211        else
2212            gen_op_iwmmxt_sadb_M0_wRn(rd1);
2213        if (!(insn & (1 << 20)))
2214            gen_op_iwmmxt_addl_M0_wRn(wrd);
2215        gen_op_iwmmxt_movq_wRn_M0(wrd);
2216        gen_op_iwmmxt_set_mup();
2217        break;
2218    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
2219        wrd = (insn >> 12) & 0xf;
2220        rd0 = (insn >> 16) & 0xf;
2221        rd1 = (insn >> 0) & 0xf;
2222        gen_op_iwmmxt_movq_M0_wRn(rd0);
2223        if (insn & (1 << 21)) {
2224            if (insn & (1 << 20))
2225                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
2226            else
2227                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
2228        } else {
2229            if (insn & (1 << 20))
2230                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
2231            else
2232                gen_op_iwmmxt_mululw_M0_wRn(rd1);
2233        }
2234        gen_op_iwmmxt_movq_wRn_M0(wrd);
2235        gen_op_iwmmxt_set_mup();
2236        break;
2237    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
2238        wrd = (insn >> 12) & 0xf;
2239        rd0 = (insn >> 16) & 0xf;
2240        rd1 = (insn >> 0) & 0xf;
2241        gen_op_iwmmxt_movq_M0_wRn(rd0);
2242        if (insn & (1 << 21))
2243            gen_op_iwmmxt_macsw_M0_wRn(rd1);
2244        else
2245            gen_op_iwmmxt_macuw_M0_wRn(rd1);
2246        if (!(insn & (1 << 20))) {
2247            iwmmxt_load_reg(cpu_V1, wrd);
2248            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
2249        }
2250        gen_op_iwmmxt_movq_wRn_M0(wrd);
2251        gen_op_iwmmxt_set_mup();
2252        break;
2253    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
2254        wrd = (insn >> 12) & 0xf;
2255        rd0 = (insn >> 16) & 0xf;
2256        rd1 = (insn >> 0) & 0xf;
2257        gen_op_iwmmxt_movq_M0_wRn(rd0);
2258        switch ((insn >> 22) & 3) {
2259        case 0:
2260            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
2261            break;
2262        case 1:
2263            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
2264            break;
2265        case 2:
2266            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
2267            break;
2268        case 3:
2269            return 1;
2270        }
2271        gen_op_iwmmxt_movq_wRn_M0(wrd);
2272        gen_op_iwmmxt_set_mup();
2273        gen_op_iwmmxt_set_cup();
2274        break;
2275    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
2276        wrd = (insn >> 12) & 0xf;
2277        rd0 = (insn >> 16) & 0xf;
2278        rd1 = (insn >> 0) & 0xf;
2279        gen_op_iwmmxt_movq_M0_wRn(rd0);
2280        if (insn & (1 << 22)) {
2281            if (insn & (1 << 20))
2282                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
2283            else
2284                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
2285        } else {
2286            if (insn & (1 << 20))
2287                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
2288            else
2289                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
2290        }
2291        gen_op_iwmmxt_movq_wRn_M0(wrd);
2292        gen_op_iwmmxt_set_mup();
2293        gen_op_iwmmxt_set_cup();
2294        break;
2295    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
2296        wrd = (insn >> 12) & 0xf;
2297        rd0 = (insn >> 16) & 0xf;
2298        rd1 = (insn >> 0) & 0xf;
2299        gen_op_iwmmxt_movq_M0_wRn(rd0);
2300        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
2301        tcg_gen_andi_i32(tmp, tmp, 7);
2302        iwmmxt_load_reg(cpu_V1, rd1);
2303        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2304        tcg_temp_free_i32(tmp);
2305        gen_op_iwmmxt_movq_wRn_M0(wrd);
2306        gen_op_iwmmxt_set_mup();
2307        break;
2308    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
2309        if (((insn >> 6) & 3) == 3)
2310            return 1;
2311        rd = (insn >> 12) & 0xf;
2312        wrd = (insn >> 16) & 0xf;
2313        tmp = load_reg(s, rd);
2314        gen_op_iwmmxt_movq_M0_wRn(wrd);
2315        switch ((insn >> 6) & 3) {
2316        case 0:
2317            tmp2 = tcg_const_i32(0xff);
2318            tmp3 = tcg_const_i32((insn & 7) << 3);
2319            break;
2320        case 1:
2321            tmp2 = tcg_const_i32(0xffff);
2322            tmp3 = tcg_const_i32((insn & 3) << 4);
2323            break;
2324        case 2:
2325            tmp2 = tcg_const_i32(0xffffffff);
2326            tmp3 = tcg_const_i32((insn & 1) << 5);
2327            break;
2328        default:
2329            tmp2 = NULL;
2330            tmp3 = NULL;
2331        }
2332        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
2333        tcg_temp_free_i32(tmp3);
2334        tcg_temp_free_i32(tmp2);
2335        tcg_temp_free_i32(tmp);
2336        gen_op_iwmmxt_movq_wRn_M0(wrd);
2337        gen_op_iwmmxt_set_mup();
2338        break;
2339    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
2340        rd = (insn >> 12) & 0xf;
2341        wrd = (insn >> 16) & 0xf;
2342        if (rd == 15 || ((insn >> 22) & 3) == 3)
2343            return 1;
2344        gen_op_iwmmxt_movq_M0_wRn(wrd);
2345        tmp = tcg_temp_new_i32();
2346        switch ((insn >> 22) & 3) {
2347        case 0:
2348            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
2349            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2350            if (insn & 8) {
2351                tcg_gen_ext8s_i32(tmp, tmp);
2352            } else {
2353                tcg_gen_andi_i32(tmp, tmp, 0xff);
2354            }
2355            break;
2356        case 1:
2357            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
2358            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2359            if (insn & 8) {
2360                tcg_gen_ext16s_i32(tmp, tmp);
2361            } else {
2362                tcg_gen_andi_i32(tmp, tmp, 0xffff);
2363            }
2364            break;
2365        case 2:
2366            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
2367            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2368            break;
2369        }
2370        store_reg(s, rd, tmp);
2371        break;
2372    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
2373        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2374            return 1;
2375        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2376        switch ((insn >> 22) & 3) {
2377        case 0:
2378            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
2379            break;
2380        case 1:
2381            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
2382            break;
2383        case 2:
2384            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
2385            break;
2386        }
2387        tcg_gen_shli_i32(tmp, tmp, 28);
2388        gen_set_nzcv(tmp);
2389        tcg_temp_free_i32(tmp);
2390        break;
2391    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
2392        if (((insn >> 6) & 3) == 3)
2393            return 1;
2394        rd = (insn >> 12) & 0xf;
2395        wrd = (insn >> 16) & 0xf;
2396        tmp = load_reg(s, rd);
2397        switch ((insn >> 6) & 3) {
2398        case 0:
2399            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2400            break;
2401        case 1:
2402            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2403            break;
2404        case 2:
2405            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2406            break;
2407        }
2408        tcg_temp_free_i32(tmp);
2409        gen_op_iwmmxt_movq_wRn_M0(wrd);
2410        gen_op_iwmmxt_set_mup();
2411        break;
2412    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
2413        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2414            return 1;
2415        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2416        tmp2 = tcg_temp_new_i32();
2417        tcg_gen_mov_i32(tmp2, tmp);
2418        switch ((insn >> 22) & 3) {
2419        case 0:
2420            for (i = 0; i < 7; i ++) {
2421                tcg_gen_shli_i32(tmp2, tmp2, 4);
2422                tcg_gen_and_i32(tmp, tmp, tmp2);
2423            }
2424            break;
2425        case 1:
2426            for (i = 0; i < 3; i ++) {
2427                tcg_gen_shli_i32(tmp2, tmp2, 8);
2428                tcg_gen_and_i32(tmp, tmp, tmp2);
2429            }
2430            break;
2431        case 2:
2432            tcg_gen_shli_i32(tmp2, tmp2, 16);
2433            tcg_gen_and_i32(tmp, tmp, tmp2);
2434            break;
2435        }
2436        gen_set_nzcv(tmp);
2437        tcg_temp_free_i32(tmp2);
2438        tcg_temp_free_i32(tmp);
2439        break;
2440    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2441        wrd = (insn >> 12) & 0xf;
2442        rd0 = (insn >> 16) & 0xf;
2443        gen_op_iwmmxt_movq_M0_wRn(rd0);
2444        switch ((insn >> 22) & 3) {
2445        case 0:
2446            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2447            break;
2448        case 1:
2449            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2450            break;
2451        case 2:
2452            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2453            break;
2454        case 3:
2455            return 1;
2456        }
2457        gen_op_iwmmxt_movq_wRn_M0(wrd);
2458        gen_op_iwmmxt_set_mup();
2459        break;
2460    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2461        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2462            return 1;
2463        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2464        tmp2 = tcg_temp_new_i32();
2465        tcg_gen_mov_i32(tmp2, tmp);
2466        switch ((insn >> 22) & 3) {
2467        case 0:
2468            for (i = 0; i < 7; i ++) {
2469                tcg_gen_shli_i32(tmp2, tmp2, 4);
2470                tcg_gen_or_i32(tmp, tmp, tmp2);
2471            }
2472            break;
2473        case 1:
2474            for (i = 0; i < 3; i ++) {
2475                tcg_gen_shli_i32(tmp2, tmp2, 8);
2476                tcg_gen_or_i32(tmp, tmp, tmp2);
2477            }
2478            break;
2479        case 2:
2480            tcg_gen_shli_i32(tmp2, tmp2, 16);
2481            tcg_gen_or_i32(tmp, tmp, tmp2);
2482            break;
2483        }
2484        gen_set_nzcv(tmp);
2485        tcg_temp_free_i32(tmp2);
2486        tcg_temp_free_i32(tmp);
2487        break;
2488    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2489        rd = (insn >> 12) & 0xf;
2490        rd0 = (insn >> 16) & 0xf;
2491        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2492            return 1;
2493        gen_op_iwmmxt_movq_M0_wRn(rd0);
2494        tmp = tcg_temp_new_i32();
2495        switch ((insn >> 22) & 3) {
2496        case 0:
2497            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2498            break;
2499        case 1:
2500            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2501            break;
2502        case 2:
2503            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2504            break;
2505        }
2506        store_reg(s, rd, tmp);
2507        break;
2508    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2509    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2510        wrd = (insn >> 12) & 0xf;
2511        rd0 = (insn >> 16) & 0xf;
2512        rd1 = (insn >> 0) & 0xf;
2513        gen_op_iwmmxt_movq_M0_wRn(rd0);
2514        switch ((insn >> 22) & 3) {
2515        case 0:
2516            if (insn & (1 << 21))
2517                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2518            else
2519                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2520            break;
2521        case 1:
2522            if (insn & (1 << 21))
2523                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2524            else
2525                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2526            break;
2527        case 2:
2528            if (insn & (1 << 21))
2529                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2530            else
2531                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2532            break;
2533        case 3:
2534            return 1;
2535        }
2536        gen_op_iwmmxt_movq_wRn_M0(wrd);
2537        gen_op_iwmmxt_set_mup();
2538        gen_op_iwmmxt_set_cup();
2539        break;
2540    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2541    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2542        wrd = (insn >> 12) & 0xf;
2543        rd0 = (insn >> 16) & 0xf;
2544        gen_op_iwmmxt_movq_M0_wRn(rd0);
2545        switch ((insn >> 22) & 3) {
2546        case 0:
2547            if (insn & (1 << 21))
2548                gen_op_iwmmxt_unpacklsb_M0();
2549            else
2550                gen_op_iwmmxt_unpacklub_M0();
2551            break;
2552        case 1:
2553            if (insn & (1 << 21))
2554                gen_op_iwmmxt_unpacklsw_M0();
2555            else
2556                gen_op_iwmmxt_unpackluw_M0();
2557            break;
2558        case 2:
2559            if (insn & (1 << 21))
2560                gen_op_iwmmxt_unpacklsl_M0();
2561            else
2562                gen_op_iwmmxt_unpacklul_M0();
2563            break;
2564        case 3:
2565            return 1;
2566        }
2567        gen_op_iwmmxt_movq_wRn_M0(wrd);
2568        gen_op_iwmmxt_set_mup();
2569        gen_op_iwmmxt_set_cup();
2570        break;
2571    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2572    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2573        wrd = (insn >> 12) & 0xf;
2574        rd0 = (insn >> 16) & 0xf;
2575        gen_op_iwmmxt_movq_M0_wRn(rd0);
2576        switch ((insn >> 22) & 3) {
2577        case 0:
2578            if (insn & (1 << 21))
2579                gen_op_iwmmxt_unpackhsb_M0();
2580            else
2581                gen_op_iwmmxt_unpackhub_M0();
2582            break;
2583        case 1:
2584            if (insn & (1 << 21))
2585                gen_op_iwmmxt_unpackhsw_M0();
2586            else
2587                gen_op_iwmmxt_unpackhuw_M0();
2588            break;
2589        case 2:
2590            if (insn & (1 << 21))
2591                gen_op_iwmmxt_unpackhsl_M0();
2592            else
2593                gen_op_iwmmxt_unpackhul_M0();
2594            break;
2595        case 3:
2596            return 1;
2597        }
2598        gen_op_iwmmxt_movq_wRn_M0(wrd);
2599        gen_op_iwmmxt_set_mup();
2600        gen_op_iwmmxt_set_cup();
2601        break;
2602    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2603    case 0x214: case 0x614: case 0xa14: case 0xe14:
2604        if (((insn >> 22) & 3) == 0)
2605            return 1;
2606        wrd = (insn >> 12) & 0xf;
2607        rd0 = (insn >> 16) & 0xf;
2608        gen_op_iwmmxt_movq_M0_wRn(rd0);
2609        tmp = tcg_temp_new_i32();
2610        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2611            tcg_temp_free_i32(tmp);
2612            return 1;
2613        }
2614        switch ((insn >> 22) & 3) {
2615        case 1:
2616            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2617            break;
2618        case 2:
2619            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2620            break;
2621        case 3:
2622            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2623            break;
2624        }
2625        tcg_temp_free_i32(tmp);
2626        gen_op_iwmmxt_movq_wRn_M0(wrd);
2627        gen_op_iwmmxt_set_mup();
2628        gen_op_iwmmxt_set_cup();
2629        break;
2630    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2631    case 0x014: case 0x414: case 0x814: case 0xc14:
2632        if (((insn >> 22) & 3) == 0)
2633            return 1;
2634        wrd = (insn >> 12) & 0xf;
2635        rd0 = (insn >> 16) & 0xf;
2636        gen_op_iwmmxt_movq_M0_wRn(rd0);
2637        tmp = tcg_temp_new_i32();
2638        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2639            tcg_temp_free_i32(tmp);
2640            return 1;
2641        }
2642        switch ((insn >> 22) & 3) {
2643        case 1:
2644            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2645            break;
2646        case 2:
2647            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2648            break;
2649        case 3:
2650            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2651            break;
2652        }
2653        tcg_temp_free_i32(tmp);
2654        gen_op_iwmmxt_movq_wRn_M0(wrd);
2655        gen_op_iwmmxt_set_mup();
2656        gen_op_iwmmxt_set_cup();
2657        break;
2658    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2659    case 0x114: case 0x514: case 0x914: case 0xd14:
2660        if (((insn >> 22) & 3) == 0)
2661            return 1;
2662        wrd = (insn >> 12) & 0xf;
2663        rd0 = (insn >> 16) & 0xf;
2664        gen_op_iwmmxt_movq_M0_wRn(rd0);
2665        tmp = tcg_temp_new_i32();
2666        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2667            tcg_temp_free_i32(tmp);
2668            return 1;
2669        }
2670        switch ((insn >> 22) & 3) {
2671        case 1:
2672            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2673            break;
2674        case 2:
2675            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2676            break;
2677        case 3:
2678            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2679            break;
2680        }
2681        tcg_temp_free_i32(tmp);
2682        gen_op_iwmmxt_movq_wRn_M0(wrd);
2683        gen_op_iwmmxt_set_mup();
2684        gen_op_iwmmxt_set_cup();
2685        break;
2686    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2687    case 0x314: case 0x714: case 0xb14: case 0xf14:
2688        if (((insn >> 22) & 3) == 0)
2689            return 1;
2690        wrd = (insn >> 12) & 0xf;
2691        rd0 = (insn >> 16) & 0xf;
2692        gen_op_iwmmxt_movq_M0_wRn(rd0);
2693        tmp = tcg_temp_new_i32();
2694        switch ((insn >> 22) & 3) {
2695        case 1:
2696            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2697                tcg_temp_free_i32(tmp);
2698                return 1;
2699            }
2700            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2701            break;
2702        case 2:
2703            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2704                tcg_temp_free_i32(tmp);
2705                return 1;
2706            }
2707            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2708            break;
2709        case 3:
2710            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2711                tcg_temp_free_i32(tmp);
2712                return 1;
2713            }
2714            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2715            break;
2716        }
2717        tcg_temp_free_i32(tmp);
2718        gen_op_iwmmxt_movq_wRn_M0(wrd);
2719        gen_op_iwmmxt_set_mup();
2720        gen_op_iwmmxt_set_cup();
2721        break;
2722    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2723    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2724        wrd = (insn >> 12) & 0xf;
2725        rd0 = (insn >> 16) & 0xf;
2726        rd1 = (insn >> 0) & 0xf;
2727        gen_op_iwmmxt_movq_M0_wRn(rd0);
2728        switch ((insn >> 22) & 3) {
2729        case 0:
2730            if (insn & (1 << 21))
2731                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2732            else
2733                gen_op_iwmmxt_minub_M0_wRn(rd1);
2734            break;
2735        case 1:
2736            if (insn & (1 << 21))
2737                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2738            else
2739                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2740            break;
2741        case 2:
2742            if (insn & (1 << 21))
2743                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2744            else
2745                gen_op_iwmmxt_minul_M0_wRn(rd1);
2746            break;
2747        case 3:
2748            return 1;
2749        }
2750        gen_op_iwmmxt_movq_wRn_M0(wrd);
2751        gen_op_iwmmxt_set_mup();
2752        break;
2753    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2754    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2755        wrd = (insn >> 12) & 0xf;
2756        rd0 = (insn >> 16) & 0xf;
2757        rd1 = (insn >> 0) & 0xf;
2758        gen_op_iwmmxt_movq_M0_wRn(rd0);
2759        switch ((insn >> 22) & 3) {
2760        case 0:
2761            if (insn & (1 << 21))
2762                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2763            else
2764                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2765            break;
2766        case 1:
2767            if (insn & (1 << 21))
2768                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2769            else
2770                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2771            break;
2772        case 2:
2773            if (insn & (1 << 21))
2774                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2775            else
2776                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2777            break;
2778        case 3:
2779            return 1;
2780        }
2781        gen_op_iwmmxt_movq_wRn_M0(wrd);
2782        gen_op_iwmmxt_set_mup();
2783        break;
2784    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2785    case 0x402: case 0x502: case 0x602: case 0x702:
2786        wrd = (insn >> 12) & 0xf;
2787        rd0 = (insn >> 16) & 0xf;
2788        rd1 = (insn >> 0) & 0xf;
2789        gen_op_iwmmxt_movq_M0_wRn(rd0);
2790        tmp = tcg_const_i32((insn >> 20) & 3);
2791        iwmmxt_load_reg(cpu_V1, rd1);
2792        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2793        tcg_temp_free_i32(tmp);
2794        gen_op_iwmmxt_movq_wRn_M0(wrd);
2795        gen_op_iwmmxt_set_mup();
2796        break;
2797    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2798    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2799    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2800    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2801        wrd = (insn >> 12) & 0xf;
2802        rd0 = (insn >> 16) & 0xf;
2803        rd1 = (insn >> 0) & 0xf;
2804        gen_op_iwmmxt_movq_M0_wRn(rd0);
2805        switch ((insn >> 20) & 0xf) {
2806        case 0x0:
2807            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2808            break;
2809        case 0x1:
2810            gen_op_iwmmxt_subub_M0_wRn(rd1);
2811            break;
2812        case 0x3:
2813            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2814            break;
2815        case 0x4:
2816            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2817            break;
2818        case 0x5:
2819            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2820            break;
2821        case 0x7:
2822            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2823            break;
2824        case 0x8:
2825            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2826            break;
2827        case 0x9:
2828            gen_op_iwmmxt_subul_M0_wRn(rd1);
2829            break;
2830        case 0xb:
2831            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2832            break;
2833        default:
2834            return 1;
2835        }
2836        gen_op_iwmmxt_movq_wRn_M0(wrd);
2837        gen_op_iwmmxt_set_mup();
2838        gen_op_iwmmxt_set_cup();
2839        break;
2840    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2841    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2842    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2843    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2844        wrd = (insn >> 12) & 0xf;
2845        rd0 = (insn >> 16) & 0xf;
2846        gen_op_iwmmxt_movq_M0_wRn(rd0);
2847        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2848        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2849        tcg_temp_free_i32(tmp);
2850        gen_op_iwmmxt_movq_wRn_M0(wrd);
2851        gen_op_iwmmxt_set_mup();
2852        gen_op_iwmmxt_set_cup();
2853        break;
2854    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2855    case 0x418: case 0x518: case 0x618: case 0x718:
2856    case 0x818: case 0x918: case 0xa18: case 0xb18:
2857    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2858        wrd = (insn >> 12) & 0xf;
2859        rd0 = (insn >> 16) & 0xf;
2860        rd1 = (insn >> 0) & 0xf;
2861        gen_op_iwmmxt_movq_M0_wRn(rd0);
2862        switch ((insn >> 20) & 0xf) {
2863        case 0x0:
2864            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2865            break;
2866        case 0x1:
2867            gen_op_iwmmxt_addub_M0_wRn(rd1);
2868            break;
2869        case 0x3:
2870            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2871            break;
2872        case 0x4:
2873            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2874            break;
2875        case 0x5:
2876            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2877            break;
2878        case 0x7:
2879            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2880            break;
2881        case 0x8:
2882            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2883            break;
2884        case 0x9:
2885            gen_op_iwmmxt_addul_M0_wRn(rd1);
2886            break;
2887        case 0xb:
2888            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2889            break;
2890        default:
2891            return 1;
2892        }
2893        gen_op_iwmmxt_movq_wRn_M0(wrd);
2894        gen_op_iwmmxt_set_mup();
2895        gen_op_iwmmxt_set_cup();
2896        break;
2897    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2898    case 0x408: case 0x508: case 0x608: case 0x708:
2899    case 0x808: case 0x908: case 0xa08: case 0xb08:
2900    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2901        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2902            return 1;
2903        wrd = (insn >> 12) & 0xf;
2904        rd0 = (insn >> 16) & 0xf;
2905        rd1 = (insn >> 0) & 0xf;
2906        gen_op_iwmmxt_movq_M0_wRn(rd0);
2907        switch ((insn >> 22) & 3) {
2908        case 1:
2909            if (insn & (1 << 21))
2910                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2911            else
2912                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2913            break;
2914        case 2:
2915            if (insn & (1 << 21))
2916                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2917            else
2918                gen_op_iwmmxt_packul_M0_wRn(rd1);
2919            break;
2920        case 3:
2921            if (insn & (1 << 21))
2922                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2923            else
2924                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2925            break;
2926        }
2927        gen_op_iwmmxt_movq_wRn_M0(wrd);
2928        gen_op_iwmmxt_set_mup();
2929        gen_op_iwmmxt_set_cup();
2930        break;
2931    case 0x201: case 0x203: case 0x205: case 0x207:
2932    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2933    case 0x211: case 0x213: case 0x215: case 0x217:
2934    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2935        wrd = (insn >> 5) & 0xf;
2936        rd0 = (insn >> 12) & 0xf;
2937        rd1 = (insn >> 0) & 0xf;
2938        if (rd0 == 0xf || rd1 == 0xf)
2939            return 1;
2940        gen_op_iwmmxt_movq_M0_wRn(wrd);
2941        tmp = load_reg(s, rd0);
2942        tmp2 = load_reg(s, rd1);
2943        switch ((insn >> 16) & 0xf) {
2944        case 0x0:                                       /* TMIA */
2945            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2946            break;
2947        case 0x8:                                       /* TMIAPH */
2948            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2949            break;
2950        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2951            if (insn & (1 << 16))
2952                tcg_gen_shri_i32(tmp, tmp, 16);
2953            if (insn & (1 << 17))
2954                tcg_gen_shri_i32(tmp2, tmp2, 16);
2955            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2956            break;
2957        default:
2958            tcg_temp_free_i32(tmp2);
2959            tcg_temp_free_i32(tmp);
2960            return 1;
2961        }
2962        tcg_temp_free_i32(tmp2);
2963        tcg_temp_free_i32(tmp);
2964        gen_op_iwmmxt_movq_wRn_M0(wrd);
2965        gen_op_iwmmxt_set_mup();
2966        break;
2967    default:
2968        return 1;
2969    }
2970
2971    return 0;
2972}
2973
2974/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2975   (ie. an undefined instruction).  */
2976static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2977{
2978    int acc, rd0, rd1, rdhi, rdlo;
2979    TCGv_i32 tmp, tmp2;
2980
2981    if ((insn & 0x0ff00f10) == 0x0e200010) {
2982        /* Multiply with Internal Accumulate Format */
2983        rd0 = (insn >> 12) & 0xf;
2984        rd1 = insn & 0xf;
2985        acc = (insn >> 5) & 7;
2986
2987        if (acc != 0)
2988            return 1;
2989
2990        tmp = load_reg(s, rd0);
2991        tmp2 = load_reg(s, rd1);
2992        switch ((insn >> 16) & 0xf) {
2993        case 0x0:                                       /* MIA */
2994            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2995            break;
2996        case 0x8:                                       /* MIAPH */
2997            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2998            break;
2999        case 0xc:                                       /* MIABB */
3000        case 0xd:                                       /* MIABT */
3001        case 0xe:                                       /* MIATB */
3002        case 0xf:                                       /* MIATT */
3003            if (insn & (1 << 16))
3004                tcg_gen_shri_i32(tmp, tmp, 16);
3005            if (insn & (1 << 17))
3006                tcg_gen_shri_i32(tmp2, tmp2, 16);
3007            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
3008            break;
3009        default:
3010            return 1;
3011        }
3012        tcg_temp_free_i32(tmp2);
3013        tcg_temp_free_i32(tmp);
3014
3015        gen_op_iwmmxt_movq_wRn_M0(acc);
3016        return 0;
3017    }
3018
3019    if ((insn & 0x0fe00ff8) == 0x0c400000) {
3020        /* Internal Accumulator Access Format */
3021        rdhi = (insn >> 16) & 0xf;
3022        rdlo = (insn >> 12) & 0xf;
3023        acc = insn & 7;
3024
3025        if (acc != 0)
3026            return 1;
3027
3028        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
3029            iwmmxt_load_reg(cpu_V0, acc);
3030            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
3031            tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
3032            tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
3033            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
3034        } else {                                        /* MAR */
3035            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
3036            iwmmxt_store_reg(cpu_V0, acc);
3037        }
3038        return 0;
3039    }
3040
3041    return 1;
3042}
3043
3044#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
3045#define VFP_SREG(insn, bigbit, smallbit) \
3046  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
3047#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
3048    if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
3049        reg = (((insn) >> (bigbit)) & 0x0f) \
3050              | (((insn) >> ((smallbit) - 4)) & 0x10); \
3051    } else { \
3052        if (insn & (1 << (smallbit))) \
3053            return 1; \
3054        reg = ((insn) >> (bigbit)) & 0x0f; \
3055    }} while (0)
3056
3057#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
3058#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
3059#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
3060#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
3061#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
3062#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
3063
3064/* Move between integer and VFP cores.  */
3065static TCGv_i32 gen_vfp_mrs(void)
3066{
3067    TCGv_i32 tmp = tcg_temp_new_i32();
3068    tcg_gen_mov_i32(tmp, cpu_F0s);
3069    return tmp;
3070}
3071
3072static void gen_vfp_msr(TCGv_i32 tmp)
3073{
3074    tcg_gen_mov_i32(cpu_F0s, tmp);
3075    tcg_temp_free_i32(tmp);
3076}
3077
3078static void gen_neon_dup_low16(TCGv_i32 var)
3079{
3080    TCGv_i32 tmp = tcg_temp_new_i32();
3081    tcg_gen_ext16u_i32(var, var);
3082    tcg_gen_shli_i32(tmp, var, 16);
3083    tcg_gen_or_i32(var, var, tmp);
3084    tcg_temp_free_i32(tmp);
3085}
3086
3087static void gen_neon_dup_high16(TCGv_i32 var)
3088{
3089    TCGv_i32 tmp = tcg_temp_new_i32();
3090    tcg_gen_andi_i32(var, var, 0xffff0000);
3091    tcg_gen_shri_i32(tmp, var, 16);
3092    tcg_gen_or_i32(var, var, tmp);
3093    tcg_temp_free_i32(tmp);
3094}
3095
3096static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
3097                       uint32_t dp)
3098{
3099    uint32_t cc = extract32(insn, 20, 2);
3100
3101    if (dp) {
3102        TCGv_i64 frn, frm, dest;
3103        TCGv_i64 tmp, zero, zf, nf, vf;
3104
3105        zero = tcg_const_i64(0);
3106
3107        frn = tcg_temp_new_i64();
3108        frm = tcg_temp_new_i64();
3109        dest = tcg_temp_new_i64();
3110
3111        zf = tcg_temp_new_i64();
3112        nf = tcg_temp_new_i64();
3113        vf = tcg_temp_new_i64();
3114
3115        tcg_gen_extu_i32_i64(zf, cpu_ZF);
3116        tcg_gen_ext_i32_i64(nf, cpu_NF);
3117        tcg_gen_ext_i32_i64(vf, cpu_VF);
3118
3119        tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
3120        tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
3121        switch (cc) {
3122        case 0: /* eq: Z */
3123            tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
3124                                frn, frm);
3125            break;
3126        case 1: /* vs: V */
3127            tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
3128                                frn, frm);
3129            break;
3130        case 2: /* ge: N == V -> N ^ V == 0 */
3131            tmp = tcg_temp_new_i64();
3132            tcg_gen_xor_i64(tmp, vf, nf);
3133            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
3134                                frn, frm);
3135            tcg_temp_free_i64(tmp);
3136            break;
3137        case 3: /* gt: !Z && N == V */
3138            tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
3139                                frn, frm);
3140            tmp = tcg_temp_new_i64();
3141            tcg_gen_xor_i64(tmp, vf, nf);
3142            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
3143                                dest, frm);
3144            tcg_temp_free_i64(tmp);
3145            break;
3146        }
3147        tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
3148        tcg_temp_free_i64(frn);
3149        tcg_temp_free_i64(frm);
3150        tcg_temp_free_i64(dest);
3151
3152        tcg_temp_free_i64(zf);
3153        tcg_temp_free_i64(nf);
3154        tcg_temp_free_i64(vf);
3155
3156        tcg_temp_free_i64(zero);
3157    } else {
3158        TCGv_i32 frn, frm, dest;
3159        TCGv_i32 tmp, zero;
3160
3161        zero = tcg_const_i32(0);
3162
3163        frn = tcg_temp_new_i32();
3164        frm = tcg_temp_new_i32();
3165        dest = tcg_temp_new_i32();
3166        tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
3167        tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
3168        switch (cc) {
3169        case 0: /* eq: Z */
3170            tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
3171                                frn, frm);
3172            break;
3173        case 1: /* vs: V */
3174            tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
3175                                frn, frm);
3176            break;
3177        case 2: /* ge: N == V -> N ^ V == 0 */
3178            tmp = tcg_temp_new_i32();
3179            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
3180            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
3181                                frn, frm);
3182            tcg_temp_free_i32(tmp);
3183            break;
3184        case 3: /* gt: !Z && N == V */
3185            tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
3186                                frn, frm);
3187            tmp = tcg_temp_new_i32();
3188            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
3189            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
3190                                dest, frm);
3191            tcg_temp_free_i32(tmp);
3192            break;
3193        }
3194        tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
3195        tcg_temp_free_i32(frn);
3196        tcg_temp_free_i32(frm);
3197        tcg_temp_free_i32(dest);
3198
3199        tcg_temp_free_i32(zero);
3200    }
3201
3202    return 0;
3203}
3204
3205static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
3206                            uint32_t rm, uint32_t dp)
3207{
3208    uint32_t vmin = extract32(insn, 6, 1);
3209    TCGv_ptr fpst = get_fpstatus_ptr(0);
3210
3211    if (dp) {
3212        TCGv_i64 frn, frm, dest;
3213
3214        frn = tcg_temp_new_i64();
3215        frm = tcg_temp_new_i64();
3216        dest = tcg_temp_new_i64();
3217
3218        tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
3219        tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
3220        if (vmin) {
3221            gen_helper_vfp_minnumd(dest, frn, frm, fpst);
3222        } else {
3223            gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
3224        }
3225        tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
3226        tcg_temp_free_i64(frn);
3227        tcg_temp_free_i64(frm);
3228        tcg_temp_free_i64(dest);
3229    } else {
3230        TCGv_i32 frn, frm, dest;
3231
3232        frn = tcg_temp_new_i32();
3233        frm = tcg_temp_new_i32();
3234        dest = tcg_temp_new_i32();
3235
3236        tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
3237        tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
3238        if (vmin) {
3239            gen_helper_vfp_minnums(dest, frn, frm, fpst);
3240        } else {
3241            gen_helper_vfp_maxnums(dest, frn, frm, fpst);
3242        }
3243        tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
3244        tcg_temp_free_i32(frn);
3245        tcg_temp_free_i32(frm);
3246        tcg_temp_free_i32(dest);
3247    }
3248
3249    tcg_temp_free_ptr(fpst);
3250    return 0;
3251}
3252
3253static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
3254                        int rounding)
3255{
3256    TCGv_ptr fpst = get_fpstatus_ptr(0);
3257    TCGv_i32 tcg_rmode;
3258
3259    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
3260    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3261
3262    if (dp) {
3263        TCGv_i64 tcg_op;
3264        TCGv_i64 tcg_res;
3265        tcg_op = tcg_temp_new_i64();
3266        tcg_res = tcg_temp_new_i64();
3267        tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
3268        gen_helper_rintd(tcg_res, tcg_op, fpst);
3269        tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
3270        tcg_temp_free_i64(tcg_op);
3271        tcg_temp_free_i64(tcg_res);
3272    } else {
3273        TCGv_i32 tcg_op;
3274        TCGv_i32 tcg_res;
3275        tcg_op = tcg_temp_new_i32();
3276        tcg_res = tcg_temp_new_i32();
3277        tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
3278        gen_helper_rints(tcg_res, tcg_op, fpst);
3279        tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
3280        tcg_temp_free_i32(tcg_op);
3281        tcg_temp_free_i32(tcg_res);
3282    }
3283
3284    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3285    tcg_temp_free_i32(tcg_rmode);
3286
3287    tcg_temp_free_ptr(fpst);
3288    return 0;
3289}
3290
3291static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
3292                       int rounding)
3293{
3294    bool is_signed = extract32(insn, 7, 1);
3295    TCGv_ptr fpst = get_fpstatus_ptr(0);
3296    TCGv_i32 tcg_rmode, tcg_shift;
3297
3298    tcg_shift = tcg_const_i32(0);
3299
3300    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
3301    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3302
3303    if (dp) {
3304        TCGv_i64 tcg_double, tcg_res;
3305        TCGv_i32 tcg_tmp;
3306        /* Rd is encoded as a single precision register even when the source
3307         * is double precision.
3308         */
3309        rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
3310        tcg_double = tcg_temp_new_i64();
3311        tcg_res = tcg_temp_new_i64();
3312        tcg_tmp = tcg_temp_new_i32();
3313        tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
3314        if (is_signed) {
3315            gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
3316        } else {
3317            gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
3318        }
3319        tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
3320        tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
3321        tcg_temp_free_i32(tcg_tmp);
3322        tcg_temp_free_i64(tcg_res);
3323        tcg_temp_free_i64(tcg_double);
3324    } else {
3325        TCGv_i32 tcg_single, tcg_res;
3326        tcg_single = tcg_temp_new_i32();
3327        tcg_res = tcg_temp_new_i32();
3328        tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
3329        if (is_signed) {
3330            gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
3331        } else {
3332            gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
3333        }
3334        tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
3335        tcg_temp_free_i32(tcg_res);
3336        tcg_temp_free_i32(tcg_single);
3337    }
3338
3339    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3340    tcg_temp_free_i32(tcg_rmode);
3341
3342    tcg_temp_free_i32(tcg_shift);
3343
3344    tcg_temp_free_ptr(fpst);
3345
3346    return 0;
3347}
3348
3349/* Table for converting the most common AArch32 encoding of
3350 * rounding mode to arm_fprounding order (which matches the
3351 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
3352 */
3353static const uint8_t fp_decode_rm[] = {
3354    FPROUNDING_TIEAWAY,
3355    FPROUNDING_TIEEVEN,
3356    FPROUNDING_POSINF,
3357    FPROUNDING_NEGINF,
3358};
3359
3360static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
3361{
3362    uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
3363
3364    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3365        return 1;
3366    }
3367
3368    if (dp) {
3369        VFP_DREG_D(rd, insn);
3370        VFP_DREG_N(rn, insn);
3371        VFP_DREG_M(rm, insn);
3372    } else {
3373        rd = VFP_SREG_D(insn);
3374        rn = VFP_SREG_N(insn);
3375        rm = VFP_SREG_M(insn);
3376    }
3377
3378    if ((insn & 0x0f800e50) == 0x0e000a00) {
3379        return handle_vsel(insn, rd, rn, rm, dp);
3380    } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
3381        return handle_vminmaxnm(insn, rd, rn, rm, dp);
3382    } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
3383        /* VRINTA, VRINTN, VRINTP, VRINTM */
3384        int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3385        return handle_vrint(insn, rd, rm, dp, rounding);
3386    } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
3387        /* VCVTA, VCVTN, VCVTP, VCVTM */
3388        int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3389        return handle_vcvt(insn, rd, rm, dp, rounding);
3390    }
3391    return 1;
3392}
3393
3394/* Disassemble a VFP instruction.  Returns nonzero if an error occurred
3395   (ie. an undefined instruction).  */
3396static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3397{
3398    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3399    int dp, veclen;
3400    TCGv_i32 addr;
3401    TCGv_i32 tmp;
3402    TCGv_i32 tmp2;
3403
3404    if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3405        return 1;
3406    }
3407
3408    /* FIXME: this access check should not take precedence over UNDEF
3409     * for invalid encodings; we will generate incorrect syndrome information
3410     * for attempts to execute invalid vfp/neon encodings with FP disabled.
3411     */
3412    if (s->fp_excp_el) {
3413        gen_exception_insn(s, 4, EXCP_UDEF,
3414                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
3415        return 0;
3416    }
3417
3418    if (!s->vfp_enabled) {
3419        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
3420        if ((insn & 0x0fe00fff) != 0x0ee00a10)
3421            return 1;
3422        rn = (insn >> 16) & 0xf;
3423        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3424            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3425            return 1;
3426        }
3427    }
3428
3429    if (extract32(insn, 28, 4) == 0xf) {
3430        /* Encodings with T=1 (Thumb) or unconditional (ARM):
3431         * only used in v8 and above.
3432         */
3433        return disas_vfp_v8_insn(s, insn);
3434    }
3435
3436    dp = ((insn & 0xf00) == 0xb00);
3437    switch ((insn >> 24) & 0xf) {
3438    case 0xe:
3439        if (insn & (1 << 4)) {
3440            /* single register transfer */
3441            rd = (insn >> 12) & 0xf;
3442            if (dp) {
3443                int size;
3444                int pass;
3445
3446                VFP_DREG_N(rn, insn);
3447                if (insn & 0xf)
3448                    return 1;
3449                if (insn & 0x00c00060
3450                    && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3451                    return 1;
3452                }
3453
3454                pass = (insn >> 21) & 1;
3455                if (insn & (1 << 22)) {
3456                    size = 0;
3457                    offset = ((insn >> 5) & 3) * 8;
3458                } else if (insn & (1 << 5)) {
3459                    size = 1;
3460                    offset = (insn & (1 << 6)) ? 16 : 0;
3461                } else {
3462                    size = 2;
3463                    offset = 0;
3464                }
3465                if (insn & ARM_CP_RW_BIT) {
3466                    /* vfp->arm */
3467                    tmp = neon_load_reg(rn, pass);
3468                    switch (size) {
3469                    case 0:
3470                        if (offset)
3471                            tcg_gen_shri_i32(tmp, tmp, offset);
3472                        if (insn & (1 << 23))
3473                            gen_uxtb(tmp);
3474                        else
3475                            gen_sxtb(tmp);
3476                        break;
3477                    case 1:
3478                        if (insn & (1 << 23)) {
3479                            if (offset) {
3480                                tcg_gen_shri_i32(tmp, tmp, 16);
3481                            } else {
3482                                gen_uxth(tmp);
3483                            }
3484                        } else {
3485                            if (offset) {
3486                                tcg_gen_sari_i32(tmp, tmp, 16);
3487                            } else {
3488                                gen_sxth(tmp);
3489                            }
3490                        }
3491                        break;
3492                    case 2:
3493                        break;
3494                    }
3495                    store_reg(s, rd, tmp);
3496                } else {
3497                    /* arm->vfp */
3498                    tmp = load_reg(s, rd);
3499                    if (insn & (1 << 23)) {
3500                        /* VDUP */
3501                        int vec_size = pass ? 16 : 8;
3502                        tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0),
3503                                             vec_size, vec_size, tmp);
3504                        tcg_temp_free_i32(tmp);
3505                    } else {
3506                        /* VMOV */
3507                        switch (size) {
3508                        case 0:
3509                            tmp2 = neon_load_reg(rn, pass);
3510                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3511                            tcg_temp_free_i32(tmp2);
3512                            break;
3513                        case 1:
3514                            tmp2 = neon_load_reg(rn, pass);
3515                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3516                            tcg_temp_free_i32(tmp2);
3517                            break;
3518                        case 2:
3519                            break;
3520                        }
3521                        neon_store_reg(rn, pass, tmp);
3522                    }
3523                }
3524            } else { /* !dp */
3525                if ((insn & 0x6f) != 0x00)
3526                    return 1;
3527                rn = VFP_SREG_N(insn);
3528                if (insn & ARM_CP_RW_BIT) {
3529                    /* vfp->arm */
3530                    if (insn & (1 << 21)) {
3531                        /* system register */
3532                        rn >>= 1;
3533
3534                        switch (rn) {
3535                        case ARM_VFP_FPSID:
3536                            /* VFP2 allows access to FSID from userspace.
3537                               VFP3 restricts all id registers to privileged
3538                               accesses.  */
3539                            if (IS_USER(s)
3540                                && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3541                                return 1;
3542                            }
3543                            tmp = load_cpu_field(vfp.xregs[rn]);
3544                            break;
3545                        case ARM_VFP_FPEXC:
3546                            if (IS_USER(s))
3547                                return 1;
3548                            tmp = load_cpu_field(vfp.xregs[rn]);
3549                            break;
3550                        case ARM_VFP_FPINST:
3551                        case ARM_VFP_FPINST2:
3552                            /* Not present in VFP3.  */
3553                            if (IS_USER(s)
3554                                || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3555                                return 1;
3556                            }
3557                            tmp = load_cpu_field(vfp.xregs[rn]);
3558                            break;
3559                        case ARM_VFP_FPSCR:
3560                            if (rd == 15) {
3561                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3562                                tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3563                            } else {
3564                                tmp = tcg_temp_new_i32();
3565                                gen_helper_vfp_get_fpscr(tmp, cpu_env);
3566                            }
3567                            break;
3568                        case ARM_VFP_MVFR2:
3569                            if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3570                                return 1;
3571                            }
3572                            /* fall through */
3573                        case ARM_VFP_MVFR0:
3574                        case ARM_VFP_MVFR1:
3575                            if (IS_USER(s)
3576                                || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3577                                return 1;
3578                            }
3579                            tmp = load_cpu_field(vfp.xregs[rn]);
3580                            break;
3581                        default:
3582                            return 1;
3583                        }
3584                    } else {
3585                        gen_mov_F0_vreg(0, rn);
3586                        tmp = gen_vfp_mrs();
3587                    }
3588                    if (rd == 15) {
3589                        /* Set the 4 flag bits in the CPSR.  */
3590                        gen_set_nzcv(tmp);
3591                        tcg_temp_free_i32(tmp);
3592                    } else {
3593                        store_reg(s, rd, tmp);
3594                    }
3595                } else {
3596                    /* arm->vfp */
3597                    if (insn & (1 << 21)) {
3598                        rn >>= 1;
3599                        /* system register */
3600                        switch (rn) {
3601                        case ARM_VFP_FPSID:
3602                        case ARM_VFP_MVFR0:
3603                        case ARM_VFP_MVFR1:
3604                            /* Writes are ignored.  */
3605                            break;
3606                        case ARM_VFP_FPSCR:
3607                            tmp = load_reg(s, rd);
3608                            gen_helper_vfp_set_fpscr(cpu_env, tmp);
3609                            tcg_temp_free_i32(tmp);
3610                            gen_lookup_tb(s);
3611                            break;
3612                        case ARM_VFP_FPEXC:
3613                            if (IS_USER(s))
3614                                return 1;
3615                            /* TODO: VFP subarchitecture support.
3616                             * For now, keep the EN bit only */
3617                            tmp = load_reg(s, rd);
3618                            tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3619                            store_cpu_field(tmp, vfp.xregs[rn]);
3620                            gen_lookup_tb(s);
3621                            break;
3622                        case ARM_VFP_FPINST:
3623                        case ARM_VFP_FPINST2:
3624                            if (IS_USER(s)) {
3625                                return 1;
3626                            }
3627                            tmp = load_reg(s, rd);
3628                            store_cpu_field(tmp, vfp.xregs[rn]);
3629                            break;
3630                        default:
3631                            return 1;
3632                        }
3633                    } else {
3634                        tmp = load_reg(s, rd);
3635                        gen_vfp_msr(tmp);
3636                        gen_mov_vreg_F0(0, rn);
3637                    }
3638                }
3639            }
3640        } else {
3641            /* data processing */
3642            /* The opcode is in bits 23, 21, 20 and 6.  */
3643            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3644            if (dp) {
3645                if (op == 15) {
3646                    /* rn is opcode */
3647                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3648                } else {
3649                    /* rn is register number */
3650                    VFP_DREG_N(rn, insn);
3651                }
3652
3653                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3654                                 ((rn & 0x1e) == 0x6))) {
3655                    /* Integer or single/half precision destination.  */
3656                    rd = VFP_SREG_D(insn);
3657                } else {
3658                    VFP_DREG_D(rd, insn);
3659                }
3660                if (op == 15 &&
3661                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3662                     ((rn & 0x1e) == 0x4))) {
3663                    /* VCVT from int or half precision is always from S reg
3664                     * regardless of dp bit. VCVT with immediate frac_bits
3665                     * has same format as SREG_M.
3666                     */
3667                    rm = VFP_SREG_M(insn);
3668                } else {
3669                    VFP_DREG_M(rm, insn);
3670                }
3671            } else {
3672                rn = VFP_SREG_N(insn);
3673                if (op == 15 && rn == 15) {
3674                    /* Double precision destination.  */
3675                    VFP_DREG_D(rd, insn);
3676                } else {
3677                    rd = VFP_SREG_D(insn);
3678                }
3679                /* NB that we implicitly rely on the encoding for the frac_bits
3680                 * in VCVT of fixed to float being the same as that of an SREG_M
3681                 */
3682                rm = VFP_SREG_M(insn);
3683            }
3684
3685            veclen = s->vec_len;
3686            if (op == 15 && rn > 3)
3687                veclen = 0;
3688
3689            /* Shut up compiler warnings.  */
3690            delta_m = 0;
3691            delta_d = 0;
3692            bank_mask = 0;
3693
3694            if (veclen > 0) {
3695                if (dp)
3696                    bank_mask = 0xc;
3697                else
3698                    bank_mask = 0x18;
3699
3700                /* Figure out what type of vector operation this is.  */
3701                if ((rd & bank_mask) == 0) {
3702                    /* scalar */
3703                    veclen = 0;
3704                } else {
3705                    if (dp)
3706                        delta_d = (s->vec_stride >> 1) + 1;
3707                    else
3708                        delta_d = s->vec_stride + 1;
3709
3710                    if ((rm & bank_mask) == 0) {
3711                        /* mixed scalar/vector */
3712                        delta_m = 0;
3713                    } else {
3714                        /* vector */
3715                        delta_m = delta_d;
3716                    }
3717                }
3718            }
3719
3720            /* Load the initial operands.  */
3721            if (op == 15) {
3722                switch (rn) {
3723                case 16:
3724                case 17:
3725                    /* Integer source */
3726                    gen_mov_F0_vreg(0, rm);
3727                    break;
3728                case 8:
3729                case 9:
3730                    /* Compare */
3731                    gen_mov_F0_vreg(dp, rd);
3732                    gen_mov_F1_vreg(dp, rm);
3733                    break;
3734                case 10:
3735                case 11:
3736                    /* Compare with zero */
3737                    gen_mov_F0_vreg(dp, rd);
3738                    gen_vfp_F1_ld0(dp);
3739                    break;
3740                case 20:
3741                case 21:
3742                case 22:
3743                case 23:
3744                case 28:
3745                case 29:
3746                case 30:
3747                case 31:
3748                    /* Source and destination the same.  */
3749                    gen_mov_F0_vreg(dp, rd);
3750                    break;
3751                case 4:
3752                case 5:
3753                case 6:
3754                case 7:
3755                    /* VCVTB, VCVTT: only present with the halfprec extension
3756                     * UNPREDICTABLE if bit 8 is set prior to ARMv8
3757                     * (we choose to UNDEF)
3758                     */
3759                    if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3760                        !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3761                        return 1;
3762                    }
3763                    if (!extract32(rn, 1, 1)) {
3764                        /* Half precision source.  */
3765                        gen_mov_F0_vreg(0, rm);
3766                        break;
3767                    }
3768                    /* Otherwise fall through */
3769                default:
3770                    /* One source operand.  */
3771                    gen_mov_F0_vreg(dp, rm);
3772                    break;
3773                }
3774            } else {
3775                /* Two source operands.  */
3776                gen_mov_F0_vreg(dp, rn);
3777                gen_mov_F1_vreg(dp, rm);
3778            }
3779
3780            for (;;) {
3781                /* Perform the calculation.  */
3782                switch (op) {
3783                case 0: /* VMLA: fd + (fn * fm) */
3784                    /* Note that order of inputs to the add matters for NaNs */
3785                    gen_vfp_F1_mul(dp);
3786                    gen_mov_F0_vreg(dp, rd);
3787                    gen_vfp_add(dp);
3788                    break;
3789                case 1: /* VMLS: fd + -(fn * fm) */
3790                    gen_vfp_mul(dp);
3791                    gen_vfp_F1_neg(dp);
3792                    gen_mov_F0_vreg(dp, rd);
3793                    gen_vfp_add(dp);
3794                    break;
3795                case 2: /* VNMLS: -fd + (fn * fm) */
3796                    /* Note that it isn't valid to replace (-A + B) with (B - A)
3797                     * or similar plausible looking simplifications
3798                     * because this will give wrong results for NaNs.
3799                     */
3800                    gen_vfp_F1_mul(dp);
3801                    gen_mov_F0_vreg(dp, rd);
3802                    gen_vfp_neg(dp);
3803                    gen_vfp_add(dp);
3804                    break;
3805                case 3: /* VNMLA: -fd + -(fn * fm) */
3806                    gen_vfp_mul(dp);
3807                    gen_vfp_F1_neg(dp);
3808                    gen_mov_F0_vreg(dp, rd);
3809                    gen_vfp_neg(dp);
3810                    gen_vfp_add(dp);
3811                    break;
3812                case 4: /* mul: fn * fm */
3813                    gen_vfp_mul(dp);
3814                    break;
3815                case 5: /* nmul: -(fn * fm) */
3816                    gen_vfp_mul(dp);
3817                    gen_vfp_neg(dp);
3818                    break;
3819                case 6: /* add: fn + fm */
3820                    gen_vfp_add(dp);
3821                    break;
3822                case 7: /* sub: fn - fm */
3823                    gen_vfp_sub(dp);
3824                    break;
3825                case 8: /* div: fn / fm */
3826                    gen_vfp_div(dp);
3827                    break;
3828                case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
3829                case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3830                case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
3831                case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
3832                    /* These are fused multiply-add, and must be done as one
3833                     * floating point operation with no rounding between the
3834                     * multiplication and addition steps.
3835                     * NB that doing the negations here as separate steps is
3836                     * correct : an input NaN should come out with its sign bit
3837                     * flipped if it is a negated-input.
3838                     */
3839                    if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3840                        return 1;
3841                    }
3842                    if (dp) {
3843                        TCGv_ptr fpst;
3844                        TCGv_i64 frd;
3845                        if (op & 1) {
3846                            /* VFNMS, VFMS */
3847                            gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3848                        }
3849                        frd = tcg_temp_new_i64();
3850                        tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3851                        if (op & 2) {
3852                            /* VFNMA, VFNMS */
3853                            gen_helper_vfp_negd(frd, frd);
3854                        }
3855                        fpst = get_fpstatus_ptr(0);
3856                        gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3857                                               cpu_F1d, frd, fpst);
3858                        tcg_temp_free_ptr(fpst);
3859                        tcg_temp_free_i64(frd);
3860                    } else {
3861                        TCGv_ptr fpst;
3862                        TCGv_i32 frd;
3863                        if (op & 1) {
3864                            /* VFNMS, VFMS */
3865                            gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3866                        }
3867                        frd = tcg_temp_new_i32();
3868                        tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3869                        if (op & 2) {
3870                            gen_helper_vfp_negs(frd, frd);
3871                        }
3872                        fpst = get_fpstatus_ptr(0);
3873                        gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3874                                               cpu_F1s, frd, fpst);
3875                        tcg_temp_free_ptr(fpst);
3876                        tcg_temp_free_i32(frd);
3877                    }
3878                    break;
3879                case 14: /* fconst */
3880                    if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3881                        return 1;
3882                    }
3883
3884                    n = (insn << 12) & 0x80000000;
3885                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
3886                    if (dp) {
3887                        if (i & 0x40)
3888                            i |= 0x3f80;
3889                        else
3890                            i |= 0x4000;
3891                        n |= i << 16;
3892                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3893                    } else {
3894                        if (i & 0x40)
3895                            i |= 0x780;
3896                        else
3897                            i |= 0x800;
3898                        n |= i << 19;
3899                        tcg_gen_movi_i32(cpu_F0s, n);
3900                    }
3901                    break;
3902                case 15: /* extension space */
3903                    switch (rn) {
3904                    case 0: /* cpy */
3905                        /* no-op */
3906                        break;
3907                    case 1: /* abs */
3908                        gen_vfp_abs(dp);
3909                        break;
3910                    case 2: /* neg */
3911                        gen_vfp_neg(dp);
3912                        break;
3913                    case 3: /* sqrt */
3914                        gen_vfp_sqrt(dp);
3915                        break;
3916                    case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3917                    {
3918                        TCGv_ptr fpst = get_fpstatus_ptr(false);
3919                        TCGv_i32 ahp_mode = get_ahp_flag();
3920                        tmp = gen_vfp_mrs();
3921                        tcg_gen_ext16u_i32(tmp, tmp);
3922                        if (dp) {
3923                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3924                                                           fpst, ahp_mode);
3925                        } else {
3926                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3927                                                           fpst, ahp_mode);
3928                        }
3929                        tcg_temp_free_i32(ahp_mode);
3930                        tcg_temp_free_ptr(fpst);
3931                        tcg_temp_free_i32(tmp);
3932                        break;
3933                    }
3934                    case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3935                    {
3936                        TCGv_ptr fpst = get_fpstatus_ptr(false);
3937                        TCGv_i32 ahp = get_ahp_flag();
3938                        tmp = gen_vfp_mrs();
3939                        tcg_gen_shri_i32(tmp, tmp, 16);
3940                        if (dp) {
3941                            gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3942                                                           fpst, ahp);
3943                        } else {
3944                            gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3945                                                           fpst, ahp);
3946                        }
3947                        tcg_temp_free_i32(tmp);
3948                        tcg_temp_free_i32(ahp);
3949                        tcg_temp_free_ptr(fpst);
3950                        break;
3951                    }
3952                    case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3953                    {
3954                        TCGv_ptr fpst = get_fpstatus_ptr(false);
3955                        TCGv_i32 ahp = get_ahp_flag();
3956                        tmp = tcg_temp_new_i32();
3957
3958                        if (dp) {
3959                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3960                                                           fpst, ahp);
3961                        } else {
3962                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3963                                                           fpst, ahp);
3964                        }
3965                        tcg_temp_free_i32(ahp);
3966                        tcg_temp_free_ptr(fpst);
3967                        gen_mov_F0_vreg(0, rd);
3968                        tmp2 = gen_vfp_mrs();
3969                        tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3970                        tcg_gen_or_i32(tmp, tmp, tmp2);
3971                        tcg_temp_free_i32(tmp2);
3972                        gen_vfp_msr(tmp);
3973                        break;
3974                    }
3975                    case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3976                    {
3977                        TCGv_ptr fpst = get_fpstatus_ptr(false);
3978                        TCGv_i32 ahp = get_ahp_flag();
3979                        tmp = tcg_temp_new_i32();
3980                        if (dp) {
3981                            gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3982                                                           fpst, ahp);
3983                        } else {
3984                            gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3985                                                           fpst, ahp);
3986                        }
3987                        tcg_temp_free_i32(ahp);
3988                        tcg_temp_free_ptr(fpst);
3989                        tcg_gen_shli_i32(tmp, tmp, 16);
3990                        gen_mov_F0_vreg(0, rd);
3991                        tmp2 = gen_vfp_mrs();
3992                        tcg_gen_ext16u_i32(tmp2, tmp2);
3993                        tcg_gen_or_i32(tmp, tmp, tmp2);
3994                        tcg_temp_free_i32(tmp2);
3995                        gen_vfp_msr(tmp);
3996                        break;
3997                    }
3998                    case 8: /* cmp */
3999                        gen_vfp_cmp(dp);
4000                        break;
4001                    case 9: /* cmpe */
4002                        gen_vfp_cmpe(dp);
4003                        break;
4004                    case 10: /* cmpz */
4005                        gen_vfp_cmp(dp);
4006                        break;
4007                    case 11: /* cmpez */
4008                        gen_vfp_F1_ld0(dp);
4009                        gen_vfp_cmpe(dp);
4010                        break;
4011                    case 12: /* vrintr */
4012                    {
4013                        TCGv_ptr fpst = get_fpstatus_ptr(0);
4014                        if (dp) {
4015                            gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
4016                        } else {
4017                            gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
4018                        }
4019                        tcg_temp_free_ptr(fpst);
4020                        break;
4021                    }
4022                    case 13: /* vrintz */
4023                    {
4024                        TCGv_ptr fpst = get_fpstatus_ptr(0);
4025                        TCGv_i32 tcg_rmode;
4026                        tcg_rmode = tcg_const_i32(float_round_to_zero);
4027                        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4028                        if (dp) {
4029                            gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
4030                        } else {
4031                            gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
4032                        }
4033                        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4034                        tcg_temp_free_i32(tcg_rmode);
4035                        tcg_temp_free_ptr(fpst);
4036                        break;
4037                    }
4038                    case 14: /* vrintx */
4039                    {
4040                        TCGv_ptr fpst = get_fpstatus_ptr(0);
4041                        if (dp) {
4042                            gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
4043                        } else {
4044                            gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
4045                        }
4046                        tcg_temp_free_ptr(fpst);
4047                        break;
4048                    }
4049                    case 15: /* single<->double conversion */
4050                        if (dp)
4051                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
4052                        else
4053                            gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
4054                        break;
4055                    case 16: /* fuito */
4056                        gen_vfp_uito(dp, 0);
4057                        break;
4058                    case 17: /* fsito */
4059                        gen_vfp_sito(dp, 0);
4060                        break;
4061                    case 20: /* fshto */
4062                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4063                            return 1;
4064                        }
4065                        gen_vfp_shto(dp, 16 - rm, 0);
4066                        break;
4067                    case 21: /* fslto */
4068                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4069                            return 1;
4070                        }
4071                        gen_vfp_slto(dp, 32 - rm, 0);
4072                        break;
4073                    case 22: /* fuhto */
4074                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4075                            return 1;
4076                        }
4077                        gen_vfp_uhto(dp, 16 - rm, 0);
4078                        break;
4079                    case 23: /* fulto */
4080                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4081                            return 1;
4082                        }
4083                        gen_vfp_ulto(dp, 32 - rm, 0);
4084                        break;
4085                    case 24: /* ftoui */
4086                        gen_vfp_toui(dp, 0);
4087                        break;
4088                    case 25: /* ftouiz */
4089                        gen_vfp_touiz(dp, 0);
4090                        break;
4091                    case 26: /* ftosi */
4092                        gen_vfp_tosi(dp, 0);
4093                        break;
4094                    case 27: /* ftosiz */
4095                        gen_vfp_tosiz(dp, 0);
4096                        break;
4097                    case 28: /* ftosh */
4098                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4099                            return 1;
4100                        }
4101                        gen_vfp_tosh(dp, 16 - rm, 0);
4102                        break;
4103                    case 29: /* ftosl */
4104                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4105                            return 1;
4106                        }
4107                        gen_vfp_tosl(dp, 32 - rm, 0);
4108                        break;
4109                    case 30: /* ftouh */
4110                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4111                            return 1;
4112                        }
4113                        gen_vfp_touh(dp, 16 - rm, 0);
4114                        break;
4115                    case 31: /* ftoul */
4116                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4117                            return 1;
4118                        }
4119                        gen_vfp_toul(dp, 32 - rm, 0);
4120                        break;
4121                    default: /* undefined */
4122                        return 1;
4123                    }
4124                    break;
4125                default: /* undefined */
4126                    return 1;
4127                }
4128
4129                /* Write back the result.  */
4130                if (op == 15 && (rn >= 8 && rn <= 11)) {
4131                    /* Comparison, do nothing.  */
4132                } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
4133                                              (rn & 0x1e) == 0x6)) {
4134                    /* VCVT double to int: always integer result.
4135                     * VCVT double to half precision is always a single
4136                     * precision result.
4137                     */
4138                    gen_mov_vreg_F0(0, rd);
4139                } else if (op == 15 && rn == 15) {
4140                    /* conversion */
4141                    gen_mov_vreg_F0(!dp, rd);
4142                } else {
4143                    gen_mov_vreg_F0(dp, rd);
4144                }
4145
4146                /* break out of the loop if we have finished  */
4147                if (veclen == 0)
4148                    break;
4149
4150                if (op == 15 && delta_m == 0) {
4151                    /* single source one-many */
4152                    while (veclen--) {
4153                        rd = ((rd + delta_d) & (bank_mask - 1))
4154                             | (rd & bank_mask);
4155                        gen_mov_vreg_F0(dp, rd);
4156                    }
4157                    break;
4158                }
4159                /* Setup the next operands.  */
4160                veclen--;
4161                rd = ((rd + delta_d) & (bank_mask - 1))
4162                     | (rd & bank_mask);
4163
4164                if (op == 15) {
4165                    /* One source operand.  */
4166                    rm = ((rm + delta_m) & (bank_mask - 1))
4167                         | (rm & bank_mask);
4168                    gen_mov_F0_vreg(dp, rm);
4169                } else {
4170                    /* Two source operands.  */
4171                    rn = ((rn + delta_d) & (bank_mask - 1))
4172                         | (rn & bank_mask);
4173                    gen_mov_F0_vreg(dp, rn);
4174                    if (delta_m) {
4175                        rm = ((rm + delta_m) & (bank_mask - 1))
4176                             | (rm & bank_mask);
4177                        gen_mov_F1_vreg(dp, rm);
4178                    }
4179                }
4180            }
4181        }
4182        break;
4183    case 0xc:
4184    case 0xd:
4185        if ((insn & 0x03e00000) == 0x00400000) {
4186            /* two-register transfer */
4187            rn = (insn >> 16) & 0xf;
4188            rd = (insn >> 12) & 0xf;
4189            if (dp) {
4190                VFP_DREG_M(rm, insn);
4191            } else {
4192                rm = VFP_SREG_M(insn);
4193            }
4194
4195            if (insn & ARM_CP_RW_BIT) {
4196                /* vfp->arm */
4197                if (dp) {
4198                    gen_mov_F0_vreg(0, rm * 2);
4199                    tmp = gen_vfp_mrs();
4200                    store_reg(s, rd, tmp);
4201                    gen_mov_F0_vreg(0, rm * 2 + 1);
4202                    tmp = gen_vfp_mrs();
4203                    store_reg(s, rn, tmp);
4204                } else {
4205                    gen_mov_F0_vreg(0, rm);
4206                    tmp = gen_vfp_mrs();
4207                    store_reg(s, rd, tmp);
4208                    gen_mov_F0_vreg(0, rm + 1);
4209                    tmp = gen_vfp_mrs();
4210                    store_reg(s, rn, tmp);
4211                }
4212            } else {
4213                /* arm->vfp */
4214                if (dp) {
4215                    tmp = load_reg(s, rd);
4216                    gen_vfp_msr(tmp);
4217                    gen_mov_vreg_F0(0, rm * 2);
4218                    tmp = load_reg(s, rn);
4219                    gen_vfp_msr(tmp);
4220                    gen_mov_vreg_F0(0, rm * 2 + 1);
4221                } else {
4222                    tmp = load_reg(s, rd);
4223                    gen_vfp_msr(tmp);
4224                    gen_mov_vreg_F0(0, rm);
4225                    tmp = load_reg(s, rn);
4226                    gen_vfp_msr(tmp);
4227                    gen_mov_vreg_F0(0, rm + 1);
4228                }
4229            }
4230        } else {
4231            /* Load/store */
4232            rn = (insn >> 16) & 0xf;
4233            if (dp)
4234                VFP_DREG_D(rd, insn);
4235            else
4236                rd = VFP_SREG_D(insn);
4237            if ((insn & 0x01200000) == 0x01000000) {
4238                /* Single load/store */
4239                offset = (insn & 0xff) << 2;
4240                if ((insn & (1 << 23)) == 0)
4241                    offset = -offset;
4242                if (s->thumb && rn == 15) {
4243                    /* This is actually UNPREDICTABLE */
4244                    addr = tcg_temp_new_i32();
4245                    tcg_gen_movi_i32(addr, s->pc & ~2);
4246                } else {
4247                    addr = load_reg(s, rn);
4248                }
4249                tcg_gen_addi_i32(addr, addr, offset);
4250                if (insn & (1 << 20)) {
4251                    gen_vfp_ld(s, dp, addr);
4252                    gen_mov_vreg_F0(dp, rd);
4253                } else {
4254                    gen_mov_F0_vreg(dp, rd);
4255                    gen_vfp_st(s, dp, addr);
4256                }
4257                tcg_temp_free_i32(addr);
4258            } else {
4259                /* load/store multiple */
4260                int w = insn & (1 << 21);
4261                if (dp)
4262                    n = (insn >> 1) & 0x7f;
4263                else
4264                    n = insn & 0xff;
4265
4266                if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
4267                    /* P == U , W == 1  => UNDEF */
4268                    return 1;
4269                }
4270                if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
4271                    /* UNPREDICTABLE cases for bad immediates: we choose to
4272                     * UNDEF to avoid generating huge numbers of TCG ops
4273                     */
4274                    return 1;
4275                }
4276                if (rn == 15 && w) {
4277                    /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
4278                    return 1;
4279                }
4280
4281                if (s->thumb && rn == 15) {
4282                    /* This is actually UNPREDICTABLE */
4283                    addr = tcg_temp_new_i32();
4284                    tcg_gen_movi_i32(addr, s->pc & ~2);
4285                } else {
4286                    addr = load_reg(s, rn);
4287                }
4288                if (insn & (1 << 24)) /* pre-decrement */
4289                    tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
4290
4291                if (s->v8m_stackcheck && rn == 13 && w) {
4292                    /*
4293                     * Here 'addr' is the lowest address we will store to,
4294                     * and is either the old SP (if post-increment) or
4295                     * the new SP (if pre-decrement). For post-increment
4296                     * where the old value is below the limit and the new
4297                     * value is above, it is UNKNOWN whether the limit check
4298                     * triggers; we choose to trigger.
4299                     */
4300                    gen_helper_v8m_stackcheck(cpu_env, addr);
4301                }
4302
4303                if (dp)
4304                    offset = 8;
4305                else
4306                    offset = 4;
4307                for (i = 0; i < n; i++) {
4308                    if (insn & ARM_CP_RW_BIT) {
4309                        /* load */
4310                        gen_vfp_ld(s, dp, addr);
4311                        gen_mov_vreg_F0(dp, rd + i);
4312                    } else {
4313                        /* store */
4314                        gen_mov_F0_vreg(dp, rd + i);
4315                        gen_vfp_st(s, dp, addr);
4316                    }
4317                    tcg_gen_addi_i32(addr, addr, offset);
4318                }
4319                if (w) {
4320                    /* writeback */
4321                    if (insn & (1 << 24))
4322                        offset = -offset * n;
4323                    else if (dp && (insn & 1))
4324                        offset = 4;
4325                    else
4326                        offset = 0;
4327
4328                    if (offset != 0)
4329                        tcg_gen_addi_i32(addr, addr, offset);
4330                    store_reg(s, rn, addr);
4331                } else {
4332                    tcg_temp_free_i32(addr);
4333                }
4334            }
4335        }
4336        break;
4337    default:
4338        /* Should never happen.  */
4339        return 1;
4340    }
4341    return 0;
4342}
4343
4344static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
4345{
4346#ifndef CONFIG_USER_ONLY
4347    return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
4348           ((s->pc - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
4349#else
4350    return true;
4351#endif
4352}
4353
4354static void gen_goto_ptr(void)
4355{
4356    tcg_gen_lookup_and_goto_ptr();
4357}
4358
4359/* This will end the TB but doesn't guarantee we'll return to
4360 * cpu_loop_exec. Any live exit_requests will be processed as we
4361 * enter the next TB.
4362 */
4363static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
4364{
4365    if (use_goto_tb(s, dest)) {
4366        tcg_gen_goto_tb(n);
4367        gen_set_pc_im(s, dest);
4368        tcg_gen_exit_tb(s->base.tb, n);
4369    } else {
4370        gen_set_pc_im(s, dest);
4371        gen_goto_ptr();
4372    }
4373    s->base.is_jmp = DISAS_NORETURN;
4374}
4375
4376static inline void gen_jmp (DisasContext *s, uint32_t dest)
4377{
4378    if (unlikely(is_singlestepping(s))) {
4379        /* An indirect jump so that we still trigger the debug exception.  */
4380        if (s->thumb)
4381            dest |= 1;
4382        gen_bx_im(s, dest);
4383    } else {
4384        gen_goto_tb(s, 0, dest);
4385    }
4386}
4387
4388static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
4389{
4390    if (x)
4391        tcg_gen_sari_i32(t0, t0, 16);
4392    else
4393        gen_sxth(t0);
4394    if (y)
4395        tcg_gen_sari_i32(t1, t1, 16);
4396    else
4397        gen_sxth(t1);
4398    tcg_gen_mul_i32(t0, t0, t1);
4399}
4400
4401/* Return the mask of PSR bits set by a MSR instruction.  */
4402static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
4403{
4404    uint32_t mask;
4405
4406    mask = 0;
4407    if (flags & (1 << 0))
4408        mask |= 0xff;
4409    if (flags & (1 << 1))
4410        mask |= 0xff00;
4411    if (flags & (1 << 2))
4412        mask |= 0xff0000;
4413    if (flags & (1 << 3))
4414        mask |= 0xff000000;
4415
4416    /* Mask out undefined bits.  */
4417    mask &= ~CPSR_RESERVED;
4418    if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
4419        mask &= ~CPSR_T;
4420    }
4421    if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
4422        mask &= ~CPSR_Q; /* V5TE in reality*/
4423    }
4424    if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
4425        mask &= ~(CPSR_E | CPSR_GE);
4426    }
4427    if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
4428        mask &= ~CPSR_IT;
4429    }
4430    /* Mask out execution state and reserved bits.  */
4431    if (!spsr) {
4432        mask &= ~(CPSR_EXEC | CPSR_RESERVED);
4433    }
4434    /* Mask out privileged bits.  */
4435    if (IS_USER(s))
4436        mask &= CPSR_USER;
4437    return mask;
4438}
4439
4440/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4441static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4442{
4443    TCGv_i32 tmp;
4444    if (spsr) {
4445        /* ??? This is also undefined in system mode.  */
4446        if (IS_USER(s))
4447            return 1;
4448
4449        tmp = load_cpu_field(spsr);
4450        tcg_gen_andi_i32(tmp, tmp, ~mask);
4451        tcg_gen_andi_i32(t0, t0, mask);
4452        tcg_gen_or_i32(tmp, tmp, t0);
4453        store_cpu_field(tmp, spsr);
4454    } else {
4455        gen_set_cpsr(t0, mask);
4456    }
4457    tcg_temp_free_i32(t0);
4458    gen_lookup_tb(s);
4459    return 0;
4460}
4461
4462/* Returns nonzero if access to the PSR is not permitted.  */
4463static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4464{
4465    TCGv_i32 tmp;
4466    tmp = tcg_temp_new_i32();
4467    tcg_gen_movi_i32(tmp, val);
4468    return gen_set_psr(s, mask, spsr, tmp);
4469}
4470
4471static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
4472                                     int *tgtmode, int *regno)
4473{
4474    /* Decode the r and sysm fields of MSR/MRS banked accesses into
4475     * the target mode and register number, and identify the various
4476     * unpredictable cases.
4477     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
4478     *  + executed in user mode
4479     *  + using R15 as the src/dest register
4480     *  + accessing an unimplemented register
4481     *  + accessing a register that's inaccessible at current PL/security state*
4482     *  + accessing a register that you could access with a different insn
4483     * We choose to UNDEF in all these cases.
4484     * Since we don't know which of the various AArch32 modes we are in
4485     * we have to defer some checks to runtime.
4486     * Accesses to Monitor mode registers from Secure EL1 (which implies
4487     * that EL3 is AArch64) must trap to EL3.
4488     *
4489     * If the access checks fail this function will emit code to take
4490     * an exception and return false. Otherwise it will return true,
4491     * and set *tgtmode and *regno appropriately.
4492     */
4493    int exc_target = default_exception_el(s);
4494
4495    /* These instructions are present only in ARMv8, or in ARMv7 with the
4496     * Virtualization Extensions.
4497     */
4498    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
4499        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
4500        goto undef;
4501    }
4502
4503    if (IS_USER(s) || rn == 15) {
4504        goto undef;
4505    }
4506
4507    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
4508     * of registers into (r, sysm).
4509     */
4510    if (r) {
4511        /* SPSRs for other modes */
4512        switch (sysm) {
4513        case 0xe: /* SPSR_fiq */
4514            *tgtmode = ARM_CPU_MODE_FIQ;
4515            break;
4516        case 0x10: /* SPSR_irq */
4517            *tgtmode = ARM_CPU_MODE_IRQ;
4518            break;
4519        case 0x12: /* SPSR_svc */
4520            *tgtmode = ARM_CPU_MODE_SVC;
4521            break;
4522        case 0x14: /* SPSR_abt */
4523            *tgtmode = ARM_CPU_MODE_ABT;
4524            break;
4525        case 0x16: /* SPSR_und */
4526            *tgtmode = ARM_CPU_MODE_UND;
4527            break;
4528        case 0x1c: /* SPSR_mon */
4529            *tgtmode = ARM_CPU_MODE_MON;
4530            break;
4531        case 0x1e: /* SPSR_hyp */
4532            *tgtmode = ARM_CPU_MODE_HYP;
4533            break;
4534        default: /* unallocated */
4535            goto undef;
4536        }
4537        /* We arbitrarily assign SPSR a register number of 16. */
4538        *regno = 16;
4539    } else {
4540        /* general purpose registers for other modes */
4541        switch (sysm) {
4542        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
4543            *tgtmode = ARM_CPU_MODE_USR;
4544            *regno = sysm + 8;
4545            break;
4546        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
4547            *tgtmode = ARM_CPU_MODE_FIQ;
4548            *regno = sysm;
4549            break;
4550        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
4551            *tgtmode = ARM_CPU_MODE_IRQ;
4552            *regno = sysm & 1 ? 13 : 14;
4553            break;
4554        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
4555            *tgtmode = ARM_CPU_MODE_SVC;
4556            *regno = sysm & 1 ? 13 : 14;
4557            break;
4558        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
4559            *tgtmode = ARM_CPU_MODE_ABT;
4560            *regno = sysm & 1 ? 13 : 14;
4561            break;
4562        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
4563            *tgtmode = ARM_CPU_MODE_UND;
4564            *regno = sysm & 1 ? 13 : 14;
4565            break;
4566        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
4567            *tgtmode = ARM_CPU_MODE_MON;
4568            *regno = sysm & 1 ? 13 : 14;
4569            break;
4570        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
4571            *tgtmode = ARM_CPU_MODE_HYP;
4572            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
4573            *regno = sysm & 1 ? 13 : 17;
4574            break;
4575        default: /* unallocated */
4576            goto undef;
4577        }
4578    }
4579
4580    /* Catch the 'accessing inaccessible register' cases we can detect
4581     * at translate time.
4582     */
4583    switch (*tgtmode) {
4584    case ARM_CPU_MODE_MON:
4585        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
4586            goto undef;
4587        }
4588        if (s->current_el == 1) {
4589            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
4590             * then accesses to Mon registers trap to EL3
4591             */
4592            exc_target = 3;
4593            goto undef;
4594        }
4595        break;
4596    case ARM_CPU_MODE_HYP:
4597        /*
4598         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
4599         * (and so we can forbid accesses from EL2 or below). elr_hyp
4600         * can be accessed also from Hyp mode, so forbid accesses from
4601         * EL0 or EL1.
4602         */
4603        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
4604            (s->current_el < 3 && *regno != 17)) {
4605            goto undef;
4606        }
4607        break;
4608    default:
4609        break;
4610    }
4611
4612    return true;
4613
4614undef:
4615    /* If we get here then some access check did not pass */
4616    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), exc_target);
4617    return false;
4618}
4619
4620static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
4621{
4622    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
4623    int tgtmode = 0, regno = 0;
4624
4625    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
4626        return;
4627    }
4628
4629    /* Sync state because msr_banked() can raise exceptions */
4630    gen_set_condexec(s);
4631    gen_set_pc_im(s, s->pc - 4);
4632    tcg_reg = load_reg(s, rn);
4633    tcg_tgtmode = tcg_const_i32(tgtmode);
4634    tcg_regno = tcg_const_i32(regno);
4635    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
4636    tcg_temp_free_i32(tcg_tgtmode);
4637    tcg_temp_free_i32(tcg_regno);
4638    tcg_temp_free_i32(tcg_reg);
4639    s->base.is_jmp = DISAS_UPDATE;
4640}
4641
4642static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
4643{
4644    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
4645    int tgtmode = 0, regno = 0;
4646
4647    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
4648        return;
4649    }
4650
4651    /* Sync state because mrs_banked() can raise exceptions */
4652    gen_set_condexec(s);
4653    gen_set_pc_im(s, s->pc - 4);
4654    tcg_reg = tcg_temp_new_i32();
4655    tcg_tgtmode = tcg_const_i32(tgtmode);
4656    tcg_regno = tcg_const_i32(regno);
4657    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
4658    tcg_temp_free_i32(tcg_tgtmode);
4659    tcg_temp_free_i32(tcg_regno);
4660    store_reg(s, rn, tcg_reg);
4661    s->base.is_jmp = DISAS_UPDATE;
4662}
4663
4664/* Store value to PC as for an exception return (ie don't
4665 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
4666 * will do the masking based on the new value of the Thumb bit.
4667 */
4668static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
4669{
4670    tcg_gen_mov_i32(cpu_R[15], pc);
4671    tcg_temp_free_i32(pc);
4672}
4673
4674/* Generate a v6 exception return.  Marks both values as dead.  */
4675static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4676{
4677    store_pc_exc_ret(s, pc);
4678    /* The cpsr_write_eret helper will mask the low bits of PC
4679     * appropriately depending on the new Thumb bit, so it must
4680     * be called after storing the new PC.
4681     */
4682    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
4683        gen_io_start();
4684    }
4685    gen_helper_cpsr_write_eret(cpu_env, cpsr);
4686    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
4687        gen_io_end();
4688    }
4689    tcg_temp_free_i32(cpsr);
4690    /* Must exit loop to check un-masked IRQs */
4691    s->base.is_jmp = DISAS_EXIT;
4692}
4693
4694/* Generate an old-style exception return. Marks pc as dead. */
4695static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4696{
4697    gen_rfe(s, pc, load_cpu_field(spsr));
4698}
4699
4700/*
4701 * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
4702 * only call the helper when running single threaded TCG code to ensure
4703 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
4704 * just skip this instruction. Currently the SEV/SEVL instructions
4705 * which are *one* of many ways to wake the CPU from WFE are not
4706 * implemented so we can't sleep like WFI does.
4707 */
4708static void gen_nop_hint(DisasContext *s, int val)
4709{
4710    switch (val) {
4711        /* When running in MTTCG we don't generate jumps to the yield and
4712         * WFE helpers as it won't affect the scheduling of other vCPUs.
4713         * If we wanted to more completely model WFE/SEV so we don't busy
4714         * spin unnecessarily we would need to do something more involved.
4715         */
4716    case 1: /* yield */
4717        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
4718            gen_set_pc_im(s, s->pc);
4719            s->base.is_jmp = DISAS_YIELD;
4720        }
4721        break;
4722    case 3: /* wfi */
4723        gen_set_pc_im(s, s->pc);
4724        s->base.is_jmp = DISAS_WFI;
4725        break;
4726    case 2: /* wfe */
4727        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
4728            gen_set_pc_im(s, s->pc);
4729            s->base.is_jmp = DISAS_WFE;
4730        }
4731        break;
4732    case 4: /* sev */
4733    case 5: /* sevl */
4734        /* TODO: Implement SEV, SEVL and WFE.  May help SMP performance.  */
4735    default: /* nop */
4736        break;
4737    }
4738}
4739
4740#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4741
4742static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4743{
4744    switch (size) {
4745    case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4746    case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4747    case 2: tcg_gen_add_i32(t0, t0, t1); break;
4748    default: abort();
4749    }
4750}
4751
4752static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4753{
4754    switch (size) {
4755    case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4756    case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4757    case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4758    default: return;
4759    }
4760}
4761
4762/* 32-bit pairwise ops end up the same as the elementwise versions.  */
4763#define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
4764#define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
4765#define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
4766#define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
4767
4768#define GEN_NEON_INTEGER_OP_ENV(name) do { \
4769    switch ((size << 1) | u) { \
4770    case 0: \
4771        gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4772        break; \
4773    case 1: \
4774        gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4775        break; \
4776    case 2: \
4777        gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4778        break; \
4779    case 3: \
4780        gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4781        break; \
4782    case 4: \
4783        gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4784        break; \
4785    case 5: \
4786        gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4787        break; \
4788    default: return 1; \
4789    }} while (0)
4790
4791#define GEN_NEON_INTEGER_OP(name) do { \
4792    switch ((size << 1) | u) { \
4793    case 0: \
4794        gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4795        break; \
4796    case 1: \
4797        gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4798        break; \
4799    case 2: \
4800        gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4801        break; \
4802    case 3: \
4803        gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4804        break; \
4805    case 4: \
4806        gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4807        break; \
4808    case 5: \
4809        gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4810        break; \
4811    default: return 1; \
4812    }} while (0)
4813
4814static TCGv_i32 neon_load_scratch(int scratch)
4815{
4816    TCGv_i32 tmp = tcg_temp_new_i32();
4817    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4818    return tmp;
4819}
4820
4821static void neon_store_scratch(int scratch, TCGv_i32 var)
4822{
4823    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4824    tcg_temp_free_i32(var);
4825}
4826
4827static inline TCGv_i32 neon_get_scalar(int size, int reg)
4828{
4829    TCGv_i32 tmp;
4830    if (size == 1) {
4831        tmp = neon_load_reg(reg & 7, reg >> 4);
4832        if (reg & 8) {
4833            gen_neon_dup_high16(tmp);
4834        } else {
4835            gen_neon_dup_low16(tmp);
4836        }
4837    } else {
4838        tmp = neon_load_reg(reg & 15, reg >> 4);
4839    }
4840    return tmp;
4841}
4842
4843static int gen_neon_unzip(int rd, int rm, int size, int q)
4844{
4845    TCGv_ptr pd, pm;
4846    
4847    if (!q && size == 2) {
4848        return 1;
4849    }
4850    pd = vfp_reg_ptr(true, rd);
4851    pm = vfp_reg_ptr(true, rm);
4852    if (q) {
4853        switch (size) {
4854        case 0:
4855            gen_helper_neon_qunzip8(pd, pm);
4856            break;
4857        case 1:
4858            gen_helper_neon_qunzip16(pd, pm);
4859            break;
4860        case 2:
4861            gen_helper_neon_qunzip32(pd, pm);
4862            break;
4863        default:
4864            abort();
4865        }
4866    } else {
4867        switch (size) {
4868        case 0:
4869            gen_helper_neon_unzip8(pd, pm);
4870            break;
4871        case 1:
4872            gen_helper_neon_unzip16(pd, pm);
4873            break;
4874        default:
4875            abort();
4876        }
4877    }
4878    tcg_temp_free_ptr(pd);
4879    tcg_temp_free_ptr(pm);
4880    return 0;
4881}
4882
4883static int gen_neon_zip(int rd, int rm, int size, int q)
4884{
4885    TCGv_ptr pd, pm;
4886
4887    if (!q && size == 2) {
4888        return 1;
4889    }
4890    pd = vfp_reg_ptr(true, rd);
4891    pm = vfp_reg_ptr(true, rm);
4892    if (q) {
4893        switch (size) {
4894        case 0:
4895            gen_helper_neon_qzip8(pd, pm);
4896            break;
4897        case 1:
4898            gen_helper_neon_qzip16(pd, pm);
4899            break;
4900        case 2:
4901            gen_helper_neon_qzip32(pd, pm);
4902            break;
4903        default:
4904            abort();
4905        }
4906    } else {
4907        switch (size) {
4908        case 0:
4909            gen_helper_neon_zip8(pd, pm);
4910            break;
4911        case 1:
4912            gen_helper_neon_zip16(pd, pm);
4913            break;
4914        default:
4915            abort();
4916        }
4917    }
4918    tcg_temp_free_ptr(pd);
4919    tcg_temp_free_ptr(pm);
4920    return 0;
4921}
4922
4923static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4924{
4925    TCGv_i32 rd, tmp;
4926
4927    rd = tcg_temp_new_i32();
4928    tmp = tcg_temp_new_i32();
4929
4930    tcg_gen_shli_i32(rd, t0, 8);
4931    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4932    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4933    tcg_gen_or_i32(rd, rd, tmp);
4934
4935    tcg_gen_shri_i32(t1, t1, 8);
4936    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4937    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4938    tcg_gen_or_i32(t1, t1, tmp);
4939    tcg_gen_mov_i32(t0, rd);
4940
4941    tcg_temp_free_i32(tmp);
4942    tcg_temp_free_i32(rd);
4943}
4944
4945static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4946{
4947    TCGv_i32 rd, tmp;
4948
4949    rd = tcg_temp_new_i32();
4950    tmp = tcg_temp_new_i32();
4951
4952    tcg_gen_shli_i32(rd, t0, 16);
4953    tcg_gen_andi_i32(tmp, t1, 0xffff);
4954    tcg_gen_or_i32(rd, rd, tmp);
4955    tcg_gen_shri_i32(t1, t1, 16);
4956    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4957    tcg_gen_or_i32(t1, t1, tmp);
4958    tcg_gen_mov_i32(t0, rd);
4959
4960    tcg_temp_free_i32(tmp);
4961    tcg_temp_free_i32(rd);
4962}
4963
4964
4965static struct {
4966    int nregs;
4967    int interleave;
4968    int spacing;
4969} const neon_ls_element_type[11] = {
4970    {1, 4, 1},
4971    {1, 4, 2},
4972    {4, 1, 1},
4973    {2, 2, 2},
4974    {1, 3, 1},
4975    {1, 3, 2},
4976    {3, 1, 1},
4977    {1, 1, 1},
4978    {1, 2, 1},
4979    {1, 2, 2},
4980    {2, 1, 1}
4981};
4982
4983/* Translate a NEON load/store element instruction.  Return nonzero if the
4984   instruction is invalid.  */
4985static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4986{
4987    int rd, rn, rm;
4988    int op;
4989    int nregs;
4990    int interleave;
4991    int spacing;
4992    int stride;
4993    int size;
4994    int reg;
4995    int load;
4996    int n;
4997    int vec_size;
4998    int mmu_idx;
4999    TCGMemOp endian;
5000    TCGv_i32 addr;
5001    TCGv_i32 tmp;
5002    TCGv_i32 tmp2;
5003    TCGv_i64 tmp64;
5004
5005    /* FIXME: this access check should not take precedence over UNDEF
5006     * for invalid encodings; we will generate incorrect syndrome information
5007     * for attempts to execute invalid vfp/neon encodings with FP disabled.
5008     */
5009    if (s->fp_excp_el) {
5010        gen_exception_insn(s, 4, EXCP_UDEF,
5011                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5012        return 0;
5013    }
5014
5015    if (!s->vfp_enabled)
5016      return 1;
5017    VFP_DREG_D(rd, insn);
5018    rn = (insn >> 16) & 0xf;
5019    rm = insn & 0xf;
5020    load = (insn & (1 << 21)) != 0;
5021    endian = s->be_data;
5022    mmu_idx = get_mem_index(s);
5023    if ((insn & (1 << 23)) == 0) {
5024        /* Load store all elements.  */
5025        op = (insn >> 8) & 0xf;
5026        size = (insn >> 6) & 3;
5027        if (op > 10)
5028            return 1;
5029        /* Catch UNDEF cases for bad values of align field */
5030        switch (op & 0xc) {
5031        case 4:
5032            if (((insn >> 5) & 1) == 1) {
5033                return 1;
5034            }
5035            break;
5036        case 8:
5037            if (((insn >> 4) & 3) == 3) {
5038                return 1;
5039            }
5040            break;
5041        default:
5042            break;
5043        }
5044        nregs = neon_ls_element_type[op].nregs;
5045        interleave = neon_ls_element_type[op].interleave;
5046        spacing = neon_ls_element_type[op].spacing;
5047        if (size == 3 && (interleave | spacing) != 1) {
5048            return 1;
5049        }
5050        /* For our purposes, bytes are always little-endian.  */
5051        if (size == 0) {
5052            endian = MO_LE;
5053        }
5054        /* Consecutive little-endian elements from a single register
5055         * can be promoted to a larger little-endian operation.
5056         */
5057        if (interleave == 1 && endian == MO_LE) {
5058            size = 3;
5059        }
5060        tmp64 = tcg_temp_new_i64();
5061        addr = tcg_temp_new_i32();
5062        tmp2 = tcg_const_i32(1 << size);
5063        load_reg_var(s, addr, rn);
5064        for (reg = 0; reg < nregs; reg++) {
5065            for (n = 0; n < 8 >> size; n++) {
5066                int xs;
5067                for (xs = 0; xs < interleave; xs++) {
5068                    int tt = rd + reg + spacing * xs;
5069
5070                    if (load) {
5071                        gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
5072                        neon_store_element64(tt, n, size, tmp64);
5073                    } else {
5074                        neon_load_element64(tmp64, tt, n, size);
5075                        gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
5076                    }
5077                    tcg_gen_add_i32(addr, addr, tmp2);
5078                }
5079            }
5080        }
5081        tcg_temp_free_i32(addr);
5082        tcg_temp_free_i32(tmp2);
5083        tcg_temp_free_i64(tmp64);
5084        stride = nregs * interleave * 8;
5085    } else {
5086        size = (insn >> 10) & 3;
5087        if (size == 3) {
5088            /* Load single element to all lanes.  */
5089            int a = (insn >> 4) & 1;
5090            if (!load) {
5091                return 1;
5092            }
5093            size = (insn >> 6) & 3;
5094            nregs = ((insn >> 8) & 3) + 1;
5095
5096            if (size == 3) {
5097                if (nregs != 4 || a == 0) {
5098                    return 1;
5099                }
5100                /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
5101                size = 2;
5102            }
5103            if (nregs == 1 && a == 1 && size == 0) {
5104                return 1;
5105            }
5106            if (nregs == 3 && a == 1) {
5107                return 1;
5108            }
5109            addr = tcg_temp_new_i32();
5110            load_reg_var(s, addr, rn);
5111
5112            /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
5113             * VLD2/3/4 to all lanes: bit 5 indicates register stride.
5114             */
5115            stride = (insn & (1 << 5)) ? 2 : 1;
5116            vec_size = nregs == 1 ? stride * 8 : 8;
5117
5118            tmp = tcg_temp_new_i32();
5119            for (reg = 0; reg < nregs; reg++) {
5120                gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
5121                                s->be_data | size);
5122                if ((rd & 1) && vec_size == 16) {
5123                    /* We cannot write 16 bytes at once because the
5124                     * destination is unaligned.
5125                     */
5126                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
5127                                         8, 8, tmp);
5128                    tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
5129                                     neon_reg_offset(rd, 0), 8, 8);
5130                } else {
5131                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
5132                                         vec_size, vec_size, tmp);
5133                }
5134                tcg_gen_addi_i32(addr, addr, 1 << size);
5135                rd += stride;
5136            }
5137            tcg_temp_free_i32(tmp);
5138            tcg_temp_free_i32(addr);
5139            stride = (1 << size) * nregs;
5140        } else {
5141            /* Single element.  */
5142            int idx = (insn >> 4) & 0xf;
5143            int reg_idx;
5144            switch (size) {
5145            case 0:
5146                reg_idx = (insn >> 5) & 7;
5147                stride = 1;
5148                break;
5149            case 1:
5150                reg_idx = (insn >> 6) & 3;
5151                stride = (insn & (1 << 5)) ? 2 : 1;
5152                break;
5153            case 2:
5154                reg_idx = (insn >> 7) & 1;
5155                stride = (insn & (1 << 6)) ? 2 : 1;
5156                break;
5157            default:
5158                abort();
5159            }
5160            nregs = ((insn >> 8) & 3) + 1;
5161            /* Catch the UNDEF cases. This is unavoidably a bit messy. */
5162            switch (nregs) {
5163            case 1:
5164                if (((idx & (1 << size)) != 0) ||
5165                    (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
5166                    return 1;
5167                }
5168                break;
5169            case 3:
5170                if ((idx & 1) != 0) {
5171                    return 1;
5172                }
5173                /* fall through */
5174            case 2:
5175                if (size == 2 && (idx & 2) != 0) {
5176                    return 1;
5177                }
5178                break;
5179            case 4:
5180                if ((size == 2) && ((idx & 3) == 3)) {
5181                    return 1;
5182                }
5183                break;
5184            default:
5185                abort();
5186            }
5187            if ((rd + stride * (nregs - 1)) > 31) {
5188                /* Attempts to write off the end of the register file
5189                 * are UNPREDICTABLE; we choose to UNDEF because otherwise
5190                 * the neon_load_reg() would write off the end of the array.
5191                 */
5192                return 1;
5193            }
5194            tmp = tcg_temp_new_i32();
5195            addr = tcg_temp_new_i32();
5196            load_reg_var(s, addr, rn);
5197            for (reg = 0; reg < nregs; reg++) {
5198                if (load) {
5199                    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
5200                                    s->be_data | size);
5201                    neon_store_element(rd, reg_idx, size, tmp);
5202                } else { /* Store */
5203                    neon_load_element(tmp, rd, reg_idx, size);
5204                    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
5205                                    s->be_data | size);
5206                }
5207                rd += stride;
5208                tcg_gen_addi_i32(addr, addr, 1 << size);
5209            }
5210            tcg_temp_free_i32(addr);
5211            tcg_temp_free_i32(tmp);
5212            stride = nregs * (1 << size);
5213        }
5214    }
5215    if (rm != 15) {
5216        TCGv_i32 base;
5217
5218        base = load_reg(s, rn);
5219        if (rm == 13) {
5220            tcg_gen_addi_i32(base, base, stride);
5221        } else {
5222            TCGv_i32 index;
5223            index = load_reg(s, rm);
5224            tcg_gen_add_i32(base, base, index);
5225            tcg_temp_free_i32(index);
5226        }
5227        store_reg(s, rn, base);
5228    }
5229    return 0;
5230}
5231
5232static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
5233{
5234    switch (size) {
5235    case 0: gen_helper_neon_narrow_u8(dest, src); break;
5236    case 1: gen_helper_neon_narrow_u16(dest, src); break;
5237    case 2: tcg_gen_extrl_i64_i32(dest, src); break;
5238    default: abort();
5239    }
5240}
5241
5242static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
5243{
5244    switch (size) {
5245    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
5246    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
5247    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
5248    default: abort();
5249    }
5250}
5251
5252static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
5253{
5254    switch (size) {
5255    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
5256    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
5257    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
5258    default: abort();
5259    }
5260}
5261
5262static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
5263{
5264    switch (size) {
5265    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
5266    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
5267    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
5268    default: abort();
5269    }
5270}
5271
5272static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
5273                                         int q, int u)
5274{
5275    if (q) {
5276        if (u) {
5277            switch (size) {
5278            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
5279            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
5280            default: abort();
5281            }
5282        } else {
5283            switch (size) {
5284            case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
5285            case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
5286            default: abort();
5287            }
5288        }
5289    } else {
5290        if (u) {
5291            switch (size) {
5292            case 1: gen_helper_neon_shl_u16(var, var, shift); break;
5293            case 2: gen_helper_neon_shl_u32(var, var, shift); break;
5294            default: abort();
5295            }
5296        } else {
5297            switch (size) {
5298            case 1: gen_helper_neon_shl_s16(var, var, shift); break;
5299            case 2: gen_helper_neon_shl_s32(var, var, shift); break;
5300            default: abort();
5301            }
5302        }
5303    }
5304}
5305
5306static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
5307{
5308    if (u) {
5309        switch (size) {
5310        case 0: gen_helper_neon_widen_u8(dest, src); break;
5311        case 1: gen_helper_neon_widen_u16(dest, src); break;
5312        case 2: tcg_gen_extu_i32_i64(dest, src); break;
5313        default: abort();
5314        }
5315    } else {
5316        switch (size) {
5317        case 0: gen_helper_neon_widen_s8(dest, src); break;
5318        case 1: gen_helper_neon_widen_s16(dest, src); break;
5319        case 2: tcg_gen_ext_i32_i64(dest, src); break;
5320        default: abort();
5321        }
5322    }
5323    tcg_temp_free_i32(src);
5324}
5325
5326static inline void gen_neon_addl(int size)
5327{
5328    switch (size) {
5329    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
5330    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
5331    case 2: tcg_gen_add_i64(CPU_V001); break;
5332    default: abort();
5333    }
5334}
5335
5336static inline void gen_neon_subl(int size)
5337{
5338    switch (size) {
5339    case 0: gen_helper_neon_subl_u16(CPU_V001); break;
5340    case 1: gen_helper_neon_subl_u32(CPU_V001); break;
5341    case 2: tcg_gen_sub_i64(CPU_V001); break;
5342    default: abort();
5343    }
5344}
5345
5346static inline void gen_neon_negl(TCGv_i64 var, int size)
5347{
5348    switch (size) {
5349    case 0: gen_helper_neon_negl_u16(var, var); break;
5350    case 1: gen_helper_neon_negl_u32(var, var); break;
5351    case 2:
5352        tcg_gen_neg_i64(var, var);
5353        break;
5354    default: abort();
5355    }
5356}
5357
5358static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
5359{
5360    switch (size) {
5361    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
5362    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
5363    default: abort();
5364    }
5365}
5366
5367static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
5368                                 int size, int u)
5369{
5370    TCGv_i64 tmp;
5371
5372    switch ((size << 1) | u) {
5373    case 0: gen_helper_neon_mull_s8(dest, a, b); break;
5374    case 1: gen_helper_neon_mull_u8(dest, a, b); break;
5375    case 2: gen_helper_neon_mull_s16(dest, a, b); break;
5376    case 3: gen_helper_neon_mull_u16(dest, a, b); break;
5377    case 4:
5378        tmp = gen_muls_i64_i32(a, b);
5379        tcg_gen_mov_i64(dest, tmp);
5380        tcg_temp_free_i64(tmp);
5381        break;
5382    case 5:
5383        tmp = gen_mulu_i64_i32(a, b);
5384        tcg_gen_mov_i64(dest, tmp);
5385        tcg_temp_free_i64(tmp);
5386        break;
5387    default: abort();
5388    }
5389
5390    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
5391       Don't forget to clean them now.  */
5392    if (size < 2) {
5393        tcg_temp_free_i32(a);
5394        tcg_temp_free_i32(b);
5395    }
5396}
5397
5398static void gen_neon_narrow_op(int op, int u, int size,
5399                               TCGv_i32 dest, TCGv_i64 src)
5400{
5401    if (op) {
5402        if (u) {
5403            gen_neon_unarrow_sats(size, dest, src);
5404        } else {
5405            gen_neon_narrow(size, dest, src);
5406        }
5407    } else {
5408        if (u) {
5409            gen_neon_narrow_satu(size, dest, src);
5410        } else {
5411            gen_neon_narrow_sats(size, dest, src);
5412        }
5413    }
5414}
5415
5416/* Symbolic constants for op fields for Neon 3-register same-length.
5417 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
5418 * table A7-9.
5419 */
5420#define NEON_3R_VHADD 0
5421#define NEON_3R_VQADD 1
5422#define NEON_3R_VRHADD 2
5423#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
5424#define NEON_3R_VHSUB 4
5425#define NEON_3R_VQSUB 5
5426#define NEON_3R_VCGT 6
5427#define NEON_3R_VCGE 7
5428#define NEON_3R_VSHL 8
5429#define NEON_3R_VQSHL 9
5430#define NEON_3R_VRSHL 10
5431#define NEON_3R_VQRSHL 11
5432#define NEON_3R_VMAX 12
5433#define NEON_3R_VMIN 13
5434#define NEON_3R_VABD 14
5435#define NEON_3R_VABA 15
5436#define NEON_3R_VADD_VSUB 16
5437#define NEON_3R_VTST_VCEQ 17
5438#define NEON_3R_VML 18 /* VMLA, VMLS */
5439#define NEON_3R_VMUL 19
5440#define NEON_3R_VPMAX 20
5441#define NEON_3R_VPMIN 21
5442#define NEON_3R_VQDMULH_VQRDMULH 22
5443#define NEON_3R_VPADD_VQRDMLAH 23
5444#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
5445#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
5446#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
5447#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
5448#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
5449#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
5450#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
5451#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
5452
5453static const uint8_t neon_3r_sizes[] = {
5454    [NEON_3R_VHADD] = 0x7,
5455    [NEON_3R_VQADD] = 0xf,
5456    [NEON_3R_VRHADD] = 0x7,
5457    [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
5458    [NEON_3R_VHSUB] = 0x7,
5459    [NEON_3R_VQSUB] = 0xf,
5460    [NEON_3R_VCGT] = 0x7,
5461    [NEON_3R_VCGE] = 0x7,
5462    [NEON_3R_VSHL] = 0xf,
5463    [NEON_3R_VQSHL] = 0xf,
5464    [NEON_3R_VRSHL] = 0xf,
5465    [NEON_3R_VQRSHL] = 0xf,
5466    [NEON_3R_VMAX] = 0x7,
5467    [NEON_3R_VMIN] = 0x7,
5468    [NEON_3R_VABD] = 0x7,
5469    [NEON_3R_VABA] = 0x7,
5470    [NEON_3R_VADD_VSUB] = 0xf,
5471    [NEON_3R_VTST_VCEQ] = 0x7,
5472    [NEON_3R_VML] = 0x7,
5473    [NEON_3R_VMUL] = 0x7,
5474    [NEON_3R_VPMAX] = 0x7,
5475    [NEON_3R_VPMIN] = 0x7,
5476    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
5477    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
5478    [NEON_3R_SHA] = 0xf, /* size field encodes op type */
5479    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
5480    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
5481    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
5482    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
5483    [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
5484    [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
5485    [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
5486};
5487
5488/* Symbolic constants for op fields for Neon 2-register miscellaneous.
5489 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
5490 * table A7-13.
5491 */
5492#define NEON_2RM_VREV64 0
5493#define NEON_2RM_VREV32 1
5494#define NEON_2RM_VREV16 2
5495#define NEON_2RM_VPADDL 4
5496#define NEON_2RM_VPADDL_U 5
5497#define NEON_2RM_AESE 6 /* Includes AESD */
5498#define NEON_2RM_AESMC 7 /* Includes AESIMC */
5499#define NEON_2RM_VCLS 8
5500#define NEON_2RM_VCLZ 9
5501#define NEON_2RM_VCNT 10
5502#define NEON_2RM_VMVN 11
5503#define NEON_2RM_VPADAL 12
5504#define NEON_2RM_VPADAL_U 13
5505#define NEON_2RM_VQABS 14
5506#define NEON_2RM_VQNEG 15
5507#define NEON_2RM_VCGT0 16
5508#define NEON_2RM_VCGE0 17
5509#define NEON_2RM_VCEQ0 18
5510#define NEON_2RM_VCLE0 19
5511#define NEON_2RM_VCLT0 20
5512#define NEON_2RM_SHA1H 21
5513#define NEON_2RM_VABS 22
5514#define NEON_2RM_VNEG 23
5515#define NEON_2RM_VCGT0_F 24
5516#define NEON_2RM_VCGE0_F 25
5517#define NEON_2RM_VCEQ0_F 26
5518#define NEON_2RM_VCLE0_F 27
5519#define NEON_2RM_VCLT0_F 28
5520#define NEON_2RM_VABS_F 30
5521#define NEON_2RM_VNEG_F 31
5522#define NEON_2RM_VSWP 32
5523#define NEON_2RM_VTRN 33
5524#define NEON_2RM_VUZP 34
5525#define NEON_2RM_VZIP 35
5526#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
5527#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
5528#define NEON_2RM_VSHLL 38
5529#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
5530#define NEON_2RM_VRINTN 40
5531#define NEON_2RM_VRINTX 41
5532#define NEON_2RM_VRINTA 42
5533#define NEON_2RM_VRINTZ 43
5534#define NEON_2RM_VCVT_F16_F32 44
5535#define NEON_2RM_VRINTM 45
5536#define NEON_2RM_VCVT_F32_F16 46
5537#define NEON_2RM_VRINTP 47
5538#define NEON_2RM_VCVTAU 48
5539#define NEON_2RM_VCVTAS 49
5540#define NEON_2RM_VCVTNU 50
5541#define NEON_2RM_VCVTNS 51
5542#define NEON_2RM_VCVTPU 52
5543#define NEON_2RM_VCVTPS 53
5544#define NEON_2RM_VCVTMU 54
5545#define NEON_2RM_VCVTMS 55
5546#define NEON_2RM_VRECPE 56
5547#define NEON_2RM_VRSQRTE 57
5548#define NEON_2RM_VRECPE_F 58
5549#define NEON_2RM_VRSQRTE_F 59
5550#define NEON_2RM_VCVT_FS 60
5551#define NEON_2RM_VCVT_FU 61
5552#define NEON_2RM_VCVT_SF 62
5553#define NEON_2RM_VCVT_UF 63
5554
5555static int neon_2rm_is_float_op(int op)
5556{
5557    /* Return true if this neon 2reg-misc op is float-to-float */
5558    return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
5559            (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
5560            op == NEON_2RM_VRINTM ||
5561            (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
5562            op >= NEON_2RM_VRECPE_F);
5563}
5564
5565static bool neon_2rm_is_v8_op(int op)
5566{
5567    /* Return true if this neon 2reg-misc op is ARMv8 and up */
5568    switch (op) {
5569    case NEON_2RM_VRINTN:
5570    case NEON_2RM_VRINTA:
5571    case NEON_2RM_VRINTM:
5572    case NEON_2RM_VRINTP:
5573    case NEON_2RM_VRINTZ:
5574    case NEON_2RM_VRINTX:
5575    case NEON_2RM_VCVTAU:
5576    case NEON_2RM_VCVTAS:
5577    case NEON_2RM_VCVTNU:
5578    case NEON_2RM_VCVTNS:
5579    case NEON_2RM_VCVTPU:
5580    case NEON_2RM_VCVTPS:
5581    case NEON_2RM_VCVTMU:
5582    case NEON_2RM_VCVTMS:
5583        return true;
5584    default:
5585        return false;
5586    }
5587}
5588
5589/* Each entry in this array has bit n set if the insn allows
5590 * size value n (otherwise it will UNDEF). Since unallocated
5591 * op values will have no bits set they always UNDEF.
5592 */
5593static const uint8_t neon_2rm_sizes[] = {
5594    [NEON_2RM_VREV64] = 0x7,
5595    [NEON_2RM_VREV32] = 0x3,
5596    [NEON_2RM_VREV16] = 0x1,
5597    [NEON_2RM_VPADDL] = 0x7,
5598    [NEON_2RM_VPADDL_U] = 0x7,
5599    [NEON_2RM_AESE] = 0x1,
5600    [NEON_2RM_AESMC] = 0x1,
5601    [NEON_2RM_VCLS] = 0x7,
5602    [NEON_2RM_VCLZ] = 0x7,
5603    [NEON_2RM_VCNT] = 0x1,
5604    [NEON_2RM_VMVN] = 0x1,
5605    [NEON_2RM_VPADAL] = 0x7,
5606    [NEON_2RM_VPADAL_U] = 0x7,
5607    [NEON_2RM_VQABS] = 0x7,
5608    [NEON_2RM_VQNEG] = 0x7,
5609    [NEON_2RM_VCGT0] = 0x7,
5610    [NEON_2RM_VCGE0] = 0x7,
5611    [NEON_2RM_VCEQ0] = 0x7,
5612    [NEON_2RM_VCLE0] = 0x7,
5613    [NEON_2RM_VCLT0] = 0x7,
5614    [NEON_2RM_SHA1H] = 0x4,
5615    [NEON_2RM_VABS] = 0x7,
5616    [NEON_2RM_VNEG] = 0x7,
5617    [NEON_2RM_VCGT0_F] = 0x4,
5618    [NEON_2RM_VCGE0_F] = 0x4,
5619    [NEON_2RM_VCEQ0_F] = 0x4,
5620    [NEON_2RM_VCLE0_F] = 0x4,
5621    [NEON_2RM_VCLT0_F] = 0x4,
5622    [NEON_2RM_VABS_F] = 0x4,
5623    [NEON_2RM_VNEG_F] = 0x4,
5624    [NEON_2RM_VSWP] = 0x1,
5625    [NEON_2RM_VTRN] = 0x7,
5626    [NEON_2RM_VUZP] = 0x7,
5627    [NEON_2RM_VZIP] = 0x7,
5628    [NEON_2RM_VMOVN] = 0x7,
5629    [NEON_2RM_VQMOVN] = 0x7,
5630    [NEON_2RM_VSHLL] = 0x7,
5631    [NEON_2RM_SHA1SU1] = 0x4,
5632    [NEON_2RM_VRINTN] = 0x4,
5633    [NEON_2RM_VRINTX] = 0x4,
5634    [NEON_2RM_VRINTA] = 0x4,
5635    [NEON_2RM_VRINTZ] = 0x4,
5636    [NEON_2RM_VCVT_F16_F32] = 0x2,
5637    [NEON_2RM_VRINTM] = 0x4,
5638    [NEON_2RM_VCVT_F32_F16] = 0x2,
5639    [NEON_2RM_VRINTP] = 0x4,
5640    [NEON_2RM_VCVTAU] = 0x4,
5641    [NEON_2RM_VCVTAS] = 0x4,
5642    [NEON_2RM_VCVTNU] = 0x4,
5643    [NEON_2RM_VCVTNS] = 0x4,
5644    [NEON_2RM_VCVTPU] = 0x4,
5645    [NEON_2RM_VCVTPS] = 0x4,
5646    [NEON_2RM_VCVTMU] = 0x4,
5647    [NEON_2RM_VCVTMS] = 0x4,
5648    [NEON_2RM_VRECPE] = 0x4,
5649    [NEON_2RM_VRSQRTE] = 0x4,
5650    [NEON_2RM_VRECPE_F] = 0x4,
5651    [NEON_2RM_VRSQRTE_F] = 0x4,
5652    [NEON_2RM_VCVT_FS] = 0x4,
5653    [NEON_2RM_VCVT_FU] = 0x4,
5654    [NEON_2RM_VCVT_SF] = 0x4,
5655    [NEON_2RM_VCVT_UF] = 0x4,
5656};
5657
5658
5659/* Expand v8.1 simd helper.  */
5660static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
5661                         int q, int rd, int rn, int rm)
5662{
5663    if (dc_isar_feature(aa32_rdm, s)) {
5664        int opr_sz = (1 + q) * 8;
5665        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
5666                           vfp_reg_offset(1, rn),
5667                           vfp_reg_offset(1, rm), cpu_env,
5668                           opr_sz, opr_sz, 0, fn);
5669        return 0;
5670    }
5671    return 1;
5672}
5673
5674/*
5675 * Expanders for VBitOps_VBIF, VBIT, VBSL.
5676 */
5677static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
5678{
5679    tcg_gen_xor_i64(rn, rn, rm);
5680    tcg_gen_and_i64(rn, rn, rd);
5681    tcg_gen_xor_i64(rd, rm, rn);
5682}
5683
5684static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
5685{
5686    tcg_gen_xor_i64(rn, rn, rd);
5687    tcg_gen_and_i64(rn, rn, rm);
5688    tcg_gen_xor_i64(rd, rd, rn);
5689}
5690
5691static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
5692{
5693    tcg_gen_xor_i64(rn, rn, rd);
5694    tcg_gen_andc_i64(rn, rn, rm);
5695    tcg_gen_xor_i64(rd, rd, rn);
5696}
5697
5698static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
5699{
5700    tcg_gen_xor_vec(vece, rn, rn, rm);
5701    tcg_gen_and_vec(vece, rn, rn, rd);
5702    tcg_gen_xor_vec(vece, rd, rm, rn);
5703}
5704
5705static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
5706{
5707    tcg_gen_xor_vec(vece, rn, rn, rd);
5708    tcg_gen_and_vec(vece, rn, rn, rm);
5709    tcg_gen_xor_vec(vece, rd, rd, rn);
5710}
5711
5712static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
5713{
5714    tcg_gen_xor_vec(vece, rn, rn, rd);
5715    tcg_gen_andc_vec(vece, rn, rn, rm);
5716    tcg_gen_xor_vec(vece, rd, rd, rn);
5717}
5718
5719const GVecGen3 bsl_op = {
5720    .fni8 = gen_bsl_i64,
5721    .fniv = gen_bsl_vec,
5722    .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5723    .load_dest = true
5724};
5725
5726const GVecGen3 bit_op = {
5727    .