qemu/target/arm/translate.c
<<
>>
Prefs
   1/*
   2 *  ARM translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *
   8 * This library is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU Lesser General Public
  10 * License as published by the Free Software Foundation; either
  11 * version 2 of the License, or (at your option) any later version.
  12 *
  13 * This library is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21#include "qemu/osdep.h"
  22
  23#include "cpu.h"
  24#include "internals.h"
  25#include "disas/disas.h"
  26#include "exec/exec-all.h"
  27#include "tcg-op.h"
  28#include "tcg-op-gvec.h"
  29#include "qemu/log.h"
  30#include "qemu/bitops.h"
  31#include "arm_ldst.h"
  32#include "hw/semihosting/semihost.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36
  37#include "trace-tcg.h"
  38#include "exec/log.h"
  39
  40
  41#define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42#define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43/* currently all emulated v5 cores are also v5TE, so don't bother */
  44#define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45#define ENABLE_ARCH_5J    dc_isar_feature(jazelle, s)
  46#define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47#define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48#define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49#define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50#define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54#include "translate.h"
  55
  56#if defined(CONFIG_USER_ONLY)
  57#define IS_USER(s) 1
  58#else
  59#define IS_USER(s) (s->user)
  60#endif
  61
  62/* We reuse the same 64-bit temporaries for efficiency.  */
  63static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64static TCGv_i32 cpu_R[16];
  65TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66TCGv_i64 cpu_exclusive_addr;
  67TCGv_i64 cpu_exclusive_val;
  68
  69#include "exec/gen-icount.h"
  70
  71static const char * const regnames[] =
  72    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75/* Function prototypes for gen_ functions calling Neon helpers.  */
  76typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                 TCGv_i32, TCGv_i32);
  78/* Function prototypes for gen_ functions for fix point conversions */
  79typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81/* initialize TCG globals.  */
  82void arm_translate_init(void)
  83{
  84    int i;
  85
  86    for (i = 0; i < 16; i++) {
  87        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                          offsetof(CPUARMState, regs[i]),
  89                                          regnames[i]);
  90    }
  91    cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92    cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93    cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94    cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96    cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98    cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99        offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101    a64_translate_init();
 102}
 103
 104/* Flags for the disas_set_da_iss info argument:
 105 * lower bits hold the Rt register number, higher bits are flags.
 106 */
 107typedef enum ISSInfo {
 108    ISSNone = 0,
 109    ISSRegMask = 0x1f,
 110    ISSInvalid = (1 << 5),
 111    ISSIsAcqRel = (1 << 6),
 112    ISSIsWrite = (1 << 7),
 113    ISSIs16Bit = (1 << 8),
 114} ISSInfo;
 115
 116/* Save the syndrome information for a Data Abort */
 117static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118{
 119    uint32_t syn;
 120    int sas = memop & MO_SIZE;
 121    bool sse = memop & MO_SIGN;
 122    bool is_acqrel = issinfo & ISSIsAcqRel;
 123    bool is_write = issinfo & ISSIsWrite;
 124    bool is_16bit = issinfo & ISSIs16Bit;
 125    int srt = issinfo & ISSRegMask;
 126
 127    if (issinfo & ISSInvalid) {
 128        /* Some callsites want to conditionally provide ISS info,
 129         * eg "only if this was not a writeback"
 130         */
 131        return;
 132    }
 133
 134    if (srt == 15) {
 135        /* For AArch32, insns where the src/dest is R15 never generate
 136         * ISS information. Catching that here saves checking at all
 137         * the call sites.
 138         */
 139        return;
 140    }
 141
 142    syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                  0, 0, 0, is_write, 0, is_16bit);
 144    disas_set_insn_syndrome(s, syn);
 145}
 146
 147static inline int get_a32_user_mem_index(DisasContext *s)
 148{
 149    /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150     * insns:
 151     *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152     *  otherwise, access as if at PL0.
 153     */
 154    switch (s->mmu_idx) {
 155    case ARMMMUIdx_S1E2:        /* this one is UNPREDICTABLE */
 156    case ARMMMUIdx_S12NSE0:
 157    case ARMMMUIdx_S12NSE1:
 158        return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
 159    case ARMMMUIdx_S1E3:
 160    case ARMMMUIdx_S1SE0:
 161    case ARMMMUIdx_S1SE1:
 162        return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
 163    case ARMMMUIdx_MUser:
 164    case ARMMMUIdx_MPriv:
 165        return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 166    case ARMMMUIdx_MUserNegPri:
 167    case ARMMMUIdx_MPrivNegPri:
 168        return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 169    case ARMMMUIdx_MSUser:
 170    case ARMMMUIdx_MSPriv:
 171        return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 172    case ARMMMUIdx_MSUserNegPri:
 173    case ARMMMUIdx_MSPrivNegPri:
 174        return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 175    case ARMMMUIdx_S2NS:
 176    default:
 177        g_assert_not_reached();
 178    }
 179}
 180
 181static inline TCGv_i32 load_cpu_offset(int offset)
 182{
 183    TCGv_i32 tmp = tcg_temp_new_i32();
 184    tcg_gen_ld_i32(tmp, cpu_env, offset);
 185    return tmp;
 186}
 187
 188#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 189
 190static inline void store_cpu_offset(TCGv_i32 var, int offset)
 191{
 192    tcg_gen_st_i32(var, cpu_env, offset);
 193    tcg_temp_free_i32(var);
 194}
 195
 196#define store_cpu_field(var, name) \
 197    store_cpu_offset(var, offsetof(CPUARMState, name))
 198
 199/* The architectural value of PC.  */
 200static uint32_t read_pc(DisasContext *s)
 201{
 202    return s->pc_curr + (s->thumb ? 4 : 8);
 203}
 204
 205/* Set a variable to the value of a CPU register.  */
 206static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 207{
 208    if (reg == 15) {
 209        tcg_gen_movi_i32(var, read_pc(s));
 210    } else {
 211        tcg_gen_mov_i32(var, cpu_R[reg]);
 212    }
 213}
 214
 215/* Create a new temporary and set it to the value of a CPU register.  */
 216static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 217{
 218    TCGv_i32 tmp = tcg_temp_new_i32();
 219    load_reg_var(s, tmp, reg);
 220    return tmp;
 221}
 222
 223/*
 224 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 225 * This is used for load/store for which use of PC implies (literal),
 226 * or ADD that implies ADR.
 227 */
 228static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 229{
 230    TCGv_i32 tmp = tcg_temp_new_i32();
 231
 232    if (reg == 15) {
 233        tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 234    } else {
 235        tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 236    }
 237    return tmp;
 238}
 239
 240/* Set a CPU register.  The source must be a temporary and will be
 241   marked as dead.  */
 242static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 243{
 244    if (reg == 15) {
 245        /* In Thumb mode, we must ignore bit 0.
 246         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 247         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 248         * We choose to ignore [1:0] in ARM mode for all architecture versions.
 249         */
 250        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 251        s->base.is_jmp = DISAS_JUMP;
 252    }
 253    tcg_gen_mov_i32(cpu_R[reg], var);
 254    tcg_temp_free_i32(var);
 255}
 256
 257/*
 258 * Variant of store_reg which applies v8M stack-limit checks before updating
 259 * SP. If the check fails this will result in an exception being taken.
 260 * We disable the stack checks for CONFIG_USER_ONLY because we have
 261 * no idea what the stack limits should be in that case.
 262 * If stack checking is not being done this just acts like store_reg().
 263 */
 264static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 265{
 266#ifndef CONFIG_USER_ONLY
 267    if (s->v8m_stackcheck) {
 268        gen_helper_v8m_stackcheck(cpu_env, var);
 269    }
 270#endif
 271    store_reg(s, 13, var);
 272}
 273
 274/* Value extensions.  */
 275#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 276#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 277#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 278#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 279
 280#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 281#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 282
 283
 284static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 285{
 286    TCGv_i32 tmp_mask = tcg_const_i32(mask);
 287    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 288    tcg_temp_free_i32(tmp_mask);
 289}
 290/* Set NZCV flags from the high 4 bits of var.  */
 291#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 292
 293static void gen_exception_internal(int excp)
 294{
 295    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 296
 297    assert(excp_is_internal(excp));
 298    gen_helper_exception_internal(cpu_env, tcg_excp);
 299    tcg_temp_free_i32(tcg_excp);
 300}
 301
 302static void gen_step_complete_exception(DisasContext *s)
 303{
 304    /* We just completed step of an insn. Move from Active-not-pending
 305     * to Active-pending, and then also take the swstep exception.
 306     * This corresponds to making the (IMPDEF) choice to prioritize
 307     * swstep exceptions over asynchronous exceptions taken to an exception
 308     * level where debug is disabled. This choice has the advantage that
 309     * we do not need to maintain internal state corresponding to the
 310     * ISV/EX syndrome bits between completion of the step and generation
 311     * of the exception, and our syndrome information is always correct.
 312     */
 313    gen_ss_advance(s);
 314    gen_swstep_exception(s, 1, s->is_ldex);
 315    s->base.is_jmp = DISAS_NORETURN;
 316}
 317
 318static void gen_singlestep_exception(DisasContext *s)
 319{
 320    /* Generate the right kind of exception for singlestep, which is
 321     * either the architectural singlestep or EXCP_DEBUG for QEMU's
 322     * gdb singlestepping.
 323     */
 324    if (s->ss_active) {
 325        gen_step_complete_exception(s);
 326    } else {
 327        gen_exception_internal(EXCP_DEBUG);
 328    }
 329}
 330
 331static inline bool is_singlestepping(DisasContext *s)
 332{
 333    /* Return true if we are singlestepping either because of
 334     * architectural singlestep or QEMU gdbstub singlestep. This does
 335     * not include the command line '-singlestep' mode which is rather
 336     * misnamed as it only means "one instruction per TB" and doesn't
 337     * affect the code we generate.
 338     */
 339    return s->base.singlestep_enabled || s->ss_active;
 340}
 341
 342static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 343{
 344    TCGv_i32 tmp1 = tcg_temp_new_i32();
 345    TCGv_i32 tmp2 = tcg_temp_new_i32();
 346    tcg_gen_ext16s_i32(tmp1, a);
 347    tcg_gen_ext16s_i32(tmp2, b);
 348    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 349    tcg_temp_free_i32(tmp2);
 350    tcg_gen_sari_i32(a, a, 16);
 351    tcg_gen_sari_i32(b, b, 16);
 352    tcg_gen_mul_i32(b, b, a);
 353    tcg_gen_mov_i32(a, tmp1);
 354    tcg_temp_free_i32(tmp1);
 355}
 356
 357/* Byteswap each halfword.  */
 358static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 359{
 360    TCGv_i32 tmp = tcg_temp_new_i32();
 361    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 362    tcg_gen_shri_i32(tmp, var, 8);
 363    tcg_gen_and_i32(tmp, tmp, mask);
 364    tcg_gen_and_i32(var, var, mask);
 365    tcg_gen_shli_i32(var, var, 8);
 366    tcg_gen_or_i32(dest, var, tmp);
 367    tcg_temp_free_i32(mask);
 368    tcg_temp_free_i32(tmp);
 369}
 370
 371/* Byteswap low halfword and sign extend.  */
 372static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 373{
 374    tcg_gen_ext16u_i32(var, var);
 375    tcg_gen_bswap16_i32(var, var);
 376    tcg_gen_ext16s_i32(dest, var);
 377}
 378
 379/* 32x32->64 multiply.  Marks inputs as dead.  */
 380static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 381{
 382    TCGv_i32 lo = tcg_temp_new_i32();
 383    TCGv_i32 hi = tcg_temp_new_i32();
 384    TCGv_i64 ret;
 385
 386    tcg_gen_mulu2_i32(lo, hi, a, b);
 387    tcg_temp_free_i32(a);
 388    tcg_temp_free_i32(b);
 389
 390    ret = tcg_temp_new_i64();
 391    tcg_gen_concat_i32_i64(ret, lo, hi);
 392    tcg_temp_free_i32(lo);
 393    tcg_temp_free_i32(hi);
 394
 395    return ret;
 396}
 397
 398static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 399{
 400    TCGv_i32 lo = tcg_temp_new_i32();
 401    TCGv_i32 hi = tcg_temp_new_i32();
 402    TCGv_i64 ret;
 403
 404    tcg_gen_muls2_i32(lo, hi, a, b);
 405    tcg_temp_free_i32(a);
 406    tcg_temp_free_i32(b);
 407
 408    ret = tcg_temp_new_i64();
 409    tcg_gen_concat_i32_i64(ret, lo, hi);
 410    tcg_temp_free_i32(lo);
 411    tcg_temp_free_i32(hi);
 412
 413    return ret;
 414}
 415
 416/* Swap low and high halfwords.  */
 417static void gen_swap_half(TCGv_i32 var)
 418{
 419    tcg_gen_rotri_i32(var, var, 16);
 420}
 421
 422/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 423    tmp = (t0 ^ t1) & 0x8000;
 424    t0 &= ~0x8000;
 425    t1 &= ~0x8000;
 426    t0 = (t0 + t1) ^ tmp;
 427 */
 428
 429static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 430{
 431    TCGv_i32 tmp = tcg_temp_new_i32();
 432    tcg_gen_xor_i32(tmp, t0, t1);
 433    tcg_gen_andi_i32(tmp, tmp, 0x8000);
 434    tcg_gen_andi_i32(t0, t0, ~0x8000);
 435    tcg_gen_andi_i32(t1, t1, ~0x8000);
 436    tcg_gen_add_i32(t0, t0, t1);
 437    tcg_gen_xor_i32(dest, t0, tmp);
 438    tcg_temp_free_i32(tmp);
 439}
 440
 441/* Set N and Z flags from var.  */
 442static inline void gen_logic_CC(TCGv_i32 var)
 443{
 444    tcg_gen_mov_i32(cpu_NF, var);
 445    tcg_gen_mov_i32(cpu_ZF, var);
 446}
 447
 448/* dest = T0 + T1 + CF. */
 449static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 450{
 451    tcg_gen_add_i32(dest, t0, t1);
 452    tcg_gen_add_i32(dest, dest, cpu_CF);
 453}
 454
 455/* dest = T0 - T1 + CF - 1.  */
 456static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 457{
 458    tcg_gen_sub_i32(dest, t0, t1);
 459    tcg_gen_add_i32(dest, dest, cpu_CF);
 460    tcg_gen_subi_i32(dest, dest, 1);
 461}
 462
 463/* dest = T0 + T1. Compute C, N, V and Z flags */
 464static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 465{
 466    TCGv_i32 tmp = tcg_temp_new_i32();
 467    tcg_gen_movi_i32(tmp, 0);
 468    tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 469    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 470    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 471    tcg_gen_xor_i32(tmp, t0, t1);
 472    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 473    tcg_temp_free_i32(tmp);
 474    tcg_gen_mov_i32(dest, cpu_NF);
 475}
 476
 477/* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 478static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 479{
 480    TCGv_i32 tmp = tcg_temp_new_i32();
 481    if (TCG_TARGET_HAS_add2_i32) {
 482        tcg_gen_movi_i32(tmp, 0);
 483        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 484        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 485    } else {
 486        TCGv_i64 q0 = tcg_temp_new_i64();
 487        TCGv_i64 q1 = tcg_temp_new_i64();
 488        tcg_gen_extu_i32_i64(q0, t0);
 489        tcg_gen_extu_i32_i64(q1, t1);
 490        tcg_gen_add_i64(q0, q0, q1);
 491        tcg_gen_extu_i32_i64(q1, cpu_CF);
 492        tcg_gen_add_i64(q0, q0, q1);
 493        tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 494        tcg_temp_free_i64(q0);
 495        tcg_temp_free_i64(q1);
 496    }
 497    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 498    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 499    tcg_gen_xor_i32(tmp, t0, t1);
 500    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 501    tcg_temp_free_i32(tmp);
 502    tcg_gen_mov_i32(dest, cpu_NF);
 503}
 504
 505/* dest = T0 - T1. Compute C, N, V and Z flags */
 506static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 507{
 508    TCGv_i32 tmp;
 509    tcg_gen_sub_i32(cpu_NF, t0, t1);
 510    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 511    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 512    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 513    tmp = tcg_temp_new_i32();
 514    tcg_gen_xor_i32(tmp, t0, t1);
 515    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 516    tcg_temp_free_i32(tmp);
 517    tcg_gen_mov_i32(dest, cpu_NF);
 518}
 519
 520/* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 521static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 522{
 523    TCGv_i32 tmp = tcg_temp_new_i32();
 524    tcg_gen_not_i32(tmp, t1);
 525    gen_adc_CC(dest, t0, tmp);
 526    tcg_temp_free_i32(tmp);
 527}
 528
 529#define GEN_SHIFT(name)                                               \
 530static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 531{                                                                     \
 532    TCGv_i32 tmp1, tmp2, tmp3;                                        \
 533    tmp1 = tcg_temp_new_i32();                                        \
 534    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 535    tmp2 = tcg_const_i32(0);                                          \
 536    tmp3 = tcg_const_i32(0x1f);                                       \
 537    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 538    tcg_temp_free_i32(tmp3);                                          \
 539    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 540    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 541    tcg_temp_free_i32(tmp2);                                          \
 542    tcg_temp_free_i32(tmp1);                                          \
 543}
 544GEN_SHIFT(shl)
 545GEN_SHIFT(shr)
 546#undef GEN_SHIFT
 547
 548static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 549{
 550    TCGv_i32 tmp1, tmp2;
 551    tmp1 = tcg_temp_new_i32();
 552    tcg_gen_andi_i32(tmp1, t1, 0xff);
 553    tmp2 = tcg_const_i32(0x1f);
 554    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 555    tcg_temp_free_i32(tmp2);
 556    tcg_gen_sar_i32(dest, t0, tmp1);
 557    tcg_temp_free_i32(tmp1);
 558}
 559
 560static void shifter_out_im(TCGv_i32 var, int shift)
 561{
 562    tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 563}
 564
 565/* Shift by immediate.  Includes special handling for shift == 0.  */
 566static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 567                                    int shift, int flags)
 568{
 569    switch (shiftop) {
 570    case 0: /* LSL */
 571        if (shift != 0) {
 572            if (flags)
 573                shifter_out_im(var, 32 - shift);
 574            tcg_gen_shli_i32(var, var, shift);
 575        }
 576        break;
 577    case 1: /* LSR */
 578        if (shift == 0) {
 579            if (flags) {
 580                tcg_gen_shri_i32(cpu_CF, var, 31);
 581            }
 582            tcg_gen_movi_i32(var, 0);
 583        } else {
 584            if (flags)
 585                shifter_out_im(var, shift - 1);
 586            tcg_gen_shri_i32(var, var, shift);
 587        }
 588        break;
 589    case 2: /* ASR */
 590        if (shift == 0)
 591            shift = 32;
 592        if (flags)
 593            shifter_out_im(var, shift - 1);
 594        if (shift == 32)
 595          shift = 31;
 596        tcg_gen_sari_i32(var, var, shift);
 597        break;
 598    case 3: /* ROR/RRX */
 599        if (shift != 0) {
 600            if (flags)
 601                shifter_out_im(var, shift - 1);
 602            tcg_gen_rotri_i32(var, var, shift); break;
 603        } else {
 604            TCGv_i32 tmp = tcg_temp_new_i32();
 605            tcg_gen_shli_i32(tmp, cpu_CF, 31);
 606            if (flags)
 607                shifter_out_im(var, 0);
 608            tcg_gen_shri_i32(var, var, 1);
 609            tcg_gen_or_i32(var, var, tmp);
 610            tcg_temp_free_i32(tmp);
 611        }
 612    }
 613};
 614
 615static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 616                                     TCGv_i32 shift, int flags)
 617{
 618    if (flags) {
 619        switch (shiftop) {
 620        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 621        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 622        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 623        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 624        }
 625    } else {
 626        switch (shiftop) {
 627        case 0:
 628            gen_shl(var, var, shift);
 629            break;
 630        case 1:
 631            gen_shr(var, var, shift);
 632            break;
 633        case 2:
 634            gen_sar(var, var, shift);
 635            break;
 636        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 637                tcg_gen_rotr_i32(var, var, shift); break;
 638        }
 639    }
 640    tcg_temp_free_i32(shift);
 641}
 642
 643/*
 644 * Generate a conditional based on ARM condition code cc.
 645 * This is common between ARM and Aarch64 targets.
 646 */
 647void arm_test_cc(DisasCompare *cmp, int cc)
 648{
 649    TCGv_i32 value;
 650    TCGCond cond;
 651    bool global = true;
 652
 653    switch (cc) {
 654    case 0: /* eq: Z */
 655    case 1: /* ne: !Z */
 656        cond = TCG_COND_EQ;
 657        value = cpu_ZF;
 658        break;
 659
 660    case 2: /* cs: C */
 661    case 3: /* cc: !C */
 662        cond = TCG_COND_NE;
 663        value = cpu_CF;
 664        break;
 665
 666    case 4: /* mi: N */
 667    case 5: /* pl: !N */
 668        cond = TCG_COND_LT;
 669        value = cpu_NF;
 670        break;
 671
 672    case 6: /* vs: V */
 673    case 7: /* vc: !V */
 674        cond = TCG_COND_LT;
 675        value = cpu_VF;
 676        break;
 677
 678    case 8: /* hi: C && !Z */
 679    case 9: /* ls: !C || Z -> !(C && !Z) */
 680        cond = TCG_COND_NE;
 681        value = tcg_temp_new_i32();
 682        global = false;
 683        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 684           ZF is non-zero for !Z; so AND the two subexpressions.  */
 685        tcg_gen_neg_i32(value, cpu_CF);
 686        tcg_gen_and_i32(value, value, cpu_ZF);
 687        break;
 688
 689    case 10: /* ge: N == V -> N ^ V == 0 */
 690    case 11: /* lt: N != V -> N ^ V != 0 */
 691        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 692        cond = TCG_COND_GE;
 693        value = tcg_temp_new_i32();
 694        global = false;
 695        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 696        break;
 697
 698    case 12: /* gt: !Z && N == V */
 699    case 13: /* le: Z || N != V */
 700        cond = TCG_COND_NE;
 701        value = tcg_temp_new_i32();
 702        global = false;
 703        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 704         * the sign bit then AND with ZF to yield the result.  */
 705        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 706        tcg_gen_sari_i32(value, value, 31);
 707        tcg_gen_andc_i32(value, cpu_ZF, value);
 708        break;
 709
 710    case 14: /* always */
 711    case 15: /* always */
 712        /* Use the ALWAYS condition, which will fold early.
 713         * It doesn't matter what we use for the value.  */
 714        cond = TCG_COND_ALWAYS;
 715        value = cpu_ZF;
 716        goto no_invert;
 717
 718    default:
 719        fprintf(stderr, "Bad condition code 0x%x\n", cc);
 720        abort();
 721    }
 722
 723    if (cc & 1) {
 724        cond = tcg_invert_cond(cond);
 725    }
 726
 727 no_invert:
 728    cmp->cond = cond;
 729    cmp->value = value;
 730    cmp->value_global = global;
 731}
 732
 733void arm_free_cc(DisasCompare *cmp)
 734{
 735    if (!cmp->value_global) {
 736        tcg_temp_free_i32(cmp->value);
 737    }
 738}
 739
 740void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 741{
 742    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 743}
 744
 745void arm_gen_test_cc(int cc, TCGLabel *label)
 746{
 747    DisasCompare cmp;
 748    arm_test_cc(&cmp, cc);
 749    arm_jump_cc(&cmp, label);
 750    arm_free_cc(&cmp);
 751}
 752
 753static inline void gen_set_condexec(DisasContext *s)
 754{
 755    if (s->condexec_mask) {
 756        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 757        TCGv_i32 tmp = tcg_temp_new_i32();
 758        tcg_gen_movi_i32(tmp, val);
 759        store_cpu_field(tmp, condexec_bits);
 760    }
 761}
 762
 763static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 764{
 765    tcg_gen_movi_i32(cpu_R[15], val);
 766}
 767
 768/* Set PC and Thumb state from var.  var is marked as dead.  */
 769static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 770{
 771    s->base.is_jmp = DISAS_JUMP;
 772    tcg_gen_andi_i32(cpu_R[15], var, ~1);
 773    tcg_gen_andi_i32(var, var, 1);
 774    store_cpu_field(var, thumb);
 775}
 776
 777/*
 778 * Set PC and Thumb state from var. var is marked as dead.
 779 * For M-profile CPUs, include logic to detect exception-return
 780 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 781 * and BX reg, and no others, and happens only for code in Handler mode.
 782 * The Security Extension also requires us to check for the FNC_RETURN
 783 * which signals a function return from non-secure state; this can happen
 784 * in both Handler and Thread mode.
 785 * To avoid having to do multiple comparisons in inline generated code,
 786 * we make the check we do here loose, so it will match for EXC_RETURN
 787 * in Thread mode. For system emulation do_v7m_exception_exit() checks
 788 * for these spurious cases and returns without doing anything (giving
 789 * the same behaviour as for a branch to a non-magic address).
 790 *
 791 * In linux-user mode it is unclear what the right behaviour for an
 792 * attempted FNC_RETURN should be, because in real hardware this will go
 793 * directly to Secure code (ie not the Linux kernel) which will then treat
 794 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 795 * attempt behave the way it would on a CPU without the security extension,
 796 * which is to say "like a normal branch". That means we can simply treat
 797 * all branches as normal with no magic address behaviour.
 798 */
 799static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 800{
 801    /* Generate the same code here as for a simple bx, but flag via
 802     * s->base.is_jmp that we need to do the rest of the work later.
 803     */
 804    gen_bx(s, var);
 805#ifndef CONFIG_USER_ONLY
 806    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 807        (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 808        s->base.is_jmp = DISAS_BX_EXCRET;
 809    }
 810#endif
 811}
 812
 813static inline void gen_bx_excret_final_code(DisasContext *s)
 814{
 815    /* Generate the code to finish possible exception return and end the TB */
 816    TCGLabel *excret_label = gen_new_label();
 817    uint32_t min_magic;
 818
 819    if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 820        /* Covers FNC_RETURN and EXC_RETURN magic */
 821        min_magic = FNC_RETURN_MIN_MAGIC;
 822    } else {
 823        /* EXC_RETURN magic only */
 824        min_magic = EXC_RETURN_MIN_MAGIC;
 825    }
 826
 827    /* Is the new PC value in the magic range indicating exception return? */
 828    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 829    /* No: end the TB as we would for a DISAS_JMP */
 830    if (is_singlestepping(s)) {
 831        gen_singlestep_exception(s);
 832    } else {
 833        tcg_gen_exit_tb(NULL, 0);
 834    }
 835    gen_set_label(excret_label);
 836    /* Yes: this is an exception return.
 837     * At this point in runtime env->regs[15] and env->thumb will hold
 838     * the exception-return magic number, which do_v7m_exception_exit()
 839     * will read. Nothing else will be able to see those values because
 840     * the cpu-exec main loop guarantees that we will always go straight
 841     * from raising the exception to the exception-handling code.
 842     *
 843     * gen_ss_advance(s) does nothing on M profile currently but
 844     * calling it is conceptually the right thing as we have executed
 845     * this instruction (compare SWI, HVC, SMC handling).
 846     */
 847    gen_ss_advance(s);
 848    gen_exception_internal(EXCP_EXCEPTION_EXIT);
 849}
 850
 851static inline void gen_bxns(DisasContext *s, int rm)
 852{
 853    TCGv_i32 var = load_reg(s, rm);
 854
 855    /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 856     * we need to sync state before calling it, but:
 857     *  - we don't need to do gen_set_pc_im() because the bxns helper will
 858     *    always set the PC itself
 859     *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 860     *    unless it's outside an IT block or the last insn in an IT block,
 861     *    so we know that condexec == 0 (already set at the top of the TB)
 862     *    is correct in the non-UNPREDICTABLE cases, and we can choose
 863     *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 864     */
 865    gen_helper_v7m_bxns(cpu_env, var);
 866    tcg_temp_free_i32(var);
 867    s->base.is_jmp = DISAS_EXIT;
 868}
 869
 870static inline void gen_blxns(DisasContext *s, int rm)
 871{
 872    TCGv_i32 var = load_reg(s, rm);
 873
 874    /* We don't need to sync condexec state, for the same reason as bxns.
 875     * We do however need to set the PC, because the blxns helper reads it.
 876     * The blxns helper may throw an exception.
 877     */
 878    gen_set_pc_im(s, s->base.pc_next);
 879    gen_helper_v7m_blxns(cpu_env, var);
 880    tcg_temp_free_i32(var);
 881    s->base.is_jmp = DISAS_EXIT;
 882}
 883
 884/* Variant of store_reg which uses branch&exchange logic when storing
 885   to r15 in ARM architecture v7 and above. The source must be a temporary
 886   and will be marked as dead. */
 887static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 888{
 889    if (reg == 15 && ENABLE_ARCH_7) {
 890        gen_bx(s, var);
 891    } else {
 892        store_reg(s, reg, var);
 893    }
 894}
 895
 896/* Variant of store_reg which uses branch&exchange logic when storing
 897 * to r15 in ARM architecture v5T and above. This is used for storing
 898 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 899 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 900static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 901{
 902    if (reg == 15 && ENABLE_ARCH_5) {
 903        gen_bx_excret(s, var);
 904    } else {
 905        store_reg(s, reg, var);
 906    }
 907}
 908
 909#ifdef CONFIG_USER_ONLY
 910#define IS_USER_ONLY 1
 911#else
 912#define IS_USER_ONLY 0
 913#endif
 914
 915/* Abstractions of "generate code to do a guest load/store for
 916 * AArch32", where a vaddr is always 32 bits (and is zero
 917 * extended if we're a 64 bit core) and  data is also
 918 * 32 bits unless specifically doing a 64 bit access.
 919 * These functions work like tcg_gen_qemu_{ld,st}* except
 920 * that the address argument is TCGv_i32 rather than TCGv.
 921 */
 922
 923static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 924{
 925    TCGv addr = tcg_temp_new();
 926    tcg_gen_extu_i32_tl(addr, a32);
 927
 928    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 929    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 930        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 931    }
 932    return addr;
 933}
 934
 935static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 936                            int index, MemOp opc)
 937{
 938    TCGv addr;
 939
 940    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 941        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 942        opc |= MO_ALIGN;
 943    }
 944
 945    addr = gen_aa32_addr(s, a32, opc);
 946    tcg_gen_qemu_ld_i32(val, addr, index, opc);
 947    tcg_temp_free(addr);
 948}
 949
 950static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 951                            int index, MemOp opc)
 952{
 953    TCGv addr;
 954
 955    if (arm_dc_feature(s, ARM_FEATURE_M) &&
 956        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 957        opc |= MO_ALIGN;
 958    }
 959
 960    addr = gen_aa32_addr(s, a32, opc);
 961    tcg_gen_qemu_st_i32(val, addr, index, opc);
 962    tcg_temp_free(addr);
 963}
 964
 965#define DO_GEN_LD(SUFF, OPC)                                             \
 966static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 967                                     TCGv_i32 a32, int index)            \
 968{                                                                        \
 969    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 970}
 971
 972#define DO_GEN_ST(SUFF, OPC)                                             \
 973static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 974                                     TCGv_i32 a32, int index)            \
 975{                                                                        \
 976    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 977}
 978
 979static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 980{
 981    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 982    if (!IS_USER_ONLY && s->sctlr_b) {
 983        tcg_gen_rotri_i64(val, val, 32);
 984    }
 985}
 986
 987static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 988                            int index, MemOp opc)
 989{
 990    TCGv addr = gen_aa32_addr(s, a32, opc);
 991    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 992    gen_aa32_frob64(s, val);
 993    tcg_temp_free(addr);
 994}
 995
 996static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 997                                 TCGv_i32 a32, int index)
 998{
 999    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1000}
1001
1002static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003                            int index, MemOp opc)
1004{
1005    TCGv addr = gen_aa32_addr(s, a32, opc);
1006
1007    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1008    if (!IS_USER_ONLY && s->sctlr_b) {
1009        TCGv_i64 tmp = tcg_temp_new_i64();
1010        tcg_gen_rotri_i64(tmp, val, 32);
1011        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1012        tcg_temp_free_i64(tmp);
1013    } else {
1014        tcg_gen_qemu_st_i64(val, addr, index, opc);
1015    }
1016    tcg_temp_free(addr);
1017}
1018
1019static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1020                                 TCGv_i32 a32, int index)
1021{
1022    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1023}
1024
1025DO_GEN_LD(8u, MO_UB)
1026DO_GEN_LD(16u, MO_UW)
1027DO_GEN_LD(32u, MO_UL)
1028DO_GEN_ST(8, MO_UB)
1029DO_GEN_ST(16, MO_UW)
1030DO_GEN_ST(32, MO_UL)
1031
1032static inline void gen_hvc(DisasContext *s, int imm16)
1033{
1034    /* The pre HVC helper handles cases when HVC gets trapped
1035     * as an undefined insn by runtime configuration (ie before
1036     * the insn really executes).
1037     */
1038    gen_set_pc_im(s, s->pc_curr);
1039    gen_helper_pre_hvc(cpu_env);
1040    /* Otherwise we will treat this as a real exception which
1041     * happens after execution of the insn. (The distinction matters
1042     * for the PC value reported to the exception handler and also
1043     * for single stepping.)
1044     */
1045    s->svc_imm = imm16;
1046    gen_set_pc_im(s, s->base.pc_next);
1047    s->base.is_jmp = DISAS_HVC;
1048}
1049
1050static inline void gen_smc(DisasContext *s)
1051{
1052    /* As with HVC, we may take an exception either before or after
1053     * the insn executes.
1054     */
1055    TCGv_i32 tmp;
1056
1057    gen_set_pc_im(s, s->pc_curr);
1058    tmp = tcg_const_i32(syn_aa32_smc());
1059    gen_helper_pre_smc(cpu_env, tmp);
1060    tcg_temp_free_i32(tmp);
1061    gen_set_pc_im(s, s->base.pc_next);
1062    s->base.is_jmp = DISAS_SMC;
1063}
1064
1065static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1066{
1067    gen_set_condexec(s);
1068    gen_set_pc_im(s, pc);
1069    gen_exception_internal(excp);
1070    s->base.is_jmp = DISAS_NORETURN;
1071}
1072
1073static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1074                               int syn, uint32_t target_el)
1075{
1076    gen_set_condexec(s);
1077    gen_set_pc_im(s, pc);
1078    gen_exception(excp, syn, target_el);
1079    s->base.is_jmp = DISAS_NORETURN;
1080}
1081
1082static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1083{
1084    TCGv_i32 tcg_syn;
1085
1086    gen_set_condexec(s);
1087    gen_set_pc_im(s, s->pc_curr);
1088    tcg_syn = tcg_const_i32(syn);
1089    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1090    tcg_temp_free_i32(tcg_syn);
1091    s->base.is_jmp = DISAS_NORETURN;
1092}
1093
1094static void unallocated_encoding(DisasContext *s)
1095{
1096    /* Unallocated and reserved encodings are uncategorized */
1097    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1098                       default_exception_el(s));
1099}
1100
1101/* Force a TB lookup after an instruction that changes the CPU state.  */
1102static inline void gen_lookup_tb(DisasContext *s)
1103{
1104    tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1105    s->base.is_jmp = DISAS_EXIT;
1106}
1107
1108static inline void gen_hlt(DisasContext *s, int imm)
1109{
1110    /* HLT. This has two purposes.
1111     * Architecturally, it is an external halting debug instruction.
1112     * Since QEMU doesn't implement external debug, we treat this as
1113     * it is required for halting debug disabled: it will UNDEF.
1114     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1115     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1116     * must trigger semihosting even for ARMv7 and earlier, where
1117     * HLT was an undefined encoding.
1118     * In system mode, we don't allow userspace access to
1119     * semihosting, to provide some semblance of security
1120     * (and for consistency with our 32-bit semihosting).
1121     */
1122    if (semihosting_enabled() &&
1123#ifndef CONFIG_USER_ONLY
1124        s->current_el != 0 &&
1125#endif
1126        (imm == (s->thumb ? 0x3c : 0xf000))) {
1127        gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1128        return;
1129    }
1130
1131    unallocated_encoding(s);
1132}
1133
1134static TCGv_ptr get_fpstatus_ptr(int neon)
1135{
1136    TCGv_ptr statusptr = tcg_temp_new_ptr();
1137    int offset;
1138    if (neon) {
1139        offset = offsetof(CPUARMState, vfp.standard_fp_status);
1140    } else {
1141        offset = offsetof(CPUARMState, vfp.fp_status);
1142    }
1143    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1144    return statusptr;
1145}
1146
1147static inline long vfp_reg_offset(bool dp, unsigned reg)
1148{
1149    if (dp) {
1150        return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1151    } else {
1152        long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1153        if (reg & 1) {
1154            ofs += offsetof(CPU_DoubleU, l.upper);
1155        } else {
1156            ofs += offsetof(CPU_DoubleU, l.lower);
1157        }
1158        return ofs;
1159    }
1160}
1161
1162/* Return the offset of a 32-bit piece of a NEON register.
1163   zero is the least significant end of the register.  */
1164static inline long
1165neon_reg_offset (int reg, int n)
1166{
1167    int sreg;
1168    sreg = reg * 2 + n;
1169    return vfp_reg_offset(0, sreg);
1170}
1171
1172/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1173 * where 0 is the least significant end of the register.
1174 */
1175static inline long
1176neon_element_offset(int reg, int element, MemOp size)
1177{
1178    int element_size = 1 << size;
1179    int ofs = element * element_size;
1180#ifdef HOST_WORDS_BIGENDIAN
1181    /* Calculate the offset assuming fully little-endian,
1182     * then XOR to account for the order of the 8-byte units.
1183     */
1184    if (element_size < 8) {
1185        ofs ^= 8 - element_size;
1186    }
1187#endif
1188    return neon_reg_offset(reg, 0) + ofs;
1189}
1190
1191static TCGv_i32 neon_load_reg(int reg, int pass)
1192{
1193    TCGv_i32 tmp = tcg_temp_new_i32();
1194    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1195    return tmp;
1196}
1197
1198static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1199{
1200    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1201
1202    switch (mop) {
1203    case MO_UB:
1204        tcg_gen_ld8u_i32(var, cpu_env, offset);
1205        break;
1206    case MO_UW:
1207        tcg_gen_ld16u_i32(var, cpu_env, offset);
1208        break;
1209    case MO_UL:
1210        tcg_gen_ld_i32(var, cpu_env, offset);
1211        break;
1212    default:
1213        g_assert_not_reached();
1214    }
1215}
1216
1217static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1218{
1219    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1220
1221    switch (mop) {
1222    case MO_UB:
1223        tcg_gen_ld8u_i64(var, cpu_env, offset);
1224        break;
1225    case MO_UW:
1226        tcg_gen_ld16u_i64(var, cpu_env, offset);
1227        break;
1228    case MO_UL:
1229        tcg_gen_ld32u_i64(var, cpu_env, offset);
1230        break;
1231    case MO_Q:
1232        tcg_gen_ld_i64(var, cpu_env, offset);
1233        break;
1234    default:
1235        g_assert_not_reached();
1236    }
1237}
1238
1239static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1240{
1241    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1242    tcg_temp_free_i32(var);
1243}
1244
1245static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1246{
1247    long offset = neon_element_offset(reg, ele, size);
1248
1249    switch (size) {
1250    case MO_8:
1251        tcg_gen_st8_i32(var, cpu_env, offset);
1252        break;
1253    case MO_16:
1254        tcg_gen_st16_i32(var, cpu_env, offset);
1255        break;
1256    case MO_32:
1257        tcg_gen_st_i32(var, cpu_env, offset);
1258        break;
1259    default:
1260        g_assert_not_reached();
1261    }
1262}
1263
1264static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1265{
1266    long offset = neon_element_offset(reg, ele, size);
1267
1268    switch (size) {
1269    case MO_8:
1270        tcg_gen_st8_i64(var, cpu_env, offset);
1271        break;
1272    case MO_16:
1273        tcg_gen_st16_i64(var, cpu_env, offset);
1274        break;
1275    case MO_32:
1276        tcg_gen_st32_i64(var, cpu_env, offset);
1277        break;
1278    case MO_64:
1279        tcg_gen_st_i64(var, cpu_env, offset);
1280        break;
1281    default:
1282        g_assert_not_reached();
1283    }
1284}
1285
1286static inline void neon_load_reg64(TCGv_i64 var, int reg)
1287{
1288    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1289}
1290
1291static inline void neon_store_reg64(TCGv_i64 var, int reg)
1292{
1293    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1294}
1295
1296static inline void neon_load_reg32(TCGv_i32 var, int reg)
1297{
1298    tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1299}
1300
1301static inline void neon_store_reg32(TCGv_i32 var, int reg)
1302{
1303    tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1304}
1305
1306static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1307{
1308    TCGv_ptr ret = tcg_temp_new_ptr();
1309    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1310    return ret;
1311}
1312
1313#define ARM_CP_RW_BIT   (1 << 20)
1314
1315/* Include the VFP decoder */
1316#include "translate-vfp.inc.c"
1317
1318static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1319{
1320    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1321}
1322
1323static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1324{
1325    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1326}
1327
1328static inline TCGv_i32 iwmmxt_load_creg(int reg)
1329{
1330    TCGv_i32 var = tcg_temp_new_i32();
1331    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1332    return var;
1333}
1334
1335static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1336{
1337    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1338    tcg_temp_free_i32(var);
1339}
1340
1341static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1342{
1343    iwmmxt_store_reg(cpu_M0, rn);
1344}
1345
1346static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1347{
1348    iwmmxt_load_reg(cpu_M0, rn);
1349}
1350
1351static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1352{
1353    iwmmxt_load_reg(cpu_V1, rn);
1354    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1355}
1356
1357static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1358{
1359    iwmmxt_load_reg(cpu_V1, rn);
1360    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1361}
1362
1363static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1364{
1365    iwmmxt_load_reg(cpu_V1, rn);
1366    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1367}
1368
1369#define IWMMXT_OP(name) \
1370static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1371{ \
1372    iwmmxt_load_reg(cpu_V1, rn); \
1373    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1374}
1375
1376#define IWMMXT_OP_ENV(name) \
1377static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1378{ \
1379    iwmmxt_load_reg(cpu_V1, rn); \
1380    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1381}
1382
1383#define IWMMXT_OP_ENV_SIZE(name) \
1384IWMMXT_OP_ENV(name##b) \
1385IWMMXT_OP_ENV(name##w) \
1386IWMMXT_OP_ENV(name##l)
1387
1388#define IWMMXT_OP_ENV1(name) \
1389static inline void gen_op_iwmmxt_##name##_M0(void) \
1390{ \
1391    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1392}
1393
1394IWMMXT_OP(maddsq)
1395IWMMXT_OP(madduq)
1396IWMMXT_OP(sadb)
1397IWMMXT_OP(sadw)
1398IWMMXT_OP(mulslw)
1399IWMMXT_OP(mulshw)
1400IWMMXT_OP(mululw)
1401IWMMXT_OP(muluhw)
1402IWMMXT_OP(macsw)
1403IWMMXT_OP(macuw)
1404
1405IWMMXT_OP_ENV_SIZE(unpackl)
1406IWMMXT_OP_ENV_SIZE(unpackh)
1407
1408IWMMXT_OP_ENV1(unpacklub)
1409IWMMXT_OP_ENV1(unpackluw)
1410IWMMXT_OP_ENV1(unpacklul)
1411IWMMXT_OP_ENV1(unpackhub)
1412IWMMXT_OP_ENV1(unpackhuw)
1413IWMMXT_OP_ENV1(unpackhul)
1414IWMMXT_OP_ENV1(unpacklsb)
1415IWMMXT_OP_ENV1(unpacklsw)
1416IWMMXT_OP_ENV1(unpacklsl)
1417IWMMXT_OP_ENV1(unpackhsb)
1418IWMMXT_OP_ENV1(unpackhsw)
1419IWMMXT_OP_ENV1(unpackhsl)
1420
1421IWMMXT_OP_ENV_SIZE(cmpeq)
1422IWMMXT_OP_ENV_SIZE(cmpgtu)
1423IWMMXT_OP_ENV_SIZE(cmpgts)
1424
1425IWMMXT_OP_ENV_SIZE(mins)
1426IWMMXT_OP_ENV_SIZE(minu)
1427IWMMXT_OP_ENV_SIZE(maxs)
1428IWMMXT_OP_ENV_SIZE(maxu)
1429
1430IWMMXT_OP_ENV_SIZE(subn)
1431IWMMXT_OP_ENV_SIZE(addn)
1432IWMMXT_OP_ENV_SIZE(subu)
1433IWMMXT_OP_ENV_SIZE(addu)
1434IWMMXT_OP_ENV_SIZE(subs)
1435IWMMXT_OP_ENV_SIZE(adds)
1436
1437IWMMXT_OP_ENV(avgb0)
1438IWMMXT_OP_ENV(avgb1)
1439IWMMXT_OP_ENV(avgw0)
1440IWMMXT_OP_ENV(avgw1)
1441
1442IWMMXT_OP_ENV(packuw)
1443IWMMXT_OP_ENV(packul)
1444IWMMXT_OP_ENV(packuq)
1445IWMMXT_OP_ENV(packsw)
1446IWMMXT_OP_ENV(packsl)
1447IWMMXT_OP_ENV(packsq)
1448
1449static void gen_op_iwmmxt_set_mup(void)
1450{
1451    TCGv_i32 tmp;
1452    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1453    tcg_gen_ori_i32(tmp, tmp, 2);
1454    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455}
1456
1457static void gen_op_iwmmxt_set_cup(void)
1458{
1459    TCGv_i32 tmp;
1460    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1461    tcg_gen_ori_i32(tmp, tmp, 1);
1462    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463}
1464
1465static void gen_op_iwmmxt_setpsr_nz(void)
1466{
1467    TCGv_i32 tmp = tcg_temp_new_i32();
1468    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1469    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1470}
1471
1472static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1473{
1474    iwmmxt_load_reg(cpu_V1, rn);
1475    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1476    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1477}
1478
1479static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1480                                     TCGv_i32 dest)
1481{
1482    int rd;
1483    uint32_t offset;
1484    TCGv_i32 tmp;
1485
1486    rd = (insn >> 16) & 0xf;
1487    tmp = load_reg(s, rd);
1488
1489    offset = (insn & 0xff) << ((insn >> 7) & 2);
1490    if (insn & (1 << 24)) {
1491        /* Pre indexed */
1492        if (insn & (1 << 23))
1493            tcg_gen_addi_i32(tmp, tmp, offset);
1494        else
1495            tcg_gen_addi_i32(tmp, tmp, -offset);
1496        tcg_gen_mov_i32(dest, tmp);
1497        if (insn & (1 << 21))
1498            store_reg(s, rd, tmp);
1499        else
1500            tcg_temp_free_i32(tmp);
1501    } else if (insn & (1 << 21)) {
1502        /* Post indexed */
1503        tcg_gen_mov_i32(dest, tmp);
1504        if (insn & (1 << 23))
1505            tcg_gen_addi_i32(tmp, tmp, offset);
1506        else
1507            tcg_gen_addi_i32(tmp, tmp, -offset);
1508        store_reg(s, rd, tmp);
1509    } else if (!(insn & (1 << 23)))
1510        return 1;
1511    return 0;
1512}
1513
1514static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1515{
1516    int rd = (insn >> 0) & 0xf;
1517    TCGv_i32 tmp;
1518
1519    if (insn & (1 << 8)) {
1520        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1521            return 1;
1522        } else {
1523            tmp = iwmmxt_load_creg(rd);
1524        }
1525    } else {
1526        tmp = tcg_temp_new_i32();
1527        iwmmxt_load_reg(cpu_V0, rd);
1528        tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1529    }
1530    tcg_gen_andi_i32(tmp, tmp, mask);
1531    tcg_gen_mov_i32(dest, tmp);
1532    tcg_temp_free_i32(tmp);
1533    return 0;
1534}
1535
1536/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1537   (ie. an undefined instruction).  */
1538static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1539{
1540    int rd, wrd;
1541    int rdhi, rdlo, rd0, rd1, i;
1542    TCGv_i32 addr;
1543    TCGv_i32 tmp, tmp2, tmp3;
1544
1545    if ((insn & 0x0e000e00) == 0x0c000000) {
1546        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1547            wrd = insn & 0xf;
1548            rdlo = (insn >> 12) & 0xf;
1549            rdhi = (insn >> 16) & 0xf;
1550            if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1551                iwmmxt_load_reg(cpu_V0, wrd);
1552                tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1553                tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1554            } else {                                    /* TMCRR */
1555                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1556                iwmmxt_store_reg(cpu_V0, wrd);
1557                gen_op_iwmmxt_set_mup();
1558            }
1559            return 0;
1560        }
1561
1562        wrd = (insn >> 12) & 0xf;
1563        addr = tcg_temp_new_i32();
1564        if (gen_iwmmxt_address(s, insn, addr)) {
1565            tcg_temp_free_i32(addr);
1566            return 1;
1567        }
1568        if (insn & ARM_CP_RW_BIT) {
1569            if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1570                tmp = tcg_temp_new_i32();
1571                gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1572                iwmmxt_store_creg(wrd, tmp);
1573            } else {
1574                i = 1;
1575                if (insn & (1 << 8)) {
1576                    if (insn & (1 << 22)) {             /* WLDRD */
1577                        gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1578                        i = 0;
1579                    } else {                            /* WLDRW wRd */
1580                        tmp = tcg_temp_new_i32();
1581                        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1582                    }
1583                } else {
1584                    tmp = tcg_temp_new_i32();
1585                    if (insn & (1 << 22)) {             /* WLDRH */
1586                        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1587                    } else {                            /* WLDRB */
1588                        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1589                    }
1590                }
1591                if (i) {
1592                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1593                    tcg_temp_free_i32(tmp);
1594                }
1595                gen_op_iwmmxt_movq_wRn_M0(wrd);
1596            }
1597        } else {
1598            if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1599                tmp = iwmmxt_load_creg(wrd);
1600                gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1601            } else {
1602                gen_op_iwmmxt_movq_M0_wRn(wrd);
1603                tmp = tcg_temp_new_i32();
1604                if (insn & (1 << 8)) {
1605                    if (insn & (1 << 22)) {             /* WSTRD */
1606                        gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1607                    } else {                            /* WSTRW wRd */
1608                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1609                        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1610                    }
1611                } else {
1612                    if (insn & (1 << 22)) {             /* WSTRH */
1613                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1614                        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1615                    } else {                            /* WSTRB */
1616                        tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1617                        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1618                    }
1619                }
1620            }
1621            tcg_temp_free_i32(tmp);
1622        }
1623        tcg_temp_free_i32(addr);
1624        return 0;
1625    }
1626
1627    if ((insn & 0x0f000000) != 0x0e000000)
1628        return 1;
1629
1630    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1631    case 0x000:                                                 /* WOR */
1632        wrd = (insn >> 12) & 0xf;
1633        rd0 = (insn >> 0) & 0xf;
1634        rd1 = (insn >> 16) & 0xf;
1635        gen_op_iwmmxt_movq_M0_wRn(rd0);
1636        gen_op_iwmmxt_orq_M0_wRn(rd1);
1637        gen_op_iwmmxt_setpsr_nz();
1638        gen_op_iwmmxt_movq_wRn_M0(wrd);
1639        gen_op_iwmmxt_set_mup();
1640        gen_op_iwmmxt_set_cup();
1641        break;
1642    case 0x011:                                                 /* TMCR */
1643        if (insn & 0xf)
1644            return 1;
1645        rd = (insn >> 12) & 0xf;
1646        wrd = (insn >> 16) & 0xf;
1647        switch (wrd) {
1648        case ARM_IWMMXT_wCID:
1649        case ARM_IWMMXT_wCASF:
1650            break;
1651        case ARM_IWMMXT_wCon:
1652            gen_op_iwmmxt_set_cup();
1653            /* Fall through.  */
1654        case ARM_IWMMXT_wCSSF:
1655            tmp = iwmmxt_load_creg(wrd);
1656            tmp2 = load_reg(s, rd);
1657            tcg_gen_andc_i32(tmp, tmp, tmp2);
1658            tcg_temp_free_i32(tmp2);
1659            iwmmxt_store_creg(wrd, tmp);
1660            break;
1661        case ARM_IWMMXT_wCGR0:
1662        case ARM_IWMMXT_wCGR1:
1663        case ARM_IWMMXT_wCGR2:
1664        case ARM_IWMMXT_wCGR3:
1665            gen_op_iwmmxt_set_cup();
1666            tmp = load_reg(s, rd);
1667            iwmmxt_store_creg(wrd, tmp);
1668            break;
1669        default:
1670            return 1;
1671        }
1672        break;
1673    case 0x100:                                                 /* WXOR */
1674        wrd = (insn >> 12) & 0xf;
1675        rd0 = (insn >> 0) & 0xf;
1676        rd1 = (insn >> 16) & 0xf;
1677        gen_op_iwmmxt_movq_M0_wRn(rd0);
1678        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1679        gen_op_iwmmxt_setpsr_nz();
1680        gen_op_iwmmxt_movq_wRn_M0(wrd);
1681        gen_op_iwmmxt_set_mup();
1682        gen_op_iwmmxt_set_cup();
1683        break;
1684    case 0x111:                                                 /* TMRC */
1685        if (insn & 0xf)
1686            return 1;
1687        rd = (insn >> 12) & 0xf;
1688        wrd = (insn >> 16) & 0xf;
1689        tmp = iwmmxt_load_creg(wrd);
1690        store_reg(s, rd, tmp);
1691        break;
1692    case 0x300:                                                 /* WANDN */
1693        wrd = (insn >> 12) & 0xf;
1694        rd0 = (insn >> 0) & 0xf;
1695        rd1 = (insn >> 16) & 0xf;
1696        gen_op_iwmmxt_movq_M0_wRn(rd0);
1697        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1698        gen_op_iwmmxt_andq_M0_wRn(rd1);
1699        gen_op_iwmmxt_setpsr_nz();
1700        gen_op_iwmmxt_movq_wRn_M0(wrd);
1701        gen_op_iwmmxt_set_mup();
1702        gen_op_iwmmxt_set_cup();
1703        break;
1704    case 0x200:                                                 /* WAND */
1705        wrd = (insn >> 12) & 0xf;
1706        rd0 = (insn >> 0) & 0xf;
1707        rd1 = (insn >> 16) & 0xf;
1708        gen_op_iwmmxt_movq_M0_wRn(rd0);
1709        gen_op_iwmmxt_andq_M0_wRn(rd1);
1710        gen_op_iwmmxt_setpsr_nz();
1711        gen_op_iwmmxt_movq_wRn_M0(wrd);
1712        gen_op_iwmmxt_set_mup();
1713        gen_op_iwmmxt_set_cup();
1714        break;
1715    case 0x810: case 0xa10:                             /* WMADD */
1716        wrd = (insn >> 12) & 0xf;
1717        rd0 = (insn >> 0) & 0xf;
1718        rd1 = (insn >> 16) & 0xf;
1719        gen_op_iwmmxt_movq_M0_wRn(rd0);
1720        if (insn & (1 << 21))
1721            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1722        else
1723            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1724        gen_op_iwmmxt_movq_wRn_M0(wrd);
1725        gen_op_iwmmxt_set_mup();
1726        break;
1727    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1728        wrd = (insn >> 12) & 0xf;
1729        rd0 = (insn >> 16) & 0xf;
1730        rd1 = (insn >> 0) & 0xf;
1731        gen_op_iwmmxt_movq_M0_wRn(rd0);
1732        switch ((insn >> 22) & 3) {
1733        case 0:
1734            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1735            break;
1736        case 1:
1737            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1738            break;
1739        case 2:
1740            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1741            break;
1742        case 3:
1743            return 1;
1744        }
1745        gen_op_iwmmxt_movq_wRn_M0(wrd);
1746        gen_op_iwmmxt_set_mup();
1747        gen_op_iwmmxt_set_cup();
1748        break;
1749    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1750        wrd = (insn >> 12) & 0xf;
1751        rd0 = (insn >> 16) & 0xf;
1752        rd1 = (insn >> 0) & 0xf;
1753        gen_op_iwmmxt_movq_M0_wRn(rd0);
1754        switch ((insn >> 22) & 3) {
1755        case 0:
1756            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1757            break;
1758        case 1:
1759            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1760            break;
1761        case 2:
1762            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1763            break;
1764        case 3:
1765            return 1;
1766        }
1767        gen_op_iwmmxt_movq_wRn_M0(wrd);
1768        gen_op_iwmmxt_set_mup();
1769        gen_op_iwmmxt_set_cup();
1770        break;
1771    case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1772        wrd = (insn >> 12) & 0xf;
1773        rd0 = (insn >> 16) & 0xf;
1774        rd1 = (insn >> 0) & 0xf;
1775        gen_op_iwmmxt_movq_M0_wRn(rd0);
1776        if (insn & (1 << 22))
1777            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1778        else
1779            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1780        if (!(insn & (1 << 20)))
1781            gen_op_iwmmxt_addl_M0_wRn(wrd);
1782        gen_op_iwmmxt_movq_wRn_M0(wrd);
1783        gen_op_iwmmxt_set_mup();
1784        break;
1785    case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1786        wrd = (insn >> 12) & 0xf;
1787        rd0 = (insn >> 16) & 0xf;
1788        rd1 = (insn >> 0) & 0xf;
1789        gen_op_iwmmxt_movq_M0_wRn(rd0);
1790        if (insn & (1 << 21)) {
1791            if (insn & (1 << 20))
1792                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1793            else
1794                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1795        } else {
1796            if (insn & (1 << 20))
1797                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1798            else
1799                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1800        }
1801        gen_op_iwmmxt_movq_wRn_M0(wrd);
1802        gen_op_iwmmxt_set_mup();
1803        break;
1804    case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1805        wrd = (insn >> 12) & 0xf;
1806        rd0 = (insn >> 16) & 0xf;
1807        rd1 = (insn >> 0) & 0xf;
1808        gen_op_iwmmxt_movq_M0_wRn(rd0);
1809        if (insn & (1 << 21))
1810            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1811        else
1812            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1813        if (!(insn & (1 << 20))) {
1814            iwmmxt_load_reg(cpu_V1, wrd);
1815            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1816        }
1817        gen_op_iwmmxt_movq_wRn_M0(wrd);
1818        gen_op_iwmmxt_set_mup();
1819        break;
1820    case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1821        wrd = (insn >> 12) & 0xf;
1822        rd0 = (insn >> 16) & 0xf;
1823        rd1 = (insn >> 0) & 0xf;
1824        gen_op_iwmmxt_movq_M0_wRn(rd0);
1825        switch ((insn >> 22) & 3) {
1826        case 0:
1827            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1828            break;
1829        case 1:
1830            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1831            break;
1832        case 2:
1833            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1834            break;
1835        case 3:
1836            return 1;
1837        }
1838        gen_op_iwmmxt_movq_wRn_M0(wrd);
1839        gen_op_iwmmxt_set_mup();
1840        gen_op_iwmmxt_set_cup();
1841        break;
1842    case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1843        wrd = (insn >> 12) & 0xf;
1844        rd0 = (insn >> 16) & 0xf;
1845        rd1 = (insn >> 0) & 0xf;
1846        gen_op_iwmmxt_movq_M0_wRn(rd0);
1847        if (insn & (1 << 22)) {
1848            if (insn & (1 << 20))
1849                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1850            else
1851                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1852        } else {
1853            if (insn & (1 << 20))
1854                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1855            else
1856                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1857        }
1858        gen_op_iwmmxt_movq_wRn_M0(wrd);
1859        gen_op_iwmmxt_set_mup();
1860        gen_op_iwmmxt_set_cup();
1861        break;
1862    case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1863        wrd = (insn >> 12) & 0xf;
1864        rd0 = (insn >> 16) & 0xf;
1865        rd1 = (insn >> 0) & 0xf;
1866        gen_op_iwmmxt_movq_M0_wRn(rd0);
1867        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1868        tcg_gen_andi_i32(tmp, tmp, 7);
1869        iwmmxt_load_reg(cpu_V1, rd1);
1870        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1871        tcg_temp_free_i32(tmp);
1872        gen_op_iwmmxt_movq_wRn_M0(wrd);
1873        gen_op_iwmmxt_set_mup();
1874        break;
1875    case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1876        if (((insn >> 6) & 3) == 3)
1877            return 1;
1878        rd = (insn >> 12) & 0xf;
1879        wrd = (insn >> 16) & 0xf;
1880        tmp = load_reg(s, rd);
1881        gen_op_iwmmxt_movq_M0_wRn(wrd);
1882        switch ((insn >> 6) & 3) {
1883        case 0:
1884            tmp2 = tcg_const_i32(0xff);
1885            tmp3 = tcg_const_i32((insn & 7) << 3);
1886            break;
1887        case 1:
1888            tmp2 = tcg_const_i32(0xffff);
1889            tmp3 = tcg_const_i32((insn & 3) << 4);
1890            break;
1891        case 2:
1892            tmp2 = tcg_const_i32(0xffffffff);
1893            tmp3 = tcg_const_i32((insn & 1) << 5);
1894            break;
1895        default:
1896            tmp2 = NULL;
1897            tmp3 = NULL;
1898        }
1899        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1900        tcg_temp_free_i32(tmp3);
1901        tcg_temp_free_i32(tmp2);
1902        tcg_temp_free_i32(tmp);
1903        gen_op_iwmmxt_movq_wRn_M0(wrd);
1904        gen_op_iwmmxt_set_mup();
1905        break;
1906    case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1907        rd = (insn >> 12) & 0xf;
1908        wrd = (insn >> 16) & 0xf;
1909        if (rd == 15 || ((insn >> 22) & 3) == 3)
1910            return 1;
1911        gen_op_iwmmxt_movq_M0_wRn(wrd);
1912        tmp = tcg_temp_new_i32();
1913        switch ((insn >> 22) & 3) {
1914        case 0:
1915            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1916            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1917            if (insn & 8) {
1918                tcg_gen_ext8s_i32(tmp, tmp);
1919            } else {
1920                tcg_gen_andi_i32(tmp, tmp, 0xff);
1921            }
1922            break;
1923        case 1:
1924            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1925            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1926            if (insn & 8) {
1927                tcg_gen_ext16s_i32(tmp, tmp);
1928            } else {
1929                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1930            }
1931            break;
1932        case 2:
1933            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1934            tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1935            break;
1936        }
1937        store_reg(s, rd, tmp);
1938        break;
1939    case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1940        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1941            return 1;
1942        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1943        switch ((insn >> 22) & 3) {
1944        case 0:
1945            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1946            break;
1947        case 1:
1948            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1949            break;
1950        case 2:
1951            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1952            break;
1953        }
1954        tcg_gen_shli_i32(tmp, tmp, 28);
1955        gen_set_nzcv(tmp);
1956        tcg_temp_free_i32(tmp);
1957        break;
1958    case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1959        if (((insn >> 6) & 3) == 3)
1960            return 1;
1961        rd = (insn >> 12) & 0xf;
1962        wrd = (insn >> 16) & 0xf;
1963        tmp = load_reg(s, rd);
1964        switch ((insn >> 6) & 3) {
1965        case 0:
1966            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1967            break;
1968        case 1:
1969            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1970            break;
1971        case 2:
1972            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1973            break;
1974        }
1975        tcg_temp_free_i32(tmp);
1976        gen_op_iwmmxt_movq_wRn_M0(wrd);
1977        gen_op_iwmmxt_set_mup();
1978        break;
1979    case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1980        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1981            return 1;
1982        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1983        tmp2 = tcg_temp_new_i32();
1984        tcg_gen_mov_i32(tmp2, tmp);
1985        switch ((insn >> 22) & 3) {
1986        case 0:
1987            for (i = 0; i < 7; i ++) {
1988                tcg_gen_shli_i32(tmp2, tmp2, 4);
1989                tcg_gen_and_i32(tmp, tmp, tmp2);
1990            }
1991            break;
1992        case 1:
1993            for (i = 0; i < 3; i ++) {
1994                tcg_gen_shli_i32(tmp2, tmp2, 8);
1995                tcg_gen_and_i32(tmp, tmp, tmp2);
1996            }
1997            break;
1998        case 2:
1999            tcg_gen_shli_i32(tmp2, tmp2, 16);
2000            tcg_gen_and_i32(tmp, tmp, tmp2);
2001            break;
2002        }
2003        gen_set_nzcv(tmp);
2004        tcg_temp_free_i32(tmp2);
2005        tcg_temp_free_i32(tmp);
2006        break;
2007    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2008        wrd = (insn >> 12) & 0xf;
2009        rd0 = (insn >> 16) & 0xf;
2010        gen_op_iwmmxt_movq_M0_wRn(rd0);
2011        switch ((insn >> 22) & 3) {
2012        case 0:
2013            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2014            break;
2015        case 1:
2016            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2017            break;
2018        case 2:
2019            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2020            break;
2021        case 3:
2022            return 1;
2023        }
2024        gen_op_iwmmxt_movq_wRn_M0(wrd);
2025        gen_op_iwmmxt_set_mup();
2026        break;
2027    case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2028        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2029            return 1;
2030        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2031        tmp2 = tcg_temp_new_i32();
2032        tcg_gen_mov_i32(tmp2, tmp);
2033        switch ((insn >> 22) & 3) {
2034        case 0:
2035            for (i = 0; i < 7; i ++) {
2036                tcg_gen_shli_i32(tmp2, tmp2, 4);
2037                tcg_gen_or_i32(tmp, tmp, tmp2);
2038            }
2039            break;
2040        case 1:
2041            for (i = 0; i < 3; i ++) {
2042                tcg_gen_shli_i32(tmp2, tmp2, 8);
2043                tcg_gen_or_i32(tmp, tmp, tmp2);
2044            }
2045            break;
2046        case 2:
2047            tcg_gen_shli_i32(tmp2, tmp2, 16);
2048            tcg_gen_or_i32(tmp, tmp, tmp2);
2049            break;
2050        }
2051        gen_set_nzcv(tmp);
2052        tcg_temp_free_i32(tmp2);
2053        tcg_temp_free_i32(tmp);
2054        break;
2055    case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2056        rd = (insn >> 12) & 0xf;
2057        rd0 = (insn >> 16) & 0xf;
2058        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2059            return 1;
2060        gen_op_iwmmxt_movq_M0_wRn(rd0);
2061        tmp = tcg_temp_new_i32();
2062        switch ((insn >> 22) & 3) {
2063        case 0:
2064            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2065            break;
2066        case 1:
2067            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2068            break;
2069        case 2:
2070            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2071            break;
2072        }
2073        store_reg(s, rd, tmp);
2074        break;
2075    case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2076    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2077        wrd = (insn >> 12) & 0xf;
2078        rd0 = (insn >> 16) & 0xf;
2079        rd1 = (insn >> 0) & 0xf;
2080        gen_op_iwmmxt_movq_M0_wRn(rd0);
2081        switch ((insn >> 22) & 3) {
2082        case 0:
2083            if (insn & (1 << 21))
2084                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2085            else
2086                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2087            break;
2088        case 1:
2089            if (insn & (1 << 21))
2090                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2091            else
2092                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2093            break;
2094        case 2:
2095            if (insn & (1 << 21))
2096                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2097            else
2098                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2099            break;
2100        case 3:
2101            return 1;
2102        }
2103        gen_op_iwmmxt_movq_wRn_M0(wrd);
2104        gen_op_iwmmxt_set_mup();
2105        gen_op_iwmmxt_set_cup();
2106        break;
2107    case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2108    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2109        wrd = (insn >> 12) & 0xf;
2110        rd0 = (insn >> 16) & 0xf;
2111        gen_op_iwmmxt_movq_M0_wRn(rd0);
2112        switch ((insn >> 22) & 3) {
2113        case 0:
2114            if (insn & (1 << 21))
2115                gen_op_iwmmxt_unpacklsb_M0();
2116            else
2117                gen_op_iwmmxt_unpacklub_M0();
2118            break;
2119        case 1:
2120            if (insn & (1 << 21))
2121                gen_op_iwmmxt_unpacklsw_M0();
2122            else
2123                gen_op_iwmmxt_unpackluw_M0();
2124            break;
2125        case 2:
2126            if (insn & (1 << 21))
2127                gen_op_iwmmxt_unpacklsl_M0();
2128            else
2129                gen_op_iwmmxt_unpacklul_M0();
2130            break;
2131        case 3:
2132            return 1;
2133        }
2134        gen_op_iwmmxt_movq_wRn_M0(wrd);
2135        gen_op_iwmmxt_set_mup();
2136        gen_op_iwmmxt_set_cup();
2137        break;
2138    case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2139    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2140        wrd = (insn >> 12) & 0xf;
2141        rd0 = (insn >> 16) & 0xf;
2142        gen_op_iwmmxt_movq_M0_wRn(rd0);
2143        switch ((insn >> 22) & 3) {
2144        case 0:
2145            if (insn & (1 << 21))
2146                gen_op_iwmmxt_unpackhsb_M0();
2147            else
2148                gen_op_iwmmxt_unpackhub_M0();
2149            break;
2150        case 1:
2151            if (insn & (1 << 21))
2152                gen_op_iwmmxt_unpackhsw_M0();
2153            else
2154                gen_op_iwmmxt_unpackhuw_M0();
2155            break;
2156        case 2:
2157            if (insn & (1 << 21))
2158                gen_op_iwmmxt_unpackhsl_M0();
2159            else
2160                gen_op_iwmmxt_unpackhul_M0();
2161            break;
2162        case 3:
2163            return 1;
2164        }
2165        gen_op_iwmmxt_movq_wRn_M0(wrd);
2166        gen_op_iwmmxt_set_mup();
2167        gen_op_iwmmxt_set_cup();
2168        break;
2169    case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2170    case 0x214: case 0x614: case 0xa14: case 0xe14:
2171        if (((insn >> 22) & 3) == 0)
2172            return 1;
2173        wrd = (insn >> 12) & 0xf;
2174        rd0 = (insn >> 16) & 0xf;
2175        gen_op_iwmmxt_movq_M0_wRn(rd0);
2176        tmp = tcg_temp_new_i32();
2177        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2178            tcg_temp_free_i32(tmp);
2179            return 1;
2180        }
2181        switch ((insn >> 22) & 3) {
2182        case 1:
2183            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2184            break;
2185        case 2:
2186            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2187            break;
2188        case 3:
2189            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2190            break;
2191        }
2192        tcg_temp_free_i32(tmp);
2193        gen_op_iwmmxt_movq_wRn_M0(wrd);
2194        gen_op_iwmmxt_set_mup();
2195        gen_op_iwmmxt_set_cup();
2196        break;
2197    case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2198    case 0x014: case 0x414: case 0x814: case 0xc14:
2199        if (((insn >> 22) & 3) == 0)
2200            return 1;
2201        wrd = (insn >> 12) & 0xf;
2202        rd0 = (insn >> 16) & 0xf;
2203        gen_op_iwmmxt_movq_M0_wRn(rd0);
2204        tmp = tcg_temp_new_i32();
2205        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2206            tcg_temp_free_i32(tmp);
2207            return 1;
2208        }
2209        switch ((insn >> 22) & 3) {
2210        case 1:
2211            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2212            break;
2213        case 2:
2214            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2215            break;
2216        case 3:
2217            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2218            break;
2219        }
2220        tcg_temp_free_i32(tmp);
2221        gen_op_iwmmxt_movq_wRn_M0(wrd);
2222        gen_op_iwmmxt_set_mup();
2223        gen_op_iwmmxt_set_cup();
2224        break;
2225    case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2226    case 0x114: case 0x514: case 0x914: case 0xd14:
2227        if (((insn >> 22) & 3) == 0)
2228            return 1;
2229        wrd = (insn >> 12) & 0xf;
2230        rd0 = (insn >> 16) & 0xf;
2231        gen_op_iwmmxt_movq_M0_wRn(rd0);
2232        tmp = tcg_temp_new_i32();
2233        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2234            tcg_temp_free_i32(tmp);
2235            return 1;
2236        }
2237        switch ((insn >> 22) & 3) {
2238        case 1:
2239            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2240            break;
2241        case 2:
2242            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2243            break;
2244        case 3:
2245            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2246            break;
2247        }
2248        tcg_temp_free_i32(tmp);
2249        gen_op_iwmmxt_movq_wRn_M0(wrd);
2250        gen_op_iwmmxt_set_mup();
2251        gen_op_iwmmxt_set_cup();
2252        break;
2253    case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2254    case 0x314: case 0x714: case 0xb14: case 0xf14:
2255        if (((insn >> 22) & 3) == 0)
2256            return 1;
2257        wrd = (insn >> 12) & 0xf;
2258        rd0 = (insn >> 16) & 0xf;
2259        gen_op_iwmmxt_movq_M0_wRn(rd0);
2260        tmp = tcg_temp_new_i32();
2261        switch ((insn >> 22) & 3) {
2262        case 1:
2263            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2264                tcg_temp_free_i32(tmp);
2265                return 1;
2266            }
2267            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2268            break;
2269        case 2:
2270            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2271                tcg_temp_free_i32(tmp);
2272                return 1;
2273            }
2274            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2275            break;
2276        case 3:
2277            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2278                tcg_temp_free_i32(tmp);
2279                return 1;
2280            }
2281            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2282            break;
2283        }
2284        tcg_temp_free_i32(tmp);
2285        gen_op_iwmmxt_movq_wRn_M0(wrd);
2286        gen_op_iwmmxt_set_mup();
2287        gen_op_iwmmxt_set_cup();
2288        break;
2289    case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2290    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2291        wrd = (insn >> 12) & 0xf;
2292        rd0 = (insn >> 16) & 0xf;
2293        rd1 = (insn >> 0) & 0xf;
2294        gen_op_iwmmxt_movq_M0_wRn(rd0);
2295        switch ((insn >> 22) & 3) {
2296        case 0:
2297            if (insn & (1 << 21))
2298                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2299            else
2300                gen_op_iwmmxt_minub_M0_wRn(rd1);
2301            break;
2302        case 1:
2303            if (insn & (1 << 21))
2304                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2305            else
2306                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2307            break;
2308        case 2:
2309            if (insn & (1 << 21))
2310                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2311            else
2312                gen_op_iwmmxt_minul_M0_wRn(rd1);
2313            break;
2314        case 3:
2315            return 1;
2316        }
2317        gen_op_iwmmxt_movq_wRn_M0(wrd);
2318        gen_op_iwmmxt_set_mup();
2319        break;
2320    case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2321    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2322        wrd = (insn >> 12) & 0xf;
2323        rd0 = (insn >> 16) & 0xf;
2324        rd1 = (insn >> 0) & 0xf;
2325        gen_op_iwmmxt_movq_M0_wRn(rd0);
2326        switch ((insn >> 22) & 3) {
2327        case 0:
2328            if (insn & (1 << 21))
2329                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2330            else
2331                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2332            break;
2333        case 1:
2334            if (insn & (1 << 21))
2335                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2336            else
2337                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2338            break;
2339        case 2:
2340            if (insn & (1 << 21))
2341                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2342            else
2343                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2344            break;
2345        case 3:
2346            return 1;
2347        }
2348        gen_op_iwmmxt_movq_wRn_M0(wrd);
2349        gen_op_iwmmxt_set_mup();
2350        break;
2351    case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2352    case 0x402: case 0x502: case 0x602: case 0x702:
2353        wrd = (insn >> 12) & 0xf;
2354        rd0 = (insn >> 16) & 0xf;
2355        rd1 = (insn >> 0) & 0xf;
2356        gen_op_iwmmxt_movq_M0_wRn(rd0);
2357        tmp = tcg_const_i32((insn >> 20) & 3);
2358        iwmmxt_load_reg(cpu_V1, rd1);
2359        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2360        tcg_temp_free_i32(tmp);
2361        gen_op_iwmmxt_movq_wRn_M0(wrd);
2362        gen_op_iwmmxt_set_mup();
2363        break;
2364    case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2365    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2366    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2367    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2368        wrd = (insn >> 12) & 0xf;
2369        rd0 = (insn >> 16) & 0xf;
2370        rd1 = (insn >> 0) & 0xf;
2371        gen_op_iwmmxt_movq_M0_wRn(rd0);
2372        switch ((insn >> 20) & 0xf) {
2373        case 0x0:
2374            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2375            break;
2376        case 0x1:
2377            gen_op_iwmmxt_subub_M0_wRn(rd1);
2378            break;
2379        case 0x3:
2380            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2381            break;
2382        case 0x4:
2383            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2384            break;
2385        case 0x5:
2386            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2387            break;
2388        case 0x7:
2389            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2390            break;
2391        case 0x8:
2392            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2393            break;
2394        case 0x9:
2395            gen_op_iwmmxt_subul_M0_wRn(rd1);
2396            break;
2397        case 0xb:
2398            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2399            break;
2400        default:
2401            return 1;
2402        }
2403        gen_op_iwmmxt_movq_wRn_M0(wrd);
2404        gen_op_iwmmxt_set_mup();
2405        gen_op_iwmmxt_set_cup();
2406        break;
2407    case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2408    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2409    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2410    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2411        wrd = (insn >> 12) & 0xf;
2412        rd0 = (insn >> 16) & 0xf;
2413        gen_op_iwmmxt_movq_M0_wRn(rd0);
2414        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2415        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2416        tcg_temp_free_i32(tmp);
2417        gen_op_iwmmxt_movq_wRn_M0(wrd);
2418        gen_op_iwmmxt_set_mup();
2419        gen_op_iwmmxt_set_cup();
2420        break;
2421    case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2422    case 0x418: case 0x518: case 0x618: case 0x718:
2423    case 0x818: case 0x918: case 0xa18: case 0xb18:
2424    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2425        wrd = (insn >> 12) & 0xf;
2426        rd0 = (insn >> 16) & 0xf;
2427        rd1 = (insn >> 0) & 0xf;
2428        gen_op_iwmmxt_movq_M0_wRn(rd0);
2429        switch ((insn >> 20) & 0xf) {
2430        case 0x0:
2431            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2432            break;
2433        case 0x1:
2434            gen_op_iwmmxt_addub_M0_wRn(rd1);
2435            break;
2436        case 0x3:
2437            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2438            break;
2439        case 0x4:
2440            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2441            break;
2442        case 0x5:
2443            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2444            break;
2445        case 0x7:
2446            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2447            break;
2448        case 0x8:
2449            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2450            break;
2451        case 0x9:
2452            gen_op_iwmmxt_addul_M0_wRn(rd1);
2453            break;
2454        case 0xb:
2455            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2456            break;
2457        default:
2458            return 1;
2459        }
2460        gen_op_iwmmxt_movq_wRn_M0(wrd);
2461        gen_op_iwmmxt_set_mup();
2462        gen_op_iwmmxt_set_cup();
2463        break;
2464    case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2465    case 0x408: case 0x508: case 0x608: case 0x708:
2466    case 0x808: case 0x908: case 0xa08: case 0xb08:
2467    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2468        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2469            return 1;
2470        wrd = (insn >> 12) & 0xf;
2471        rd0 = (insn >> 16) & 0xf;
2472        rd1 = (insn >> 0) & 0xf;
2473        gen_op_iwmmxt_movq_M0_wRn(rd0);
2474        switch ((insn >> 22) & 3) {
2475        case 1:
2476            if (insn & (1 << 21))
2477                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2478            else
2479                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2480            break;
2481        case 2:
2482            if (insn & (1 << 21))
2483                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2484            else
2485                gen_op_iwmmxt_packul_M0_wRn(rd1);
2486            break;
2487        case 3:
2488            if (insn & (1 << 21))
2489                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2490            else
2491                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2492            break;
2493        }
2494        gen_op_iwmmxt_movq_wRn_M0(wrd);
2495        gen_op_iwmmxt_set_mup();
2496        gen_op_iwmmxt_set_cup();
2497        break;
2498    case 0x201: case 0x203: case 0x205: case 0x207:
2499    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2500    case 0x211: case 0x213: case 0x215: case 0x217:
2501    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2502        wrd = (insn >> 5) & 0xf;
2503        rd0 = (insn >> 12) & 0xf;
2504        rd1 = (insn >> 0) & 0xf;
2505        if (rd0 == 0xf || rd1 == 0xf)
2506            return 1;
2507        gen_op_iwmmxt_movq_M0_wRn(wrd);
2508        tmp = load_reg(s, rd0);
2509        tmp2 = load_reg(s, rd1);
2510        switch ((insn >> 16) & 0xf) {
2511        case 0x0:                                       /* TMIA */
2512            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2513            break;
2514        case 0x8:                                       /* TMIAPH */
2515            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2516            break;
2517        case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2518            if (insn & (1 << 16))
2519                tcg_gen_shri_i32(tmp, tmp, 16);
2520            if (insn & (1 << 17))
2521                tcg_gen_shri_i32(tmp2, tmp2, 16);
2522            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2523            break;
2524        default:
2525            tcg_temp_free_i32(tmp2);
2526            tcg_temp_free_i32(tmp);
2527            return 1;
2528        }
2529        tcg_temp_free_i32(tmp2);
2530        tcg_temp_free_i32(tmp);
2531        gen_op_iwmmxt_movq_wRn_M0(wrd);
2532        gen_op_iwmmxt_set_mup();
2533        break;
2534    default:
2535        return 1;
2536    }
2537
2538    return 0;
2539}
2540
2541/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2542   (ie. an undefined instruction).  */
2543static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2544{
2545    int acc, rd0, rd1, rdhi, rdlo;
2546    TCGv_i32 tmp, tmp2;
2547
2548    if ((insn & 0x0ff00f10) == 0x0e200010) {
2549        /* Multiply with Internal Accumulate Format */
2550        rd0 = (insn >> 12) & 0xf;
2551        rd1 = insn & 0xf;
2552        acc = (insn >> 5) & 7;
2553
2554        if (acc != 0)
2555            return 1;
2556
2557        tmp = load_reg(s, rd0);
2558        tmp2 = load_reg(s, rd1);
2559        switch ((insn >> 16) & 0xf) {
2560        case 0x0:                                       /* MIA */
2561            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2562            break;
2563        case 0x8:                                       /* MIAPH */
2564            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2565            break;
2566        case 0xc:                                       /* MIABB */
2567        case 0xd:                                       /* MIABT */
2568        case 0xe:                                       /* MIATB */
2569        case 0xf:                                       /* MIATT */
2570            if (insn & (1 << 16))
2571                tcg_gen_shri_i32(tmp, tmp, 16);
2572            if (insn & (1 << 17))
2573                tcg_gen_shri_i32(tmp2, tmp2, 16);
2574            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2575            break;
2576        default:
2577            return 1;
2578        }
2579        tcg_temp_free_i32(tmp2);
2580        tcg_temp_free_i32(tmp);
2581
2582        gen_op_iwmmxt_movq_wRn_M0(acc);
2583        return 0;
2584    }
2585
2586    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2587        /* Internal Accumulator Access Format */
2588        rdhi = (insn >> 16) & 0xf;
2589        rdlo = (insn >> 12) & 0xf;
2590        acc = insn & 7;
2591
2592        if (acc != 0)
2593            return 1;
2594
2595        if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2596            iwmmxt_load_reg(cpu_V0, acc);
2597            tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2598            tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2599            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2600        } else {                                        /* MAR */
2601            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2602            iwmmxt_store_reg(cpu_V0, acc);
2603        }
2604        return 0;
2605    }
2606
2607    return 1;
2608}
2609
2610#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2611#define VFP_SREG(insn, bigbit, smallbit) \
2612  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2613#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614    if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2615        reg = (((insn) >> (bigbit)) & 0x0f) \
2616              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617    } else { \
2618        if (insn & (1 << (smallbit))) \
2619            return 1; \
2620        reg = ((insn) >> (bigbit)) & 0x0f; \
2621    }} while (0)
2622
2623#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2624#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2625#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2626#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2627#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2628#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2629
2630static void gen_neon_dup_low16(TCGv_i32 var)
2631{
2632    TCGv_i32 tmp = tcg_temp_new_i32();
2633    tcg_gen_ext16u_i32(var, var);
2634    tcg_gen_shli_i32(tmp, var, 16);
2635    tcg_gen_or_i32(var, var, tmp);
2636    tcg_temp_free_i32(tmp);
2637}
2638
2639static void gen_neon_dup_high16(TCGv_i32 var)
2640{
2641    TCGv_i32 tmp = tcg_temp_new_i32();
2642    tcg_gen_andi_i32(var, var, 0xffff0000);
2643    tcg_gen_shri_i32(tmp, var, 16);
2644    tcg_gen_or_i32(var, var, tmp);
2645    tcg_temp_free_i32(tmp);
2646}
2647
2648/*
2649 * Disassemble a VFP instruction.  Returns nonzero if an error occurred
2650 * (ie. an undefined instruction).
2651 */
2652static int disas_vfp_insn(DisasContext *s, uint32_t insn)
2653{
2654    if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
2655        return 1;
2656    }
2657
2658    /*
2659     * If the decodetree decoder handles this insn it will always
2660     * emit code to either execute the insn or generate an appropriate
2661     * exception; so we don't need to ever return non-zero to tell
2662     * the calling code to emit an UNDEF exception.
2663     */
2664    if (extract32(insn, 28, 4) == 0xf) {
2665        if (disas_vfp_uncond(s, insn)) {
2666            return 0;
2667        }
2668    } else {
2669        if (disas_vfp(s, insn)) {
2670            return 0;
2671        }
2672    }
2673    /* If the decodetree decoder didn't handle this insn, it must be UNDEF */
2674    return 1;
2675}
2676
2677static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2678{
2679#ifndef CONFIG_USER_ONLY
2680    return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2681           ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2682#else
2683    return true;
2684#endif
2685}
2686
2687static void gen_goto_ptr(void)
2688{
2689    tcg_gen_lookup_and_goto_ptr();
2690}
2691
2692/* This will end the TB but doesn't guarantee we'll return to
2693 * cpu_loop_exec. Any live exit_requests will be processed as we
2694 * enter the next TB.
2695 */
2696static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2697{
2698    if (use_goto_tb(s, dest)) {
2699        tcg_gen_goto_tb(n);
2700        gen_set_pc_im(s, dest);
2701        tcg_gen_exit_tb(s->base.tb, n);
2702    } else {
2703        gen_set_pc_im(s, dest);
2704        gen_goto_ptr();
2705    }
2706    s->base.is_jmp = DISAS_NORETURN;
2707}
2708
2709static inline void gen_jmp (DisasContext *s, uint32_t dest)
2710{
2711    if (unlikely(is_singlestepping(s))) {
2712        /* An indirect jump so that we still trigger the debug exception.  */
2713        gen_set_pc_im(s, dest);
2714        s->base.is_jmp = DISAS_JUMP;
2715    } else {
2716        gen_goto_tb(s, 0, dest);
2717    }
2718}
2719
2720static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2721{
2722    if (x)
2723        tcg_gen_sari_i32(t0, t0, 16);
2724    else
2725        gen_sxth(t0);
2726    if (y)
2727        tcg_gen_sari_i32(t1, t1, 16);
2728    else
2729        gen_sxth(t1);
2730    tcg_gen_mul_i32(t0, t0, t1);
2731}
2732
2733/* Return the mask of PSR bits set by a MSR instruction.  */
2734static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2735{
2736    uint32_t mask;
2737
2738    mask = 0;
2739    if (flags & (1 << 0))
2740        mask |= 0xff;
2741    if (flags & (1 << 1))
2742        mask |= 0xff00;
2743    if (flags & (1 << 2))
2744        mask |= 0xff0000;
2745    if (flags & (1 << 3))
2746        mask |= 0xff000000;
2747
2748    /* Mask out undefined bits.  */
2749    mask &= ~CPSR_RESERVED;
2750    if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
2751        mask &= ~CPSR_T;
2752    }
2753    if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
2754        mask &= ~CPSR_Q; /* V5TE in reality*/
2755    }
2756    if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
2757        mask &= ~(CPSR_E | CPSR_GE);
2758    }
2759    if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
2760        mask &= ~CPSR_IT;
2761    }
2762    /* Mask out execution state and reserved bits.  */
2763    if (!spsr) {
2764        mask &= ~(CPSR_EXEC | CPSR_RESERVED);
2765    }
2766    /* Mask out privileged bits.  */
2767    if (IS_USER(s))
2768        mask &= CPSR_USER;
2769    return mask;
2770}
2771
2772/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2773static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2774{
2775    TCGv_i32 tmp;
2776    if (spsr) {
2777        /* ??? This is also undefined in system mode.  */
2778        if (IS_USER(s))
2779            return 1;
2780
2781        tmp = load_cpu_field(spsr);
2782        tcg_gen_andi_i32(tmp, tmp, ~mask);
2783        tcg_gen_andi_i32(t0, t0, mask);
2784        tcg_gen_or_i32(tmp, tmp, t0);
2785        store_cpu_field(tmp, spsr);
2786    } else {
2787        gen_set_cpsr(t0, mask);
2788    }
2789    tcg_temp_free_i32(t0);
2790    gen_lookup_tb(s);
2791    return 0;
2792}
2793
2794/* Returns nonzero if access to the PSR is not permitted.  */
2795static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2796{
2797    TCGv_i32 tmp;
2798    tmp = tcg_temp_new_i32();
2799    tcg_gen_movi_i32(tmp, val);
2800    return gen_set_psr(s, mask, spsr, tmp);
2801}
2802
2803static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2804                                     int *tgtmode, int *regno)
2805{
2806    /* Decode the r and sysm fields of MSR/MRS banked accesses into
2807     * the target mode and register number, and identify the various
2808     * unpredictable cases.
2809     * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2810     *  + executed in user mode
2811     *  + using R15 as the src/dest register
2812     *  + accessing an unimplemented register
2813     *  + accessing a register that's inaccessible at current PL/security state*
2814     *  + accessing a register that you could access with a different insn
2815     * We choose to UNDEF in all these cases.
2816     * Since we don't know which of the various AArch32 modes we are in
2817     * we have to defer some checks to runtime.
2818     * Accesses to Monitor mode registers from Secure EL1 (which implies
2819     * that EL3 is AArch64) must trap to EL3.
2820     *
2821     * If the access checks fail this function will emit code to take
2822     * an exception and return false. Otherwise it will return true,
2823     * and set *tgtmode and *regno appropriately.
2824     */
2825    int exc_target = default_exception_el(s);
2826
2827    /* These instructions are present only in ARMv8, or in ARMv7 with the
2828     * Virtualization Extensions.
2829     */
2830    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2831        !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2832        goto undef;
2833    }
2834
2835    if (IS_USER(s) || rn == 15) {
2836        goto undef;
2837    }
2838
2839    /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2840     * of registers into (r, sysm).
2841     */
2842    if (r) {
2843        /* SPSRs for other modes */
2844        switch (sysm) {
2845        case 0xe: /* SPSR_fiq */
2846            *tgtmode = ARM_CPU_MODE_FIQ;
2847            break;
2848        case 0x10: /* SPSR_irq */
2849            *tgtmode = ARM_CPU_MODE_IRQ;
2850            break;
2851        case 0x12: /* SPSR_svc */
2852            *tgtmode = ARM_CPU_MODE_SVC;
2853            break;
2854        case 0x14: /* SPSR_abt */
2855            *tgtmode = ARM_CPU_MODE_ABT;
2856            break;
2857        case 0x16: /* SPSR_und */
2858            *tgtmode = ARM_CPU_MODE_UND;
2859            break;
2860        case 0x1c: /* SPSR_mon */
2861            *tgtmode = ARM_CPU_MODE_MON;
2862            break;
2863        case 0x1e: /* SPSR_hyp */
2864            *tgtmode = ARM_CPU_MODE_HYP;
2865            break;
2866        default: /* unallocated */
2867            goto undef;
2868        }
2869        /* We arbitrarily assign SPSR a register number of 16. */
2870        *regno = 16;
2871    } else {
2872        /* general purpose registers for other modes */
2873        switch (sysm) {
2874        case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2875            *tgtmode = ARM_CPU_MODE_USR;
2876            *regno = sysm + 8;
2877            break;
2878        case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2879            *tgtmode = ARM_CPU_MODE_FIQ;
2880            *regno = sysm;
2881            break;
2882        case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2883            *tgtmode = ARM_CPU_MODE_IRQ;
2884            *regno = sysm & 1 ? 13 : 14;
2885            break;
2886        case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2887            *tgtmode = ARM_CPU_MODE_SVC;
2888            *regno = sysm & 1 ? 13 : 14;
2889            break;
2890        case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2891            *tgtmode = ARM_CPU_MODE_ABT;
2892            *regno = sysm & 1 ? 13 : 14;
2893            break;
2894        case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2895            *tgtmode = ARM_CPU_MODE_UND;
2896            *regno = sysm & 1 ? 13 : 14;
2897            break;
2898        case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2899            *tgtmode = ARM_CPU_MODE_MON;
2900            *regno = sysm & 1 ? 13 : 14;
2901            break;
2902        case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2903            *tgtmode = ARM_CPU_MODE_HYP;
2904            /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2905            *regno = sysm & 1 ? 13 : 17;
2906            break;
2907        default: /* unallocated */
2908            goto undef;
2909        }
2910    }
2911
2912    /* Catch the 'accessing inaccessible register' cases we can detect
2913     * at translate time.
2914     */
2915    switch (*tgtmode) {
2916    case ARM_CPU_MODE_MON:
2917        if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2918            goto undef;
2919        }
2920        if (s->current_el == 1) {
2921            /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2922             * then accesses to Mon registers trap to EL3
2923             */
2924            exc_target = 3;
2925            goto undef;
2926        }
2927        break;
2928    case ARM_CPU_MODE_HYP:
2929        /*
2930         * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2931         * (and so we can forbid accesses from EL2 or below). elr_hyp
2932         * can be accessed also from Hyp mode, so forbid accesses from
2933         * EL0 or EL1.
2934         */
2935        if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2936            (s->current_el < 3 && *regno != 17)) {
2937            goto undef;
2938        }
2939        break;
2940    default:
2941        break;
2942    }
2943
2944    return true;
2945
2946undef:
2947    /* If we get here then some access check did not pass */
2948    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2949                       syn_uncategorized(), exc_target);
2950    return false;
2951}
2952
2953static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2954{
2955    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2956    int tgtmode = 0, regno = 0;
2957
2958    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2959        return;
2960    }
2961
2962    /* Sync state because msr_banked() can raise exceptions */
2963    gen_set_condexec(s);
2964    gen_set_pc_im(s, s->pc_curr);
2965    tcg_reg = load_reg(s, rn);
2966    tcg_tgtmode = tcg_const_i32(tgtmode);
2967    tcg_regno = tcg_const_i32(regno);
2968    gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2969    tcg_temp_free_i32(tcg_tgtmode);
2970    tcg_temp_free_i32(tcg_regno);
2971    tcg_temp_free_i32(tcg_reg);
2972    s->base.is_jmp = DISAS_UPDATE;
2973}
2974
2975static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2976{
2977    TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2978    int tgtmode = 0, regno = 0;
2979
2980    if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2981        return;
2982    }
2983
2984    /* Sync state because mrs_banked() can raise exceptions */
2985    gen_set_condexec(s);
2986    gen_set_pc_im(s, s->pc_curr);
2987    tcg_reg = tcg_temp_new_i32();
2988    tcg_tgtmode = tcg_const_i32(tgtmode);
2989    tcg_regno = tcg_const_i32(regno);
2990    gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2991    tcg_temp_free_i32(tcg_tgtmode);
2992    tcg_temp_free_i32(tcg_regno);
2993    store_reg(s, rn, tcg_reg);
2994    s->base.is_jmp = DISAS_UPDATE;
2995}
2996
2997/* Store value to PC as for an exception return (ie don't
2998 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2999 * will do the masking based on the new value of the Thumb bit.
3000 */
3001static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
3002{
3003    tcg_gen_mov_i32(cpu_R[15], pc);
3004    tcg_temp_free_i32(pc);
3005}
3006
3007/* Generate a v6 exception return.  Marks both values as dead.  */
3008static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
3009{
3010    store_pc_exc_ret(s, pc);
3011    /* The cpsr_write_eret helper will mask the low bits of PC
3012     * appropriately depending on the new Thumb bit, so it must
3013     * be called after storing the new PC.
3014     */
3015    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
3016        gen_io_start();
3017    }
3018    gen_helper_cpsr_write_eret(cpu_env, cpsr);
3019    tcg_temp_free_i32(cpsr);
3020    /* Must exit loop to check un-masked IRQs */
3021    s->base.is_jmp = DISAS_EXIT;
3022}
3023
3024/* Generate an old-style exception return. Marks pc as dead. */
3025static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
3026{
3027    gen_rfe(s, pc, load_cpu_field(spsr));
3028}
3029
3030#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3031
3032static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
3033{
3034    switch (size) {
3035    case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3036    case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3037    case 2: tcg_gen_add_i32(t0, t0, t1); break;
3038    default: abort();
3039    }
3040}
3041
3042static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3043{
3044    switch (size) {
3045    case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3046    case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3047    case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3048    default: return;
3049    }
3050}
3051
3052/* 32-bit pairwise ops end up the same as the elementwise versions.  */
3053#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
3054#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
3055#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
3056#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
3057
3058#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3059    switch ((size << 1) | u) { \
3060    case 0: \
3061        gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3062        break; \
3063    case 1: \
3064        gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3065        break; \
3066    case 2: \
3067        gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3068        break; \
3069    case 3: \
3070        gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3071        break; \
3072    case 4: \
3073        gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3074        break; \
3075    case 5: \
3076        gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3077        break; \
3078    default: return 1; \
3079    }} while (0)
3080
3081#define GEN_NEON_INTEGER_OP(name) do { \
3082    switch ((size << 1) | u) { \
3083    case 0: \
3084        gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3085        break; \
3086    case 1: \
3087        gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3088        break; \
3089    case 2: \
3090        gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3091        break; \
3092    case 3: \
3093        gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3094        break; \
3095    case 4: \
3096        gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3097        break; \
3098    case 5: \
3099        gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3100        break; \
3101    default: return 1; \
3102    }} while (0)
3103
3104static TCGv_i32 neon_load_scratch(int scratch)
3105{
3106    TCGv_i32 tmp = tcg_temp_new_i32();
3107    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3108    return tmp;
3109}
3110
3111static void neon_store_scratch(int scratch, TCGv_i32 var)
3112{
3113    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3114    tcg_temp_free_i32(var);
3115}
3116
3117static inline TCGv_i32 neon_get_scalar(int size, int reg)
3118{
3119    TCGv_i32 tmp;
3120    if (size == 1) {
3121        tmp = neon_load_reg(reg & 7, reg >> 4);
3122        if (reg & 8) {
3123            gen_neon_dup_high16(tmp);
3124        } else {
3125            gen_neon_dup_low16(tmp);
3126        }
3127    } else {
3128        tmp = neon_load_reg(reg & 15, reg >> 4);
3129    }
3130    return tmp;
3131}
3132
3133static int gen_neon_unzip(int rd, int rm, int size, int q)
3134{
3135    TCGv_ptr pd, pm;
3136    
3137    if (!q && size == 2) {
3138        return 1;
3139    }
3140    pd = vfp_reg_ptr(true, rd);
3141    pm = vfp_reg_ptr(true, rm);
3142    if (q) {
3143        switch (size) {
3144        case 0:
3145            gen_helper_neon_qunzip8(pd, pm);
3146            break;
3147        case 1:
3148            gen_helper_neon_qunzip16(pd, pm);
3149            break;
3150        case 2:
3151            gen_helper_neon_qunzip32(pd, pm);
3152            break;
3153        default:
3154            abort();
3155        }
3156    } else {
3157        switch (size) {
3158        case 0:
3159            gen_helper_neon_unzip8(pd, pm);
3160            break;
3161        case 1:
3162            gen_helper_neon_unzip16(pd, pm);
3163            break;
3164        default:
3165            abort();
3166        }
3167    }
3168    tcg_temp_free_ptr(pd);
3169    tcg_temp_free_ptr(pm);
3170    return 0;
3171}
3172
3173static int gen_neon_zip(int rd, int rm, int size, int q)
3174{
3175    TCGv_ptr pd, pm;
3176
3177    if (!q && size == 2) {
3178        return 1;
3179    }
3180    pd = vfp_reg_ptr(true, rd);
3181    pm = vfp_reg_ptr(true, rm);
3182    if (q) {
3183        switch (size) {
3184        case 0:
3185            gen_helper_neon_qzip8(pd, pm);
3186            break;
3187        case 1:
3188            gen_helper_neon_qzip16(pd, pm);
3189            break;
3190        case 2:
3191            gen_helper_neon_qzip32(pd, pm);
3192            break;
3193        default:
3194            abort();
3195        }
3196    } else {
3197        switch (size) {
3198        case 0:
3199            gen_helper_neon_zip8(pd, pm);
3200            break;
3201        case 1:
3202            gen_helper_neon_zip16(pd, pm);
3203            break;
3204        default:
3205            abort();
3206        }
3207    }
3208    tcg_temp_free_ptr(pd);
3209    tcg_temp_free_ptr(pm);
3210    return 0;
3211}
3212
3213static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3214{
3215    TCGv_i32 rd, tmp;
3216
3217    rd = tcg_temp_new_i32();
3218    tmp = tcg_temp_new_i32();
3219
3220    tcg_gen_shli_i32(rd, t0, 8);
3221    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3222    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3223    tcg_gen_or_i32(rd, rd, tmp);
3224
3225    tcg_gen_shri_i32(t1, t1, 8);
3226    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3227    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3228    tcg_gen_or_i32(t1, t1, tmp);
3229    tcg_gen_mov_i32(t0, rd);
3230
3231    tcg_temp_free_i32(tmp);
3232    tcg_temp_free_i32(rd);
3233}
3234
3235static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3236{
3237    TCGv_i32 rd, tmp;
3238
3239    rd = tcg_temp_new_i32();
3240    tmp = tcg_temp_new_i32();
3241
3242    tcg_gen_shli_i32(rd, t0, 16);
3243    tcg_gen_andi_i32(tmp, t1, 0xffff);
3244    tcg_gen_or_i32(rd, rd, tmp);
3245    tcg_gen_shri_i32(t1, t1, 16);
3246    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3247    tcg_gen_or_i32(t1, t1, tmp);
3248    tcg_gen_mov_i32(t0, rd);
3249
3250    tcg_temp_free_i32(tmp);
3251    tcg_temp_free_i32(rd);
3252}
3253
3254
3255static struct {
3256    int nregs;
3257    int interleave;
3258    int spacing;
3259} const neon_ls_element_type[11] = {
3260    {1, 4, 1},
3261    {1, 4, 2},
3262    {4, 1, 1},
3263    {2, 2, 2},
3264    {1, 3, 1},
3265    {1, 3, 2},
3266    {3, 1, 1},
3267    {1, 1, 1},
3268    {1, 2, 1},
3269    {1, 2, 2},
3270    {2, 1, 1}
3271};
3272
3273/* Translate a NEON load/store element instruction.  Return nonzero if the
3274   instruction is invalid.  */
3275static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
3276{
3277    int rd, rn, rm;
3278    int op;
3279    int nregs;
3280    int interleave;
3281    int spacing;
3282    int stride;
3283    int size;
3284    int reg;
3285    int load;
3286    int n;
3287    int vec_size;
3288    int mmu_idx;
3289    MemOp endian;
3290    TCGv_i32 addr;
3291    TCGv_i32 tmp;
3292    TCGv_i32 tmp2;
3293    TCGv_i64 tmp64;
3294
3295    /* FIXME: this access check should not take precedence over UNDEF
3296     * for invalid encodings; we will generate incorrect syndrome information
3297     * for attempts to execute invalid vfp/neon encodings with FP disabled.
3298     */
3299    if (s->fp_excp_el) {
3300        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
3301                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
3302        return 0;
3303    }
3304
3305    if (!s->vfp_enabled)
3306      return 1;
3307    VFP_DREG_D(rd, insn);
3308    rn = (insn >> 16) & 0xf;
3309    rm = insn & 0xf;
3310    load = (insn & (1 << 21)) != 0;
3311    endian = s->be_data;
3312    mmu_idx = get_mem_index(s);
3313    if ((insn & (1 << 23)) == 0) {
3314        /* Load store all elements.  */
3315        op = (insn >> 8) & 0xf;
3316        size = (insn >> 6) & 3;
3317        if (op > 10)
3318            return 1;
3319        /* Catch UNDEF cases for bad values of align field */
3320        switch (op & 0xc) {
3321        case 4:
3322            if (((insn >> 5) & 1) == 1) {
3323                return 1;
3324            }
3325            break;
3326        case 8:
3327            if (((insn >> 4) & 3) == 3) {
3328                return 1;
3329            }
3330            break;
3331        default:
3332            break;
3333        }
3334        nregs = neon_ls_element_type[op].nregs;
3335        interleave = neon_ls_element_type[op].interleave;
3336        spacing = neon_ls_element_type[op].spacing;
3337        if (size == 3 && (interleave | spacing) != 1) {
3338            return 1;
3339        }
3340        /* For our purposes, bytes are always little-endian.  */
3341        if (size == 0) {
3342            endian = MO_LE;
3343        }
3344        /* Consecutive little-endian elements from a single register
3345         * can be promoted to a larger little-endian operation.
3346         */
3347        if (interleave == 1 && endian == MO_LE) {
3348            size = 3;
3349        }
3350        tmp64 = tcg_temp_new_i64();
3351        addr = tcg_temp_new_i32();
3352        tmp2 = tcg_const_i32(1 << size);
3353        load_reg_var(s, addr, rn);
3354        for (reg = 0; reg < nregs; reg++) {
3355            for (n = 0; n < 8 >> size; n++) {
3356                int xs;
3357                for (xs = 0; xs < interleave; xs++) {
3358                    int tt = rd + reg + spacing * xs;
3359
3360                    if (load) {
3361                        gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
3362                        neon_store_element64(tt, n, size, tmp64);
3363                    } else {
3364                        neon_load_element64(tmp64, tt, n, size);
3365                        gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
3366                    }
3367                    tcg_gen_add_i32(addr, addr, tmp2);
3368                }
3369            }
3370        }
3371        tcg_temp_free_i32(addr);
3372        tcg_temp_free_i32(tmp2);
3373        tcg_temp_free_i64(tmp64);
3374        stride = nregs * interleave * 8;
3375    } else {
3376        size = (insn >> 10) & 3;
3377        if (size == 3) {
3378            /* Load single element to all lanes.  */
3379            int a = (insn >> 4) & 1;
3380            if (!load) {
3381                return 1;
3382            }
3383            size = (insn >> 6) & 3;
3384            nregs = ((insn >> 8) & 3) + 1;
3385
3386            if (size == 3) {
3387                if (nregs != 4 || a == 0) {
3388                    return 1;
3389                }
3390                /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
3391                size = 2;
3392            }
3393            if (nregs == 1 && a == 1 && size == 0) {
3394                return 1;
3395            }
3396            if (nregs == 3 && a == 1) {
3397                return 1;
3398            }
3399            addr = tcg_temp_new_i32();
3400            load_reg_var(s, addr, rn);
3401
3402            /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
3403             * VLD2/3/4 to all lanes: bit 5 indicates register stride.
3404             */
3405            stride = (insn & (1 << 5)) ? 2 : 1;
3406            vec_size = nregs == 1 ? stride * 8 : 8;
3407
3408            tmp = tcg_temp_new_i32();
3409            for (reg = 0; reg < nregs; reg++) {
3410                gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3411                                s->be_data | size);
3412                if ((rd & 1) && vec_size == 16) {
3413                    /* We cannot write 16 bytes at once because the
3414                     * destination is unaligned.
3415                     */
3416                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3417                                         8, 8, tmp);
3418                    tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
3419                                     neon_reg_offset(rd, 0), 8, 8);
3420                } else {
3421                    tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3422                                         vec_size, vec_size, tmp);
3423                }
3424                tcg_gen_addi_i32(addr, addr, 1 << size);
3425                rd += stride;
3426            }
3427            tcg_temp_free_i32(tmp);
3428            tcg_temp_free_i32(addr);
3429            stride = (1 << size) * nregs;
3430        } else {
3431            /* Single element.  */
3432            int idx = (insn >> 4) & 0xf;
3433            int reg_idx;
3434            switch (size) {
3435            case 0:
3436                reg_idx = (insn >> 5) & 7;
3437                stride = 1;
3438                break;
3439            case 1:
3440                reg_idx = (insn >> 6) & 3;
3441                stride = (insn & (1 << 5)) ? 2 : 1;
3442                break;
3443            case 2:
3444                reg_idx = (insn >> 7) & 1;
3445                stride = (insn & (1 << 6)) ? 2 : 1;
3446                break;
3447            default:
3448                abort();
3449            }
3450            nregs = ((insn >> 8) & 3) + 1;
3451            /* Catch the UNDEF cases. This is unavoidably a bit messy. */
3452            switch (nregs) {
3453            case 1:
3454                if (((idx & (1 << size)) != 0) ||
3455                    (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
3456                    return 1;
3457                }
3458                break;
3459            case 3:
3460                if ((idx & 1) != 0) {
3461                    return 1;
3462                }
3463                /* fall through */
3464            case 2:
3465                if (size == 2 && (idx & 2) != 0) {
3466                    return 1;
3467                }
3468                break;
3469            case 4:
3470                if ((size == 2) && ((idx & 3) == 3)) {
3471                    return 1;
3472                }
3473                break;
3474            default:
3475                abort();
3476            }
3477            if ((rd + stride * (nregs - 1)) > 31) {
3478                /* Attempts to write off the end of the register file
3479                 * are UNPREDICTABLE; we choose to UNDEF because otherwise
3480                 * the neon_load_reg() would write off the end of the array.
3481                 */
3482                return 1;
3483            }
3484            tmp = tcg_temp_new_i32();
3485            addr = tcg_temp_new_i32();
3486            load_reg_var(s, addr, rn);
3487            for (reg = 0; reg < nregs; reg++) {
3488                if (load) {
3489                    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3490                                    s->be_data | size);
3491                    neon_store_element(rd, reg_idx, size, tmp);
3492                } else { /* Store */
3493                    neon_load_element(tmp, rd, reg_idx, size);
3494                    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
3495                                    s->be_data | size);
3496                }
3497                rd += stride;
3498                tcg_gen_addi_i32(addr, addr, 1 << size);
3499            }
3500            tcg_temp_free_i32(addr);
3501            tcg_temp_free_i32(tmp);
3502            stride = nregs * (1 << size);
3503        }
3504    }
3505    if (rm != 15) {
3506        TCGv_i32 base;
3507
3508        base = load_reg(s, rn);
3509        if (rm == 13) {
3510            tcg_gen_addi_i32(base, base, stride);
3511        } else {
3512            TCGv_i32 index;
3513            index = load_reg(s, rm);
3514            tcg_gen_add_i32(base, base, index);
3515            tcg_temp_free_i32(index);
3516        }
3517        store_reg(s, rn, base);
3518    }
3519    return 0;
3520}
3521
3522static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3523{
3524    switch (size) {
3525    case 0: gen_helper_neon_narrow_u8(dest, src); break;
3526    case 1: gen_helper_neon_narrow_u16(dest, src); break;
3527    case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3528    default: abort();
3529    }
3530}
3531
3532static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3533{
3534    switch (size) {
3535    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3536    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3537    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3538    default: abort();
3539    }
3540}
3541
3542static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3543{
3544    switch (size) {
3545    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3546    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3547    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3548    default: abort();
3549    }
3550}
3551
3552static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3553{
3554    switch (size) {
3555    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3556    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3557    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3558    default: abort();
3559    }
3560}
3561
3562static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3563                                         int q, int u)
3564{
3565    if (q) {
3566        if (u) {
3567            switch (size) {
3568            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3569            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3570            default: abort();
3571            }
3572        } else {
3573            switch (size) {
3574            case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3575            case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3576            default: abort();
3577            }
3578        }
3579    } else {
3580        if (u) {
3581            switch (size) {
3582            case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3583            case 2: gen_helper_neon_shl_u32(var, var, shift); break;
3584            default: abort();
3585            }
3586        } else {
3587            switch (size) {
3588            case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3589            case 2: gen_helper_neon_shl_s32(var, var, shift); break;
3590            default: abort();
3591            }
3592        }
3593    }
3594}
3595
3596static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3597{
3598    if (u) {
3599        switch (size) {
3600        case 0: gen_helper_neon_widen_u8(dest, src); break;
3601        case 1: gen_helper_neon_widen_u16(dest, src); break;
3602        case 2: tcg_gen_extu_i32_i64(dest, src); break;
3603        default: abort();
3604        }
3605    } else {
3606        switch (size) {
3607        case 0: gen_helper_neon_widen_s8(dest, src); break;
3608        case 1: gen_helper_neon_widen_s16(dest, src); break;
3609        case 2: tcg_gen_ext_i32_i64(dest, src); break;
3610        default: abort();
3611        }
3612    }
3613    tcg_temp_free_i32(src);
3614}
3615
3616static inline void gen_neon_addl(int size)
3617{
3618    switch (size) {
3619    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3620    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3621    case 2: tcg_gen_add_i64(CPU_V001); break;
3622    default: abort();
3623    }
3624}
3625
3626static inline void gen_neon_subl(int size)
3627{
3628    switch (size) {
3629    case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3630    case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3631    case 2: tcg_gen_sub_i64(CPU_V001); break;
3632    default: abort();
3633    }
3634}
3635
3636static inline void gen_neon_negl(TCGv_i64 var, int size)
3637{
3638    switch (size) {
3639    case 0: gen_helper_neon_negl_u16(var, var); break;
3640    case 1: gen_helper_neon_negl_u32(var, var); break;
3641    case 2:
3642        tcg_gen_neg_i64(var, var);
3643        break;
3644    default: abort();
3645    }
3646}
3647
3648static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3649{
3650    switch (size) {
3651    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3652    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3653    default: abort();
3654    }
3655}
3656
3657static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3658                                 int size, int u)
3659{
3660    TCGv_i64 tmp;
3661
3662    switch ((size << 1) | u) {
3663    case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3664    case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3665    case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3666    case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3667    case 4:
3668        tmp = gen_muls_i64_i32(a, b);
3669        tcg_gen_mov_i64(dest, tmp);
3670        tcg_temp_free_i64(tmp);
3671        break;
3672    case 5:
3673        tmp = gen_mulu_i64_i32(a, b);
3674        tcg_gen_mov_i64(dest, tmp);
3675        tcg_temp_free_i64(tmp);
3676        break;
3677    default: abort();
3678    }
3679
3680    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3681       Don't forget to clean them now.  */
3682    if (size < 2) {
3683        tcg_temp_free_i32(a);
3684        tcg_temp_free_i32(b);
3685    }
3686}
3687
3688static void gen_neon_narrow_op(int op, int u, int size,
3689                               TCGv_i32 dest, TCGv_i64 src)
3690{
3691    if (op) {
3692        if (u) {
3693            gen_neon_unarrow_sats(size, dest, src);
3694        } else {
3695            gen_neon_narrow(size, dest, src);
3696        }
3697    } else {
3698        if (u) {
3699            gen_neon_narrow_satu(size, dest, src);
3700        } else {
3701            gen_neon_narrow_sats(size, dest, src);
3702        }
3703    }
3704}
3705
3706/* Symbolic constants for op fields for Neon 3-register same-length.
3707 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3708 * table A7-9.
3709 */
3710#define NEON_3R_VHADD 0
3711#define NEON_3R_VQADD 1
3712#define NEON_3R_VRHADD 2
3713#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3714#define NEON_3R_VHSUB 4
3715#define NEON_3R_VQSUB 5
3716#define NEON_3R_VCGT 6
3717#define NEON_3R_VCGE 7
3718#define NEON_3R_VSHL 8
3719#define NEON_3R_VQSHL 9
3720#define NEON_3R_VRSHL 10
3721#define NEON_3R_VQRSHL 11
3722#define NEON_3R_VMAX 12
3723#define NEON_3R_VMIN 13
3724#define NEON_3R_VABD 14
3725#define NEON_3R_VABA 15
3726#define NEON_3R_VADD_VSUB 16
3727#define NEON_3R_VTST_VCEQ 17
3728#define NEON_3R_VML 18 /* VMLA, VMLS */
3729#define NEON_3R_VMUL 19
3730#define NEON_3R_VPMAX 20
3731#define NEON_3R_VPMIN 21
3732#define NEON_3R_VQDMULH_VQRDMULH 22
3733#define NEON_3R_VPADD_VQRDMLAH 23
3734#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3735#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3736#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3737#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3738#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3739#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3740#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3741#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3742
3743static const uint8_t neon_3r_sizes[] = {
3744    [NEON_3R_VHADD] = 0x7,
3745    [NEON_3R_VQADD] = 0xf,
3746    [NEON_3R_VRHADD] = 0x7,
3747    [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3748    [NEON_3R_VHSUB] = 0x7,
3749    [NEON_3R_VQSUB] = 0xf,
3750    [NEON_3R_VCGT] = 0x7,
3751    [NEON_3R_VCGE] = 0x7,
3752    [NEON_3R_VSHL] = 0xf,
3753    [NEON_3R_VQSHL] = 0xf,
3754    [NEON_3R_VRSHL] = 0xf,
3755    [NEON_3R_VQRSHL] = 0xf,
3756    [NEON_3R_VMAX] = 0x7,
3757    [NEON_3R_VMIN] = 0x7,
3758    [NEON_3R_VABD] = 0x7,
3759    [NEON_3R_VABA] = 0x7,
3760    [NEON_3R_VADD_VSUB] = 0xf,
3761    [NEON_3R_VTST_VCEQ] = 0x7,
3762    [NEON_3R_VML] = 0x7,
3763    [NEON_3R_VMUL] = 0x7,
3764    [NEON_3R_VPMAX] = 0x7,
3765    [NEON_3R_VPMIN] = 0x7,
3766    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3767    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3768    [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3769    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3770    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3771    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3772    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3773    [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3774    [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3775    [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3776};
3777
3778/* Symbolic constants for op fields for Neon 2-register miscellaneous.
3779 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3780 * table A7-13.
3781 */
3782#define NEON_2RM_VREV64 0
3783#define NEON_2RM_VREV32 1
3784#define NEON_2RM_VREV16 2
3785#define NEON_2RM_VPADDL 4
3786#define NEON_2RM_VPADDL_U 5
3787#define NEON_2RM_AESE 6 /* Includes AESD */
3788#define NEON_2RM_AESMC 7 /* Includes AESIMC */
3789#define NEON_2RM_VCLS 8
3790#define NEON_2RM_VCLZ 9
3791#define NEON_2RM_VCNT 10
3792#define NEON_2RM_VMVN 11
3793#define NEON_2RM_VPADAL 12
3794#define NEON_2RM_VPADAL_U 13
3795#define NEON_2RM_VQABS 14
3796#define NEON_2RM_VQNEG 15
3797#define NEON_2RM_VCGT0 16
3798#define NEON_2RM_VCGE0 17
3799#define NEON_2RM_VCEQ0 18
3800#define NEON_2RM_VCLE0 19
3801#define NEON_2RM_VCLT0 20
3802#define NEON_2RM_SHA1H 21
3803#define NEON_2RM_VABS 22
3804#define NEON_2RM_VNEG 23
3805#define NEON_2RM_VCGT0_F 24
3806#define NEON_2RM_VCGE0_F 25
3807#define NEON_2RM_VCEQ0_F 26
3808#define NEON_2RM_VCLE0_F 27
3809#define NEON_2RM_VCLT0_F 28
3810#define NEON_2RM_VABS_F 30
3811#define NEON_2RM_VNEG_F 31
3812#define NEON_2RM_VSWP 32
3813#define NEON_2RM_VTRN 33
3814#define NEON_2RM_VUZP 34
3815#define NEON_2RM_VZIP 35
3816#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3817#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3818#define NEON_2RM_VSHLL 38
3819#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3820#define NEON_2RM_VRINTN 40
3821#define NEON_2RM_VRINTX 41
3822#define NEON_2RM_VRINTA 42
3823#define NEON_2RM_VRINTZ 43
3824#define NEON_2RM_VCVT_F16_F32 44
3825#define NEON_2RM_VRINTM 45
3826#define NEON_2RM_VCVT_F32_F16 46
3827#define NEON_2RM_VRINTP 47
3828#define NEON_2RM_VCVTAU 48
3829#define NEON_2RM_VCVTAS 49
3830#define NEON_2RM_VCVTNU 50
3831#define NEON_2RM_VCVTNS 51
3832#define NEON_2RM_VCVTPU 52
3833#define NEON_2RM_VCVTPS 53
3834#define NEON_2RM_VCVTMU 54
3835#define NEON_2RM_VCVTMS 55
3836#define NEON_2RM_VRECPE 56
3837#define NEON_2RM_VRSQRTE 57
3838#define NEON_2RM_VRECPE_F 58
3839#define NEON_2RM_VRSQRTE_F 59
3840#define NEON_2RM_VCVT_FS 60
3841#define NEON_2RM_VCVT_FU 61
3842#define NEON_2RM_VCVT_SF 62
3843#define NEON_2RM_VCVT_UF 63
3844
3845static bool neon_2rm_is_v8_op(int op)
3846{
3847    /* Return true if this neon 2reg-misc op is ARMv8 and up */
3848    switch (op) {
3849    case NEON_2RM_VRINTN:
3850    case NEON_2RM_VRINTA:
3851    case NEON_2RM_VRINTM:
3852    case NEON_2RM_VRINTP:
3853    case NEON_2RM_VRINTZ:
3854    case NEON_2RM_VRINTX:
3855    case NEON_2RM_VCVTAU:
3856    case NEON_2RM_VCVTAS:
3857    case NEON_2RM_VCVTNU:
3858    case NEON_2RM_VCVTNS:
3859    case NEON_2RM_VCVTPU:
3860    case NEON_2RM_VCVTPS:
3861    case NEON_2RM_VCVTMU:
3862    case NEON_2RM_VCVTMS:
3863        return true;
3864    default:
3865        return false;
3866    }
3867}
3868
3869/* Each entry in this array has bit n set if the insn allows
3870 * size value n (otherwise it will UNDEF). Since unallocated
3871 * op values will have no bits set they always UNDEF.
3872 */
3873static const uint8_t neon_2rm_sizes[] = {
3874    [NEON_2RM_VREV64] = 0x7,
3875    [NEON_2RM_VREV32] = 0x3,
3876    [NEON_2RM_VREV16] = 0x1,
3877    [NEON_2RM_VPADDL] = 0x7,
3878    [NEON_2RM_VPADDL_U] = 0x7,
3879    [NEON_2RM_AESE] = 0x1,
3880    [NEON_2RM_AESMC] = 0x1,
3881    [NEON_2RM_VCLS] = 0x7,
3882    [NEON_2RM_VCLZ] = 0x7,
3883    [NEON_2RM_VCNT] = 0x1,
3884    [NEON_2RM_VMVN] = 0x1,
3885    [NEON_2RM_VPADAL] = 0x7,
3886    [NEON_2RM_VPADAL_U] = 0x7,
3887    [NEON_2RM_VQABS] = 0x7,
3888    [NEON_2RM_VQNEG] = 0x7,
3889    [NEON_2RM_VCGT0] = 0x7,
3890    [NEON_2RM_VCGE0] = 0x7,
3891    [NEON_2RM_VCEQ0] = 0x7,
3892    [NEON_2RM_VCLE0] = 0x7,
3893    [NEON_2RM_VCLT0] = 0x7,
3894    [NEON_2RM_SHA1H] = 0x4,
3895    [NEON_2RM_VABS] = 0x7,
3896    [NEON_2RM_VNEG] = 0x7,
3897    [NEON_2RM_VCGT0_F] = 0x4,
3898    [NEON_2RM_VCGE0_F] = 0x4,
3899    [NEON_2RM_VCEQ0_F] = 0x4,
3900    [NEON_2RM_VCLE0_F] = 0x4,
3901    [NEON_2RM_VCLT0_F] = 0x4,
3902    [NEON_2RM_VABS_F] = 0x4,
3903    [NEON_2RM_VNEG_F] = 0x4,
3904    [NEON_2RM_VSWP] = 0x1,
3905    [NEON_2RM_VTRN] = 0x7,
3906    [NEON_2RM_VUZP] = 0x7,
3907    [NEON_2RM_VZIP] = 0x7,
3908    [NEON_2RM_VMOVN] = 0x7,
3909    [NEON_2RM_VQMOVN] = 0x7,
3910    [NEON_2RM_VSHLL] = 0x7,
3911    [NEON_2RM_SHA1SU1] = 0x4,
3912    [NEON_2RM_VRINTN] = 0x4,
3913    [NEON_2RM_VRINTX] = 0x4,
3914    [NEON_2RM_VRINTA] = 0x4,
3915    [NEON_2RM_VRINTZ] = 0x4,
3916    [NEON_2RM_VCVT_F16_F32] = 0x2,
3917    [NEON_2RM_VRINTM] = 0x4,
3918    [NEON_2RM_VCVT_F32_F16] = 0x2,
3919    [NEON_2RM_VRINTP] = 0x4,
3920    [NEON_2RM_VCVTAU] = 0x4,
3921    [NEON_2RM_VCVTAS] = 0x4,
3922    [NEON_2RM_VCVTNU] = 0x4,
3923    [NEON_2RM_VCVTNS] = 0x4,
3924    [NEON_2RM_VCVTPU] = 0x4,
3925    [NEON_2RM_VCVTPS] = 0x4,
3926    [NEON_2RM_VCVTMU] = 0x4,
3927    [NEON_2RM_VCVTMS] = 0x4,
3928    [NEON_2RM_VRECPE] = 0x4,
3929    [NEON_2RM_VRSQRTE] = 0x4,
3930    [NEON_2RM_VRECPE_F] = 0x4,
3931    [NEON_2RM_VRSQRTE_F] = 0x4,
3932    [NEON_2RM_VCVT_FS] = 0x4,
3933    [NEON_2RM_VCVT_FU] = 0x4,
3934    [NEON_2RM_VCVT_SF] = 0x4,
3935    [NEON_2RM_VCVT_UF] = 0x4,
3936};
3937
3938
3939/* Expand v8.1 simd helper.  */
3940static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3941                         int q, int rd, int rn, int rm)
3942{
3943    if (dc_isar_feature(aa32_rdm, s)) {
3944        int opr_sz = (1 + q) * 8;
3945        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3946                           vfp_reg_offset(1, rn),
3947                           vfp_reg_offset(1, rm), cpu_env,
3948                           opr_sz, opr_sz, 0, fn);
3949        return 0;
3950    }
3951    return 1;
3952}
3953
3954static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3955{
3956    tcg_gen_vec_sar8i_i64(a, a, shift);
3957    tcg_gen_vec_add8_i64(d, d, a);
3958}
3959
3960static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3961{
3962    tcg_gen_vec_sar16i_i64(a, a, shift);
3963    tcg_gen_vec_add16_i64(d, d, a);
3964}
3965
3966static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3967{
3968    tcg_gen_sari_i32(a, a, shift);
3969    tcg_gen_add_i32(d, d, a);
3970}
3971
3972static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3973{
3974    tcg_gen_sari_i64(a, a, shift);
3975    tcg_gen_add_i64(d, d, a);
3976}
3977
3978static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3979{
3980    tcg_gen_sari_vec(vece, a, a, sh);
3981    tcg_gen_add_vec(vece, d, d, a);
3982}
3983
3984static const TCGOpcode vecop_list_ssra[] = {
3985    INDEX_op_sari_vec, INDEX_op_add_vec, 0
3986};
3987
3988const GVecGen2i ssra_op[4] = {
3989    { .fni8 = gen_ssra8_i64,
3990      .fniv = gen_ssra_vec,
3991      .load_dest = true,
3992      .opt_opc = vecop_list_ssra,
3993      .vece = MO_8 },
3994    { .fni8 = gen_ssra16_i64,
3995      .fniv = gen_ssra_vec,
3996      .load_dest = true,
3997      .opt_opc = vecop_list_ssra,
3998      .vece = MO_16 },
3999    { .fni4 = gen_ssra32_i32,
4000      .fniv = gen_ssra_vec,
4001      .load_dest = true,
4002      .opt_opc = vecop_list_ssra,
4003      .vece = MO_32 },
4004    { .fni8 = gen_ssra64_i64,
4005      .fniv = gen_ssra_vec,
4006      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4007      .opt_opc = vecop_list_ssra,
4008      .load_dest = true,
4009      .vece = MO_64 },
4010};
4011
4012static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4013{
4014    tcg_gen_vec_shr8i_i64(a, a, shift);
4015    tcg_gen_vec_add8_i64(d, d, a);
4016}
4017
4018static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4019{
4020    tcg_gen_vec_shr16i_i64(a, a, shift);
4021    tcg_gen_vec_add16_i64(d, d, a);
4022}
4023
4024static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4025{
4026    tcg_gen_shri_i32(a, a, shift);
4027    tcg_gen_add_i32(d, d, a);
4028}
4029
4030static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4031{
4032    tcg_gen_shri_i64(a, a, shift);
4033    tcg_gen_add_i64(d, d, a);
4034}
4035
4036static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4037{
4038    tcg_gen_shri_vec(vece, a, a, sh);
4039    tcg_gen_add_vec(vece, d, d, a);
4040}
4041
4042static const TCGOpcode vecop_list_usra[] = {
4043    INDEX_op_shri_vec, INDEX_op_add_vec, 0
4044};
4045
4046const GVecGen2i usra_op[4] = {
4047    { .fni8 = gen_usra8_i64,
4048      .fniv = gen_usra_vec,
4049      .load_dest = true,
4050      .opt_opc = vecop_list_usra,
4051      .vece = MO_8, },
4052    { .fni8 = gen_usra16_i64,
4053      .fniv = gen_usra_vec,
4054      .load_dest = true,
4055      .opt_opc = vecop_list_usra,
4056      .vece = MO_16, },
4057    { .fni4 = gen_usra32_i32,
4058      .fniv = gen_usra_vec,
4059      .load_dest = true,
4060      .opt_opc = vecop_list_usra,
4061      .vece = MO_32, },
4062    { .fni8 = gen_usra64_i64,
4063      .fniv = gen_usra_vec,
4064      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4065      .load_dest = true,
4066      .opt_opc = vecop_list_usra,
4067      .vece = MO_64, },
4068};
4069
4070static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4071{
4072    uint64_t mask = dup_const(MO_8, 0xff >> shift);
4073    TCGv_i64 t = tcg_temp_new_i64();
4074
4075    tcg_gen_shri_i64(t, a, shift);
4076    tcg_gen_andi_i64(t, t, mask);
4077    tcg_gen_andi_i64(d, d, ~mask);
4078    tcg_gen_or_i64(d, d, t);
4079    tcg_temp_free_i64(t);
4080}
4081
4082static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4083{
4084    uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4085    TCGv_i64 t = tcg_temp_new_i64();
4086
4087    tcg_gen_shri_i64(t, a, shift);
4088    tcg_gen_andi_i64(t, t, mask);
4089    tcg_gen_andi_i64(d, d, ~mask);
4090    tcg_gen_or_i64(d, d, t);
4091    tcg_temp_free_i64(t);
4092}
4093
4094static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4095{
4096    tcg_gen_shri_i32(a, a, shift);
4097    tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4098}
4099
4100static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4101{
4102    tcg_gen_shri_i64(a, a, shift);
4103    tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4104}
4105
4106static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4107{
4108    if (sh == 0) {
4109        tcg_gen_mov_vec(d, a);
4110    } else {
4111        TCGv_vec t = tcg_temp_new_vec_matching(d);
4112        TCGv_vec m = tcg_temp_new_vec_matching(d);
4113
4114        tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4115        tcg_gen_shri_vec(vece, t, a, sh);
4116        tcg_gen_and_vec(vece, d, d, m);
4117        tcg_gen_or_vec(vece, d, d, t);
4118
4119        tcg_temp_free_vec(t);
4120        tcg_temp_free_vec(m);
4121    }
4122}
4123
4124static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
4125
4126const GVecGen2i sri_op[4] = {
4127    { .fni8 = gen_shr8_ins_i64,
4128      .fniv = gen_shr_ins_vec,
4129      .load_dest = true,
4130      .opt_opc = vecop_list_sri,
4131      .vece = MO_8 },
4132    { .fni8 = gen_shr16_ins_i64,
4133      .fniv = gen_shr_ins_vec,
4134      .load_dest = true,
4135      .opt_opc = vecop_list_sri,
4136      .vece = MO_16 },
4137    { .fni4 = gen_shr32_ins_i32,
4138      .fniv = gen_shr_ins_vec,
4139      .load_dest = true,
4140      .opt_opc = vecop_list_sri,
4141      .vece = MO_32 },
4142    { .fni8 = gen_shr64_ins_i64,
4143      .fniv = gen_shr_ins_vec,
4144      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4145      .load_dest = true,
4146      .opt_opc = vecop_list_sri,
4147      .vece = MO_64 },
4148};
4149
4150static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4151{
4152    uint64_t mask = dup_const(MO_8, 0xff << shift);
4153    TCGv_i64 t = tcg_temp_new_i64();
4154
4155    tcg_gen_shli_i64(t, a, shift);
4156    tcg_gen_andi_i64(t, t, mask);
4157    tcg_gen_andi_i64(d, d, ~mask);
4158    tcg_gen_or_i64(d, d, t);
4159    tcg_temp_free_i64(t);
4160}
4161
4162static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4163{
4164    uint64_t mask = dup_const(MO_16, 0xffff << shift);
4165    TCGv_i64 t = tcg_temp_new_i64();
4166
4167    tcg_gen_shli_i64(t, a, shift);
4168    tcg_gen_andi_i64(t, t, mask);
4169    tcg_gen_andi_i64(d, d, ~mask);
4170    tcg_gen_or_i64(d, d, t);
4171    tcg_temp_free_i64(t);
4172}
4173
4174static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4175{
4176    tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4177}
4178
4179static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4180{
4181    tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4182}
4183
4184static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4185{
4186    if (sh == 0) {
4187        tcg_gen_mov_vec(d, a);
4188    } else {
4189        TCGv_vec t = tcg_temp_new_vec_matching(d);
4190        TCGv_vec m = tcg_temp_new_vec_matching(d);
4191
4192        tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4193        tcg_gen_shli_vec(vece, t, a, sh);
4194        tcg_gen_and_vec(vece, d, d, m);
4195        tcg_gen_or_vec(vece, d, d, t);
4196
4197        tcg_temp_free_vec(t);
4198        tcg_temp_free_vec(m);
4199    }
4200}
4201
4202static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
4203
4204const GVecGen2i sli_op[4] = {
4205    { .fni8 = gen_shl8_ins_i64,
4206      .fniv = gen_shl_ins_vec,
4207      .load_dest = true,
4208      .opt_opc = vecop_list_sli,
4209      .vece = MO_8 },
4210    { .fni8 = gen_shl16_ins_i64,
4211      .fniv = gen_shl_ins_vec,
4212      .load_dest = true,
4213      .opt_opc = vecop_list_sli,
4214      .vece = MO_16 },
4215    { .fni4 = gen_shl32_ins_i32,
4216      .fniv = gen_shl_ins_vec,
4217      .load_dest = true,
4218      .opt_opc = vecop_list_sli,
4219      .vece = MO_32 },
4220    { .fni8 = gen_shl64_ins_i64,
4221      .fniv = gen_shl_ins_vec,
4222      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4223      .load_dest = true,
4224      .opt_opc = vecop_list_sli,
4225      .vece = MO_64 },
4226};
4227
4228static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4229{
4230    gen_helper_neon_mul_u8(a, a, b);
4231    gen_helper_neon_add_u8(d, d, a);
4232}
4233
4234static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4235{
4236    gen_helper_neon_mul_u8(a, a, b);
4237    gen_helper_neon_sub_u8(d, d, a);
4238}
4239
4240static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4241{
4242    gen_helper_neon_mul_u16(a, a, b);
4243    gen_helper_neon_add_u16(d, d, a);
4244}
4245
4246static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4247{
4248    gen_helper_neon_mul_u16(a, a, b);
4249    gen_helper_neon_sub_u16(d, d, a);
4250}
4251
4252static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4253{
4254    tcg_gen_mul_i32(a, a, b);
4255    tcg_gen_add_i32(d, d, a);
4256}
4257
4258static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4259{
4260    tcg_gen_mul_i32(a, a, b);
4261    tcg_gen_sub_i32(d, d, a);
4262}
4263
4264static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4265{
4266    tcg_gen_mul_i64(a, a, b);
4267    tcg_gen_add_i64(d, d, a);
4268}
4269
4270static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4271{
4272    tcg_gen_mul_i64(a, a, b);
4273    tcg_gen_sub_i64(d, d, a);
4274}
4275
4276static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4277{
4278    tcg_gen_mul_vec(vece, a, a, b);
4279    tcg_gen_add_vec(vece, d, d, a);
4280}
4281
4282static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4283{
4284    tcg_gen_mul_vec(vece, a, a, b);
4285    tcg_gen_sub_vec(vece, d, d, a);
4286}
4287
4288/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4289 * these tables are shared with AArch64 which does support them.
4290 */
4291
4292static const TCGOpcode vecop_list_mla[] = {
4293    INDEX_op_mul_vec, INDEX_op_add_vec, 0
4294};
4295
4296static const TCGOpcode vecop_list_mls[] = {
4297    INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4298};
4299
4300const GVecGen3 mla_op[4] = {
4301    { .fni4 = gen_mla8_i32,
4302      .fniv = gen_mla_vec,
4303      .load_dest = true,
4304      .opt_opc = vecop_list_mla,
4305      .vece = MO_8 },
4306    { .fni4 = gen_mla16_i32,
4307      .fniv = gen_mla_vec,
4308      .load_dest = true,
4309      .opt_opc = vecop_list_mla,
4310      .vece = MO_16 },
4311    { .fni4 = gen_mla32_i32,
4312      .fniv = gen_mla_vec,
4313      .load_dest = true,
4314      .opt_opc = vecop_list_mla,
4315      .vece = MO_32 },
4316    { .fni8 = gen_mla64_i64,
4317      .fniv = gen_mla_vec,
4318      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4319      .load_dest = true,
4320      .opt_opc = vecop_list_mla,
4321      .vece = MO_64 },
4322};
4323
4324const GVecGen3 mls_op[4] = {
4325    { .fni4 = gen_mls8_i32,
4326      .fniv = gen_mls_vec,
4327      .load_dest = true,
4328      .opt_opc = vecop_list_mls,
4329      .vece = MO_8 },
4330    { .fni4 = gen_mls16_i32,
4331      .fniv = gen_mls_vec,
4332      .load_dest = true,
4333      .opt_opc = vecop_list_mls,
4334      .vece = MO_16 },
4335    { .fni4 = gen_mls32_i32,
4336      .fniv = gen_mls_vec,
4337      .load_dest = true,
4338      .opt_opc = vecop_list_mls,
4339      .vece = MO_32 },
4340    { .fni8 = gen_mls64_i64,
4341      .fniv = gen_mls_vec,
4342      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4343      .load_dest = true,
4344      .opt_opc = vecop_list_mls,
4345      .vece = MO_64 },
4346};
4347
4348/* CMTST : test is "if (X & Y != 0)". */
4349static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4350{
4351    tcg_gen_and_i32(d, a, b);
4352    tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4353    tcg_gen_neg_i32(d, d);
4354}
4355
4356void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4357{
4358    tcg_gen_and_i64(d, a, b);
4359    tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4360    tcg_gen_neg_i64(d, d);
4361}
4362
4363static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4364{
4365    tcg_gen_and_vec(vece, d, a, b);
4366    tcg_gen_dupi_vec(vece, a, 0);
4367    tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4368}
4369
4370static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4371
4372const GVecGen3 cmtst_op[4] = {
4373    { .fni4 = gen_helper_neon_tst_u8,
4374      .fniv = gen_cmtst_vec,
4375      .opt_opc = vecop_list_cmtst,
4376      .vece = MO_8 },
4377    { .fni4 = gen_helper_neon_tst_u16,
4378      .fniv = gen_cmtst_vec,
4379      .opt_opc = vecop_list_cmtst,
4380      .vece = MO_16 },
4381    { .fni4 = gen_cmtst_i32,
4382      .fniv = gen_cmtst_vec,
4383      .opt_opc = vecop_list_cmtst,
4384      .vece = MO_32 },
4385    { .fni8 = gen_cmtst_i64,
4386      .fniv = gen_cmtst_vec,
4387      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4388      .opt_opc = vecop_list_cmtst,
4389      .vece = MO_64 },
4390};
4391
4392static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4393                          TCGv_vec a, TCGv_vec b)
4394{
4395    TCGv_vec x = tcg_temp_new_vec_matching(t);
4396    tcg_gen_add_vec(vece, x, a, b);
4397    tcg_gen_usadd_vec(vece, t, a, b);
4398    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4399    tcg_gen_or_vec(vece, sat, sat, x);
4400    tcg_temp_free_vec(x);
4401}
4402
4403static const TCGOpcode vecop_list_uqadd[] = {
4404    INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4405};
4406
4407const GVecGen4 uqadd_op[4] = {
4408    { .fniv = gen_uqadd_vec,
4409      .fno = gen_helper_gvec_uqadd_b,
4410      .write_aofs = true,
4411      .opt_opc = vecop_list_uqadd,
4412      .vece = MO_8 },
4413    { .fniv = gen_uqadd_vec,
4414      .fno = gen_helper_gvec_uqadd_h,
4415      .write_aofs = true,
4416      .opt_opc = vecop_list_uqadd,
4417      .vece = MO_16 },
4418    { .fniv = gen_uqadd_vec,
4419      .fno = gen_helper_gvec_uqadd_s,
4420      .write_aofs = true,
4421      .opt_opc = vecop_list_uqadd,
4422      .vece = MO_32 },
4423    { .fniv = gen_uqadd_vec,
4424      .fno = gen_helper_gvec_uqadd_d,
4425      .write_aofs = true,
4426      .opt_opc = vecop_list_uqadd,
4427      .vece = MO_64 },
4428};
4429
4430static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4431                          TCGv_vec a, TCGv_vec b)
4432{
4433    TCGv_vec x = tcg_temp_new_vec_matching(t);
4434    tcg_gen_add_vec(vece, x, a, b);
4435    tcg_gen_ssadd_vec(vece, t, a, b);
4436    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4437    tcg_gen_or_vec(vece, sat, sat, x);
4438    tcg_temp_free_vec(x);
4439}
4440
4441static const TCGOpcode vecop_list_sqadd[] = {
4442    INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4443};
4444
4445const GVecGen4 sqadd_op[4] = {
4446    { .fniv = gen_sqadd_vec,
4447      .fno = gen_helper_gvec_sqadd_b,
4448      .opt_opc = vecop_list_sqadd,
4449      .write_aofs = true,
4450      .vece = MO_8 },
4451    { .fniv = gen_sqadd_vec,
4452      .fno = gen_helper_gvec_sqadd_h,
4453      .opt_opc = vecop_list_sqadd,
4454      .write_aofs = true,
4455      .vece = MO_16 },
4456    { .fniv = gen_sqadd_vec,
4457      .fno = gen_helper_gvec_sqadd_s,
4458      .opt_opc = vecop_list_sqadd,
4459      .write_aofs = true,
4460      .vece = MO_32 },
4461    { .fniv = gen_sqadd_vec,
4462      .fno = gen_helper_gvec_sqadd_d,
4463      .opt_opc = vecop_list_sqadd,
4464      .write_aofs = true,
4465      .vece = MO_64 },
4466};
4467
4468static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4469                          TCGv_vec a, TCGv_vec b)
4470{
4471    TCGv_vec x = tcg_temp_new_vec_matching(t);
4472    tcg_gen_sub_vec(vece, x, a, b);
4473    tcg_gen_ussub_vec(vece, t, a, b);
4474    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4475    tcg_gen_or_vec(vece, sat, sat, x);
4476    tcg_temp_free_vec(x);
4477}
4478
4479static const TCGOpcode vecop_list_uqsub[] = {
4480    INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4481};
4482
4483const GVecGen4 uqsub_op[4] = {
4484    { .fniv = gen_uqsub_vec,
4485      .fno = gen_helper_gvec_uqsub_b,
4486      .opt_opc = vecop_list_uqsub,
4487      .write_aofs = true,
4488      .vece = MO_8 },
4489    { .fniv = gen_uqsub_vec,
4490      .fno = gen_helper_gvec_uqsub_h,
4491      .opt_opc = vecop_list_uqsub,
4492      .write_aofs = true,
4493      .vece = MO_16 },
4494    { .fniv = gen_uqsub_vec,
4495      .fno = gen_helper_gvec_uqsub_s,
4496      .opt_opc = vecop_list_uqsub,
4497      .write_aofs = true,
4498      .vece = MO_32 },
4499    { .fniv = gen_uqsub_vec,
4500      .fno = gen_helper_gvec_uqsub_d,
4501      .opt_opc = vecop_list_uqsub,
4502      .write_aofs = true,
4503      .vece = MO_64 },
4504};
4505
4506static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4507                          TCGv_vec a, TCGv_vec b)
4508{
4509    TCGv_vec x = tcg_temp_new_vec_matching(t);
4510    tcg_gen_sub_vec(vece, x, a, b);
4511    tcg_gen_sssub_vec(vece, t, a, b);
4512    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4513    tcg_gen_or_vec(vece, sat, sat, x);
4514    tcg_temp_free_vec(x);
4515}
4516
4517static const TCGOpcode vecop_list_sqsub[] = {
4518    INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4519};
4520
4521const GVecGen4 sqsub_op[4] = {
4522    { .fniv = gen_sqsub_vec,
4523      .fno = gen_helper_gvec_sqsub_b,
4524      .opt_opc = vecop_list_sqsub,
4525      .write_aofs = true,
4526      .vece = MO_8 },
4527    { .fniv = gen_sqsub_vec,
4528      .fno = gen_helper_gvec_sqsub_h,
4529      .opt_opc = vecop_list_sqsub,
4530      .write_aofs = true,
4531      .vece = MO_16 },
4532    { .fniv = gen_sqsub_vec,
4533      .fno = gen_helper_gvec_sqsub_s,
4534      .opt_opc = vecop_list_sqsub,
4535      .write_aofs = true,
4536      .vece = MO_32 },
4537    { .fniv = gen_sqsub_vec,
4538      .fno = gen_helper_gvec_sqsub_d,
4539      .opt_opc = vecop_list_sqsub,
4540      .write_aofs = true,
4541      .vece = MO_64 },
4542};
4543
4544/* Translate a NEON data processing instruction.  Return nonzero if the
4545   instruction is invalid.
4546   We process data in a mixture of 32-bit and 64-bit chunks.
4547   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
4548
4549static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4550{
4551    int op;
4552    int q;
4553    int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
4554    int size;
4555    int shift;
4556    int pass;
4557    int count;
4558    int pairwise;
4559    int u;
4560    int vec_size;
4561    uint32_t imm;
4562    TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
4563    TCGv_ptr ptr1, ptr2, ptr3;
4564    TCGv_i64 tmp64;
4565
4566    /* FIXME: this access check should not take precedence over UNDEF
4567     * for invalid encodings; we will generate incorrect syndrome information
4568     * for attempts to execute invalid vfp/neon encodings with FP disabled.
4569     */
4570    if (s->fp_excp_el) {
4571        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
4572                           syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4573        return 0;
4574    }
4575
4576    if (!s->vfp_enabled)
4577      return 1;
4578    q = (insn & (1 << 6)) != 0;
4579    u = (insn >> 24) & 1;
4580    VFP_DREG_D(rd, insn);
4581    VFP_DREG_N(rn, insn);
4582    VFP_DREG_M(rm, insn);
4583    size = (insn >> 20) & 3;
4584    vec_size = q ? 16 : 8;
4585    rd_ofs = neon_reg_offset(rd, 0);
4586    rn_ofs = neon_reg_offset(rn, 0);
4587    rm_ofs = neon_reg_offset(rm, 0);
4588
4589    if ((insn & (1 << 23)) == 0) {
4590        /* Three register same length.  */
4591        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4592        /* Catch invalid op and bad size combinations: UNDEF */
4593        if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4594            return 1;
4595        }
4596        /* All insns of this form UNDEF for either this condition or the
4597         * superset of cases "Q==1"; we catch the latter later.
4598         */
4599        if (q && ((rd | rn | rm) & 1)) {
4600            return 1;
4601        }
4602        switch (op) {
4603        case NEON_3R_SHA:
4604            /* The SHA-1/SHA-256 3-register instructions require special
4605             * treatment here, as their size field is overloaded as an
4606             * op type selector, and they all consume their input in a
4607             * single pass.
4608             */
4609            if (!q) {
4610                return 1;
4611            }
4612            if (!u) { /* SHA-1 */
4613                if (!dc_isar_feature(aa32_sha1, s)) {
4614                    return 1;
4615                }
4616                ptr1 = vfp_reg_ptr(true, rd);
4617                ptr2 = vfp_reg_ptr(true, rn);
4618                ptr3 = vfp_reg_ptr(true, rm);
4619                tmp4 = tcg_const_i32(size);
4620                gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
4621                tcg_temp_free_i32(tmp4);
4622            } else { /* SHA-256 */
4623                if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
4624                    return 1;
4625                }
4626                ptr1 = vfp_reg_ptr(true, rd);
4627                ptr2 = vfp_reg_ptr(true, rn);
4628                ptr3 = vfp_reg_ptr(true, rm);
4629                switch (size) {
4630                case 0:
4631                    gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
4632                    break;
4633                case 1:
4634                    gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
4635                    break;
4636                case 2:
4637                    gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
4638                    break;
4639                }
4640            }
4641            tcg_temp_free_ptr(ptr1);
4642            tcg_temp_free_ptr(ptr2);
4643            tcg_temp_free_ptr(ptr3);
4644            return 0;
4645
4646        case NEON_3R_VPADD_VQRDMLAH:
4647            if (!u) {
4648                break;  /* VPADD */
4649            }
4650            /* VQRDMLAH */
4651            switch (size) {
4652            case 1:
4653                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
4654                                     q, rd, rn, rm);
4655            case 2:
4656                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
4657                                     q, rd, rn, rm);
4658            }
4659            return 1;
4660
4661        case NEON_3R_VFM_VQRDMLSH:
4662            if (!u) {
4663                /* VFM, VFMS */
4664                if (size == 1) {
4665                    return 1;
4666                }
4667                break;
4668            }
4669            /* VQRDMLSH */
4670            switch (size) {
4671            case 1:
4672                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
4673                                     q, rd, rn, rm);
4674            case 2:
4675                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
4676                                     q, rd, rn, rm);
4677            }
4678            return 1;
4679
4680        case NEON_3R_LOGIC: /* Logic ops.  */
4681            switch ((u << 2) | size) {
4682            case 0: /* VAND */
4683                tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
4684                                 vec_size, vec_size);
4685                break;
4686            case 1: /* VBIC */
4687                tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
4688                                  vec_size, vec_size);
4689                break;
4690            case 2: /* VORR */
4691                tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
4692                                vec_size, vec_size);
4693                break;
4694            case 3: /* VORN */
4695                tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
4696                                 vec_size, vec_size);
4697                break;
4698            case 4: /* VEOR */
4699                tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
4700                                 vec_size, vec_size);
4701                break;
4702            case 5: /* VBSL */
4703                tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
4704                                    vec_size, vec_size);
4705                break;
4706            case 6: /* VBIT */
4707                tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
4708                                    vec_size, vec_size);
4709                break;
4710            case 7: /* VBIF */
4711                tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
4712                                    vec_size, vec_size);
4713                break;
4714            }
4715            return 0;
4716
4717        case NEON_3R_VADD_VSUB:
4718            if (u) {
4719                tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
4720                                 vec_size, vec_size);
4721            } else {
4722                tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
4723                                 vec_size, vec_size);
4724            }
4725            return 0;
4726
4727        case NEON_3R_VQADD:
4728            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4729                           rn_ofs, rm_ofs, vec_size, vec_size,
4730                           (u ? uqadd_op : sqadd_op) + size);
4731            return 0;
4732
4733        case NEON_3R_VQSUB:
4734            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4735                           rn_ofs, rm_ofs, vec_size, vec_size,
4736                           (u ? uqsub_op : sqsub_op) + size);
4737            return 0;
4738
4739        case NEON_3R_VMUL: /* VMUL */
4740            if (u) {
4741                /* Polynomial case allows only P8 and is handled below.  */
4742                if (size != 0) {
4743                    return 1;
4744                }
4745            } else {
4746                tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
4747                                 vec_size, vec_size);
4748                return 0;
4749            }
4750            break;
4751
4752        case NEON_3R_VML: /* VMLA, VMLS */
4753            tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
4754                           u ? &mls_op[size] : &mla_op[size]);
4755            return 0;
4756
4757        case NEON_3R_VTST_VCEQ:
4758            if (u) { /* VCEQ */
4759                tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
4760                                 vec_size, vec_size);
4761            } else { /* VTST */
4762                tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
4763                               vec_size, vec_size, &cmtst_op[size]);
4764            }
4765            return 0;
4766
4767        case NEON_3R_VCGT:
4768            tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
4769                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4770            return 0;
4771
4772        case NEON_3R_VCGE:
4773            tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
4774                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4775            return 0;
4776
4777        case NEON_3R_VMAX:
4778            if (u) {
4779                tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
4780                                  vec_size, vec_size);
4781            } else {
4782                tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
4783                                  vec_size, vec_size);
4784            }
4785            return 0;
4786        case NEON_3R_VMIN:
4787            if (u) {
4788                tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
4789                                  vec_size, vec_size);
4790            } else {
4791                tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
4792                                  vec_size, vec_size);
4793            }
4794            return 0;
4795        }
4796
4797        if (size == 3) {
4798            /* 64-bit element instructions. */
4799            for (pass = 0; pass < (q ? 2 : 1); pass++) {
4800                neon_load_reg64(cpu_V0, rn + pass);
4801                neon_load_reg64(cpu_V1, rm + pass);
4802                switch (op) {
4803                case NEON_3R_VSHL:
4804                    if (u) {
4805                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4806                    } else {
4807                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4808                    }
4809                    break;
4810                case NEON_3R_VQSHL:
4811                    if (u) {
4812                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4813                                                 cpu_V1, cpu_V0);
4814                    } else {
4815                        gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4816                                                 cpu_V1, cpu_V0);
4817                    }
4818                    break;
4819                case NEON_3R_VRSHL:
4820                    if (u) {
4821                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4822                    } else {
4823                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4824                    }
4825                    break;
4826                case NEON_3R_VQRSHL:
4827                    if (u) {
4828                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4829                                                  cpu_V1, cpu_V0);
4830                    } else {
4831                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4832                                                  cpu_V1, cpu_V0);
4833                    }
4834                    break;
4835                default:
4836                    abort();
4837                }
4838                neon_store_reg64(cpu_V0, rd + pass);
4839            }
4840            return 0;
4841        }
4842        pairwise = 0;
4843        switch (op) {
4844        case NEON_3R_VSHL:
4845        case NEON_3R_VQSHL:
4846        case NEON_3R_VRSHL:
4847        case NEON_3R_VQRSHL:
4848            {
4849                int rtmp;
4850                /* Shift instruction operands are reversed.  */
4851                rtmp = rn;
4852                rn = rm;
4853                rm = rtmp;
4854            }
4855            break;
4856        case NEON_3R_VPADD_VQRDMLAH:
4857        case NEON_3R_VPMAX:
4858        case NEON_3R_VPMIN:
4859            pairwise = 1;
4860            break;
4861        case NEON_3R_FLOAT_ARITH:
4862            pairwise = (u && size < 2); /* if VPADD (float) */
4863            break;
4864        case NEON_3R_FLOAT_MINMAX:
4865            pairwise = u; /* if VPMIN/VPMAX (float) */
4866            break;
4867        case NEON_3R_FLOAT_CMP:
4868            if (!u && size) {
4869                /* no encoding for U=0 C=1x */
4870                return 1;
4871            }
4872            break;
4873        case NEON_3R_FLOAT_ACMP:
4874            if (!u) {
4875                return 1;
4876            }
4877            break;
4878        case NEON_3R_FLOAT_MISC:
4879            /* VMAXNM/VMINNM in ARMv8 */
4880            if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
4881                return 1;
4882            }
4883            break;
4884        case NEON_3R_VFM_VQRDMLSH:
4885            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
4886                return 1;
4887            }
4888            break;
4889        default:
4890            break;
4891        }
4892
4893        if (pairwise && q) {
4894            /* All the pairwise insns UNDEF if Q is set */
4895            return 1;
4896        }
4897
4898        for (pass = 0; pass < (q ? 4 : 2); pass++) {
4899
4900        if (pairwise) {
4901            /* Pairwise.  */
4902            if (pass < 1) {
4903                tmp = neon_load_reg(rn, 0);
4904                tmp2 = neon_load_reg(rn, 1);
4905            } else {
4906                tmp = neon_load_reg(rm, 0);
4907                tmp2 = neon_load_reg(rm, 1);
4908            }
4909        } else {
4910            /* Elementwise.  */
4911            tmp = neon_load_reg(rn, pass);
4912            tmp2 = neon_load_reg(rm, pass);
4913        }
4914        switch (op) {
4915        case NEON_3R_VHADD:
4916            GEN_NEON_INTEGER_OP(hadd);
4917            break;
4918        case NEON_3R_VRHADD:
4919            GEN_NEON_INTEGER_OP(rhadd);
4920            break;
4921        case NEON_3R_VHSUB:
4922            GEN_NEON_INTEGER_OP(hsub);
4923            break;
4924        case NEON_3R_VSHL:
4925            GEN_NEON_INTEGER_OP(shl);
4926            break;
4927        case NEON_3R_VQSHL:
4928            GEN_NEON_INTEGER_OP_ENV(qshl);
4929            break;
4930        case NEON_3R_VRSHL:
4931            GEN_NEON_INTEGER_OP(rshl);
4932            break;
4933        case NEON_3R_VQRSHL:
4934            GEN_NEON_INTEGER_OP_ENV(qrshl);
4935            break;
4936        case NEON_3R_VABD:
4937            GEN_NEON_INTEGER_OP(abd);
4938            break;
4939        case NEON_3R_VABA:
4940            GEN_NEON_INTEGER_OP(abd);
4941            tcg_temp_free_i32(tmp2);
4942            tmp2 = neon_load_reg(rd, pass);
4943            gen_neon_add(size, tmp, tmp2);
4944            break;
4945        case NEON_3R_VMUL:
4946            /* VMUL.P8; other cases already eliminated.  */
4947            gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4948            break;
4949        case NEON_3R_VPMAX:
4950            GEN_NEON_INTEGER_OP(pmax);
4951            break;
4952        case NEON_3R_VPMIN:
4953            GEN_NEON_INTEGER_OP(pmin);
4954            break;
4955        case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
4956            if (!u) { /* VQDMULH */
4957                switch (size) {
4958                case 1:
4959                    gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4960                    break;
4961                case 2:
4962                    gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4963                    break;
4964                default: abort();
4965                }
4966            } else { /* VQRDMULH */
4967                switch (size) {
4968                case 1:
4969                    gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4970                    break;
4971                case 2:
4972                    gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4973                    break;
4974                default: abort();
4975                }
4976            }
4977            break;
4978        case NEON_3R_VPADD_VQRDMLAH:
4979            switch (size) {
4980            case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4981            case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4982            case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4983            default: abort();
4984            }
4985            break;
4986        case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
4987        {
4988            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4989            switch ((u << 2) | size) {
4990            case 0: /* VADD */
4991            case 4: /* VPADD */
4992                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4993                break;
4994            case 2: /* VSUB */
4995                gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
4996                break;
4997            case 6: /* VABD */
4998                gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
4999                break;
5000            default:
5001                abort();
5002            }
5003            tcg_temp_free_ptr(fpstatus);
5004            break;
5005        }
5006        case NEON_3R_FLOAT_MULTIPLY:
5007        {
5008            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5009            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5010            if (!u) {
5011                tcg_temp_free_i32(tmp2);
5012                tmp2 = neon_load_reg(rd, pass);
5013                if (size == 0) {
5014                    gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5015                } else {
5016                    gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5017                }
5018            }
5019            tcg_temp_free_ptr(fpstatus);
5020            break;
5021        }
5022        case NEON_3R_FLOAT_CMP:
5023        {
5024            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5025            if (!u) {
5026                gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5027            } else {
5028                if (size == 0) {
5029                    gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5030                } else {
5031                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5032                }
5033            }
5034            tcg_temp_free_ptr(fpstatus);
5035            break;
5036        }
5037        case NEON_3R_FLOAT_ACMP:
5038        {
5039            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5040            if (size == 0) {
5041                gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5042            } else {
5043                gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5044            }
5045            tcg_temp_free_ptr(fpstatus);
5046            break;
5047        }
5048        case NEON_3R_FLOAT_MINMAX:
5049        {
5050            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5051            if (size == 0) {
5052                gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5053            } else {
5054                gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5055            }
5056            tcg_temp_free_ptr(fpstatus);
5057            break;
5058        }
5059        case NEON_3R_FLOAT_MISC:
5060            if (u) {
5061                /* VMAXNM/VMINNM */
5062                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5063                if (size == 0) {
5064                    gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5065                } else {
5066                    gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5067                }
5068                tcg_temp_free_ptr(fpstatus);
5069            } else {
5070                if (size == 0) {
5071                    gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5072                } else {
5073                    gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5074              }
5075            }
5076            break;
5077        case NEON_3R_VFM_VQRDMLSH:
5078        {
5079            /* VFMA, VFMS: fused multiply-add */
5080            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5081            TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5082            if (size) {
5083                /* VFMS */
5084                gen_helper_vfp_negs(tmp, tmp);
5085            }
5086            gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5087            tcg_temp_free_i32(tmp3);
5088            tcg_temp_free_ptr(fpstatus);
5089            break;
5090        }
5091        default:
5092            abort();
5093        }
5094        tcg_temp_free_i32(tmp2);
5095
5096        /* Save the result.  For elementwise operations we can put it
5097           straight into the destination register.  For pairwise operations
5098           we have to be careful to avoid clobbering the source operands.  */
5099        if (pairwise && rd == rm) {
5100            neon_store_scratch(pass, tmp);
5101        } else {
5102            neon_store_reg(rd, pass, tmp);
5103        }
5104
5105        } /* for pass */
5106        if (pairwise && rd == rm) {
5107            for (pass = 0; pass < (q ? 4 : 2); pass++) {
5108                tmp = neon_load_scratch(pass);
5109                neon_store_reg(rd, pass, tmp);
5110            }
5111        }
5112        /* End of 3 register same size operations.  */
5113    } else if (insn & (1 << 4)) {
5114        if ((insn & 0x00380080) != 0) {
5115            /* Two registers and shift.  */
5116            op = (insn >> 8) & 0xf;
5117            if (insn & (1 << 7)) {
5118                /* 64-bit shift. */
5119                if (op > 7) {
5120                    return 1;
5121                }
5122                size = 3;
5123            } else {
5124                size = 2;
5125                while ((insn & (1 << (size + 19))) == 0)
5126                    size--;
5127            }
5128            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5129            if (op < 8) {
5130                /* Shift by immediate:
5131                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5132                if (q && ((rd | rm) & 1)) {
5133                    return 1;
5134                }
5135                if (!u && (op == 4 || op == 6)) {
5136                    return 1;
5137                }
5138                /* Right shifts are encoded as N - shift, where N is the
5139                   element size in bits.  */
5140                if (op <= 4) {
5141                    shift = shift - (1 << (size + 3));
5142                }
5143
5144                switch (op) {
5145                case 0:  /* VSHR */
5146                    /* Right shift comes here negative.  */
5147                    shift = -shift;
5148                    /* Shifts larger than the element size are architecturally
5149                     * valid.  Unsigned results in all zeros; signed results
5150                     * in all sign bits.
5151                     */
5152                    if (!u) {
5153                        tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5154                                          MIN(shift, (8 << size) - 1),
5155                                          vec_size, vec_size);
5156                    } else if (shift >= 8 << size) {
5157                        tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5158                    } else {
5159                        tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5160                                          vec_size, vec_size);
5161                    }
5162                    return 0;
5163
5164                case 1:  /* VSRA */
5165                    /* Right shift comes here negative.  */
5166                    shift = -shift;
5167                    /* Shifts larger than the element size are architecturally
5168                     * valid.  Unsigned results in all zeros; signed results
5169                     * in all sign bits.
5170                     */
5171                    if (!u) {
5172                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5173                                        MIN(shift, (8 << size) - 1),
5174                                        &ssra_op[size]);
5175                    } else if (shift >= 8 << size) {
5176                        /* rd += 0 */
5177                    } else {
5178                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5179                                        shift, &usra_op[size]);
5180                    }
5181                    return 0;
5182
5183                case 4: /* VSRI */
5184                    if (!u) {
5185                        return 1;
5186                    }
5187                    /* Right shift comes here negative.  */
5188                    shift = -shift;
5189                    /* Shift out of range leaves destination unchanged.  */
5190                    if (shift < 8 << size) {
5191                        tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5192                                        shift, &sri_op[size]);
5193                    }
5194                    return 0;
5195
5196                case 5: /* VSHL, VSLI */
5197                    if (u) { /* VSLI */
5198                        /* Shift out of range leaves destination unchanged.  */
5199                        if (shift < 8 << size) {
5200                            tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
5201                                            vec_size, shift, &sli_op[size]);
5202                        }
5203                    } else { /* VSHL */
5204                        /* Shifts larger than the element size are
5205                         * architecturally valid and results in zero.
5206                         */
5207                        if (shift >= 8 << size) {
5208                            tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5209                        } else {
5210                            tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5211                                              vec_size, vec_size);
5212                        }
5213                    }
5214                    return 0;
5215                }
5216
5217                if (size == 3) {
5218                    count = q + 1;
5219                } else {
5220                    count = q ? 4: 2;
5221                }
5222
5223                /* To avoid excessive duplication of ops we implement shift
5224                 * by immediate using the variable shift operations.
5225                  */
5226                imm = dup_const(size, shift);
5227
5228                for (pass = 0; pass < count; pass++) {
5229                    if (size == 3) {
5230                        neon_load_reg64(cpu_V0, rm + pass);
5231                        tcg_gen_movi_i64(cpu_V1, imm);
5232                        switch (op) {
5233                        case 2: /* VRSHR */
5234                        case 3: /* VRSRA */
5235                            if (u)
5236                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5237                            else
5238                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5239                            break;
5240                        case 6: /* VQSHLU */
5241                            gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5242                                                      cpu_V0, cpu_V1);
5243                            break;
5244                        case 7: /* VQSHL */
5245                            if (u) {
5246                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5247                                                         cpu_V0, cpu_V1);
5248                            } else {
5249                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5250                                                         cpu_V0, cpu_V1);
5251                            }
5252                            break;
5253                        default:
5254                            g_assert_not_reached();
5255                        }
5256                        if (op == 3) {
5257                            /* Accumulate.  */
5258                            neon_load_reg64(cpu_V1, rd + pass);
5259                            tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5260                        }
5261                        neon_store_reg64(cpu_V0, rd + pass);
5262                    } else { /* size < 3 */
5263                        /* Operands in T0 and T1.  */
5264                        tmp = neon_load_reg(rm, pass);
5265                        tmp2 = tcg_temp_new_i32();
5266                        tcg_gen_movi_i32(tmp2, imm);
5267                        switch (op) {
5268                        case 2: /* VRSHR */
5269                        case 3: /* VRSRA */
5270                            GEN_NEON_INTEGER_OP(rshl);
5271                            break;
5272                        case 6: /* VQSHLU */
5273                            switch (size) {
5274                            case 0:
5275                                gen_helper_neon_qshlu_s8(tmp, cpu_env,
5276                                                         tmp, tmp2);
5277                                break;
5278                            case 1:
5279                                gen_helper_neon_qshlu_s16(tmp, cpu_env,
5280                                                          tmp, tmp2);
5281                                break;
5282                            case 2:
5283                                gen_helper_neon_qshlu_s32(tmp, cpu_env,
5284                                                          tmp, tmp2);
5285                                break;
5286                            default:
5287                                abort();
5288                            }
5289                            break;
5290                        case 7: /* VQSHL */
5291                            GEN_NEON_INTEGER_OP_ENV(qshl);
5292                            break;
5293                        default:
5294                            g_assert_not_reached();
5295                        }
5296                        tcg_temp_free_i32(tmp2);
5297
5298                        if (op == 3) {
5299                            /* Accumulate.  */
5300                            tmp2 = neon_load_reg(rd, pass);
5301                            gen_neon_add(size, tmp, tmp2);
5302                            tcg_temp_free_i32(tmp2);
5303                        }
5304                        neon_store_reg(rd, pass, tmp);
5305                    }
5306                } /* for pass */
5307            } else if (op < 10) {
5308                /* Shift by immediate and narrow:
5309                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5310                int input_unsigned = (op == 8) ? !u : u;
5311                if (rm & 1) {
5312                    return 1;
5313                }
5314                shift = shift - (1 << (size + 3));
5315                size++;
5316                if (size == 3) {
5317                    tmp64 = tcg_const_i64(shift);
5318                    neon_load_reg64(cpu_V0, rm);
5319                    neon_load_reg64(cpu_V1, rm + 1);
5320                    for (pass = 0; pass < 2; pass++) {
5321                        TCGv_i64 in;
5322                        if (pass == 0) {
5323                            in = cpu_V0;
5324                        } else {
5325                            in = cpu_V1;
5326                        }
5327                        if (q) {
5328                            if (input_unsigned) {
5329                                gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5330                            } else {
5331                                gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5332                            }
5333                        } else {
5334                            if (input_unsigned) {
5335                                gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5336                            } else {
5337                                gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5338                            }
5339                        }
5340                        tmp = tcg_temp_new_i32();
5341                        gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5342                        neon_store_reg(rd, pass, tmp);
5343                    } /* for pass */
5344                    tcg_temp_free_i64(tmp64);
5345                } else {
5346                    if (size == 1) {
5347                        imm = (uint16_t)shift;
5348                        imm |= imm << 16;
5349                    } else {
5350                        /* size == 2 */
5351                        imm = (uint32_t)shift;
5352                    }
5353                    tmp2 = tcg_const_i32(imm);
5354                    tmp4 = neon_load_reg(rm + 1, 0);
5355                    tmp5 = neon_load_reg(rm + 1, 1);
5356                    for (pass = 0; pass < 2; pass++) {
5357                        if (pass == 0) {
5358                            tmp = neon_load_reg(rm, 0);
5359                        } else {
5360                            tmp = tmp4;
5361                        }
5362                        gen_neon_shift_narrow(size, tmp, tmp2, q,
5363                                              input_unsigned);
5364                        if (pass == 0) {
5365                            tmp3 = neon_load_reg(rm, 1);
5366                        } else {
5367                            tmp3 = tmp5;
5368                        }
5369                        gen_neon_shift_narrow(size, tmp3, tmp2, q,
5370                                              input_unsigned);
5371                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5372                        tcg_temp_free_i32(tmp);
5373                        tcg_temp_free_i32(tmp3);
5374                        tmp = tcg_temp_new_i32();
5375                        gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5376                        neon_store_reg(rd, pass, tmp);
5377                    } /* for pass */
5378                    tcg_temp_free_i32(tmp2);
5379                }
5380            } else if (op == 10) {
5381                /* VSHLL, VMOVL */
5382                if (q || (rd & 1)) {
5383                    return 1;
5384                }
5385                tmp = neon_load_reg(rm, 0);
5386                tmp2 = neon_load_reg(rm, 1);
5387                for (pass = 0; pass < 2; pass++) {
5388                    if (pass == 1)
5389                        tmp = tmp2;
5390
5391                    gen_neon_widen(cpu_V0, tmp, size, u);
5392
5393                    if (shift != 0) {
5394                        /* The shift is less than the width of the source
5395                           type, so we can just shift the whole register.  */
5396                        tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5397                        /* Widen the result of shift: we need to clear
5398                         * the potential overflow bits resulting from
5399                         * left bits of the narrow input appearing as
5400                         * right bits of left the neighbour narrow
5401                         * input.  */
5402                        if (size < 2 || !u) {
5403                            uint64_t imm64;
5404                            if (size == 0) {
5405                                imm = (0xffu >> (8 - shift));
5406                                imm |= imm << 16;
5407                            } else if (size == 1) {
5408                                imm = 0xffff >> (16 - shift);
5409                            } else {
5410                                /* size == 2 */
5411                                imm = 0xffffffff >> (32 - shift);
5412                            }
5413                            if (size < 2) {
5414                                imm64 = imm | (((uint64_t)imm) << 32);
5415                            } else {
5416                                imm64 = imm;
5417                            }
5418                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5419                        }
5420                    }
5421                    neon_store_reg64(cpu_V0, rd + pass);
5422                }
5423            } else if (op >= 14) {
5424                /* VCVT fixed-point.  */
5425                TCGv_ptr fpst;
5426                TCGv_i32 shiftv;
5427                VFPGenFixPointFn *fn;
5428
5429                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5430                    return 1;
5431                }
5432
5433                if (!(op & 1)) {
5434                    if (u) {
5435                        fn = gen_helper_vfp_ultos;
5436                    } else {
5437                        fn = gen_helper_vfp_sltos;
5438                    }
5439                } else {
5440                    if (u) {
5441                        fn = gen_helper_vfp_touls_round_to_zero;
5442                    } else {
5443                        fn = gen_helper_vfp_tosls_round_to_zero;
5444                    }
5445                }
5446
5447                /* We have already masked out the must-be-1 top bit of imm6,
5448                 * hence this 32-shift where the ARM ARM has 64-imm6.
5449                 */
5450                shift = 32 - shift;
5451                fpst = get_fpstatus_ptr(1);
5452                shiftv = tcg_const_i32(shift);
5453                for (pass = 0; pass < (q ? 4 : 2); pass++) {
5454                    TCGv_i32 tmpf = neon_load_reg(rm, pass);
5455                    fn(tmpf, tmpf, shiftv, fpst);
5456                    neon_store_reg(rd, pass, tmpf);
5457                }
5458                tcg_temp_free_ptr(fpst);
5459                tcg_temp_free_i32(shiftv);
5460            } else {
5461                return 1;
5462            }
5463        } else { /* (insn & 0x00380080) == 0 */
5464            int invert, reg_ofs, vec_size;
5465
5466            if (q && (rd & 1)) {
5467                return 1;
5468            }
5469
5470            op = (insn >> 8) & 0xf;
5471            /* One register and immediate.  */
5472            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5473            invert = (insn & (1 << 5)) != 0;
5474            /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5475             * We choose to not special-case this and will behave as if a
5476             * valid constant encoding of 0 had been given.
5477             */
5478            switch (op) {
5479            case 0: case 1:
5480                /* no-op */
5481                break;
5482            case 2: case 3:
5483                imm <<= 8;
5484                break;
5485            case 4: case 5:
5486                imm <<= 16;
5487                break;
5488            case 6: case 7:
5489                imm <<= 24;
5490                break;
5491            case 8: case 9:
5492                imm |= imm << 16;
5493                break;
5494            case 10: case 11:
5495                imm = (imm << 8) | (imm << 24);
5496                break;
5497            case 12:
5498                imm = (imm << 8) | 0xff;
5499                break;
5500            case 13:
5501                imm = (imm << 16) | 0xffff;
5502                break;
5503            case 14:
5504                imm |= (imm << 8) | (imm << 16) | (imm << 24);
5505                if (invert) {
5506                    imm = ~imm;
5507                }
5508                break;
5509            case 15:
5510                if (invert) {
5511                    return 1;
5512                }
5513                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5514                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5515                break;
5516            }
5517            if (invert) {
5518                imm = ~imm;
5519            }
5520
5521            reg_ofs = neon_reg_offset(rd, 0);
5522            vec_size = q ? 16 : 8;
5523
5524            if (op & 1 && op < 12) {
5525                if (invert) {
5526                    /* The immediate value has already been inverted,
5527                     * so BIC becomes AND.
5528                     */
5529                    tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5530                                      vec_size, vec_size);
5531                } else {
5532                    tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5533                                     vec_size, vec_size);
5534                }
5535            } else {
5536                /* VMOV, VMVN.  */
5537                if (op == 14 && invert) {
5538                    TCGv_i64 t64 = tcg_temp_new_i64();
5539
5540                    for (pass = 0; pass <= q; ++pass) {
5541                        uint64_t val = 0;
5542                        int n;
5543
5544                        for (n = 0; n < 8; n++) {
5545                            if (imm & (1 << (n + pass * 8))) {
5546                                val |= 0xffull << (n * 8);
5547                            }
5548                        }
5549                        tcg_gen_movi_i64(t64, val);
5550                        neon_store_reg64(t64, rd + pass);
5551                    }
5552                    tcg_temp_free_i64(t64);
5553                } else {
5554                    tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
5555                }
5556            }
5557        }
5558    } else { /* (insn & 0x00800010 == 0x00800000) */
5559        if (size != 3) {
5560            op = (insn >> 8) & 0xf;
5561            if ((insn & (1 << 6)) == 0) {
5562                /* Three registers of different lengths.  */
5563                int src1_wide;
5564                int src2_wide;
5565                int prewiden;
5566                /* undefreq: bit 0 : UNDEF if size == 0
5567                 *           bit 1 : UNDEF if size == 1
5568                 *           bit 2 : UNDEF if size == 2
5569                 *           bit 3 : UNDEF if U == 1
5570                 * Note that [2:0] set implies 'always UNDEF'
5571                 */
5572                int undefreq;
5573                /* prewiden, src1_wide, src2_wide, undefreq */
5574                static const int neon_3reg_wide[16][4] = {
5575                    {1, 0, 0, 0}, /* VADDL */
5576                    {1, 1, 0, 0}, /* VADDW */
5577                    {1, 0, 0, 0}, /* VSUBL */
5578                    {1, 1, 0, 0}, /* VSUBW */
5579                    {0, 1, 1, 0}, /* VADDHN */
5580                    {0, 0, 0, 0}, /* VABAL */
5581                    {0, 1, 1, 0}, /* VSUBHN */
5582                    {0, 0, 0, 0}, /* VABDL */
5583                    {0, 0, 0, 0}, /* VMLAL */
5584                    {0, 0, 0, 9}, /* VQDMLAL */
5585                    {0, 0, 0, 0}, /* VMLSL */
5586                    {0, 0, 0, 9}, /* VQDMLSL */
5587                    {0, 0, 0, 0}, /* Integer VMULL */
5588                    {0, 0, 0, 1}, /* VQDMULL */
5589                    {0, 0, 0, 0xa}, /* Polynomial VMULL */
5590                    {0, 0, 0, 7}, /* Reserved: always UNDEF */
5591                };
5592
5593                prewiden = neon_3reg_wide[op][0];
5594                src1_wide = neon_3reg_wide[op][1];
5595                src2_wide = neon_3reg_wide[op][2];
5596                undefreq = neon_3reg_wide[op][3];
5597
5598                if ((undefreq & (1 << size)) ||
5599                    ((undefreq & 8) && u)) {
5600                    return 1;
5601                }
5602                if ((src1_wide && (rn & 1)) ||
5603                    (src2_wide && (rm & 1)) ||
5604                    (!src2_wide && (rd & 1))) {
5605                    return 1;
5606                }
5607
5608                /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
5609                 * outside the loop below as it only performs a single pass.
5610                 */
5611                if (op == 14 && size == 2) {
5612                    TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
5613
5614                    if (!dc_isar_feature(aa32_pmull, s)) {
5615                        return 1;
5616                    }
5617                    tcg_rn = tcg_temp_new_i64();
5618                    tcg_rm = tcg_temp_new_i64();
5619                    tcg_rd = tcg_temp_new_i64();
5620                    neon_load_reg64(tcg_rn, rn);
5621                    neon_load_reg64(tcg_rm, rm);
5622                    gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
5623                    neon_store_reg64(tcg_rd, rd);
5624                    gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
5625                    neon_store_reg64(tcg_rd, rd + 1);
5626                    tcg_temp_free_i64(tcg_rn);
5627                    tcg_temp_free_i64(tcg_rm);
5628                    tcg_temp_free_i64(tcg_rd);
5629                    return 0;
5630                }
5631
5632                /* Avoid overlapping operands.  Wide source operands are
5633                   always aligned so will never overlap with wide
5634                   destinations in problematic ways.  */
5635                if (rd == rm && !src2_wide) {
5636                    tmp = neon_load_reg(rm, 1);
5637                    neon_store_scratch(2, tmp);
5638                } else if (rd == rn && !src1_wide) {
5639                    tmp = neon_load_reg(rn, 1);
5640                    neon_store_scratch(2, tmp);
5641                }
5642                tmp3 = NULL;
5643                for (pass = 0; pass < 2; pass++) {
5644                    if (src1_wide) {
5645                        neon_load_reg64(cpu_V0, rn + pass);
5646                        tmp = NULL;
5647                    } else {
5648                        if (pass == 1 && rd == rn) {
5649                            tmp = neon_load_scratch(2);
5650                        } else {
5651                            tmp = neon_load_reg(rn, pass);
5652                        }
5653                        if (prewiden) {
5654                            gen_neon_widen(cpu_V0, tmp, size, u);
5655                        }
5656                    }
5657                    if (src2_wide) {
5658                        neon_load_reg64(cpu_V1, rm + pass);
5659                        tmp2 = NULL;
5660                    } else {
5661                        if (pass == 1 && rd == rm) {
5662                            tmp2 = neon_load_scratch(2);
5663                        } else {
5664                            tmp2 = neon_load_reg(rm, pass);
5665                        }
5666                        if (prewiden) {
5667                            gen_neon_widen(cpu_V1, tmp2, size, u);
5668                        }
5669                    }
5670                    switch (op) {
5671                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5672                        gen_neon_addl(size);
5673                        break;
5674                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5675                        gen_neon_subl(size);
5676                        break;
5677                    case 5: case 7: /* VABAL, VABDL */
5678                        switch ((size << 1) | u) {
5679                        case 0:
5680                            gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5681                            break;
5682                        case 1:
5683                            gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5684                            break;
5685                        case 2:
5686                            gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5687                            break;
5688                        case 3:
5689                            gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5690                            break;
5691                        case 4:
5692                            gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5693                            break;
5694                        case 5:
5695                            gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5696                            break;
5697                        default: abort();
5698                        }
5699                        tcg_temp_free_i32(tmp2);
5700                        tcg_temp_free_i32(tmp);
5701                        break;
5702                    case 8: case 9: case 10: case 11: case 12: case 13:
5703                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */