qemu/target/arm/tcg/translate-neon.c
<<
>>
Prefs
   1/*
   2 *  ARM translation: AArch32 Neon instructions
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *  Copyright (c) 2005-2007 CodeSourcery
   6 *  Copyright (c) 2007 OpenedHand, Ltd.
   7 *  Copyright (c) 2020 Linaro, Ltd.
   8 *
   9 * This library is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU Lesser General Public
  11 * License as published by the Free Software Foundation; either
  12 * version 2.1 of the License, or (at your option) any later version.
  13 *
  14 * This library is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * Lesser General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU Lesser General Public
  20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21 */
  22
  23#include "qemu/osdep.h"
  24#include "translate.h"
  25#include "translate-a32.h"
  26
  27/* Include the generated Neon decoder */
  28#include "decode-neon-dp.c.inc"
  29#include "decode-neon-ls.c.inc"
  30#include "decode-neon-shared.c.inc"
  31
  32static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
  33{
  34    TCGv_ptr ret = tcg_temp_new_ptr();
  35    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
  36    return ret;
  37}
  38
  39static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
  40{
  41    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  42
  43    switch (mop) {
  44    case MO_UB:
  45        tcg_gen_ld8u_i32(var, cpu_env, offset);
  46        break;
  47    case MO_UW:
  48        tcg_gen_ld16u_i32(var, cpu_env, offset);
  49        break;
  50    case MO_UL:
  51        tcg_gen_ld_i32(var, cpu_env, offset);
  52        break;
  53    default:
  54        g_assert_not_reached();
  55    }
  56}
  57
  58static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
  59{
  60    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  61
  62    switch (mop) {
  63    case MO_UB:
  64        tcg_gen_ld8u_i64(var, cpu_env, offset);
  65        break;
  66    case MO_UW:
  67        tcg_gen_ld16u_i64(var, cpu_env, offset);
  68        break;
  69    case MO_UL:
  70        tcg_gen_ld32u_i64(var, cpu_env, offset);
  71        break;
  72    case MO_UQ:
  73        tcg_gen_ld_i64(var, cpu_env, offset);
  74        break;
  75    default:
  76        g_assert_not_reached();
  77    }
  78}
  79
  80static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
  81{
  82    long offset = neon_element_offset(reg, ele, size);
  83
  84    switch (size) {
  85    case MO_8:
  86        tcg_gen_st8_i32(var, cpu_env, offset);
  87        break;
  88    case MO_16:
  89        tcg_gen_st16_i32(var, cpu_env, offset);
  90        break;
  91    case MO_32:
  92        tcg_gen_st_i32(var, cpu_env, offset);
  93        break;
  94    default:
  95        g_assert_not_reached();
  96    }
  97}
  98
  99static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
 100{
 101    long offset = neon_element_offset(reg, ele, size);
 102
 103    switch (size) {
 104    case MO_8:
 105        tcg_gen_st8_i64(var, cpu_env, offset);
 106        break;
 107    case MO_16:
 108        tcg_gen_st16_i64(var, cpu_env, offset);
 109        break;
 110    case MO_32:
 111        tcg_gen_st32_i64(var, cpu_env, offset);
 112        break;
 113    case MO_64:
 114        tcg_gen_st_i64(var, cpu_env, offset);
 115        break;
 116    default:
 117        g_assert_not_reached();
 118    }
 119}
 120
 121static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
 122                         int data, gen_helper_gvec_4 *fn_gvec)
 123{
 124    /* UNDEF accesses to D16-D31 if they don't exist. */
 125    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
 126        return false;
 127    }
 128
 129    /*
 130     * UNDEF accesses to odd registers for each bit of Q.
 131     * Q will be 0b111 for all Q-reg instructions, otherwise
 132     * when we have mixed Q- and D-reg inputs.
 133     */
 134    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
 135        return false;
 136    }
 137
 138    if (!vfp_access_check(s)) {
 139        return true;
 140    }
 141
 142    int opr_sz = q ? 16 : 8;
 143    tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
 144                       vfp_reg_offset(1, vn),
 145                       vfp_reg_offset(1, vm),
 146                       vfp_reg_offset(1, vd),
 147                       opr_sz, opr_sz, data, fn_gvec);
 148    return true;
 149}
 150
 151static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
 152                              int data, ARMFPStatusFlavour fp_flavour,
 153                              gen_helper_gvec_4_ptr *fn_gvec_ptr)
 154{
 155    /* UNDEF accesses to D16-D31 if they don't exist. */
 156    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
 157        return false;
 158    }
 159
 160    /*
 161     * UNDEF accesses to odd registers for each bit of Q.
 162     * Q will be 0b111 for all Q-reg instructions, otherwise
 163     * when we have mixed Q- and D-reg inputs.
 164     */
 165    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
 166        return false;
 167    }
 168
 169    if (!vfp_access_check(s)) {
 170        return true;
 171    }
 172
 173    int opr_sz = q ? 16 : 8;
 174    TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
 175
 176    tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
 177                       vfp_reg_offset(1, vn),
 178                       vfp_reg_offset(1, vm),
 179                       vfp_reg_offset(1, vd),
 180                       fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
 181    return true;
 182}
 183
 184static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
 185{
 186    if (!dc_isar_feature(aa32_vcma, s)) {
 187        return false;
 188    }
 189    if (a->size == MO_16) {
 190        if (!dc_isar_feature(aa32_fp16_arith, s)) {
 191            return false;
 192        }
 193        return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
 194                                 FPST_STD_F16, gen_helper_gvec_fcmlah);
 195    }
 196    return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
 197                             FPST_STD, gen_helper_gvec_fcmlas);
 198}
 199
 200static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
 201{
 202    int opr_sz;
 203    TCGv_ptr fpst;
 204    gen_helper_gvec_3_ptr *fn_gvec_ptr;
 205
 206    if (!dc_isar_feature(aa32_vcma, s)
 207        || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
 208        return false;
 209    }
 210
 211    /* UNDEF accesses to D16-D31 if they don't exist. */
 212    if (!dc_isar_feature(aa32_simd_r32, s) &&
 213        ((a->vd | a->vn | a->vm) & 0x10)) {
 214        return false;
 215    }
 216
 217    if ((a->vn | a->vm | a->vd) & a->q) {
 218        return false;
 219    }
 220
 221    if (!vfp_access_check(s)) {
 222        return true;
 223    }
 224
 225    opr_sz = (1 + a->q) * 8;
 226    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
 227    fn_gvec_ptr = (a->size == MO_16) ?
 228        gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
 229    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 230                       vfp_reg_offset(1, a->vn),
 231                       vfp_reg_offset(1, a->vm),
 232                       fpst, opr_sz, opr_sz, a->rot,
 233                       fn_gvec_ptr);
 234    return true;
 235}
 236
 237static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
 238{
 239    if (!dc_isar_feature(aa32_dp, s)) {
 240        return false;
 241    }
 242    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
 243                        gen_helper_gvec_sdot_b);
 244}
 245
 246static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
 247{
 248    if (!dc_isar_feature(aa32_dp, s)) {
 249        return false;
 250    }
 251    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
 252                        gen_helper_gvec_udot_b);
 253}
 254
 255static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
 256{
 257    if (!dc_isar_feature(aa32_i8mm, s)) {
 258        return false;
 259    }
 260    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
 261                        gen_helper_gvec_usdot_b);
 262}
 263
 264static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
 265{
 266    if (!dc_isar_feature(aa32_bf16, s)) {
 267        return false;
 268    }
 269    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
 270                        gen_helper_gvec_bfdot);
 271}
 272
 273static bool trans_VFML(DisasContext *s, arg_VFML *a)
 274{
 275    int opr_sz;
 276
 277    if (!dc_isar_feature(aa32_fhm, s)) {
 278        return false;
 279    }
 280
 281    /* UNDEF accesses to D16-D31 if they don't exist. */
 282    if (!dc_isar_feature(aa32_simd_r32, s) &&
 283        (a->vd & 0x10)) {
 284        return false;
 285    }
 286
 287    if (a->vd & a->q) {
 288        return false;
 289    }
 290
 291    if (!vfp_access_check(s)) {
 292        return true;
 293    }
 294
 295    opr_sz = (1 + a->q) * 8;
 296    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 297                       vfp_reg_offset(a->q, a->vn),
 298                       vfp_reg_offset(a->q, a->vm),
 299                       cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
 300                       gen_helper_gvec_fmlal_a32);
 301    return true;
 302}
 303
 304static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
 305{
 306    int data = (a->index << 2) | a->rot;
 307
 308    if (!dc_isar_feature(aa32_vcma, s)) {
 309        return false;
 310    }
 311    if (a->size == MO_16) {
 312        if (!dc_isar_feature(aa32_fp16_arith, s)) {
 313            return false;
 314        }
 315        return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
 316                                 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
 317    }
 318    return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
 319                             FPST_STD, gen_helper_gvec_fcmlas_idx);
 320}
 321
 322static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
 323{
 324    if (!dc_isar_feature(aa32_dp, s)) {
 325        return false;
 326    }
 327    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
 328                        gen_helper_gvec_sdot_idx_b);
 329}
 330
 331static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
 332{
 333    if (!dc_isar_feature(aa32_dp, s)) {
 334        return false;
 335    }
 336    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
 337                        gen_helper_gvec_udot_idx_b);
 338}
 339
 340static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
 341{
 342    if (!dc_isar_feature(aa32_i8mm, s)) {
 343        return false;
 344    }
 345    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
 346                        gen_helper_gvec_usdot_idx_b);
 347}
 348
 349static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
 350{
 351    if (!dc_isar_feature(aa32_i8mm, s)) {
 352        return false;
 353    }
 354    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
 355                        gen_helper_gvec_sudot_idx_b);
 356}
 357
 358static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
 359{
 360    if (!dc_isar_feature(aa32_bf16, s)) {
 361        return false;
 362    }
 363    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
 364                        gen_helper_gvec_bfdot_idx);
 365}
 366
 367static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
 368{
 369    int opr_sz;
 370
 371    if (!dc_isar_feature(aa32_fhm, s)) {
 372        return false;
 373    }
 374
 375    /* UNDEF accesses to D16-D31 if they don't exist. */
 376    if (!dc_isar_feature(aa32_simd_r32, s) &&
 377        ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
 378        return false;
 379    }
 380
 381    if (a->vd & a->q) {
 382        return false;
 383    }
 384
 385    if (!vfp_access_check(s)) {
 386        return true;
 387    }
 388
 389    opr_sz = (1 + a->q) * 8;
 390    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
 391                       vfp_reg_offset(a->q, a->vn),
 392                       vfp_reg_offset(a->q, a->rm),
 393                       cpu_env, opr_sz, opr_sz,
 394                       (a->index << 2) | a->s, /* is_2 == 0 */
 395                       gen_helper_gvec_fmlal_idx_a32);
 396    return true;
 397}
 398
 399static struct {
 400    int nregs;
 401    int interleave;
 402    int spacing;
 403} const neon_ls_element_type[11] = {
 404    {1, 4, 1},
 405    {1, 4, 2},
 406    {4, 1, 1},
 407    {2, 2, 2},
 408    {1, 3, 1},
 409    {1, 3, 2},
 410    {3, 1, 1},
 411    {1, 1, 1},
 412    {1, 2, 1},
 413    {1, 2, 2},
 414    {2, 1, 1}
 415};
 416
 417static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
 418                                      int stride)
 419{
 420    if (rm != 15) {
 421        TCGv_i32 base;
 422
 423        base = load_reg(s, rn);
 424        if (rm == 13) {
 425            tcg_gen_addi_i32(base, base, stride);
 426        } else {
 427            TCGv_i32 index;
 428            index = load_reg(s, rm);
 429            tcg_gen_add_i32(base, base, index);
 430        }
 431        store_reg(s, rn, base);
 432    }
 433}
 434
 435static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
 436{
 437    /* Neon load/store multiple structures */
 438    int nregs, interleave, spacing, reg, n;
 439    MemOp mop, align, endian;
 440    int mmu_idx = get_mem_index(s);
 441    int size = a->size;
 442    TCGv_i64 tmp64;
 443    TCGv_i32 addr;
 444
 445    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 446        return false;
 447    }
 448
 449    /* UNDEF accesses to D16-D31 if they don't exist */
 450    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 451        return false;
 452    }
 453    if (a->itype > 10) {
 454        return false;
 455    }
 456    /* Catch UNDEF cases for bad values of align field */
 457    switch (a->itype & 0xc) {
 458    case 4:
 459        if (a->align >= 2) {
 460            return false;
 461        }
 462        break;
 463    case 8:
 464        if (a->align == 3) {
 465            return false;
 466        }
 467        break;
 468    default:
 469        break;
 470    }
 471    nregs = neon_ls_element_type[a->itype].nregs;
 472    interleave = neon_ls_element_type[a->itype].interleave;
 473    spacing = neon_ls_element_type[a->itype].spacing;
 474    if (size == 3 && (interleave | spacing) != 1) {
 475        return false;
 476    }
 477
 478    if (!vfp_access_check(s)) {
 479        return true;
 480    }
 481
 482    /* For our purposes, bytes are always little-endian.  */
 483    endian = s->be_data;
 484    if (size == 0) {
 485        endian = MO_LE;
 486    }
 487
 488    /* Enforce alignment requested by the instruction */
 489    if (a->align) {
 490        align = pow2_align(a->align + 2); /* 4 ** a->align */
 491    } else {
 492        align = s->align_mem ? MO_ALIGN : 0;
 493    }
 494
 495    /*
 496     * Consecutive little-endian elements from a single register
 497     * can be promoted to a larger little-endian operation.
 498     */
 499    if (interleave == 1 && endian == MO_LE) {
 500        /* Retain any natural alignment. */
 501        if (align == MO_ALIGN) {
 502            align = pow2_align(size);
 503        }
 504        size = 3;
 505    }
 506
 507    tmp64 = tcg_temp_new_i64();
 508    addr = tcg_temp_new_i32();
 509    load_reg_var(s, addr, a->rn);
 510
 511    mop = endian | size | align;
 512    for (reg = 0; reg < nregs; reg++) {
 513        for (n = 0; n < 8 >> size; n++) {
 514            int xs;
 515            for (xs = 0; xs < interleave; xs++) {
 516                int tt = a->vd + reg + spacing * xs;
 517
 518                if (a->l) {
 519                    gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
 520                    neon_store_element64(tt, n, size, tmp64);
 521                } else {
 522                    neon_load_element64(tmp64, tt, n, size);
 523                    gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
 524                }
 525                tcg_gen_addi_i32(addr, addr, 1 << size);
 526
 527                /* Subsequent memory operations inherit alignment */
 528                mop &= ~MO_AMASK;
 529            }
 530        }
 531    }
 532
 533    gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
 534    return true;
 535}
 536
 537static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
 538{
 539    /* Neon load single structure to all lanes */
 540    int reg, stride, vec_size;
 541    int vd = a->vd;
 542    int size = a->size;
 543    int nregs = a->n + 1;
 544    TCGv_i32 addr, tmp;
 545    MemOp mop, align;
 546
 547    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 548        return false;
 549    }
 550
 551    /* UNDEF accesses to D16-D31 if they don't exist */
 552    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 553        return false;
 554    }
 555
 556    align = 0;
 557    if (size == 3) {
 558        if (nregs != 4 || a->a == 0) {
 559            return false;
 560        }
 561        /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
 562        size = MO_32;
 563        align = MO_ALIGN_16;
 564    } else if (a->a) {
 565        switch (nregs) {
 566        case 1:
 567            if (size == 0) {
 568                return false;
 569            }
 570            align = MO_ALIGN;
 571            break;
 572        case 2:
 573            align = pow2_align(size + 1);
 574            break;
 575        case 3:
 576            return false;
 577        case 4:
 578            if (size == 2) {
 579                align = pow2_align(3);
 580            } else {
 581                align = pow2_align(size + 2);
 582            }
 583            break;
 584        default:
 585            g_assert_not_reached();
 586        }
 587    }
 588
 589    if (!vfp_access_check(s)) {
 590        return true;
 591    }
 592
 593    /*
 594     * VLD1 to all lanes: T bit indicates how many Dregs to write.
 595     * VLD2/3/4 to all lanes: T bit indicates register stride.
 596     */
 597    stride = a->t ? 2 : 1;
 598    vec_size = nregs == 1 ? stride * 8 : 8;
 599    mop = size | align;
 600    tmp = tcg_temp_new_i32();
 601    addr = tcg_temp_new_i32();
 602    load_reg_var(s, addr, a->rn);
 603    for (reg = 0; reg < nregs; reg++) {
 604        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
 605        if ((vd & 1) && vec_size == 16) {
 606            /*
 607             * We cannot write 16 bytes at once because the
 608             * destination is unaligned.
 609             */
 610            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
 611                                 8, 8, tmp);
 612            tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
 613                             neon_full_reg_offset(vd), 8, 8);
 614        } else {
 615            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
 616                                 vec_size, vec_size, tmp);
 617        }
 618        tcg_gen_addi_i32(addr, addr, 1 << size);
 619        vd += stride;
 620
 621        /* Subsequent memory operations inherit alignment */
 622        mop &= ~MO_AMASK;
 623    }
 624
 625    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
 626
 627    return true;
 628}
 629
 630static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
 631{
 632    /* Neon load/store single structure to one lane */
 633    int reg;
 634    int nregs = a->n + 1;
 635    int vd = a->vd;
 636    TCGv_i32 addr, tmp;
 637    MemOp mop;
 638
 639    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 640        return false;
 641    }
 642
 643    /* UNDEF accesses to D16-D31 if they don't exist */
 644    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 645        return false;
 646    }
 647
 648    /* Catch the UNDEF cases. This is unavoidably a bit messy. */
 649    switch (nregs) {
 650    case 1:
 651        if (a->stride != 1) {
 652            return false;
 653        }
 654        if (((a->align & (1 << a->size)) != 0) ||
 655            (a->size == 2 && (a->align == 1 || a->align == 2))) {
 656            return false;
 657        }
 658        break;
 659    case 2:
 660        if (a->size == 2 && (a->align & 2) != 0) {
 661            return false;
 662        }
 663        break;
 664    case 3:
 665        if (a->align != 0) {
 666            return false;
 667        }
 668        break;
 669    case 4:
 670        if (a->size == 2 && a->align == 3) {
 671            return false;
 672        }
 673        break;
 674    default:
 675        g_assert_not_reached();
 676    }
 677    if ((vd + a->stride * (nregs - 1)) > 31) {
 678        /*
 679         * Attempts to write off the end of the register file are
 680         * UNPREDICTABLE; we choose to UNDEF because otherwise we would
 681         * access off the end of the array that holds the register data.
 682         */
 683        return false;
 684    }
 685
 686    if (!vfp_access_check(s)) {
 687        return true;
 688    }
 689
 690    /* Pick up SCTLR settings */
 691    mop = finalize_memop(s, a->size);
 692
 693    if (a->align) {
 694        MemOp align_op;
 695
 696        switch (nregs) {
 697        case 1:
 698            /* For VLD1, use natural alignment. */
 699            align_op = MO_ALIGN;
 700            break;
 701        case 2:
 702            /* For VLD2, use double alignment. */
 703            align_op = pow2_align(a->size + 1);
 704            break;
 705        case 4:
 706            if (a->size == MO_32) {
 707                /*
 708                 * For VLD4.32, align = 1 is double alignment, align = 2 is
 709                 * quad alignment; align = 3 is rejected above.
 710                 */
 711                align_op = pow2_align(a->size + a->align);
 712            } else {
 713                /* For VLD4.8 and VLD.16, we want quad alignment. */
 714                align_op = pow2_align(a->size + 2);
 715            }
 716            break;
 717        default:
 718            /* For VLD3, the alignment field is zero and rejected above. */
 719            g_assert_not_reached();
 720        }
 721
 722        mop = (mop & ~MO_AMASK) | align_op;
 723    }
 724
 725    tmp = tcg_temp_new_i32();
 726    addr = tcg_temp_new_i32();
 727    load_reg_var(s, addr, a->rn);
 728
 729    for (reg = 0; reg < nregs; reg++) {
 730        if (a->l) {
 731            gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
 732            neon_store_element(vd, a->reg_idx, a->size, tmp);
 733        } else { /* Store */
 734            neon_load_element(tmp, vd, a->reg_idx, a->size);
 735            gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
 736        }
 737        vd += a->stride;
 738        tcg_gen_addi_i32(addr, addr, 1 << a->size);
 739
 740        /* Subsequent memory operations inherit alignment */
 741        mop &= ~MO_AMASK;
 742    }
 743
 744    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
 745
 746    return true;
 747}
 748
 749static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
 750{
 751    int vec_size = a->q ? 16 : 8;
 752    int rd_ofs = neon_full_reg_offset(a->vd);
 753    int rn_ofs = neon_full_reg_offset(a->vn);
 754    int rm_ofs = neon_full_reg_offset(a->vm);
 755
 756    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 757        return false;
 758    }
 759
 760    /* UNDEF accesses to D16-D31 if they don't exist. */
 761    if (!dc_isar_feature(aa32_simd_r32, s) &&
 762        ((a->vd | a->vn | a->vm) & 0x10)) {
 763        return false;
 764    }
 765
 766    if ((a->vn | a->vm | a->vd) & a->q) {
 767        return false;
 768    }
 769
 770    if (!vfp_access_check(s)) {
 771        return true;
 772    }
 773
 774    fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
 775    return true;
 776}
 777
 778#define DO_3SAME(INSN, FUNC)                                            \
 779    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 780    {                                                                   \
 781        return do_3same(s, a, FUNC);                                    \
 782    }
 783
 784DO_3SAME(VADD, tcg_gen_gvec_add)
 785DO_3SAME(VSUB, tcg_gen_gvec_sub)
 786DO_3SAME(VAND, tcg_gen_gvec_and)
 787DO_3SAME(VBIC, tcg_gen_gvec_andc)
 788DO_3SAME(VORR, tcg_gen_gvec_or)
 789DO_3SAME(VORN, tcg_gen_gvec_orc)
 790DO_3SAME(VEOR, tcg_gen_gvec_xor)
 791DO_3SAME(VSHL_S, gen_gvec_sshl)
 792DO_3SAME(VSHL_U, gen_gvec_ushl)
 793DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
 794DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
 795DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
 796DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
 797
 798/* These insns are all gvec_bitsel but with the inputs in various orders. */
 799#define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
 800    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 801                                uint32_t rn_ofs, uint32_t rm_ofs,       \
 802                                uint32_t oprsz, uint32_t maxsz)         \
 803    {                                                                   \
 804        tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
 805    }                                                                   \
 806    DO_3SAME(INSN, gen_##INSN##_3s)
 807
 808DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
 809DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
 810DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
 811
 812#define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
 813    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 814    {                                                                   \
 815        if (a->size == 3) {                                             \
 816            return false;                                               \
 817        }                                                               \
 818        return do_3same(s, a, FUNC);                                    \
 819    }
 820
 821DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
 822DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
 823DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
 824DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
 825DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
 826DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
 827DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
 828DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
 829DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
 830DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
 831DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
 832DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
 833
 834#define DO_3SAME_CMP(INSN, COND)                                        \
 835    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 836                                uint32_t rn_ofs, uint32_t rm_ofs,       \
 837                                uint32_t oprsz, uint32_t maxsz)         \
 838    {                                                                   \
 839        tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
 840    }                                                                   \
 841    DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
 842
 843DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
 844DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
 845DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
 846DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
 847DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
 848
 849#define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
 850    static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
 851                         uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
 852    {                                                                      \
 853        tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
 854    }
 855
 856WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
 857
 858static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
 859{
 860    if (a->size != 0) {
 861        return false;
 862    }
 863    return do_3same(s, a, gen_VMUL_p_3s);
 864}
 865
 866#define DO_VQRDMLAH(INSN, FUNC)                                         \
 867    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 868    {                                                                   \
 869        if (!dc_isar_feature(aa32_rdm, s)) {                            \
 870            return false;                                               \
 871        }                                                               \
 872        if (a->size != 1 && a->size != 2) {                             \
 873            return false;                                               \
 874        }                                                               \
 875        return do_3same(s, a, FUNC);                                    \
 876    }
 877
 878DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
 879DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
 880
 881#define DO_SHA1(NAME, FUNC)                                             \
 882    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
 883    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
 884    {                                                                   \
 885        if (!dc_isar_feature(aa32_sha1, s)) {                           \
 886            return false;                                               \
 887        }                                                               \
 888        return do_3same(s, a, gen_##NAME##_3s);                         \
 889    }
 890
 891DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
 892DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
 893DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
 894DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
 895
 896#define DO_SHA2(NAME, FUNC)                                             \
 897    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
 898    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
 899    {                                                                   \
 900        if (!dc_isar_feature(aa32_sha2, s)) {                           \
 901            return false;                                               \
 902        }                                                               \
 903        return do_3same(s, a, gen_##NAME##_3s);                         \
 904    }
 905
 906DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
 907DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
 908DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
 909
 910#define DO_3SAME_64(INSN, FUNC)                                         \
 911    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 912                                uint32_t rn_ofs, uint32_t rm_ofs,       \
 913                                uint32_t oprsz, uint32_t maxsz)         \
 914    {                                                                   \
 915        static const GVecGen3 op = { .fni8 = FUNC };                    \
 916        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
 917    }                                                                   \
 918    DO_3SAME(INSN, gen_##INSN##_3s)
 919
 920#define DO_3SAME_64_ENV(INSN, FUNC)                                     \
 921    static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
 922    {                                                                   \
 923        FUNC(d, cpu_env, n, m);                                         \
 924    }                                                                   \
 925    DO_3SAME_64(INSN, gen_##INSN##_elt)
 926
 927DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
 928DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
 929DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
 930DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
 931DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
 932DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
 933
 934#define DO_3SAME_32(INSN, FUNC)                                         \
 935    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 936                                uint32_t rn_ofs, uint32_t rm_ofs,       \
 937                                uint32_t oprsz, uint32_t maxsz)         \
 938    {                                                                   \
 939        static const GVecGen3 ops[4] = {                                \
 940            { .fni4 = gen_helper_neon_##FUNC##8 },                      \
 941            { .fni4 = gen_helper_neon_##FUNC##16 },                     \
 942            { .fni4 = gen_helper_neon_##FUNC##32 },                     \
 943            { 0 },                                                      \
 944        };                                                              \
 945        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
 946    }                                                                   \
 947    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 948    {                                                                   \
 949        if (a->size > 2) {                                              \
 950            return false;                                               \
 951        }                                                               \
 952        return do_3same(s, a, gen_##INSN##_3s);                         \
 953    }
 954
 955/*
 956 * Some helper functions need to be passed the cpu_env. In order
 957 * to use those with the gvec APIs like tcg_gen_gvec_3() we need
 958 * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
 959 * and which call a NeonGenTwoOpEnvFn().
 960 */
 961#define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
 962    static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
 963    {                                                                   \
 964        FUNC(d, cpu_env, n, m);                                         \
 965    }
 966
 967#define DO_3SAME_32_ENV(INSN, FUNC)                                     \
 968    WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
 969    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
 970    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
 971    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
 972                                uint32_t rn_ofs, uint32_t rm_ofs,       \
 973                                uint32_t oprsz, uint32_t maxsz)         \
 974    {                                                                   \
 975        static const GVecGen3 ops[4] = {                                \
 976            { .fni4 = gen_##INSN##_tramp8 },                            \
 977            { .fni4 = gen_##INSN##_tramp16 },                           \
 978            { .fni4 = gen_##INSN##_tramp32 },                           \
 979            { 0 },                                                      \
 980        };                                                              \
 981        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
 982    }                                                                   \
 983    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
 984    {                                                                   \
 985        if (a->size > 2) {                                              \
 986            return false;                                               \
 987        }                                                               \
 988        return do_3same(s, a, gen_##INSN##_3s);                         \
 989    }
 990
 991DO_3SAME_32(VHADD_S, hadd_s)
 992DO_3SAME_32(VHADD_U, hadd_u)
 993DO_3SAME_32(VHSUB_S, hsub_s)
 994DO_3SAME_32(VHSUB_U, hsub_u)
 995DO_3SAME_32(VRHADD_S, rhadd_s)
 996DO_3SAME_32(VRHADD_U, rhadd_u)
 997DO_3SAME_32(VRSHL_S, rshl_s)
 998DO_3SAME_32(VRSHL_U, rshl_u)
 999
1000DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1001DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1002DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1003DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1004
1005static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1006{
1007    /* Operations handled pairwise 32 bits at a time */
1008    TCGv_i32 tmp, tmp2, tmp3;
1009
1010    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1011        return false;
1012    }
1013
1014    /* UNDEF accesses to D16-D31 if they don't exist. */
1015    if (!dc_isar_feature(aa32_simd_r32, s) &&
1016        ((a->vd | a->vn | a->vm) & 0x10)) {
1017        return false;
1018    }
1019
1020    if (a->size == 3) {
1021        return false;
1022    }
1023
1024    if (!vfp_access_check(s)) {
1025        return true;
1026    }
1027
1028    assert(a->q == 0); /* enforced by decode patterns */
1029
1030    /*
1031     * Note that we have to be careful not to clobber the source operands
1032     * in the "vm == vd" case by storing the result of the first pass too
1033     * early. Since Q is 0 there are always just two passes, so instead
1034     * of a complicated loop over each pass we just unroll.
1035     */
1036    tmp = tcg_temp_new_i32();
1037    tmp2 = tcg_temp_new_i32();
1038    tmp3 = tcg_temp_new_i32();
1039
1040    read_neon_element32(tmp, a->vn, 0, MO_32);
1041    read_neon_element32(tmp2, a->vn, 1, MO_32);
1042    fn(tmp, tmp, tmp2);
1043
1044    read_neon_element32(tmp3, a->vm, 0, MO_32);
1045    read_neon_element32(tmp2, a->vm, 1, MO_32);
1046    fn(tmp3, tmp3, tmp2);
1047
1048    write_neon_element32(tmp, a->vd, 0, MO_32);
1049    write_neon_element32(tmp3, a->vd, 1, MO_32);
1050
1051    return true;
1052}
1053
1054#define DO_3SAME_PAIR(INSN, func)                                       \
1055    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1056    {                                                                   \
1057        static NeonGenTwoOpFn * const fns[] = {                         \
1058            gen_helper_neon_##func##8,                                  \
1059            gen_helper_neon_##func##16,                                 \
1060            gen_helper_neon_##func##32,                                 \
1061        };                                                              \
1062        if (a->size > 2) {                                              \
1063            return false;                                               \
1064        }                                                               \
1065        return do_3same_pair(s, a, fns[a->size]);                       \
1066    }
1067
1068/* 32-bit pairwise ops end up the same as the elementwise versions.  */
1069#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
1070#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
1071#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
1072#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
1073#define gen_helper_neon_padd_u32  tcg_gen_add_i32
1074
1075DO_3SAME_PAIR(VPMAX_S, pmax_s)
1076DO_3SAME_PAIR(VPMIN_S, pmin_s)
1077DO_3SAME_PAIR(VPMAX_U, pmax_u)
1078DO_3SAME_PAIR(VPMIN_U, pmin_u)
1079DO_3SAME_PAIR(VPADD, padd_u)
1080
1081#define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
1082    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
1083    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
1084    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
1085                                uint32_t rn_ofs, uint32_t rm_ofs,       \
1086                                uint32_t oprsz, uint32_t maxsz)         \
1087    {                                                                   \
1088        static const GVecGen3 ops[2] = {                                \
1089            { .fni4 = gen_##INSN##_tramp16 },                           \
1090            { .fni4 = gen_##INSN##_tramp32 },                           \
1091        };                                                              \
1092        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1093    }                                                                   \
1094    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
1095    {                                                                   \
1096        if (a->size != 1 && a->size != 2) {                             \
1097            return false;                                               \
1098        }                                                               \
1099        return do_3same(s, a, gen_##INSN##_3s);                         \
1100    }
1101
1102DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1103DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1104
1105#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
1106    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
1107                         uint32_t rn_ofs, uint32_t rm_ofs,              \
1108                         uint32_t oprsz, uint32_t maxsz)                \
1109    {                                                                   \
1110        TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
1111        tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
1112                           oprsz, maxsz, 0, FUNC);                      \
1113    }
1114
1115#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
1116    WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
1117    WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
1118    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
1119    {                                                                   \
1120        if (a->size == MO_16) {                                         \
1121            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
1122                return false;                                           \
1123            }                                                           \
1124            return do_3same(s, a, gen_##INSN##_fp16_3s);                \
1125        }                                                               \
1126        return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
1127    }
1128
1129
1130DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1131DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1132DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1133DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1134DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1135DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1136DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1137DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1138DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1139DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1140DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1141DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1142DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1143DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1144DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1145DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1146DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1147
1148WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1149WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1150WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1151WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1152
1153static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1154{
1155    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1156        return false;
1157    }
1158
1159    if (a->size == MO_16) {
1160        if (!dc_isar_feature(aa32_fp16_arith, s)) {
1161            return false;
1162        }
1163        return do_3same(s, a, gen_VMAXNM_fp16_3s);
1164    }
1165    return do_3same(s, a, gen_VMAXNM_fp32_3s);
1166}
1167
1168static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1169{
1170    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1171        return false;
1172    }
1173
1174    if (a->size == MO_16) {
1175        if (!dc_isar_feature(aa32_fp16_arith, s)) {
1176            return false;
1177        }
1178        return do_3same(s, a, gen_VMINNM_fp16_3s);
1179    }
1180    return do_3same(s, a, gen_VMINNM_fp32_3s);
1181}
1182
1183static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1184                             gen_helper_gvec_3_ptr *fn)
1185{
1186    /* FP pairwise operations */
1187    TCGv_ptr fpstatus;
1188
1189    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1190        return false;
1191    }
1192
1193    /* UNDEF accesses to D16-D31 if they don't exist. */
1194    if (!dc_isar_feature(aa32_simd_r32, s) &&
1195        ((a->vd | a->vn | a->vm) & 0x10)) {
1196        return false;
1197    }
1198
1199    if (!vfp_access_check(s)) {
1200        return true;
1201    }
1202
1203    assert(a->q == 0); /* enforced by decode patterns */
1204
1205
1206    fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1207    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1208                       vfp_reg_offset(1, a->vn),
1209                       vfp_reg_offset(1, a->vm),
1210                       fpstatus, 8, 8, 0, fn);
1211
1212    return true;
1213}
1214
1215/*
1216 * For all the functions using this macro, size == 1 means fp16,
1217 * which is an architecture extension we don't implement yet.
1218 */
1219#define DO_3S_FP_PAIR(INSN,FUNC)                                    \
1220    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1221    {                                                               \
1222        if (a->size == MO_16) {                                     \
1223            if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
1224                return false;                                       \
1225            }                                                       \
1226            return do_3same_fp_pair(s, a, FUNC##h);                 \
1227        }                                                           \
1228        return do_3same_fp_pair(s, a, FUNC##s);                     \
1229    }
1230
1231DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1232DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1233DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1234
1235static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1236{
1237    /* Handle a 2-reg-shift insn which can be vectorized. */
1238    int vec_size = a->q ? 16 : 8;
1239    int rd_ofs = neon_full_reg_offset(a->vd);
1240    int rm_ofs = neon_full_reg_offset(a->vm);
1241
1242    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1243        return false;
1244    }
1245
1246    /* UNDEF accesses to D16-D31 if they don't exist. */
1247    if (!dc_isar_feature(aa32_simd_r32, s) &&
1248        ((a->vd | a->vm) & 0x10)) {
1249        return false;
1250    }
1251
1252    if ((a->vm | a->vd) & a->q) {
1253        return false;
1254    }
1255
1256    if (!vfp_access_check(s)) {
1257        return true;
1258    }
1259
1260    fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1261    return true;
1262}
1263
1264#define DO_2SH(INSN, FUNC)                                              \
1265    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1266    {                                                                   \
1267        return do_vector_2sh(s, a, FUNC);                               \
1268    }                                                                   \
1269
1270DO_2SH(VSHL, tcg_gen_gvec_shli)
1271DO_2SH(VSLI, gen_gvec_sli)
1272DO_2SH(VSRI, gen_gvec_sri)
1273DO_2SH(VSRA_S, gen_gvec_ssra)
1274DO_2SH(VSRA_U, gen_gvec_usra)
1275DO_2SH(VRSHR_S, gen_gvec_srshr)
1276DO_2SH(VRSHR_U, gen_gvec_urshr)
1277DO_2SH(VRSRA_S, gen_gvec_srsra)
1278DO_2SH(VRSRA_U, gen_gvec_ursra)
1279
1280static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1281{
1282    /* Signed shift out of range results in all-sign-bits */
1283    a->shift = MIN(a->shift, (8 << a->size) - 1);
1284    return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1285}
1286
1287static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1288                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
1289{
1290    tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1291}
1292
1293static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1294{
1295    /* Shift out of range is architecturally valid and results in zero. */
1296    if (a->shift >= (8 << a->size)) {
1297        return do_vector_2sh(s, a, gen_zero_rd_2sh);
1298    } else {
1299        return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1300    }
1301}
1302
1303static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1304                             NeonGenTwo64OpEnvFn *fn)
1305{
1306    /*
1307     * 2-reg-and-shift operations, size == 3 case, where the
1308     * function needs to be passed cpu_env.
1309     */
1310    TCGv_i64 constimm;
1311    int pass;
1312
1313    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1314        return false;
1315    }
1316
1317    /* UNDEF accesses to D16-D31 if they don't exist. */
1318    if (!dc_isar_feature(aa32_simd_r32, s) &&
1319        ((a->vd | a->vm) & 0x10)) {
1320        return false;
1321    }
1322
1323    if ((a->vm | a->vd) & a->q) {
1324        return false;
1325    }
1326
1327    if (!vfp_access_check(s)) {
1328        return true;
1329    }
1330
1331    /*
1332     * To avoid excessive duplication of ops we implement shift
1333     * by immediate using the variable shift operations.
1334     */
1335    constimm = tcg_constant_i64(dup_const(a->size, a->shift));
1336
1337    for (pass = 0; pass < a->q + 1; pass++) {
1338        TCGv_i64 tmp = tcg_temp_new_i64();
1339
1340        read_neon_element64(tmp, a->vm, pass, MO_64);
1341        fn(tmp, cpu_env, tmp, constimm);
1342        write_neon_element64(tmp, a->vd, pass, MO_64);
1343    }
1344    return true;
1345}
1346
1347static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1348                             NeonGenTwoOpEnvFn *fn)
1349{
1350    /*
1351     * 2-reg-and-shift operations, size < 3 case, where the
1352     * helper needs to be passed cpu_env.
1353     */
1354    TCGv_i32 constimm, tmp;
1355    int pass;
1356
1357    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1358        return false;
1359    }
1360
1361    /* UNDEF accesses to D16-D31 if they don't exist. */
1362    if (!dc_isar_feature(aa32_simd_r32, s) &&
1363        ((a->vd | a->vm) & 0x10)) {
1364        return false;
1365    }
1366
1367    if ((a->vm | a->vd) & a->q) {
1368        return false;
1369    }
1370
1371    if (!vfp_access_check(s)) {
1372        return true;
1373    }
1374
1375    /*
1376     * To avoid excessive duplication of ops we implement shift
1377     * by immediate using the variable shift operations.
1378     */
1379    constimm = tcg_constant_i32(dup_const(a->size, a->shift));
1380    tmp = tcg_temp_new_i32();
1381
1382    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1383        read_neon_element32(tmp, a->vm, pass, MO_32);
1384        fn(tmp, cpu_env, tmp, constimm);
1385        write_neon_element32(tmp, a->vd, pass, MO_32);
1386    }
1387    return true;
1388}
1389
1390#define DO_2SHIFT_ENV(INSN, FUNC)                                       \
1391    static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1392    {                                                                   \
1393        return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
1394    }                                                                   \
1395    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1396    {                                                                   \
1397        static NeonGenTwoOpEnvFn * const fns[] = {                      \
1398            gen_helper_neon_##FUNC##8,                                  \
1399            gen_helper_neon_##FUNC##16,                                 \
1400            gen_helper_neon_##FUNC##32,                                 \
1401        };                                                              \
1402        assert(a->size < ARRAY_SIZE(fns));                              \
1403        return do_2shift_env_32(s, a, fns[a->size]);                    \
1404    }
1405
1406DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1407DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1408DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1409
1410static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1411                                NeonGenTwo64OpFn *shiftfn,
1412                                NeonGenNarrowEnvFn *narrowfn)
1413{
1414    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
1415    TCGv_i64 constimm, rm1, rm2;
1416    TCGv_i32 rd;
1417
1418    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1419        return false;
1420    }
1421
1422    /* UNDEF accesses to D16-D31 if they don't exist. */
1423    if (!dc_isar_feature(aa32_simd_r32, s) &&
1424        ((a->vd | a->vm) & 0x10)) {
1425        return false;
1426    }
1427
1428    if (a->vm & 1) {
1429        return false;
1430    }
1431
1432    if (!vfp_access_check(s)) {
1433        return true;
1434    }
1435
1436    /*
1437     * This is always a right shift, and the shiftfn is always a
1438     * left-shift helper, which thus needs the negated shift count.
1439     */
1440    constimm = tcg_constant_i64(-a->shift);
1441    rm1 = tcg_temp_new_i64();
1442    rm2 = tcg_temp_new_i64();
1443    rd = tcg_temp_new_i32();
1444
1445    /* Load both inputs first to avoid potential overwrite if rm == rd */
1446    read_neon_element64(rm1, a->vm, 0, MO_64);
1447    read_neon_element64(rm2, a->vm, 1, MO_64);
1448
1449    shiftfn(rm1, rm1, constimm);
1450    narrowfn(rd, cpu_env, rm1);
1451    write_neon_element32(rd, a->vd, 0, MO_32);
1452
1453    shiftfn(rm2, rm2, constimm);
1454    narrowfn(rd, cpu_env, rm2);
1455    write_neon_element32(rd, a->vd, 1, MO_32);
1456
1457    return true;
1458}
1459
1460static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1461                                NeonGenTwoOpFn *shiftfn,
1462                                NeonGenNarrowEnvFn *narrowfn)
1463{
1464    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
1465    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1466    TCGv_i64 rtmp;
1467    uint32_t imm;
1468
1469    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1470        return false;
1471    }
1472
1473    /* UNDEF accesses to D16-D31 if they don't exist. */
1474    if (!dc_isar_feature(aa32_simd_r32, s) &&
1475        ((a->vd | a->vm) & 0x10)) {
1476        return false;
1477    }
1478
1479    if (a->vm & 1) {
1480        return false;
1481    }
1482
1483    if (!vfp_access_check(s)) {
1484        return true;
1485    }
1486
1487    /*
1488     * This is always a right shift, and the shiftfn is always a
1489     * left-shift helper, which thus needs the negated shift count
1490     * duplicated into each lane of the immediate value.
1491     */
1492    if (a->size == 1) {
1493        imm = (uint16_t)(-a->shift);
1494        imm |= imm << 16;
1495    } else {
1496        /* size == 2 */
1497        imm = -a->shift;
1498    }
1499    constimm = tcg_constant_i32(imm);
1500
1501    /* Load all inputs first to avoid potential overwrite */
1502    rm1 = tcg_temp_new_i32();
1503    rm2 = tcg_temp_new_i32();
1504    rm3 = tcg_temp_new_i32();
1505    rm4 = tcg_temp_new_i32();
1506    read_neon_element32(rm1, a->vm, 0, MO_32);
1507    read_neon_element32(rm2, a->vm, 1, MO_32);
1508    read_neon_element32(rm3, a->vm, 2, MO_32);
1509    read_neon_element32(rm4, a->vm, 3, MO_32);
1510    rtmp = tcg_temp_new_i64();
1511
1512    shiftfn(rm1, rm1, constimm);
1513    shiftfn(rm2, rm2, constimm);
1514
1515    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1516
1517    narrowfn(rm1, cpu_env, rtmp);
1518    write_neon_element32(rm1, a->vd, 0, MO_32);
1519
1520    shiftfn(rm3, rm3, constimm);
1521    shiftfn(rm4, rm4, constimm);
1522
1523    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1524
1525    narrowfn(rm3, cpu_env, rtmp);
1526    write_neon_element32(rm3, a->vd, 1, MO_32);
1527    return true;
1528}
1529
1530#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
1531    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1532    {                                                                   \
1533        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
1534    }
1535#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
1536    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1537    {                                                                   \
1538        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
1539    }
1540
1541static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1542{
1543    tcg_gen_extrl_i64_i32(dest, src);
1544}
1545
1546static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1547{
1548    gen_helper_neon_narrow_u16(dest, src);
1549}
1550
1551static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1552{
1553    gen_helper_neon_narrow_u8(dest, src);
1554}
1555
1556DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1557DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1558DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1559
1560DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1561DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1562DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1563
1564DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1565DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1566DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1567
1568DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1569DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1570DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1571DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1572DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1573DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1574
1575DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1576DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1577DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1578
1579DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1580DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1581DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1582
1583DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1584DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1585DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1586
1587static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1588                         NeonGenWidenFn *widenfn, bool u)
1589{
1590    TCGv_i64 tmp;
1591    TCGv_i32 rm0, rm1;
1592    uint64_t widen_mask = 0;
1593
1594    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1595        return false;
1596    }
1597
1598    /* UNDEF accesses to D16-D31 if they don't exist. */
1599    if (!dc_isar_feature(aa32_simd_r32, s) &&
1600        ((a->vd | a->vm) & 0x10)) {
1601        return false;
1602    }
1603
1604    if (a->vd & 1) {
1605        return false;
1606    }
1607
1608    if (!vfp_access_check(s)) {
1609        return true;
1610    }
1611
1612    /*
1613     * This is a widen-and-shift operation. The shift is always less
1614     * than the width of the source type, so after widening the input
1615     * vector we can simply shift the whole 64-bit widened register,
1616     * and then clear the potential overflow bits resulting from left
1617     * bits of the narrow input appearing as right bits of the left
1618     * neighbour narrow input. Calculate a mask of bits to clear.
1619     */
1620    if ((a->shift != 0) && (a->size < 2 || u)) {
1621        int esize = 8 << a->size;
1622        widen_mask = MAKE_64BIT_MASK(0, esize);
1623        widen_mask >>= esize - a->shift;
1624        widen_mask = dup_const(a->size + 1, widen_mask);
1625    }
1626
1627    rm0 = tcg_temp_new_i32();
1628    rm1 = tcg_temp_new_i32();
1629    read_neon_element32(rm0, a->vm, 0, MO_32);
1630    read_neon_element32(rm1, a->vm, 1, MO_32);
1631    tmp = tcg_temp_new_i64();
1632
1633    widenfn(tmp, rm0);
1634    if (a->shift != 0) {
1635        tcg_gen_shli_i64(tmp, tmp, a->shift);
1636        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1637    }
1638    write_neon_element64(tmp, a->vd, 0, MO_64);
1639
1640    widenfn(tmp, rm1);
1641    if (a->shift != 0) {
1642        tcg_gen_shli_i64(tmp, tmp, a->shift);
1643        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1644    }
1645    write_neon_element64(tmp, a->vd, 1, MO_64);
1646    return true;
1647}
1648
1649static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1650{
1651    static NeonGenWidenFn * const widenfn[] = {
1652        gen_helper_neon_widen_s8,
1653        gen_helper_neon_widen_s16,
1654        tcg_gen_ext_i32_i64,
1655    };
1656    return do_vshll_2sh(s, a, widenfn[a->size], false);
1657}
1658
1659static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1660{
1661    static NeonGenWidenFn * const widenfn[] = {
1662        gen_helper_neon_widen_u8,
1663        gen_helper_neon_widen_u16,
1664        tcg_gen_extu_i32_i64,
1665    };
1666    return do_vshll_2sh(s, a, widenfn[a->size], true);
1667}
1668
1669static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1670                      gen_helper_gvec_2_ptr *fn)
1671{
1672    /* FP operations in 2-reg-and-shift group */
1673    int vec_size = a->q ? 16 : 8;
1674    int rd_ofs = neon_full_reg_offset(a->vd);
1675    int rm_ofs = neon_full_reg_offset(a->vm);
1676    TCGv_ptr fpst;
1677
1678    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1679        return false;
1680    }
1681
1682    if (a->size == MO_16) {
1683        if (!dc_isar_feature(aa32_fp16_arith, s)) {
1684            return false;
1685        }
1686    }
1687
1688    /* UNDEF accesses to D16-D31 if they don't exist. */
1689    if (!dc_isar_feature(aa32_simd_r32, s) &&
1690        ((a->vd | a->vm) & 0x10)) {
1691        return false;
1692    }
1693
1694    if ((a->vm | a->vd) & a->q) {
1695        return false;
1696    }
1697
1698    if (!vfp_access_check(s)) {
1699        return true;
1700    }
1701
1702    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1703    tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1704    return true;
1705}
1706
1707#define DO_FP_2SH(INSN, FUNC)                                           \
1708    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
1709    {                                                                   \
1710        return do_fp_2sh(s, a, FUNC);                                   \
1711    }
1712
1713DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1714DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1715DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1716DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1717
1718DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1719DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1720DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1721DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1722
1723static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1724                        GVecGen2iFn *fn)
1725{
1726    uint64_t imm;
1727    int reg_ofs, vec_size;
1728
1729    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1730        return false;
1731    }
1732
1733    /* UNDEF accesses to D16-D31 if they don't exist. */
1734    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1735        return false;
1736    }
1737
1738    if (a->vd & a->q) {
1739        return false;
1740    }
1741
1742    if (!vfp_access_check(s)) {
1743        return true;
1744    }
1745
1746    reg_ofs = neon_full_reg_offset(a->vd);
1747    vec_size = a->q ? 16 : 8;
1748    imm = asimd_imm_const(a->imm, a->cmode, a->op);
1749
1750    fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1751    return true;
1752}
1753
1754static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1755                        int64_t c, uint32_t oprsz, uint32_t maxsz)
1756{
1757    tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1758}
1759
1760static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1761{
1762    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
1763    GVecGen2iFn *fn;
1764
1765    if ((a->cmode & 1) && a->cmode < 12) {
1766        /* for op=1, the imm will be inverted, so BIC becomes AND. */
1767        fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1768    } else {
1769        /* There is one unallocated cmode/op combination in this space */
1770        if (a->cmode == 15 && a->op == 1) {
1771            return false;
1772        }
1773        fn = gen_VMOV_1r;
1774    }
1775    return do_1reg_imm(s, a, fn);
1776}
1777
1778static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1779                           NeonGenWidenFn *widenfn,
1780                           NeonGenTwo64OpFn *opfn,
1781                           int src1_mop, int src2_mop)
1782{
1783    /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
1784    TCGv_i64 rn0_64, rn1_64, rm_64;
1785
1786    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1787        return false;
1788    }
1789
1790    /* UNDEF accesses to D16-D31 if they don't exist. */
1791    if (!dc_isar_feature(aa32_simd_r32, s) &&
1792        ((a->vd | a->vn | a->vm) & 0x10)) {
1793        return false;
1794    }
1795
1796    if (!opfn) {
1797        /* size == 3 case, which is an entirely different insn group */
1798        return false;
1799    }
1800
1801    if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
1802        return false;
1803    }
1804
1805    if (!vfp_access_check(s)) {
1806        return true;
1807    }
1808
1809    rn0_64 = tcg_temp_new_i64();
1810    rn1_64 = tcg_temp_new_i64();
1811    rm_64 = tcg_temp_new_i64();
1812
1813    if (src1_mop >= 0) {
1814        read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1815    } else {
1816        TCGv_i32 tmp = tcg_temp_new_i32();
1817        read_neon_element32(tmp, a->vn, 0, MO_32);
1818        widenfn(rn0_64, tmp);
1819    }
1820    if (src2_mop >= 0) {
1821        read_neon_element64(rm_64, a->vm, 0, src2_mop);
1822    } else {
1823        TCGv_i32 tmp = tcg_temp_new_i32();
1824        read_neon_element32(tmp, a->vm, 0, MO_32);
1825        widenfn(rm_64, tmp);
1826    }
1827
1828    opfn(rn0_64, rn0_64, rm_64);
1829
1830    /*
1831     * Load second pass inputs before storing the first pass result, to
1832     * avoid incorrect results if a narrow input overlaps with the result.
1833     */
1834    if (src1_mop >= 0) {
1835        read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1836    } else {
1837        TCGv_i32 tmp = tcg_temp_new_i32();
1838        read_neon_element32(tmp, a->vn, 1, MO_32);
1839        widenfn(rn1_64, tmp);
1840    }
1841    if (src2_mop >= 0) {
1842        read_neon_element64(rm_64, a->vm, 1, src2_mop);
1843    } else {
1844        TCGv_i32 tmp = tcg_temp_new_i32();
1845        read_neon_element32(tmp, a->vm, 1, MO_32);
1846        widenfn(rm_64, tmp);
1847    }
1848
1849    write_neon_element64(rn0_64, a->vd, 0, MO_64);
1850
1851    opfn(rn1_64, rn1_64, rm_64);
1852    write_neon_element64(rn1_64, a->vd, 1, MO_64);
1853
1854    return true;
1855}
1856
1857#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
1858    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1859    {                                                                   \
1860        static NeonGenWidenFn * const widenfn[] = {                     \
1861            gen_helper_neon_widen_##S##8,                               \
1862            gen_helper_neon_widen_##S##16,                              \
1863            NULL, NULL,                                                 \
1864        };                                                              \
1865        static NeonGenTwo64OpFn * const addfn[] = {                     \
1866            gen_helper_neon_##OP##l_u16,                                \
1867            gen_helper_neon_##OP##l_u32,                                \
1868            tcg_gen_##OP##_i64,                                         \
1869            NULL,                                                       \
1870        };                                                              \
1871        int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
1872        return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
1873                              SRC1WIDE ? MO_UQ : narrow_mop,             \
1874                              narrow_mop);                              \
1875    }
1876
1877DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1878DO_PREWIDEN(VADDL_U, u, add, false, 0)
1879DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1880DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1881DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1882DO_PREWIDEN(VADDW_U, u, add, true, 0)
1883DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1884DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1885
1886static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1887                         NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1888{
1889    /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
1890    TCGv_i64 rn_64, rm_64;
1891    TCGv_i32 rd0, rd1;
1892
1893    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1894        return false;
1895    }
1896
1897    /* UNDEF accesses to D16-D31 if they don't exist. */
1898    if (!dc_isar_feature(aa32_simd_r32, s) &&
1899        ((a->vd | a->vn | a->vm) & 0x10)) {
1900        return false;
1901    }
1902
1903    if (!opfn || !narrowfn) {
1904        /* size == 3 case, which is an entirely different insn group */
1905        return false;
1906    }
1907
1908    if ((a->vn | a->vm) & 1) {
1909        return false;
1910    }
1911
1912    if (!vfp_access_check(s)) {
1913        return true;
1914    }
1915
1916    rn_64 = tcg_temp_new_i64();
1917    rm_64 = tcg_temp_new_i64();
1918    rd0 = tcg_temp_new_i32();
1919    rd1 = tcg_temp_new_i32();
1920
1921    read_neon_element64(rn_64, a->vn, 0, MO_64);
1922    read_neon_element64(rm_64, a->vm, 0, MO_64);
1923
1924    opfn(rn_64, rn_64, rm_64);
1925
1926    narrowfn(rd0, rn_64);
1927
1928    read_neon_element64(rn_64, a->vn, 1, MO_64);
1929    read_neon_element64(rm_64, a->vm, 1, MO_64);
1930
1931    opfn(rn_64, rn_64, rm_64);
1932
1933    narrowfn(rd1, rn_64);
1934
1935    write_neon_element32(rd0, a->vd, 0, MO_32);
1936    write_neon_element32(rd1, a->vd, 1, MO_32);
1937
1938    return true;
1939}
1940
1941#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
1942    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
1943    {                                                                   \
1944        static NeonGenTwo64OpFn * const addfn[] = {                     \
1945            gen_helper_neon_##OP##l_u16,                                \
1946            gen_helper_neon_##OP##l_u32,                                \
1947            tcg_gen_##OP##_i64,                                         \
1948            NULL,                                                       \
1949        };                                                              \
1950        static NeonGenNarrowFn * const narrowfn[] = {                   \
1951            gen_helper_neon_##NARROWTYPE##_high_u8,                     \
1952            gen_helper_neon_##NARROWTYPE##_high_u16,                    \
1953            EXTOP,                                                      \
1954            NULL,                                                       \
1955        };                                                              \
1956        return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
1957    }
1958
1959static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
1960{
1961    tcg_gen_addi_i64(rn, rn, 1u << 31);
1962    tcg_gen_extrh_i64_i32(rd, rn);
1963}
1964
1965DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
1966DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
1967DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
1968DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
1969
1970static bool do_long_3d(DisasContext *s, arg_3diff *a,
1971                       NeonGenTwoOpWidenFn *opfn,
1972                       NeonGenTwo64OpFn *accfn)
1973{
1974    /*
1975     * 3-regs different lengths, long operations.
1976     * These perform an operation on two inputs that returns a double-width
1977     * result, and then possibly perform an accumulation operation of
1978     * that result into the double-width destination.
1979     */
1980    TCGv_i64 rd0, rd1, tmp;
1981    TCGv_i32 rn, rm;
1982
1983    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1984        return false;
1985    }
1986
1987    /* UNDEF accesses to D16-D31 if they don't exist. */
1988    if (!dc_isar_feature(aa32_simd_r32, s) &&
1989        ((a->vd | a->vn | a->vm) & 0x10)) {
1990        return false;
1991    }
1992
1993    if (!opfn) {
1994        /* size == 3 case, which is an entirely different insn group */
1995        return false;
1996    }
1997
1998    if (a->vd & 1) {
1999        return false;
2000    }
2001
2002    if (!vfp_access_check(s)) {
2003        return true;
2004    }
2005
2006    rd0 = tcg_temp_new_i64();
2007    rd1 = tcg_temp_new_i64();
2008
2009    rn = tcg_temp_new_i32();
2010    rm = tcg_temp_new_i32();
2011    read_neon_element32(rn, a->vn, 0, MO_32);
2012    read_neon_element32(rm, a->vm, 0, MO_32);
2013    opfn(rd0, rn, rm);
2014
2015    read_neon_element32(rn, a->vn, 1, MO_32);
2016    read_neon_element32(rm, a->vm, 1, MO_32);
2017    opfn(rd1, rn, rm);
2018
2019    /* Don't store results until after all loads: they might overlap */
2020    if (accfn) {
2021        tmp = tcg_temp_new_i64();
2022        read_neon_element64(tmp, a->vd, 0, MO_64);
2023        accfn(rd0, tmp, rd0);
2024        read_neon_element64(tmp, a->vd, 1, MO_64);
2025        accfn(rd1, tmp, rd1);
2026    }
2027
2028    write_neon_element64(rd0, a->vd, 0, MO_64);
2029    write_neon_element64(rd1, a->vd, 1, MO_64);
2030
2031    return true;
2032}
2033
2034static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2035{
2036    static NeonGenTwoOpWidenFn * const opfn[] = {
2037        gen_helper_neon_abdl_s16,
2038        gen_helper_neon_abdl_s32,
2039        gen_helper_neon_abdl_s64,
2040        NULL,
2041    };
2042
2043    return do_long_3d(s, a, opfn[a->size], NULL);
2044}
2045
2046static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2047{
2048    static NeonGenTwoOpWidenFn * const opfn[] = {
2049        gen_helper_neon_abdl_u16,
2050        gen_helper_neon_abdl_u32,
2051        gen_helper_neon_abdl_u64,
2052        NULL,
2053    };
2054
2055    return do_long_3d(s, a, opfn[a->size], NULL);
2056}
2057
2058static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2059{
2060    static NeonGenTwoOpWidenFn * const opfn[] = {
2061        gen_helper_neon_abdl_s16,
2062        gen_helper_neon_abdl_s32,
2063        gen_helper_neon_abdl_s64,
2064        NULL,
2065    };
2066    static NeonGenTwo64OpFn * const addfn[] = {
2067        gen_helper_neon_addl_u16,
2068        gen_helper_neon_addl_u32,
2069        tcg_gen_add_i64,
2070        NULL,
2071    };
2072
2073    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2074}
2075
2076static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2077{
2078    static NeonGenTwoOpWidenFn * const opfn[] = {
2079        gen_helper_neon_abdl_u16,
2080        gen_helper_neon_abdl_u32,
2081        gen_helper_neon_abdl_u64,
2082        NULL,
2083    };
2084    static NeonGenTwo64OpFn * const addfn[] = {
2085        gen_helper_neon_addl_u16,
2086        gen_helper_neon_addl_u32,
2087        tcg_gen_add_i64,
2088        NULL,
2089    };
2090
2091    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2092}
2093
2094static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2095{
2096    TCGv_i32 lo = tcg_temp_new_i32();
2097    TCGv_i32 hi = tcg_temp_new_i32();
2098
2099    tcg_gen_muls2_i32(lo, hi, rn, rm);
2100    tcg_gen_concat_i32_i64(rd, lo, hi);
2101}
2102
2103static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2104{
2105    TCGv_i32 lo = tcg_temp_new_i32();
2106    TCGv_i32 hi = tcg_temp_new_i32();
2107
2108    tcg_gen_mulu2_i32(lo, hi, rn, rm);
2109    tcg_gen_concat_i32_i64(rd, lo, hi);
2110}
2111
2112static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2113{
2114    static NeonGenTwoOpWidenFn * const opfn[] = {
2115        gen_helper_neon_mull_s8,
2116        gen_helper_neon_mull_s16,
2117        gen_mull_s32,
2118        NULL,
2119    };
2120
2121    return do_long_3d(s, a, opfn[a->size], NULL);
2122}
2123
2124static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2125{
2126    static NeonGenTwoOpWidenFn * const opfn[] = {
2127        gen_helper_neon_mull_u8,
2128        gen_helper_neon_mull_u16,
2129        gen_mull_u32,
2130        NULL,
2131    };
2132
2133    return do_long_3d(s, a, opfn[a->size], NULL);
2134}
2135
2136#define DO_VMLAL(INSN,MULL,ACC)                                         \
2137    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
2138    {                                                                   \
2139        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2140            gen_helper_neon_##MULL##8,                                  \
2141            gen_helper_neon_##MULL##16,                                 \
2142            gen_##MULL##32,                                             \
2143            NULL,                                                       \
2144        };                                                              \
2145        static NeonGenTwo64OpFn * const accfn[] = {                     \
2146            gen_helper_neon_##ACC##l_u16,                               \
2147            gen_helper_neon_##ACC##l_u32,                               \
2148            tcg_gen_##ACC##_i64,                                        \
2149            NULL,                                                       \
2150        };                                                              \
2151        return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
2152    }
2153
2154DO_VMLAL(VMLAL_S,mull_s,add)
2155DO_VMLAL(VMLAL_U,mull_u,add)
2156DO_VMLAL(VMLSL_S,mull_s,sub)
2157DO_VMLAL(VMLSL_U,mull_u,sub)
2158
2159static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2160{
2161    gen_helper_neon_mull_s16(rd, rn, rm);
2162    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2163}
2164
2165static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2166{
2167    gen_mull_s32(rd, rn, rm);
2168    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2169}
2170
2171static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2172{
2173    static NeonGenTwoOpWidenFn * const opfn[] = {
2174        NULL,
2175        gen_VQDMULL_16,
2176        gen_VQDMULL_32,
2177        NULL,
2178    };
2179
2180    return do_long_3d(s, a, opfn[a->size], NULL);
2181}
2182
2183static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2184{
2185    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2186}
2187
2188static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2189{
2190    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2191}
2192
2193static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2194{
2195    static NeonGenTwoOpWidenFn * const opfn[] = {
2196        NULL,
2197        gen_VQDMULL_16,
2198        gen_VQDMULL_32,
2199        NULL,
2200    };
2201    static NeonGenTwo64OpFn * const accfn[] = {
2202        NULL,
2203        gen_VQDMLAL_acc_16,
2204        gen_VQDMLAL_acc_32,
2205        NULL,
2206    };
2207
2208    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2209}
2210
2211static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2212{
2213    gen_helper_neon_negl_u32(rm, rm);
2214    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2215}
2216
2217static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2218{
2219    tcg_gen_neg_i64(rm, rm);
2220    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2221}
2222
2223static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2224{
2225    static NeonGenTwoOpWidenFn * const opfn[] = {
2226        NULL,
2227        gen_VQDMULL_16,
2228        gen_VQDMULL_32,
2229        NULL,
2230    };
2231    static NeonGenTwo64OpFn * const accfn[] = {
2232        NULL,
2233        gen_VQDMLSL_acc_16,
2234        gen_VQDMLSL_acc_32,
2235        NULL,
2236    };
2237
2238    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2239}
2240
2241static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2242{
2243    gen_helper_gvec_3 *fn_gvec;
2244
2245    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2246        return false;
2247    }
2248
2249    /* UNDEF accesses to D16-D31 if they don't exist. */
2250    if (!dc_isar_feature(aa32_simd_r32, s) &&
2251        ((a->vd | a->vn | a->vm) & 0x10)) {
2252        return false;
2253    }
2254
2255    if (a->vd & 1) {
2256        return false;
2257    }
2258
2259    switch (a->size) {
2260    case 0:
2261        fn_gvec = gen_helper_neon_pmull_h;
2262        break;
2263    case 2:
2264        if (!dc_isar_feature(aa32_pmull, s)) {
2265            return false;
2266        }
2267        fn_gvec = gen_helper_gvec_pmull_q;
2268        break;
2269    default:
2270        return false;
2271    }
2272
2273    if (!vfp_access_check(s)) {
2274        return true;
2275    }
2276
2277    tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2278                       neon_full_reg_offset(a->vn),
2279                       neon_full_reg_offset(a->vm),
2280                       16, 16, 0, fn_gvec);
2281    return true;
2282}
2283
2284static void gen_neon_dup_low16(TCGv_i32 var)
2285{
2286    TCGv_i32 tmp = tcg_temp_new_i32();
2287    tcg_gen_ext16u_i32(var, var);
2288    tcg_gen_shli_i32(tmp, var, 16);
2289    tcg_gen_or_i32(var, var, tmp);
2290}
2291
2292static void gen_neon_dup_high16(TCGv_i32 var)
2293{
2294    TCGv_i32 tmp = tcg_temp_new_i32();
2295    tcg_gen_andi_i32(var, var, 0xffff0000);
2296    tcg_gen_shri_i32(tmp, var, 16);
2297    tcg_gen_or_i32(var, var, tmp);
2298}
2299
2300static inline TCGv_i32 neon_get_scalar(int size, int reg)
2301{
2302    TCGv_i32 tmp = tcg_temp_new_i32();
2303    if (size == MO_16) {
2304        read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2305        if (reg & 8) {
2306            gen_neon_dup_high16(tmp);
2307        } else {
2308            gen_neon_dup_low16(tmp);
2309        }
2310    } else {
2311        read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2312    }
2313    return tmp;
2314}
2315
2316static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2317                       NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2318{
2319    /*
2320     * Two registers and a scalar: perform an operation between
2321     * the input elements and the scalar, and then possibly
2322     * perform an accumulation operation of that result into the
2323     * destination.
2324     */
2325    TCGv_i32 scalar, tmp;
2326    int pass;
2327
2328    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2329        return false;
2330    }
2331
2332    /* UNDEF accesses to D16-D31 if they don't exist. */
2333    if (!dc_isar_feature(aa32_simd_r32, s) &&
2334        ((a->vd | a->vn | a->vm) & 0x10)) {
2335        return false;
2336    }
2337
2338    if (!opfn) {
2339        /* Bad size (including size == 3, which is a different insn group) */
2340        return false;
2341    }
2342
2343    if (a->q && ((a->vd | a->vn) & 1)) {
2344        return false;
2345    }
2346
2347    if (!vfp_access_check(s)) {
2348        return true;
2349    }
2350
2351    scalar = neon_get_scalar(a->size, a->vm);
2352    tmp = tcg_temp_new_i32();
2353
2354    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2355        read_neon_element32(tmp, a->vn, pass, MO_32);
2356        opfn(tmp, tmp, scalar);
2357        if (accfn) {
2358            TCGv_i32 rd = tcg_temp_new_i32();
2359            read_neon_element32(rd, a->vd, pass, MO_32);
2360            accfn(tmp, rd, tmp);
2361        }
2362        write_neon_element32(tmp, a->vd, pass, MO_32);
2363    }
2364    return true;
2365}
2366
2367static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2368{
2369    static NeonGenTwoOpFn * const opfn[] = {
2370        NULL,
2371        gen_helper_neon_mul_u16,
2372        tcg_gen_mul_i32,
2373        NULL,
2374    };
2375
2376    return do_2scalar(s, a, opfn[a->size], NULL);
2377}
2378
2379static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2380{
2381    static NeonGenTwoOpFn * const opfn[] = {
2382        NULL,
2383        gen_helper_neon_mul_u16,
2384        tcg_gen_mul_i32,
2385        NULL,
2386    };
2387    static NeonGenTwoOpFn * const accfn[] = {
2388        NULL,
2389        gen_helper_neon_add_u16,
2390        tcg_gen_add_i32,
2391        NULL,
2392    };
2393
2394    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2395}
2396
2397static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2398{
2399    static NeonGenTwoOpFn * const opfn[] = {
2400        NULL,
2401        gen_helper_neon_mul_u16,
2402        tcg_gen_mul_i32,
2403        NULL,
2404    };
2405    static NeonGenTwoOpFn * const accfn[] = {
2406        NULL,
2407        gen_helper_neon_sub_u16,
2408        tcg_gen_sub_i32,
2409        NULL,
2410    };
2411
2412    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2413}
2414
2415static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2416                              gen_helper_gvec_3_ptr *fn)
2417{
2418    /* Two registers and a scalar, using gvec */
2419    int vec_size = a->q ? 16 : 8;
2420    int rd_ofs = neon_full_reg_offset(a->vd);
2421    int rn_ofs = neon_full_reg_offset(a->vn);
2422    int rm_ofs;
2423    int idx;
2424    TCGv_ptr fpstatus;
2425
2426    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2427        return false;
2428    }
2429
2430    /* UNDEF accesses to D16-D31 if they don't exist. */
2431    if (!dc_isar_feature(aa32_simd_r32, s) &&
2432        ((a->vd | a->vn | a->vm) & 0x10)) {
2433        return false;
2434    }
2435
2436    if (!fn) {
2437        /* Bad size (including size == 3, which is a different insn group) */
2438        return false;
2439    }
2440
2441    if (a->q && ((a->vd | a->vn) & 1)) {
2442        return false;
2443    }
2444
2445    if (!vfp_access_check(s)) {
2446        return true;
2447    }
2448
2449    /* a->vm is M:Vm, which encodes both register and index */
2450    idx = extract32(a->vm, a->size + 2, 2);
2451    a->vm = extract32(a->vm, 0, a->size + 2);
2452    rm_ofs = neon_full_reg_offset(a->vm);
2453
2454    fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2455    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2456                       vec_size, vec_size, idx, fn);
2457    return true;
2458}
2459
2460#define DO_VMUL_F_2sc(NAME, FUNC)                                       \
2461    static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
2462    {                                                                   \
2463        static gen_helper_gvec_3_ptr * const opfn[] = {                 \
2464            NULL,                                                       \
2465            gen_helper_##FUNC##_h,                                      \
2466            gen_helper_##FUNC##_s,                                      \
2467            NULL,                                                       \
2468        };                                                              \
2469        if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2470            return false;                                               \
2471        }                                                               \
2472        return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
2473    }
2474
2475DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2476DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2477DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2478
2479WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2480WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2481WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2482WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2483
2484static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2485{
2486    static NeonGenTwoOpFn * const opfn[] = {
2487        NULL,
2488        gen_VQDMULH_16,
2489        gen_VQDMULH_32,
2490        NULL,
2491    };
2492
2493    return do_2scalar(s, a, opfn[a->size], NULL);
2494}
2495
2496static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2497{
2498    static NeonGenTwoOpFn * const opfn[] = {
2499        NULL,
2500        gen_VQRDMULH_16,
2501        gen_VQRDMULH_32,
2502        NULL,
2503    };
2504
2505    return do_2scalar(s, a, opfn[a->size], NULL);
2506}
2507
2508static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2509                            NeonGenThreeOpEnvFn *opfn)
2510{
2511    /*
2512     * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
2513     * performs a kind of fused op-then-accumulate using a helper
2514     * function that takes all of rd, rn and the scalar at once.
2515     */
2516    TCGv_i32 scalar, rn, rd;
2517    int pass;
2518
2519    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2520        return false;
2521    }
2522
2523    if (!dc_isar_feature(aa32_rdm, s)) {
2524        return false;
2525    }
2526
2527    /* UNDEF accesses to D16-D31 if they don't exist. */
2528    if (!dc_isar_feature(aa32_simd_r32, s) &&
2529        ((a->vd | a->vn | a->vm) & 0x10)) {
2530        return false;
2531    }
2532
2533    if (!opfn) {
2534        /* Bad size (including size == 3, which is a different insn group) */
2535        return false;
2536    }
2537
2538    if (a->q && ((a->vd | a->vn) & 1)) {
2539        return false;
2540    }
2541
2542    if (!vfp_access_check(s)) {
2543        return true;
2544    }
2545
2546    scalar = neon_get_scalar(a->size, a->vm);
2547    rn = tcg_temp_new_i32();
2548    rd = tcg_temp_new_i32();
2549
2550    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2551        read_neon_element32(rn, a->vn, pass, MO_32);
2552        read_neon_element32(rd, a->vd, pass, MO_32);
2553        opfn(rd, cpu_env, rn, scalar, rd);
2554        write_neon_element32(rd, a->vd, pass, MO_32);
2555    }
2556    return true;
2557}
2558
2559static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2560{
2561    static NeonGenThreeOpEnvFn *opfn[] = {
2562        NULL,
2563        gen_helper_neon_qrdmlah_s16,
2564        gen_helper_neon_qrdmlah_s32,
2565        NULL,
2566    };
2567    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2568}
2569
2570static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2571{
2572    static NeonGenThreeOpEnvFn *opfn[] = {
2573        NULL,
2574        gen_helper_neon_qrdmlsh_s16,
2575        gen_helper_neon_qrdmlsh_s32,
2576        NULL,
2577    };
2578    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2579}
2580
2581static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2582                            NeonGenTwoOpWidenFn *opfn,
2583                            NeonGenTwo64OpFn *accfn)
2584{
2585    /*
2586     * Two registers and a scalar, long operations: perform an
2587     * operation on the input elements and the scalar which produces
2588     * a double-width result, and then possibly perform an accumulation
2589     * operation of that result into the destination.
2590     */
2591    TCGv_i32 scalar, rn;
2592    TCGv_i64 rn0_64, rn1_64;
2593
2594    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2595        return false;
2596    }
2597
2598    /* UNDEF accesses to D16-D31 if they don't exist. */
2599    if (!dc_isar_feature(aa32_simd_r32, s) &&
2600        ((a->vd | a->vn | a->vm) & 0x10)) {
2601        return false;
2602    }
2603
2604    if (!opfn) {
2605        /* Bad size (including size == 3, which is a different insn group) */
2606        return false;
2607    }
2608
2609    if (a->vd & 1) {
2610        return false;
2611    }
2612
2613    if (!vfp_access_check(s)) {
2614        return true;
2615    }
2616
2617    scalar = neon_get_scalar(a->size, a->vm);
2618
2619    /* Load all inputs before writing any outputs, in case of overlap */
2620    rn = tcg_temp_new_i32();
2621    read_neon_element32(rn, a->vn, 0, MO_32);
2622    rn0_64 = tcg_temp_new_i64();
2623    opfn(rn0_64, rn, scalar);
2624
2625    read_neon_element32(rn, a->vn, 1, MO_32);
2626    rn1_64 = tcg_temp_new_i64();
2627    opfn(rn1_64, rn, scalar);
2628
2629    if (accfn) {
2630        TCGv_i64 t64 = tcg_temp_new_i64();
2631        read_neon_element64(t64, a->vd, 0, MO_64);
2632        accfn(rn0_64, t64, rn0_64);
2633        read_neon_element64(t64, a->vd, 1, MO_64);
2634        accfn(rn1_64, t64, rn1_64);
2635    }
2636
2637    write_neon_element64(rn0_64, a->vd, 0, MO_64);
2638    write_neon_element64(rn1_64, a->vd, 1, MO_64);
2639    return true;
2640}
2641
2642static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2643{
2644    static NeonGenTwoOpWidenFn * const opfn[] = {
2645        NULL,
2646        gen_helper_neon_mull_s16,
2647        gen_mull_s32,
2648        NULL,
2649    };
2650
2651    return do_2scalar_long(s, a, opfn[a->size], NULL);
2652}
2653
2654static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2655{
2656    static NeonGenTwoOpWidenFn * const opfn[] = {
2657        NULL,
2658        gen_helper_neon_mull_u16,
2659        gen_mull_u32,
2660        NULL,
2661    };
2662
2663    return do_2scalar_long(s, a, opfn[a->size], NULL);
2664}
2665
2666#define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
2667    static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
2668    {                                                                   \
2669        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
2670            NULL,                                                       \
2671            gen_helper_neon_##MULL##16,                                 \
2672            gen_##MULL##32,                                             \
2673            NULL,                                                       \
2674        };                                                              \
2675        static NeonGenTwo64OpFn * const accfn[] = {                     \
2676            NULL,                                                       \
2677            gen_helper_neon_##ACC##l_u32,                               \
2678            tcg_gen_##ACC##_i64,                                        \
2679            NULL,                                                       \
2680        };                                                              \
2681        return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
2682    }
2683
2684DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2685DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2686DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2687DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2688
2689static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2690{
2691    static NeonGenTwoOpWidenFn * const opfn[] = {
2692        NULL,
2693        gen_VQDMULL_16,
2694        gen_VQDMULL_32,
2695        NULL,
2696    };
2697
2698    return do_2scalar_long(s, a, opfn[a->size], NULL);
2699}
2700
2701static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2702{
2703    static NeonGenTwoOpWidenFn * const opfn[] = {
2704        NULL,
2705        gen_VQDMULL_16,
2706        gen_VQDMULL_32,
2707        NULL,
2708    };
2709    static NeonGenTwo64OpFn * const accfn[] = {
2710        NULL,
2711        gen_VQDMLAL_acc_16,
2712        gen_VQDMLAL_acc_32,
2713        NULL,
2714    };
2715
2716    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2717}
2718
2719static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2720{
2721    static NeonGenTwoOpWidenFn * const opfn[] = {
2722        NULL,
2723        gen_VQDMULL_16,
2724        gen_VQDMULL_32,
2725        NULL,
2726    };
2727    static NeonGenTwo64OpFn * const accfn[] = {
2728        NULL,
2729        gen_VQDMLSL_acc_16,
2730        gen_VQDMLSL_acc_32,
2731        NULL,
2732    };
2733
2734    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2735}
2736
2737static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2738{
2739    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2740        return false;
2741    }
2742
2743    /* UNDEF accesses to D16-D31 if they don't exist. */
2744    if (!dc_isar_feature(aa32_simd_r32, s) &&
2745        ((a->vd | a->vn | a->vm) & 0x10)) {
2746        return false;
2747    }
2748
2749    if ((a->vn | a->vm | a->vd) & a->q) {
2750        return false;
2751    }
2752
2753    if (a->imm > 7 && !a->q) {
2754        return false;
2755    }
2756
2757    if (!vfp_access_check(s)) {
2758        return true;
2759    }
2760
2761    if (!a->q) {
2762        /* Extract 64 bits from <Vm:Vn> */
2763        TCGv_i64 left, right, dest;
2764
2765        left = tcg_temp_new_i64();
2766        right = tcg_temp_new_i64();
2767        dest = tcg_temp_new_i64();
2768
2769        read_neon_element64(right, a->vn, 0, MO_64);
2770        read_neon_element64(left, a->vm, 0, MO_64);
2771        tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2772        write_neon_element64(dest, a->vd, 0, MO_64);
2773    } else {
2774        /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
2775        TCGv_i64 left, middle, right, destleft, destright;
2776
2777        left = tcg_temp_new_i64();
2778        middle = tcg_temp_new_i64();
2779        right = tcg_temp_new_i64();
2780        destleft = tcg_temp_new_i64();
2781        destright = tcg_temp_new_i64();
2782
2783        if (a->imm < 8) {
2784            read_neon_element64(right, a->vn, 0, MO_64);
2785            read_neon_element64(middle, a->vn, 1, MO_64);
2786            tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2787            read_neon_element64(left, a->vm, 0, MO_64);
2788            tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2789        } else {
2790            read_neon_element64(right, a->vn, 1, MO_64);
2791            read_neon_element64(middle, a->vm, 0, MO_64);
2792            tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2793            read_neon_element64(left, a->vm, 1, MO_64);
2794            tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2795        }
2796
2797        write_neon_element64(destright, a->vd, 0, MO_64);
2798        write_neon_element64(destleft, a->vd, 1, MO_64);
2799    }
2800    return true;
2801}
2802
2803static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2804{
2805    TCGv_i64 val, def;
2806    TCGv_i32 desc;
2807
2808    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2809        return false;
2810    }
2811
2812    /* UNDEF accesses to D16-D31 if they don't exist. */
2813    if (!dc_isar_feature(aa32_simd_r32, s) &&
2814        ((a->vd | a->vn | a->vm) & 0x10)) {
2815        return false;
2816    }
2817
2818    if ((a->vn + a->len + 1) > 32) {
2819        /*
2820         * This is UNPREDICTABLE; we choose to UNDEF to avoid the
2821         * helper function running off the end of the register file.
2822         */
2823        return false;
2824    }
2825
2826    if (!vfp_access_check(s)) {
2827        return true;
2828    }
2829
2830    desc = tcg_constant_i32((a->vn << 2) | a->len);
2831    def = tcg_temp_new_i64();
2832    if (a->op) {
2833        read_neon_element64(def, a->vd, 0, MO_64);
2834    } else {
2835        tcg_gen_movi_i64(def, 0);
2836    }
2837    val = tcg_temp_new_i64();
2838    read_neon_element64(val, a->vm, 0, MO_64);
2839
2840    gen_helper_neon_tbl(val, cpu_env, desc, val, def);
2841    write_neon_element64(val, a->vd, 0, MO_64);
2842    return true;
2843}
2844
2845static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2846{
2847    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2848        return false;
2849    }
2850
2851    /* UNDEF accesses to D16-D31 if they don't exist. */
2852    if (!dc_isar_feature(aa32_simd_r32, s) &&
2853        ((a->vd | a->vm) & 0x10)) {
2854        return false;
2855    }
2856
2857    if (a->vd & a->q) {
2858        return false;
2859    }
2860
2861    if (!vfp_access_check(s)) {
2862        return true;
2863    }
2864
2865    tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2866                         neon_element_offset(a->vm, a->index, a->size),
2867                         a->q ? 16 : 8, a->q ? 16 : 8);
2868    return true;
2869}
2870
2871static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2872{
2873    int pass, half;
2874    TCGv_i32 tmp[2];
2875
2876    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2877        return false;
2878    }
2879
2880    /* UNDEF accesses to D16-D31 if they don't exist. */
2881    if (!dc_isar_feature(aa32_simd_r32, s) &&
2882        ((a->vd | a->vm) & 0x10)) {
2883        return false;
2884    }
2885
2886    if ((a->vd | a->vm) & a->q) {
2887        return false;
2888    }
2889
2890    if (a->size == 3) {
2891        return false;
2892    }
2893
2894    if (!vfp_access_check(s)) {
2895        return true;
2896    }
2897
2898    tmp[0] = tcg_temp_new_i32();
2899    tmp[1] = tcg_temp_new_i32();
2900
2901    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2902        for (half = 0; half < 2; half++) {
2903            read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2904            switch (a->size) {
2905            case 0:
2906                tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2907                break;
2908            case 1:
2909                gen_swap_half(tmp[half], tmp[half]);
2910                break;
2911            case 2:
2912                break;
2913            default:
2914                g_assert_not_reached();
2915            }
2916        }
2917        write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
2918        write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
2919    }
2920    return true;
2921}
2922
2923static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
2924                              NeonGenWidenFn *widenfn,
2925                              NeonGenTwo64OpFn *opfn,
2926                              NeonGenTwo64OpFn *accfn)
2927{
2928    /*
2929     * Pairwise long operations: widen both halves of the pair,
2930     * combine the pairs with the opfn, and then possibly accumulate
2931     * into the destination with the accfn.
2932     */
2933    int pass;
2934
2935    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2936        return false;
2937    }
2938
2939    /* UNDEF accesses to D16-D31 if they don't exist. */
2940    if (!dc_isar_feature(aa32_simd_r32, s) &&
2941        ((a->vd | a->vm) & 0x10)) {
2942        return false;
2943    }
2944
2945    if ((a->vd | a->vm) & a->q) {
2946        return false;
2947    }
2948
2949    if (!widenfn) {
2950        return false;
2951    }
2952
2953    if (!vfp_access_check(s)) {
2954        return true;
2955    }
2956
2957    for (pass = 0; pass < a->q + 1; pass++) {
2958        TCGv_i32 tmp;
2959        TCGv_i64 rm0_64, rm1_64, rd_64;
2960
2961        rm0_64 = tcg_temp_new_i64();
2962        rm1_64 = tcg_temp_new_i64();
2963        rd_64 = tcg_temp_new_i64();
2964
2965        tmp = tcg_temp_new_i32();
2966        read_neon_element32(tmp, a->vm, pass * 2, MO_32);
2967        widenfn(rm0_64, tmp);
2968        read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
2969        widenfn(rm1_64, tmp);
2970
2971        opfn(rd_64, rm0_64, rm1_64);
2972
2973        if (accfn) {
2974            TCGv_i64 tmp64 = tcg_temp_new_i64();
2975            read_neon_element64(tmp64, a->vd, pass, MO_64);
2976            accfn(rd_64, tmp64, rd_64);
2977        }
2978        write_neon_element64(rd_64, a->vd, pass, MO_64);
2979    }
2980    return true;
2981}
2982
2983static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
2984{
2985    static NeonGenWidenFn * const widenfn[] = {
2986        gen_helper_neon_widen_s8,
2987        gen_helper_neon_widen_s16,
2988        tcg_gen_ext_i32_i64,
2989        NULL,
2990    };
2991    static NeonGenTwo64OpFn * const opfn[] = {
2992        gen_helper_neon_paddl_u16,
2993        gen_helper_neon_paddl_u32,
2994        tcg_gen_add_i64,
2995        NULL,
2996    };
2997
2998    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
2999}
3000
3001static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3002{
3003    static NeonGenWidenFn * const widenfn[] = {
3004        gen_helper_neon_widen_u8,
3005        gen_helper_neon_widen_u16,
3006        tcg_gen_extu_i32_i64,
3007        NULL,
3008    };
3009    static NeonGenTwo64OpFn * const opfn[] = {
3010        gen_helper_neon_paddl_u16,
3011        gen_helper_neon_paddl_u32,
3012        tcg_gen_add_i64,
3013        NULL,
3014    };
3015
3016    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3017}
3018
3019static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3020{
3021    static NeonGenWidenFn * const widenfn[] = {
3022        gen_helper_neon_widen_s8,
3023        gen_helper_neon_widen_s16,
3024        tcg_gen_ext_i32_i64,
3025        NULL,
3026    };
3027    static NeonGenTwo64OpFn * const opfn[] = {
3028        gen_helper_neon_paddl_u16,
3029        gen_helper_neon_paddl_u32,
3030        tcg_gen_add_i64,
3031        NULL,
3032    };
3033    static NeonGenTwo64OpFn * const accfn[] = {
3034        gen_helper_neon_addl_u16,
3035        gen_helper_neon_addl_u32,
3036        tcg_gen_add_i64,
3037        NULL,
3038    };
3039
3040    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3041                             accfn[a->size]);
3042}
3043
3044static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3045{
3046    static NeonGenWidenFn * const widenfn[] = {
3047        gen_helper_neon_widen_u8,
3048        gen_helper_neon_widen_u16,
3049        tcg_gen_extu_i32_i64,
3050        NULL,
3051    };
3052    static NeonGenTwo64OpFn * const opfn[] = {
3053        gen_helper_neon_paddl_u16,
3054        gen_helper_neon_paddl_u32,
3055        tcg_gen_add_i64,
3056        NULL,
3057    };
3058    static NeonGenTwo64OpFn * const accfn[] = {
3059        gen_helper_neon_addl_u16,
3060        gen_helper_neon_addl_u32,
3061        tcg_gen_add_i64,
3062        NULL,
3063    };
3064
3065    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3066                             accfn[a->size]);
3067}
3068
3069typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3070
3071static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3072                       ZipFn *fn)
3073{
3074    TCGv_ptr pd, pm;
3075
3076    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3077        return false;
3078    }
3079
3080    /* UNDEF accesses to D16-D31 if they don't exist. */
3081    if (!dc_isar_feature(aa32_simd_r32, s) &&
3082        ((a->vd | a->vm) & 0x10)) {
3083        return false;
3084    }
3085
3086    if ((a->vd | a->vm) & a->q) {
3087        return false;
3088    }
3089
3090    if (!fn) {
3091        /* Bad size or size/q combination */
3092        return false;
3093    }
3094
3095    if (!vfp_access_check(s)) {
3096        return true;
3097    }
3098
3099    pd = vfp_reg_ptr(true, a->vd);
3100    pm = vfp_reg_ptr(true, a->vm);
3101    fn(pd, pm);
3102    return true;
3103}
3104
3105static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3106{
3107    static ZipFn * const fn[2][4] = {
3108        {
3109            gen_helper_neon_unzip8,
3110            gen_helper_neon_unzip16,
3111            NULL,
3112            NULL,
3113        }, {
3114            gen_helper_neon_qunzip8,
3115            gen_helper_neon_qunzip16,
3116            gen_helper_neon_qunzip32,
3117            NULL,
3118        }
3119    };
3120    return do_zip_uzp(s, a, fn[a->q][a->size]);
3121}
3122
3123static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3124{
3125    static ZipFn * const fn[2][4] = {
3126        {
3127            gen_helper_neon_zip8,
3128            gen_helper_neon_zip16,
3129            NULL,
3130            NULL,
3131        }, {
3132            gen_helper_neon_qzip8,
3133            gen_helper_neon_qzip16,
3134            gen_helper_neon_qzip32,
3135            NULL,
3136        }
3137    };
3138    return do_zip_uzp(s, a, fn[a->q][a->size]);
3139}
3140
3141static bool do_vmovn(DisasContext *s, arg_2misc *a,
3142                     NeonGenNarrowEnvFn *narrowfn)
3143{
3144    TCGv_i64 rm;
3145    TCGv_i32 rd0, rd1;
3146
3147    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3148        return false;
3149    }
3150
3151    /* UNDEF accesses to D16-D31 if they don't exist. */
3152    if (!dc_isar_feature(aa32_simd_r32, s) &&
3153        ((a->vd | a->vm) & 0x10)) {
3154        return false;
3155    }
3156
3157    if (a->vm & 1) {
3158        return false;
3159    }
3160
3161    if (!narrowfn) {
3162        return false;
3163    }
3164
3165    if (!vfp_access_check(s)) {
3166        return true;
3167    }
3168
3169    rm = tcg_temp_new_i64();
3170    rd0 = tcg_temp_new_i32();
3171    rd1 = tcg_temp_new_i32();
3172
3173    read_neon_element64(rm, a->vm, 0, MO_64);
3174    narrowfn(rd0, cpu_env, rm);
3175    read_neon_element64(rm, a->vm, 1, MO_64);
3176    narrowfn(rd1, cpu_env, rm);
3177    write_neon_element32(rd0, a->vd, 0, MO_32);
3178    write_neon_element32(rd1, a->vd, 1, MO_32);
3179    return true;
3180}
3181
3182#define DO_VMOVN(INSN, FUNC)                                    \
3183    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3184    {                                                           \
3185        static NeonGenNarrowEnvFn * const narrowfn[] = {        \
3186            FUNC##8,                                            \
3187            FUNC##16,                                           \
3188            FUNC##32,                                           \
3189            NULL,                                               \
3190        };                                                      \
3191        return do_vmovn(s, a, narrowfn[a->size]);               \
3192    }
3193
3194DO_VMOVN(VMOVN, gen_neon_narrow_u)
3195DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3196DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3197DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3198
3199static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3200{
3201    TCGv_i32 rm0, rm1;
3202    TCGv_i64 rd;
3203    static NeonGenWidenFn * const widenfns[] = {
3204        gen_helper_neon_widen_u8,
3205        gen_helper_neon_widen_u16,
3206        tcg_gen_extu_i32_i64,
3207        NULL,
3208    };
3209    NeonGenWidenFn *widenfn = widenfns[a->size];
3210
3211    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3212        return false;
3213    }
3214
3215    /* UNDEF accesses to D16-D31 if they don't exist. */
3216    if (!dc_isar_feature(aa32_simd_r32, s) &&
3217        ((a->vd | a->vm) & 0x10)) {
3218        return false;
3219    }
3220
3221    if (a->vd & 1) {
3222        return false;
3223    }
3224
3225    if (!widenfn) {
3226        return false;
3227    }
3228
3229    if (!vfp_access_check(s)) {
3230        return true;
3231    }
3232
3233    rd = tcg_temp_new_i64();
3234    rm0 = tcg_temp_new_i32();
3235    rm1 = tcg_temp_new_i32();
3236
3237    read_neon_element32(rm0, a->vm, 0, MO_32);
3238    read_neon_element32(rm1, a->vm, 1, MO_32);
3239
3240    widenfn(rd, rm0);
3241    tcg_gen_shli_i64(rd, rd, 8 << a->size);
3242    write_neon_element64(rd, a->vd, 0, MO_64);
3243    widenfn(rd, rm1);
3244    tcg_gen_shli_i64(rd, rd, 8 << a->size);
3245    write_neon_element64(rd, a->vd, 1, MO_64);
3246    return true;
3247}
3248
3249static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3250{
3251    TCGv_ptr fpst;
3252    TCGv_i64 tmp;
3253    TCGv_i32 dst0, dst1;
3254
3255    if (!dc_isar_feature(aa32_bf16, s)) {
3256        return false;
3257    }
3258
3259    /* UNDEF accesses to D16-D31 if they don't exist. */
3260    if (!dc_isar_feature(aa32_simd_r32, s) &&
3261        ((a->vd | a->vm) & 0x10)) {
3262        return false;
3263    }
3264
3265    if ((a->vm & 1) || (a->size != 1)) {
3266        return false;
3267    }
3268
3269    if (!vfp_access_check(s)) {
3270        return true;
3271    }
3272
3273    fpst = fpstatus_ptr(FPST_STD);
3274    tmp = tcg_temp_new_i64();
3275    dst0 = tcg_temp_new_i32();
3276    dst1 = tcg_temp_new_i32();
3277
3278    read_neon_element64(tmp, a->vm, 0, MO_64);
3279    gen_helper_bfcvt_pair(dst0, tmp, fpst);
3280
3281    read_neon_element64(tmp, a->vm, 1, MO_64);
3282    gen_helper_bfcvt_pair(dst1, tmp, fpst);
3283
3284    write_neon_element32(dst0, a->vd, 0, MO_32);
3285    write_neon_element32(dst1, a->vd, 1, MO_32);
3286    return true;
3287}
3288
3289static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3290{
3291    TCGv_ptr fpst;
3292    TCGv_i32 ahp, tmp, tmp2, tmp3;
3293
3294    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3295        !dc_isar_feature(aa32_fp16_spconv, s)) {
3296        return false;
3297    }
3298
3299    /* UNDEF accesses to D16-D31 if they don't exist. */
3300    if (!dc_isar_feature(aa32_simd_r32, s) &&
3301        ((a->vd | a->vm) & 0x10)) {
3302        return false;
3303    }
3304
3305    if ((a->vm & 1) || (a->size != 1)) {
3306        return false;
3307    }
3308
3309    if (!vfp_access_check(s)) {
3310        return true;
3311    }
3312
3313    fpst = fpstatus_ptr(FPST_STD);
3314    ahp = get_ahp_flag();
3315    tmp = tcg_temp_new_i32();
3316    read_neon_element32(tmp, a->vm, 0, MO_32);
3317    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3318    tmp2 = tcg_temp_new_i32();
3319    read_neon_element32(tmp2, a->vm, 1, MO_32);
3320    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3321    tcg_gen_shli_i32(tmp2, tmp2, 16);
3322    tcg_gen_or_i32(tmp2, tmp2, tmp);
3323    read_neon_element32(tmp, a->vm, 2, MO_32);
3324    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3325    tmp3 = tcg_temp_new_i32();
3326    read_neon_element32(tmp3, a->vm, 3, MO_32);
3327    write_neon_element32(tmp2, a->vd, 0, MO_32);
3328    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3329    tcg_gen_shli_i32(tmp3, tmp3, 16);
3330    tcg_gen_or_i32(tmp3, tmp3, tmp);
3331    write_neon_element32(tmp3, a->vd, 1, MO_32);
3332    return true;
3333}
3334
3335static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3336{
3337    TCGv_ptr fpst;
3338    TCGv_i32 ahp, tmp, tmp2, tmp3;
3339
3340    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3341        !dc_isar_feature(aa32_fp16_spconv, s)) {
3342        return false;
3343    }
3344
3345    /* UNDEF accesses to D16-D31 if they don't exist. */
3346    if (!dc_isar_feature(aa32_simd_r32, s) &&
3347        ((a->vd | a->vm) & 0x10)) {
3348        return false;
3349    }
3350
3351    if ((a->vd & 1) || (a->size != 1)) {
3352        return false;
3353    }
3354
3355    if (!vfp_access_check(s)) {
3356        return true;
3357    }
3358
3359    fpst = fpstatus_ptr(FPST_STD);
3360    ahp = get_ahp_flag();
3361    tmp3 = tcg_temp_new_i32();
3362    tmp2 = tcg_temp_new_i32();
3363    tmp = tcg_temp_new_i32();
3364    read_neon_element32(tmp, a->vm, 0, MO_32);
3365    read_neon_element32(tmp2, a->vm, 1, MO_32);
3366    tcg_gen_ext16u_i32(tmp3, tmp);
3367    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3368    write_neon_element32(tmp3, a->vd, 0, MO_32);
3369    tcg_gen_shri_i32(tmp, tmp, 16);
3370    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3371    write_neon_element32(tmp, a->vd, 1, MO_32);
3372    tcg_gen_ext16u_i32(tmp3, tmp2);
3373    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3374    write_neon_element32(tmp3, a->vd, 2, MO_32);
3375    tcg_gen_shri_i32(tmp2, tmp2, 16);
3376    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3377    write_neon_element32(tmp2, a->vd, 3, MO_32);
3378    return true;
3379}
3380
3381static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3382{
3383    int vec_size = a->q ? 16 : 8;
3384    int rd_ofs = neon_full_reg_offset(a->vd);
3385    int rm_ofs = neon_full_reg_offset(a->vm);
3386
3387    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3388        return false;
3389    }
3390
3391    /* UNDEF accesses to D16-D31 if they don't exist. */
3392    if (!dc_isar_feature(aa32_simd_r32, s) &&
3393        ((a->vd | a->vm) & 0x10)) {
3394        return false;
3395    }
3396
3397    if (a->size == 3) {
3398        return false;
3399    }
3400
3401    if ((a->vd | a->vm) & a->q) {
3402        return false;
3403    }
3404
3405    if (!vfp_access_check(s)) {
3406        return true;
3407    }
3408
3409    fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3410
3411    return true;
3412}
3413
3414#define DO_2MISC_VEC(INSN, FN)                                  \
3415    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3416    {                                                           \
3417        return do_2misc_vec(s, a, FN);                          \
3418    }
3419
3420DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3421DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3422DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3423DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3424DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3425DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3426DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3427
3428static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3429{
3430    if (a->size != 0) {
3431        return false;
3432    }
3433    return do_2misc_vec(s, a, tcg_gen_gvec_not);
3434}
3435
3436#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3437    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3438                         uint32_t rm_ofs, uint32_t oprsz,               \
3439                         uint32_t maxsz)                                \
3440    {                                                                   \
3441        tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
3442                           DATA, FUNC);                                 \
3443    }
3444
3445#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
3446    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
3447                         uint32_t rm_ofs, uint32_t oprsz,               \
3448                         uint32_t maxsz)                                \
3449    {                                                                   \
3450        tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
3451    }
3452
3453WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3454WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0)
3455WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3456WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0)
3457WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3458WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3459WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3460
3461#define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
3462    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
3463    {                                                           \
3464        if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
3465            return false;                                       \
3466        }                                                       \
3467        return do_2misc_vec(s, a, gen_##INSN);                  \
3468    }
3469
3470DO_2M_CRYPTO(AESE, aa32_aes, 0)
3471DO_2M_CRYPTO(AESD, aa32_aes, 0)
3472DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3473DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3474DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3475DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3476DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3477
3478static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3479{
3480    TCGv_i32 tmp;
3481    int pass;
3482
3483    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
3484    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3485        return false;
3486    }
3487
3488    /* UNDEF accesses to D16-D31 if they don't exist. */
3489    if (!dc_isar_feature(aa32_simd_r32, s) &&
3490        ((a->vd | a->vm) & 0x10)) {
3491        return false;
3492    }
3493
3494    if (!fn) {
3495        return false;
3496    }
3497
3498    if ((a->vd | a->vm) & a->q) {
3499        return false;
3500    }
3501
3502    if (!vfp_access_check(s)) {
3503        return true;
3504    }
3505
3506    tmp = tcg_temp_new_i32();
3507    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3508        read_neon_element32(tmp, a->vm, pass, MO_32);
3509        fn(tmp, tmp);
3510        write_neon_element32(tmp, a->vd, pass, MO_32);
3511    }
3512    return true;
3513}
3514
3515static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3516{
3517    static NeonGenOneOpFn * const fn[] = {
3518        tcg_gen_bswap32_i32,
3519        gen_swap_half,
3520        NULL,
3521        NULL,
3522    };
3523    return do_2misc(s, a, fn[a->size]);
3524}
3525
3526static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3527{
3528    if (a->size != 0) {
3529        return false;
3530    }
3531    return do_2misc(s, a, gen_rev16);
3532}
3533
3534static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3535{
3536    static NeonGenOneOpFn * const fn[] = {
3537        gen_helper_neon_cls_s8,
3538        gen_helper_neon_cls_s16,
3539        gen_helper_neon_cls_s32,
3540        NULL,
3541    };
3542    return do_2misc(s, a, fn[a->size]);
3543}
3544
3545static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3546{
3547    tcg_gen_clzi_i32(rd, rm, 32);
3548}
3549
3550static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3551{
3552    static NeonGenOneOpFn * const fn[] = {
3553        gen_helper_neon_clz_u8,
3554        gen_helper_neon_clz_u16,
3555        do_VCLZ_32,
3556        NULL,
3557    };
3558    return do_2misc(s, a, fn[a->size]);
3559}
3560
3561static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3562{
3563    if (a->size != 0) {
3564        return false;
3565    }
3566    return do_2misc(s, a, gen_helper_neon_cnt_u8);
3567}
3568
3569static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3570                       uint32_t oprsz, uint32_t maxsz)
3571{
3572    tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3573                      vece == MO_16 ? 0x7fff : 0x7fffffff,
3574                      oprsz, maxsz);
3575}
3576
3577static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3578{
3579    if (a->size == MO_16) {
3580        if (!dc_isar_feature(aa32_fp16_arith, s)) {
3581            return false;
3582        }
3583    } else if (a->size != MO_32) {
3584        return false;
3585    }
3586    return do_2misc_vec(s, a, gen_VABS_F);
3587}
3588
3589static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3590                       uint32_t oprsz, uint32_t maxsz)
3591{
3592    tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3593                      vece == MO_16 ? 0x8000 : 0x80000000,
3594                      oprsz, maxsz);
3595}
3596
3597static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3598{
3599    if (a->size == MO_16) {
3600        if (!dc_isar_feature(aa32_fp16_arith, s)) {
3601            return false;
3602        }
3603    } else if (a->size != MO_32) {
3604        return false;
3605    }
3606    return do_2misc_vec(s, a, gen_VNEG_F);
3607}
3608
3609static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3610{
3611    if (a->size != 2) {
3612        return false;
3613    }
3614    return do_2misc(s, a, gen_helper_recpe_u32);
3615}
3616
3617static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3618{
3619    if (a->size != 2) {
3620        return false;
3621    }
3622    return do_2misc(s, a, gen_helper_rsqrte_u32);
3623}
3624
3625#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3626    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
3627    {                                                   \
3628        FUNC(d, cpu_env, m);                            \
3629    }
3630
3631WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3632WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3633WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3634WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3635WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3636WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3637
3638static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3639{
3640    static NeonGenOneOpFn * const fn[] = {
3641        gen_VQABS_s8,
3642        gen_VQABS_s16,
3643        gen_VQABS_s32,
3644        NULL,
3645    };
3646    return do_2misc(s, a, fn[a->size]);
3647}
3648
3649static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3650{
3651    static NeonGenOneOpFn * const fn[] = {
3652        gen_VQNEG_s8,
3653        gen_VQNEG_s16,
3654        gen_VQNEG_s32,
3655        NULL,
3656    };
3657    return do_2misc(s, a, fn[a->size]);
3658}
3659
3660#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
3661    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3662                           uint32_t rm_ofs,                             \
3663                           uint32_t oprsz, uint32_t maxsz)              \
3664    {                                                                   \
3665        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3666            NULL, HFUNC, SFUNC, NULL,                                   \
3667        };                                                              \
3668        TCGv_ptr fpst;                                                  \
3669        fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
3670        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
3671                           fns[vece]);                                  \
3672    }                                                                   \
3673    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3674    {                                                                   \
3675        if (a->size == MO_16) {                                         \
3676            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3677                return false;                                           \
3678            }                                                           \
3679        } else if (a->size != MO_32) {                                  \
3680            return false;                                               \
3681        }                                                               \
3682        return do_2misc_vec(s, a, gen_##INSN);                          \
3683    }
3684
3685DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3686DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3687DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3688DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3689DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3690DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3691DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3692DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3693DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3694DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3695DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3696
3697DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3698
3699static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3700{
3701    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3702        return false;
3703    }
3704    return trans_VRINTX_impl(s, a);
3705}
3706
3707#define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
3708    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
3709                           uint32_t rm_ofs,                             \
3710                           uint32_t oprsz, uint32_t maxsz)              \
3711    {                                                                   \
3712        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
3713            NULL,                                                       \
3714            gen_helper_gvec_##OP##h,                                    \
3715            gen_helper_gvec_##OP##s,                                    \
3716            NULL,                                                       \
3717        };                                                              \
3718        TCGv_ptr fpst;                                                  \
3719        fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
3720        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
3721                           arm_rmode_to_sf(RMODE), fns[vece]);          \
3722    }                                                                   \
3723    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
3724    {                                                                   \
3725        if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
3726            return false;                                               \
3727        }                                                               \
3728        if (a->size == MO_16) {                                         \
3729            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
3730                return false;                                           \
3731            }                                                           \
3732        } else if (a->size != MO_32) {                                  \
3733            return false;                                               \
3734        }                                                               \
3735        return do_2misc_vec(s, a, gen_##INSN);                          \
3736    }
3737
3738DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3739DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3740DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3741DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3742DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3743DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3744DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3745DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3746
3747DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3748DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3749DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3750DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3751DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3752
3753static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3754{
3755    TCGv_i64 rm, rd;
3756    int pass;
3757
3758    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3759        return false;
3760    }
3761
3762    /* UNDEF accesses to D16-D31 if they don't exist. */
3763    if (!dc_isar_feature(aa32_simd_r32, s) &&
3764        ((a->vd | a->vm) & 0x10)) {
3765        return false;
3766    }
3767
3768    if (a->size != 0) {
3769        return false;
3770    }
3771
3772    if ((a->vd | a->vm) & a->q) {
3773        return false;
3774    }
3775
3776    if (!vfp_access_check(s)) {
3777        return true;
3778    }
3779
3780    rm = tcg_temp_new_i64();
3781    rd = tcg_temp_new_i64();
3782    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3783        read_neon_element64(rm, a->vm, pass, MO_64);
3784        read_neon_element64(rd, a->vd, pass, MO_64);
3785        write_neon_element64(rm, a->vd, pass, MO_64);
3786        write_neon_element64(rd, a->vm, pass, MO_64);
3787    }
3788    return true;
3789}
3790
3791static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3792{
3793    TCGv_i32 rd, tmp;
3794
3795    rd = tcg_temp_new_i32();
3796    tmp = tcg_temp_new_i32();
3797
3798    tcg_gen_shli_i32(rd, t0, 8);
3799    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3800    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3801    tcg_gen_or_i32(rd, rd, tmp);
3802
3803    tcg_gen_shri_i32(t1, t1, 8);
3804    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3805    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3806    tcg_gen_or_i32(t1, t1, tmp);
3807    tcg_gen_mov_i32(t0, rd);
3808}
3809
3810static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3811{
3812    TCGv_i32 rd, tmp;
3813
3814    rd = tcg_temp_new_i32();
3815    tmp = tcg_temp_new_i32();
3816
3817    tcg_gen_shli_i32(rd, t0, 16);
3818    tcg_gen_andi_i32(tmp, t1, 0xffff);
3819    tcg_gen_or_i32(rd, rd, tmp);
3820    tcg_gen_shri_i32(t1, t1, 16);
3821    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3822    tcg_gen_or_i32(t1, t1, tmp);
3823    tcg_gen_mov_i32(t0, rd);
3824}
3825
3826static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3827{
3828    TCGv_i32 tmp, tmp2;
3829    int pass;
3830
3831    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3832        return false;
3833    }
3834
3835    /* UNDEF accesses to D16-D31 if they don't exist. */
3836    if (!dc_isar_feature(aa32_simd_r32, s) &&
3837        ((a->vd | a->vm) & 0x10)) {
3838        return false;
3839    }
3840
3841    if ((a->vd | a->vm) & a->q) {
3842        return false;
3843    }
3844
3845    if (a->size == 3) {
3846        return false;
3847    }
3848
3849    if (!vfp_access_check(s)) {
3850        return true;
3851    }
3852
3853    tmp = tcg_temp_new_i32();
3854    tmp2 = tcg_temp_new_i32();
3855    if (a->size == MO_32) {
3856        for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3857            read_neon_element32(tmp, a->vm, pass, MO_32);
3858            read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3859            write_neon_element32(tmp2, a->vm, pass, MO_32);
3860            write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3861        }
3862    } else {
3863        for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3864            read_neon_element32(tmp, a->vm, pass, MO_32);
3865            read_neon_element32(tmp2, a->vd, pass, MO_32);
3866            if (a->size == MO_8) {
3867                gen_neon_trn_u8(tmp, tmp2);
3868            } else {
3869                gen_neon_trn_u16(tmp, tmp2);
3870            }
3871            write_neon_element32(tmp2, a->vm, pass, MO_32);
3872            write_neon_element32(tmp, a->vd, pass, MO_32);
3873        }
3874    }
3875    return true;
3876}
3877
3878static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
3879{
3880    if (!dc_isar_feature(aa32_i8mm, s)) {
3881        return false;
3882    }
3883    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3884                        gen_helper_gvec_smmla_b);
3885}
3886
3887static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
3888{
3889    if (!dc_isar_feature(aa32_i8mm, s)) {
3890        return false;
3891    }
3892    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3893                        gen_helper_gvec_ummla_b);
3894}
3895
3896static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
3897{
3898    if (!dc_isar_feature(aa32_i8mm, s)) {
3899        return false;
3900    }
3901    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3902                        gen_helper_gvec_usmmla_b);
3903}
3904
3905static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
3906{
3907    if (!dc_isar_feature(aa32_bf16, s)) {
3908        return false;
3909    }
3910    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
3911                        gen_helper_gvec_bfmmla);
3912}
3913
3914static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
3915{
3916    if (!dc_isar_feature(aa32_bf16, s)) {
3917        return false;
3918    }
3919    return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
3920                             gen_helper_gvec_bfmlal);
3921}
3922
3923static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
3924{
3925    if (!dc_isar_feature(aa32_bf16, s)) {
3926        return false;
3927    }
3928    return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
3929                             (a->index << 1) | a->q, FPST_STD,
3930                             gen_helper_gvec_bfmlal_idx);
3931}
3932