1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include "qemu/osdep.h"
24#include "tcg/tcg-op.h"
25#include "tcg/tcg-op-gvec.h"
26#include "exec/exec-all.h"
27#include "exec/gen-icount.h"
28#include "translate.h"
29#include "translate-a32.h"
30
31
32#include "decode-neon-dp.c.inc"
33#include "decode-neon-ls.c.inc"
34#include "decode-neon-shared.c.inc"
35
36static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
37{
38 TCGv_ptr ret = tcg_temp_new_ptr();
39 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
40 return ret;
41}
42
43static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
44{
45 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
46
47 switch (mop) {
48 case MO_UB:
49 tcg_gen_ld8u_i32(var, cpu_env, offset);
50 break;
51 case MO_UW:
52 tcg_gen_ld16u_i32(var, cpu_env, offset);
53 break;
54 case MO_UL:
55 tcg_gen_ld_i32(var, cpu_env, offset);
56 break;
57 default:
58 g_assert_not_reached();
59 }
60}
61
62static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
63{
64 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
65
66 switch (mop) {
67 case MO_UB:
68 tcg_gen_ld8u_i64(var, cpu_env, offset);
69 break;
70 case MO_UW:
71 tcg_gen_ld16u_i64(var, cpu_env, offset);
72 break;
73 case MO_UL:
74 tcg_gen_ld32u_i64(var, cpu_env, offset);
75 break;
76 case MO_Q:
77 tcg_gen_ld_i64(var, cpu_env, offset);
78 break;
79 default:
80 g_assert_not_reached();
81 }
82}
83
84static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
85{
86 long offset = neon_element_offset(reg, ele, size);
87
88 switch (size) {
89 case MO_8:
90 tcg_gen_st8_i32(var, cpu_env, offset);
91 break;
92 case MO_16:
93 tcg_gen_st16_i32(var, cpu_env, offset);
94 break;
95 case MO_32:
96 tcg_gen_st_i32(var, cpu_env, offset);
97 break;
98 default:
99 g_assert_not_reached();
100 }
101}
102
103static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
104{
105 long offset = neon_element_offset(reg, ele, size);
106
107 switch (size) {
108 case MO_8:
109 tcg_gen_st8_i64(var, cpu_env, offset);
110 break;
111 case MO_16:
112 tcg_gen_st16_i64(var, cpu_env, offset);
113 break;
114 case MO_32:
115 tcg_gen_st32_i64(var, cpu_env, offset);
116 break;
117 case MO_64:
118 tcg_gen_st_i64(var, cpu_env, offset);
119 break;
120 default:
121 g_assert_not_reached();
122 }
123}
124
125static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
126 int data, gen_helper_gvec_4 *fn_gvec)
127{
128
129 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
130 return false;
131 }
132
133
134
135
136
137
138 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
139 return false;
140 }
141
142 if (!vfp_access_check(s)) {
143 return true;
144 }
145
146 int opr_sz = q ? 16 : 8;
147 tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
148 vfp_reg_offset(1, vn),
149 vfp_reg_offset(1, vm),
150 vfp_reg_offset(1, vd),
151 opr_sz, opr_sz, data, fn_gvec);
152 return true;
153}
154
155static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
156 int data, ARMFPStatusFlavour fp_flavour,
157 gen_helper_gvec_4_ptr *fn_gvec_ptr)
158{
159
160 if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
161 return false;
162 }
163
164
165
166
167
168
169 if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
170 return false;
171 }
172
173 if (!vfp_access_check(s)) {
174 return true;
175 }
176
177 int opr_sz = q ? 16 : 8;
178 TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
179
180 tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
181 vfp_reg_offset(1, vn),
182 vfp_reg_offset(1, vm),
183 vfp_reg_offset(1, vd),
184 fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
185 tcg_temp_free_ptr(fpst);
186 return true;
187}
188
189static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
190{
191 if (!dc_isar_feature(aa32_vcma, s)) {
192 return false;
193 }
194 if (a->size == MO_16) {
195 if (!dc_isar_feature(aa32_fp16_arith, s)) {
196 return false;
197 }
198 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
199 FPST_STD_F16, gen_helper_gvec_fcmlah);
200 }
201 return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
202 FPST_STD, gen_helper_gvec_fcmlas);
203}
204
205static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
206{
207 int opr_sz;
208 TCGv_ptr fpst;
209 gen_helper_gvec_3_ptr *fn_gvec_ptr;
210
211 if (!dc_isar_feature(aa32_vcma, s)
212 || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
213 return false;
214 }
215
216
217 if (!dc_isar_feature(aa32_simd_r32, s) &&
218 ((a->vd | a->vn | a->vm) & 0x10)) {
219 return false;
220 }
221
222 if ((a->vn | a->vm | a->vd) & a->q) {
223 return false;
224 }
225
226 if (!vfp_access_check(s)) {
227 return true;
228 }
229
230 opr_sz = (1 + a->q) * 8;
231 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
232 fn_gvec_ptr = (a->size == MO_16) ?
233 gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
234 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
235 vfp_reg_offset(1, a->vn),
236 vfp_reg_offset(1, a->vm),
237 fpst, opr_sz, opr_sz, a->rot,
238 fn_gvec_ptr);
239 tcg_temp_free_ptr(fpst);
240 return true;
241}
242
243static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
244{
245 if (!dc_isar_feature(aa32_dp, s)) {
246 return false;
247 }
248 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
249 gen_helper_gvec_sdot_b);
250}
251
252static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
253{
254 if (!dc_isar_feature(aa32_dp, s)) {
255 return false;
256 }
257 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
258 gen_helper_gvec_udot_b);
259}
260
261static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
262{
263 if (!dc_isar_feature(aa32_i8mm, s)) {
264 return false;
265 }
266 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
267 gen_helper_gvec_usdot_b);
268}
269
270static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
271{
272 if (!dc_isar_feature(aa32_bf16, s)) {
273 return false;
274 }
275 return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
276 gen_helper_gvec_bfdot);
277}
278
279static bool trans_VFML(DisasContext *s, arg_VFML *a)
280{
281 int opr_sz;
282
283 if (!dc_isar_feature(aa32_fhm, s)) {
284 return false;
285 }
286
287
288 if (!dc_isar_feature(aa32_simd_r32, s) &&
289 (a->vd & 0x10)) {
290 return false;
291 }
292
293 if (a->vd & a->q) {
294 return false;
295 }
296
297 if (!vfp_access_check(s)) {
298 return true;
299 }
300
301 opr_sz = (1 + a->q) * 8;
302 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
303 vfp_reg_offset(a->q, a->vn),
304 vfp_reg_offset(a->q, a->vm),
305 cpu_env, opr_sz, opr_sz, a->s,
306 gen_helper_gvec_fmlal_a32);
307 return true;
308}
309
310static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
311{
312 int data = (a->index << 2) | a->rot;
313
314 if (!dc_isar_feature(aa32_vcma, s)) {
315 return false;
316 }
317 if (a->size == MO_16) {
318 if (!dc_isar_feature(aa32_fp16_arith, s)) {
319 return false;
320 }
321 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
322 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
323 }
324 return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
325 FPST_STD, gen_helper_gvec_fcmlas_idx);
326}
327
328static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
329{
330 if (!dc_isar_feature(aa32_dp, s)) {
331 return false;
332 }
333 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
334 gen_helper_gvec_sdot_idx_b);
335}
336
337static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
338{
339 if (!dc_isar_feature(aa32_dp, s)) {
340 return false;
341 }
342 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
343 gen_helper_gvec_udot_idx_b);
344}
345
346static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
347{
348 if (!dc_isar_feature(aa32_i8mm, s)) {
349 return false;
350 }
351 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
352 gen_helper_gvec_usdot_idx_b);
353}
354
355static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
356{
357 if (!dc_isar_feature(aa32_i8mm, s)) {
358 return false;
359 }
360 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
361 gen_helper_gvec_sudot_idx_b);
362}
363
364static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
365{
366 if (!dc_isar_feature(aa32_bf16, s)) {
367 return false;
368 }
369 return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
370 gen_helper_gvec_bfdot_idx);
371}
372
373static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
374{
375 int opr_sz;
376
377 if (!dc_isar_feature(aa32_fhm, s)) {
378 return false;
379 }
380
381
382 if (!dc_isar_feature(aa32_simd_r32, s) &&
383 ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
384 return false;
385 }
386
387 if (a->vd & a->q) {
388 return false;
389 }
390
391 if (!vfp_access_check(s)) {
392 return true;
393 }
394
395 opr_sz = (1 + a->q) * 8;
396 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
397 vfp_reg_offset(a->q, a->vn),
398 vfp_reg_offset(a->q, a->rm),
399 cpu_env, opr_sz, opr_sz,
400 (a->index << 2) | a->s,
401 gen_helper_gvec_fmlal_idx_a32);
402 return true;
403}
404
405static struct {
406 int nregs;
407 int interleave;
408 int spacing;
409} const neon_ls_element_type[11] = {
410 {1, 4, 1},
411 {1, 4, 2},
412 {4, 1, 1},
413 {2, 2, 2},
414 {1, 3, 1},
415 {1, 3, 2},
416 {3, 1, 1},
417 {1, 1, 1},
418 {1, 2, 1},
419 {1, 2, 2},
420 {2, 1, 1}
421};
422
423static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
424 int stride)
425{
426 if (rm != 15) {
427 TCGv_i32 base;
428
429 base = load_reg(s, rn);
430 if (rm == 13) {
431 tcg_gen_addi_i32(base, base, stride);
432 } else {
433 TCGv_i32 index;
434 index = load_reg(s, rm);
435 tcg_gen_add_i32(base, base, index);
436 tcg_temp_free_i32(index);
437 }
438 store_reg(s, rn, base);
439 }
440}
441
442static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
443{
444
445 int nregs, interleave, spacing, reg, n;
446 MemOp mop, align, endian;
447 int mmu_idx = get_mem_index(s);
448 int size = a->size;
449 TCGv_i64 tmp64;
450 TCGv_i32 addr, tmp;
451
452 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
453 return false;
454 }
455
456
457 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
458 return false;
459 }
460 if (a->itype > 10) {
461 return false;
462 }
463
464 switch (a->itype & 0xc) {
465 case 4:
466 if (a->align >= 2) {
467 return false;
468 }
469 break;
470 case 8:
471 if (a->align == 3) {
472 return false;
473 }
474 break;
475 default:
476 break;
477 }
478 nregs = neon_ls_element_type[a->itype].nregs;
479 interleave = neon_ls_element_type[a->itype].interleave;
480 spacing = neon_ls_element_type[a->itype].spacing;
481 if (size == 3 && (interleave | spacing) != 1) {
482 return false;
483 }
484
485 if (!vfp_access_check(s)) {
486 return true;
487 }
488
489
490 endian = s->be_data;
491 if (size == 0) {
492 endian = MO_LE;
493 }
494
495
496 if (a->align) {
497 align = pow2_align(a->align + 2);
498 } else {
499 align = s->align_mem ? MO_ALIGN : 0;
500 }
501
502
503
504
505
506 if (interleave == 1 && endian == MO_LE) {
507
508 if (align == MO_ALIGN) {
509 align = pow2_align(size);
510 }
511 size = 3;
512 }
513
514 tmp64 = tcg_temp_new_i64();
515 addr = tcg_temp_new_i32();
516 tmp = tcg_const_i32(1 << size);
517 load_reg_var(s, addr, a->rn);
518
519 mop = endian | size | align;
520 for (reg = 0; reg < nregs; reg++) {
521 for (n = 0; n < 8 >> size; n++) {
522 int xs;
523 for (xs = 0; xs < interleave; xs++) {
524 int tt = a->vd + reg + spacing * xs;
525
526 if (a->l) {
527 gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
528 neon_store_element64(tt, n, size, tmp64);
529 } else {
530 neon_load_element64(tmp64, tt, n, size);
531 gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
532 }
533 tcg_gen_add_i32(addr, addr, tmp);
534
535
536 mop &= ~MO_AMASK;
537 }
538 }
539 }
540 tcg_temp_free_i32(addr);
541 tcg_temp_free_i32(tmp);
542 tcg_temp_free_i64(tmp64);
543
544 gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
545 return true;
546}
547
548static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
549{
550
551 int reg, stride, vec_size;
552 int vd = a->vd;
553 int size = a->size;
554 int nregs = a->n + 1;
555 TCGv_i32 addr, tmp;
556 MemOp mop, align;
557
558 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
559 return false;
560 }
561
562
563 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
564 return false;
565 }
566
567 align = 0;
568 if (size == 3) {
569 if (nregs != 4 || a->a == 0) {
570 return false;
571 }
572
573 size = MO_32;
574 align = MO_ALIGN_16;
575 } else if (a->a) {
576 switch (nregs) {
577 case 1:
578 if (size == 0) {
579 return false;
580 }
581 align = MO_ALIGN;
582 break;
583 case 2:
584 align = pow2_align(size + 1);
585 break;
586 case 3:
587 return false;
588 case 4:
589 align = pow2_align(size + 2);
590 break;
591 default:
592 g_assert_not_reached();
593 }
594 }
595
596 if (!vfp_access_check(s)) {
597 return true;
598 }
599
600
601
602
603
604 stride = a->t ? 2 : 1;
605 vec_size = nregs == 1 ? stride * 8 : 8;
606 mop = size | align;
607 tmp = tcg_temp_new_i32();
608 addr = tcg_temp_new_i32();
609 load_reg_var(s, addr, a->rn);
610 for (reg = 0; reg < nregs; reg++) {
611 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
612 if ((vd & 1) && vec_size == 16) {
613
614
615
616
617 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
618 8, 8, tmp);
619 tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
620 neon_full_reg_offset(vd), 8, 8);
621 } else {
622 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
623 vec_size, vec_size, tmp);
624 }
625 tcg_gen_addi_i32(addr, addr, 1 << size);
626 vd += stride;
627
628
629 mop &= ~MO_AMASK;
630 }
631 tcg_temp_free_i32(tmp);
632 tcg_temp_free_i32(addr);
633
634 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
635
636 return true;
637}
638
639static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
640{
641
642 int reg;
643 int nregs = a->n + 1;
644 int vd = a->vd;
645 TCGv_i32 addr, tmp;
646 MemOp mop;
647
648 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
649 return false;
650 }
651
652
653 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
654 return false;
655 }
656
657
658 switch (nregs) {
659 case 1:
660 if (((a->align & (1 << a->size)) != 0) ||
661 (a->size == 2 && (a->align == 1 || a->align == 2))) {
662 return false;
663 }
664 break;
665 case 3:
666 if ((a->align & 1) != 0) {
667 return false;
668 }
669
670 case 2:
671 if (a->size == 2 && (a->align & 2) != 0) {
672 return false;
673 }
674 break;
675 case 4:
676 if (a->size == 2 && a->align == 3) {
677 return false;
678 }
679 break;
680 default:
681 abort();
682 }
683 if ((vd + a->stride * (nregs - 1)) > 31) {
684
685
686
687
688
689 return false;
690 }
691
692 if (!vfp_access_check(s)) {
693 return true;
694 }
695
696
697 mop = finalize_memop(s, a->size);
698
699 if (a->align) {
700 MemOp align_op;
701
702 switch (nregs) {
703 case 1:
704
705 align_op = MO_ALIGN;
706 break;
707 case 2:
708
709 align_op = pow2_align(a->size + 1);
710 break;
711 case 4:
712 if (a->size == MO_32) {
713
714
715
716
717 align_op = pow2_align(a->size + a->align);
718 } else {
719
720 align_op = pow2_align(a->size + 2);
721 }
722 break;
723 default:
724
725 g_assert_not_reached();
726 }
727
728 mop = (mop & ~MO_AMASK) | align_op;
729 }
730
731 tmp = tcg_temp_new_i32();
732 addr = tcg_temp_new_i32();
733 load_reg_var(s, addr, a->rn);
734
735 for (reg = 0; reg < nregs; reg++) {
736 if (a->l) {
737 gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
738 neon_store_element(vd, a->reg_idx, a->size, tmp);
739 } else {
740 neon_load_element(tmp, vd, a->reg_idx, a->size);
741 gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
742 }
743 vd += a->stride;
744 tcg_gen_addi_i32(addr, addr, 1 << a->size);
745
746
747 mop &= ~MO_AMASK;
748 }
749 tcg_temp_free_i32(addr);
750 tcg_temp_free_i32(tmp);
751
752 gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
753
754 return true;
755}
756
757static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
758{
759 int vec_size = a->q ? 16 : 8;
760 int rd_ofs = neon_full_reg_offset(a->vd);
761 int rn_ofs = neon_full_reg_offset(a->vn);
762 int rm_ofs = neon_full_reg_offset(a->vm);
763
764 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
765 return false;
766 }
767
768
769 if (!dc_isar_feature(aa32_simd_r32, s) &&
770 ((a->vd | a->vn | a->vm) & 0x10)) {
771 return false;
772 }
773
774 if ((a->vn | a->vm | a->vd) & a->q) {
775 return false;
776 }
777
778 if (!vfp_access_check(s)) {
779 return true;
780 }
781
782 fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
783 return true;
784}
785
786#define DO_3SAME(INSN, FUNC) \
787 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
788 { \
789 return do_3same(s, a, FUNC); \
790 }
791
792DO_3SAME(VADD, tcg_gen_gvec_add)
793DO_3SAME(VSUB, tcg_gen_gvec_sub)
794DO_3SAME(VAND, tcg_gen_gvec_and)
795DO_3SAME(VBIC, tcg_gen_gvec_andc)
796DO_3SAME(VORR, tcg_gen_gvec_or)
797DO_3SAME(VORN, tcg_gen_gvec_orc)
798DO_3SAME(VEOR, tcg_gen_gvec_xor)
799DO_3SAME(VSHL_S, gen_gvec_sshl)
800DO_3SAME(VSHL_U, gen_gvec_ushl)
801DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
802DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
803DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
804DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
805
806
807#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
808 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
809 uint32_t rn_ofs, uint32_t rm_ofs, \
810 uint32_t oprsz, uint32_t maxsz) \
811 { \
812 tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
813 } \
814 DO_3SAME(INSN, gen_##INSN##_3s)
815
816DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
817DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
818DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
819
820#define DO_3SAME_NO_SZ_3(INSN, FUNC) \
821 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
822 { \
823 if (a->size == 3) { \
824 return false; \
825 } \
826 return do_3same(s, a, FUNC); \
827 }
828
829DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
830DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
831DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
832DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
833DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
834DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
835DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
836DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
837DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
838DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
839DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
840DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
841
842#define DO_3SAME_CMP(INSN, COND) \
843 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
844 uint32_t rn_ofs, uint32_t rm_ofs, \
845 uint32_t oprsz, uint32_t maxsz) \
846 { \
847 tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
848 } \
849 DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
850
851DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
852DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
853DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
854DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
855DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
856
857#define WRAP_OOL_FN(WRAPNAME, FUNC) \
858 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
859 uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
860 { \
861 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
862 }
863
864WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
865
866static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
867{
868 if (a->size != 0) {
869 return false;
870 }
871 return do_3same(s, a, gen_VMUL_p_3s);
872}
873
874#define DO_VQRDMLAH(INSN, FUNC) \
875 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
876 { \
877 if (!dc_isar_feature(aa32_rdm, s)) { \
878 return false; \
879 } \
880 if (a->size != 1 && a->size != 2) { \
881 return false; \
882 } \
883 return do_3same(s, a, FUNC); \
884 }
885
886DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
887DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
888
889#define DO_SHA1(NAME, FUNC) \
890 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
891 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
892 { \
893 if (!dc_isar_feature(aa32_sha1, s)) { \
894 return false; \
895 } \
896 return do_3same(s, a, gen_##NAME##_3s); \
897 }
898
899DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
900DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
901DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
902DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
903
904#define DO_SHA2(NAME, FUNC) \
905 WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
906 static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
907 { \
908 if (!dc_isar_feature(aa32_sha2, s)) { \
909 return false; \
910 } \
911 return do_3same(s, a, gen_##NAME##_3s); \
912 }
913
914DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
915DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
916DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
917
918#define DO_3SAME_64(INSN, FUNC) \
919 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
920 uint32_t rn_ofs, uint32_t rm_ofs, \
921 uint32_t oprsz, uint32_t maxsz) \
922 { \
923 static const GVecGen3 op = { .fni8 = FUNC }; \
924 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
925 } \
926 DO_3SAME(INSN, gen_##INSN##_3s)
927
928#define DO_3SAME_64_ENV(INSN, FUNC) \
929 static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
930 { \
931 FUNC(d, cpu_env, n, m); \
932 } \
933 DO_3SAME_64(INSN, gen_##INSN##_elt)
934
935DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
936DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
937DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
938DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
939DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
940DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
941
942#define DO_3SAME_32(INSN, FUNC) \
943 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
944 uint32_t rn_ofs, uint32_t rm_ofs, \
945 uint32_t oprsz, uint32_t maxsz) \
946 { \
947 static const GVecGen3 ops[4] = { \
948 { .fni4 = gen_helper_neon_##FUNC##8 }, \
949 { .fni4 = gen_helper_neon_##FUNC##16 }, \
950 { .fni4 = gen_helper_neon_##FUNC##32 }, \
951 { 0 }, \
952 }; \
953 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
954 } \
955 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
956 { \
957 if (a->size > 2) { \
958 return false; \
959 } \
960 return do_3same(s, a, gen_##INSN##_3s); \
961 }
962
963
964
965
966
967
968
969#define WRAP_ENV_FN(WRAPNAME, FUNC) \
970 static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
971 { \
972 FUNC(d, cpu_env, n, m); \
973 }
974
975#define DO_3SAME_32_ENV(INSN, FUNC) \
976 WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
977 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
978 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
979 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
980 uint32_t rn_ofs, uint32_t rm_ofs, \
981 uint32_t oprsz, uint32_t maxsz) \
982 { \
983 static const GVecGen3 ops[4] = { \
984 { .fni4 = gen_##INSN##_tramp8 }, \
985 { .fni4 = gen_##INSN##_tramp16 }, \
986 { .fni4 = gen_##INSN##_tramp32 }, \
987 { 0 }, \
988 }; \
989 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
990 } \
991 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
992 { \
993 if (a->size > 2) { \
994 return false; \
995 } \
996 return do_3same(s, a, gen_##INSN##_3s); \
997 }
998
999DO_3SAME_32(VHADD_S, hadd_s)
1000DO_3SAME_32(VHADD_U, hadd_u)
1001DO_3SAME_32(VHSUB_S, hsub_s)
1002DO_3SAME_32(VHSUB_U, hsub_u)
1003DO_3SAME_32(VRHADD_S, rhadd_s)
1004DO_3SAME_32(VRHADD_U, rhadd_u)
1005DO_3SAME_32(VRSHL_S, rshl_s)
1006DO_3SAME_32(VRSHL_U, rshl_u)
1007
1008DO_3SAME_32_ENV(VQSHL_S, qshl_s)
1009DO_3SAME_32_ENV(VQSHL_U, qshl_u)
1010DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
1011DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
1012
1013static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
1014{
1015
1016 TCGv_i32 tmp, tmp2, tmp3;
1017
1018 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1019 return false;
1020 }
1021
1022
1023 if (!dc_isar_feature(aa32_simd_r32, s) &&
1024 ((a->vd | a->vn | a->vm) & 0x10)) {
1025 return false;
1026 }
1027
1028 if (a->size == 3) {
1029 return false;
1030 }
1031
1032 if (!vfp_access_check(s)) {
1033 return true;
1034 }
1035
1036 assert(a->q == 0);
1037
1038
1039
1040
1041
1042
1043
1044 tmp = tcg_temp_new_i32();
1045 tmp2 = tcg_temp_new_i32();
1046 tmp3 = tcg_temp_new_i32();
1047
1048 read_neon_element32(tmp, a->vn, 0, MO_32);
1049 read_neon_element32(tmp2, a->vn, 1, MO_32);
1050 fn(tmp, tmp, tmp2);
1051
1052 read_neon_element32(tmp3, a->vm, 0, MO_32);
1053 read_neon_element32(tmp2, a->vm, 1, MO_32);
1054 fn(tmp3, tmp3, tmp2);
1055
1056 write_neon_element32(tmp, a->vd, 0, MO_32);
1057 write_neon_element32(tmp3, a->vd, 1, MO_32);
1058
1059 tcg_temp_free_i32(tmp);
1060 tcg_temp_free_i32(tmp2);
1061 tcg_temp_free_i32(tmp3);
1062 return true;
1063}
1064
1065#define DO_3SAME_PAIR(INSN, func) \
1066 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1067 { \
1068 static NeonGenTwoOpFn * const fns[] = { \
1069 gen_helper_neon_##func##8, \
1070 gen_helper_neon_##func##16, \
1071 gen_helper_neon_##func##32, \
1072 }; \
1073 if (a->size > 2) { \
1074 return false; \
1075 } \
1076 return do_3same_pair(s, a, fns[a->size]); \
1077 }
1078
1079
1080#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
1081#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
1082#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
1083#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
1084#define gen_helper_neon_padd_u32 tcg_gen_add_i32
1085
1086DO_3SAME_PAIR(VPMAX_S, pmax_s)
1087DO_3SAME_PAIR(VPMIN_S, pmin_s)
1088DO_3SAME_PAIR(VPMAX_U, pmax_u)
1089DO_3SAME_PAIR(VPMIN_U, pmin_u)
1090DO_3SAME_PAIR(VPADD, padd_u)
1091
1092#define DO_3SAME_VQDMULH(INSN, FUNC) \
1093 WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
1094 WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
1095 static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
1096 uint32_t rn_ofs, uint32_t rm_ofs, \
1097 uint32_t oprsz, uint32_t maxsz) \
1098 { \
1099 static const GVecGen3 ops[2] = { \
1100 { .fni4 = gen_##INSN##_tramp16 }, \
1101 { .fni4 = gen_##INSN##_tramp32 }, \
1102 }; \
1103 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
1104 } \
1105 static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
1106 { \
1107 if (a->size != 1 && a->size != 2) { \
1108 return false; \
1109 } \
1110 return do_3same(s, a, gen_##INSN##_3s); \
1111 }
1112
1113DO_3SAME_VQDMULH(VQDMULH, qdmulh)
1114DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
1115
1116#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
1117 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
1118 uint32_t rn_ofs, uint32_t rm_ofs, \
1119 uint32_t oprsz, uint32_t maxsz) \
1120 { \
1121 TCGv_ptr fpst = fpstatus_ptr(FPST); \
1122 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
1123 oprsz, maxsz, 0, FUNC); \
1124 tcg_temp_free_ptr(fpst); \
1125 }
1126
1127#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
1128 WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
1129 WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
1130 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1131 { \
1132 if (a->size == MO_16) { \
1133 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1134 return false; \
1135 } \
1136 return do_3same(s, a, gen_##INSN##_fp16_3s); \
1137 } \
1138 return do_3same(s, a, gen_##INSN##_fp32_3s); \
1139 }
1140
1141
1142DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
1143DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
1144DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
1145DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
1146DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
1147DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
1148DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
1149DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
1150DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
1151DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
1152DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
1153DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
1154DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
1155DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
1156DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
1157DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
1158DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
1159
1160WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
1161WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
1162WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
1163WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
1164
1165static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
1166{
1167 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1168 return false;
1169 }
1170
1171 if (a->size == MO_16) {
1172 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1173 return false;
1174 }
1175 return do_3same(s, a, gen_VMAXNM_fp16_3s);
1176 }
1177 return do_3same(s, a, gen_VMAXNM_fp32_3s);
1178}
1179
1180static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
1181{
1182 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
1183 return false;
1184 }
1185
1186 if (a->size == MO_16) {
1187 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1188 return false;
1189 }
1190 return do_3same(s, a, gen_VMINNM_fp16_3s);
1191 }
1192 return do_3same(s, a, gen_VMINNM_fp32_3s);
1193}
1194
1195static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
1196 gen_helper_gvec_3_ptr *fn)
1197{
1198
1199 TCGv_ptr fpstatus;
1200
1201 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1202 return false;
1203 }
1204
1205
1206 if (!dc_isar_feature(aa32_simd_r32, s) &&
1207 ((a->vd | a->vn | a->vm) & 0x10)) {
1208 return false;
1209 }
1210
1211 if (!vfp_access_check(s)) {
1212 return true;
1213 }
1214
1215 assert(a->q == 0);
1216
1217
1218 fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1219 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
1220 vfp_reg_offset(1, a->vn),
1221 vfp_reg_offset(1, a->vm),
1222 fpstatus, 8, 8, 0, fn);
1223 tcg_temp_free_ptr(fpstatus);
1224
1225 return true;
1226}
1227
1228
1229
1230
1231
1232#define DO_3S_FP_PAIR(INSN,FUNC) \
1233 static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
1234 { \
1235 if (a->size == MO_16) { \
1236 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
1237 return false; \
1238 } \
1239 return do_3same_fp_pair(s, a, FUNC##h); \
1240 } \
1241 return do_3same_fp_pair(s, a, FUNC##s); \
1242 }
1243
1244DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
1245DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
1246DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
1247
1248static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
1249{
1250
1251 int vec_size = a->q ? 16 : 8;
1252 int rd_ofs = neon_full_reg_offset(a->vd);
1253 int rm_ofs = neon_full_reg_offset(a->vm);
1254
1255 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1256 return false;
1257 }
1258
1259
1260 if (!dc_isar_feature(aa32_simd_r32, s) &&
1261 ((a->vd | a->vm) & 0x10)) {
1262 return false;
1263 }
1264
1265 if ((a->vm | a->vd) & a->q) {
1266 return false;
1267 }
1268
1269 if (!vfp_access_check(s)) {
1270 return true;
1271 }
1272
1273 fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
1274 return true;
1275}
1276
1277#define DO_2SH(INSN, FUNC) \
1278 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1279 { \
1280 return do_vector_2sh(s, a, FUNC); \
1281 } \
1282
1283DO_2SH(VSHL, tcg_gen_gvec_shli)
1284DO_2SH(VSLI, gen_gvec_sli)
1285DO_2SH(VSRI, gen_gvec_sri)
1286DO_2SH(VSRA_S, gen_gvec_ssra)
1287DO_2SH(VSRA_U, gen_gvec_usra)
1288DO_2SH(VRSHR_S, gen_gvec_srshr)
1289DO_2SH(VRSHR_U, gen_gvec_urshr)
1290DO_2SH(VRSRA_S, gen_gvec_srsra)
1291DO_2SH(VRSRA_U, gen_gvec_ursra)
1292
1293static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
1294{
1295
1296 a->shift = MIN(a->shift, (8 << a->size) - 1);
1297 return do_vector_2sh(s, a, tcg_gen_gvec_sari);
1298}
1299
1300static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
1301 int64_t shift, uint32_t oprsz, uint32_t maxsz)
1302{
1303 tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
1304}
1305
1306static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
1307{
1308
1309 if (a->shift >= (8 << a->size)) {
1310 return do_vector_2sh(s, a, gen_zero_rd_2sh);
1311 } else {
1312 return do_vector_2sh(s, a, tcg_gen_gvec_shri);
1313 }
1314}
1315
1316static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
1317 NeonGenTwo64OpEnvFn *fn)
1318{
1319
1320
1321
1322
1323 TCGv_i64 constimm;
1324 int pass;
1325
1326 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1327 return false;
1328 }
1329
1330
1331 if (!dc_isar_feature(aa32_simd_r32, s) &&
1332 ((a->vd | a->vm) & 0x10)) {
1333 return false;
1334 }
1335
1336 if ((a->vm | a->vd) & a->q) {
1337 return false;
1338 }
1339
1340 if (!vfp_access_check(s)) {
1341 return true;
1342 }
1343
1344
1345
1346
1347
1348 constimm = tcg_const_i64(dup_const(a->size, a->shift));
1349
1350 for (pass = 0; pass < a->q + 1; pass++) {
1351 TCGv_i64 tmp = tcg_temp_new_i64();
1352
1353 read_neon_element64(tmp, a->vm, pass, MO_64);
1354 fn(tmp, cpu_env, tmp, constimm);
1355 write_neon_element64(tmp, a->vd, pass, MO_64);
1356 tcg_temp_free_i64(tmp);
1357 }
1358 tcg_temp_free_i64(constimm);
1359 return true;
1360}
1361
1362static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
1363 NeonGenTwoOpEnvFn *fn)
1364{
1365
1366
1367
1368
1369 TCGv_i32 constimm, tmp;
1370 int pass;
1371
1372 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1373 return false;
1374 }
1375
1376
1377 if (!dc_isar_feature(aa32_simd_r32, s) &&
1378 ((a->vd | a->vm) & 0x10)) {
1379 return false;
1380 }
1381
1382 if ((a->vm | a->vd) & a->q) {
1383 return false;
1384 }
1385
1386 if (!vfp_access_check(s)) {
1387 return true;
1388 }
1389
1390
1391
1392
1393
1394 constimm = tcg_const_i32(dup_const(a->size, a->shift));
1395 tmp = tcg_temp_new_i32();
1396
1397 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
1398 read_neon_element32(tmp, a->vm, pass, MO_32);
1399 fn(tmp, cpu_env, tmp, constimm);
1400 write_neon_element32(tmp, a->vd, pass, MO_32);
1401 }
1402 tcg_temp_free_i32(tmp);
1403 tcg_temp_free_i32(constimm);
1404 return true;
1405}
1406
1407#define DO_2SHIFT_ENV(INSN, FUNC) \
1408 static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
1409 { \
1410 return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
1411 } \
1412 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1413 { \
1414 static NeonGenTwoOpEnvFn * const fns[] = { \
1415 gen_helper_neon_##FUNC##8, \
1416 gen_helper_neon_##FUNC##16, \
1417 gen_helper_neon_##FUNC##32, \
1418 }; \
1419 assert(a->size < ARRAY_SIZE(fns)); \
1420 return do_2shift_env_32(s, a, fns[a->size]); \
1421 }
1422
1423DO_2SHIFT_ENV(VQSHLU, qshlu_s)
1424DO_2SHIFT_ENV(VQSHL_U, qshl_u)
1425DO_2SHIFT_ENV(VQSHL_S, qshl_s)
1426
1427static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
1428 NeonGenTwo64OpFn *shiftfn,
1429 NeonGenNarrowEnvFn *narrowfn)
1430{
1431
1432 TCGv_i64 constimm, rm1, rm2;
1433 TCGv_i32 rd;
1434
1435 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1436 return false;
1437 }
1438
1439
1440 if (!dc_isar_feature(aa32_simd_r32, s) &&
1441 ((a->vd | a->vm) & 0x10)) {
1442 return false;
1443 }
1444
1445 if (a->vm & 1) {
1446 return false;
1447 }
1448
1449 if (!vfp_access_check(s)) {
1450 return true;
1451 }
1452
1453
1454
1455
1456
1457 constimm = tcg_const_i64(-a->shift);
1458 rm1 = tcg_temp_new_i64();
1459 rm2 = tcg_temp_new_i64();
1460 rd = tcg_temp_new_i32();
1461
1462
1463 read_neon_element64(rm1, a->vm, 0, MO_64);
1464 read_neon_element64(rm2, a->vm, 1, MO_64);
1465
1466 shiftfn(rm1, rm1, constimm);
1467 narrowfn(rd, cpu_env, rm1);
1468 write_neon_element32(rd, a->vd, 0, MO_32);
1469
1470 shiftfn(rm2, rm2, constimm);
1471 narrowfn(rd, cpu_env, rm2);
1472 write_neon_element32(rd, a->vd, 1, MO_32);
1473
1474 tcg_temp_free_i32(rd);
1475 tcg_temp_free_i64(rm1);
1476 tcg_temp_free_i64(rm2);
1477 tcg_temp_free_i64(constimm);
1478
1479 return true;
1480}
1481
1482static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
1483 NeonGenTwoOpFn *shiftfn,
1484 NeonGenNarrowEnvFn *narrowfn)
1485{
1486
1487 TCGv_i32 constimm, rm1, rm2, rm3, rm4;
1488 TCGv_i64 rtmp;
1489 uint32_t imm;
1490
1491 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1492 return false;
1493 }
1494
1495
1496 if (!dc_isar_feature(aa32_simd_r32, s) &&
1497 ((a->vd | a->vm) & 0x10)) {
1498 return false;
1499 }
1500
1501 if (a->vm & 1) {
1502 return false;
1503 }
1504
1505 if (!vfp_access_check(s)) {
1506 return true;
1507 }
1508
1509
1510
1511
1512
1513
1514 if (a->size == 1) {
1515 imm = (uint16_t)(-a->shift);
1516 imm |= imm << 16;
1517 } else {
1518
1519 imm = -a->shift;
1520 }
1521 constimm = tcg_const_i32(imm);
1522
1523
1524 rm1 = tcg_temp_new_i32();
1525 rm2 = tcg_temp_new_i32();
1526 rm3 = tcg_temp_new_i32();
1527 rm4 = tcg_temp_new_i32();
1528 read_neon_element32(rm1, a->vm, 0, MO_32);
1529 read_neon_element32(rm2, a->vm, 1, MO_32);
1530 read_neon_element32(rm3, a->vm, 2, MO_32);
1531 read_neon_element32(rm4, a->vm, 3, MO_32);
1532 rtmp = tcg_temp_new_i64();
1533
1534 shiftfn(rm1, rm1, constimm);
1535 shiftfn(rm2, rm2, constimm);
1536
1537 tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
1538 tcg_temp_free_i32(rm2);
1539
1540 narrowfn(rm1, cpu_env, rtmp);
1541 write_neon_element32(rm1, a->vd, 0, MO_32);
1542 tcg_temp_free_i32(rm1);
1543
1544 shiftfn(rm3, rm3, constimm);
1545 shiftfn(rm4, rm4, constimm);
1546 tcg_temp_free_i32(constimm);
1547
1548 tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
1549 tcg_temp_free_i32(rm4);
1550
1551 narrowfn(rm3, cpu_env, rtmp);
1552 tcg_temp_free_i64(rtmp);
1553 write_neon_element32(rm3, a->vd, 1, MO_32);
1554 tcg_temp_free_i32(rm3);
1555 return true;
1556}
1557
1558#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
1559 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1560 { \
1561 return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
1562 }
1563#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
1564 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1565 { \
1566 return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
1567 }
1568
1569static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1570{
1571 tcg_gen_extrl_i64_i32(dest, src);
1572}
1573
1574static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1575{
1576 gen_helper_neon_narrow_u16(dest, src);
1577}
1578
1579static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
1580{
1581 gen_helper_neon_narrow_u8(dest, src);
1582}
1583
1584DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
1585DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
1586DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
1587
1588DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
1589DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
1590DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
1591
1592DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
1593DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
1594DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
1595
1596DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
1597DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
1598DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
1599DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
1600DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
1601DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
1602
1603DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
1604DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
1605DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
1606
1607DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
1608DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
1609DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
1610
1611DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
1612DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
1613DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
1614
1615static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
1616 NeonGenWidenFn *widenfn, bool u)
1617{
1618 TCGv_i64 tmp;
1619 TCGv_i32 rm0, rm1;
1620 uint64_t widen_mask = 0;
1621
1622 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1623 return false;
1624 }
1625
1626
1627 if (!dc_isar_feature(aa32_simd_r32, s) &&
1628 ((a->vd | a->vm) & 0x10)) {
1629 return false;
1630 }
1631
1632 if (a->vd & 1) {
1633 return false;
1634 }
1635
1636 if (!vfp_access_check(s)) {
1637 return true;
1638 }
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648 if ((a->shift != 0) && (a->size < 2 || u)) {
1649 int esize = 8 << a->size;
1650 widen_mask = MAKE_64BIT_MASK(0, esize);
1651 widen_mask >>= esize - a->shift;
1652 widen_mask = dup_const(a->size + 1, widen_mask);
1653 }
1654
1655 rm0 = tcg_temp_new_i32();
1656 rm1 = tcg_temp_new_i32();
1657 read_neon_element32(rm0, a->vm, 0, MO_32);
1658 read_neon_element32(rm1, a->vm, 1, MO_32);
1659 tmp = tcg_temp_new_i64();
1660
1661 widenfn(tmp, rm0);
1662 tcg_temp_free_i32(rm0);
1663 if (a->shift != 0) {
1664 tcg_gen_shli_i64(tmp, tmp, a->shift);
1665 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1666 }
1667 write_neon_element64(tmp, a->vd, 0, MO_64);
1668
1669 widenfn(tmp, rm1);
1670 tcg_temp_free_i32(rm1);
1671 if (a->shift != 0) {
1672 tcg_gen_shli_i64(tmp, tmp, a->shift);
1673 tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
1674 }
1675 write_neon_element64(tmp, a->vd, 1, MO_64);
1676 tcg_temp_free_i64(tmp);
1677 return true;
1678}
1679
1680static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
1681{
1682 static NeonGenWidenFn * const widenfn[] = {
1683 gen_helper_neon_widen_s8,
1684 gen_helper_neon_widen_s16,
1685 tcg_gen_ext_i32_i64,
1686 };
1687 return do_vshll_2sh(s, a, widenfn[a->size], false);
1688}
1689
1690static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
1691{
1692 static NeonGenWidenFn * const widenfn[] = {
1693 gen_helper_neon_widen_u8,
1694 gen_helper_neon_widen_u16,
1695 tcg_gen_extu_i32_i64,
1696 };
1697 return do_vshll_2sh(s, a, widenfn[a->size], true);
1698}
1699
1700static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
1701 gen_helper_gvec_2_ptr *fn)
1702{
1703
1704 int vec_size = a->q ? 16 : 8;
1705 int rd_ofs = neon_full_reg_offset(a->vd);
1706 int rm_ofs = neon_full_reg_offset(a->vm);
1707 TCGv_ptr fpst;
1708
1709 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1710 return false;
1711 }
1712
1713 if (a->size == MO_16) {
1714 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1715 return false;
1716 }
1717 }
1718
1719
1720 if (!dc_isar_feature(aa32_simd_r32, s) &&
1721 ((a->vd | a->vm) & 0x10)) {
1722 return false;
1723 }
1724
1725 if ((a->vm | a->vd) & a->q) {
1726 return false;
1727 }
1728
1729 if (!vfp_access_check(s)) {
1730 return true;
1731 }
1732
1733 fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
1734 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
1735 tcg_temp_free_ptr(fpst);
1736 return true;
1737}
1738
1739#define DO_FP_2SH(INSN, FUNC) \
1740 static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
1741 { \
1742 return do_fp_2sh(s, a, FUNC); \
1743 }
1744
1745DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
1746DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
1747DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
1748DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
1749
1750DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
1751DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
1752DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
1753DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
1754
1755static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
1756 GVecGen2iFn *fn)
1757{
1758 uint64_t imm;
1759 int reg_ofs, vec_size;
1760
1761 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1762 return false;
1763 }
1764
1765
1766 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1767 return false;
1768 }
1769
1770 if (a->vd & a->q) {
1771 return false;
1772 }
1773
1774 if (!vfp_access_check(s)) {
1775 return true;
1776 }
1777
1778 reg_ofs = neon_full_reg_offset(a->vd);
1779 vec_size = a->q ? 16 : 8;
1780 imm = asimd_imm_const(a->imm, a->cmode, a->op);
1781
1782 fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
1783 return true;
1784}
1785
1786static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
1787 int64_t c, uint32_t oprsz, uint32_t maxsz)
1788{
1789 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
1790}
1791
1792static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
1793{
1794
1795 GVecGen2iFn *fn;
1796
1797 if ((a->cmode & 1) && a->cmode < 12) {
1798
1799 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
1800 } else {
1801
1802 if (a->cmode == 15 && a->op == 1) {
1803 return false;
1804 }
1805 fn = gen_VMOV_1r;
1806 }
1807 return do_1reg_imm(s, a, fn);
1808}
1809
1810static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
1811 NeonGenWidenFn *widenfn,
1812 NeonGenTwo64OpFn *opfn,
1813 int src1_mop, int src2_mop)
1814{
1815
1816 TCGv_i64 rn0_64, rn1_64, rm_64;
1817
1818 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1819 return false;
1820 }
1821
1822
1823 if (!dc_isar_feature(aa32_simd_r32, s) &&
1824 ((a->vd | a->vn | a->vm) & 0x10)) {
1825 return false;
1826 }
1827
1828 if (!opfn) {
1829
1830 return false;
1831 }
1832
1833 if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
1834 return false;
1835 }
1836
1837 if (!vfp_access_check(s)) {
1838 return true;
1839 }
1840
1841 rn0_64 = tcg_temp_new_i64();
1842 rn1_64 = tcg_temp_new_i64();
1843 rm_64 = tcg_temp_new_i64();
1844
1845 if (src1_mop >= 0) {
1846 read_neon_element64(rn0_64, a->vn, 0, src1_mop);
1847 } else {
1848 TCGv_i32 tmp = tcg_temp_new_i32();
1849 read_neon_element32(tmp, a->vn, 0, MO_32);
1850 widenfn(rn0_64, tmp);
1851 tcg_temp_free_i32(tmp);
1852 }
1853 if (src2_mop >= 0) {
1854 read_neon_element64(rm_64, a->vm, 0, src2_mop);
1855 } else {
1856 TCGv_i32 tmp = tcg_temp_new_i32();
1857 read_neon_element32(tmp, a->vm, 0, MO_32);
1858 widenfn(rm_64, tmp);
1859 tcg_temp_free_i32(tmp);
1860 }
1861
1862 opfn(rn0_64, rn0_64, rm_64);
1863
1864
1865
1866
1867
1868 if (src1_mop >= 0) {
1869 read_neon_element64(rn1_64, a->vn, 1, src1_mop);
1870 } else {
1871 TCGv_i32 tmp = tcg_temp_new_i32();
1872 read_neon_element32(tmp, a->vn, 1, MO_32);
1873 widenfn(rn1_64, tmp);
1874 tcg_temp_free_i32(tmp);
1875 }
1876 if (src2_mop >= 0) {
1877 read_neon_element64(rm_64, a->vm, 1, src2_mop);
1878 } else {
1879 TCGv_i32 tmp = tcg_temp_new_i32();
1880 read_neon_element32(tmp, a->vm, 1, MO_32);
1881 widenfn(rm_64, tmp);
1882 tcg_temp_free_i32(tmp);
1883 }
1884
1885 write_neon_element64(rn0_64, a->vd, 0, MO_64);
1886
1887 opfn(rn1_64, rn1_64, rm_64);
1888 write_neon_element64(rn1_64, a->vd, 1, MO_64);
1889
1890 tcg_temp_free_i64(rn0_64);
1891 tcg_temp_free_i64(rn1_64);
1892 tcg_temp_free_i64(rm_64);
1893
1894 return true;
1895}
1896
1897#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
1898 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1899 { \
1900 static NeonGenWidenFn * const widenfn[] = { \
1901 gen_helper_neon_widen_##S##8, \
1902 gen_helper_neon_widen_##S##16, \
1903 NULL, NULL, \
1904 }; \
1905 static NeonGenTwo64OpFn * const addfn[] = { \
1906 gen_helper_neon_##OP##l_u16, \
1907 gen_helper_neon_##OP##l_u32, \
1908 tcg_gen_##OP##_i64, \
1909 NULL, \
1910 }; \
1911 int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
1912 return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
1913 SRC1WIDE ? MO_Q : narrow_mop, \
1914 narrow_mop); \
1915 }
1916
1917DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
1918DO_PREWIDEN(VADDL_U, u, add, false, 0)
1919DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
1920DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
1921DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
1922DO_PREWIDEN(VADDW_U, u, add, true, 0)
1923DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
1924DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
1925
1926static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
1927 NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
1928{
1929
1930 TCGv_i64 rn_64, rm_64;
1931 TCGv_i32 rd0, rd1;
1932
1933 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
1934 return false;
1935 }
1936
1937
1938 if (!dc_isar_feature(aa32_simd_r32, s) &&
1939 ((a->vd | a->vn | a->vm) & 0x10)) {
1940 return false;
1941 }
1942
1943 if (!opfn || !narrowfn) {
1944
1945 return false;
1946 }
1947
1948 if ((a->vn | a->vm) & 1) {
1949 return false;
1950 }
1951
1952 if (!vfp_access_check(s)) {
1953 return true;
1954 }
1955
1956 rn_64 = tcg_temp_new_i64();
1957 rm_64 = tcg_temp_new_i64();
1958 rd0 = tcg_temp_new_i32();
1959 rd1 = tcg_temp_new_i32();
1960
1961 read_neon_element64(rn_64, a->vn, 0, MO_64);
1962 read_neon_element64(rm_64, a->vm, 0, MO_64);
1963
1964 opfn(rn_64, rn_64, rm_64);
1965
1966 narrowfn(rd0, rn_64);
1967
1968 read_neon_element64(rn_64, a->vn, 1, MO_64);
1969 read_neon_element64(rm_64, a->vm, 1, MO_64);
1970
1971 opfn(rn_64, rn_64, rm_64);
1972
1973 narrowfn(rd1, rn_64);
1974
1975 write_neon_element32(rd0, a->vd, 0, MO_32);
1976 write_neon_element32(rd1, a->vd, 1, MO_32);
1977
1978 tcg_temp_free_i32(rd0);
1979 tcg_temp_free_i32(rd1);
1980 tcg_temp_free_i64(rn_64);
1981 tcg_temp_free_i64(rm_64);
1982
1983 return true;
1984}
1985
1986#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
1987 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
1988 { \
1989 static NeonGenTwo64OpFn * const addfn[] = { \
1990 gen_helper_neon_##OP##l_u16, \
1991 gen_helper_neon_##OP##l_u32, \
1992 tcg_gen_##OP##_i64, \
1993 NULL, \
1994 }; \
1995 static NeonGenNarrowFn * const narrowfn[] = { \
1996 gen_helper_neon_##NARROWTYPE##_high_u8, \
1997 gen_helper_neon_##NARROWTYPE##_high_u16, \
1998 EXTOP, \
1999 NULL, \
2000 }; \
2001 return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
2002 }
2003
2004static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
2005{
2006 tcg_gen_addi_i64(rn, rn, 1u << 31);
2007 tcg_gen_extrh_i64_i32(rd, rn);
2008}
2009
2010DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
2011DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
2012DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
2013DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
2014
2015static bool do_long_3d(DisasContext *s, arg_3diff *a,
2016 NeonGenTwoOpWidenFn *opfn,
2017 NeonGenTwo64OpFn *accfn)
2018{
2019
2020
2021
2022
2023
2024
2025 TCGv_i64 rd0, rd1, tmp;
2026 TCGv_i32 rn, rm;
2027
2028 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2029 return false;
2030 }
2031
2032
2033 if (!dc_isar_feature(aa32_simd_r32, s) &&
2034 ((a->vd | a->vn | a->vm) & 0x10)) {
2035 return false;
2036 }
2037
2038 if (!opfn) {
2039
2040 return false;
2041 }
2042
2043 if (a->vd & 1) {
2044 return false;
2045 }
2046
2047 if (!vfp_access_check(s)) {
2048 return true;
2049 }
2050
2051 rd0 = tcg_temp_new_i64();
2052 rd1 = tcg_temp_new_i64();
2053
2054 rn = tcg_temp_new_i32();
2055 rm = tcg_temp_new_i32();
2056 read_neon_element32(rn, a->vn, 0, MO_32);
2057 read_neon_element32(rm, a->vm, 0, MO_32);
2058 opfn(rd0, rn, rm);
2059
2060 read_neon_element32(rn, a->vn, 1, MO_32);
2061 read_neon_element32(rm, a->vm, 1, MO_32);
2062 opfn(rd1, rn, rm);
2063 tcg_temp_free_i32(rn);
2064 tcg_temp_free_i32(rm);
2065
2066
2067 if (accfn) {
2068 tmp = tcg_temp_new_i64();
2069 read_neon_element64(tmp, a->vd, 0, MO_64);
2070 accfn(rd0, tmp, rd0);
2071 read_neon_element64(tmp, a->vd, 1, MO_64);
2072 accfn(rd1, tmp, rd1);
2073 tcg_temp_free_i64(tmp);
2074 }
2075
2076 write_neon_element64(rd0, a->vd, 0, MO_64);
2077 write_neon_element64(rd1, a->vd, 1, MO_64);
2078 tcg_temp_free_i64(rd0);
2079 tcg_temp_free_i64(rd1);
2080
2081 return true;
2082}
2083
2084static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
2085{
2086 static NeonGenTwoOpWidenFn * const opfn[] = {
2087 gen_helper_neon_abdl_s16,
2088 gen_helper_neon_abdl_s32,
2089 gen_helper_neon_abdl_s64,
2090 NULL,
2091 };
2092
2093 return do_long_3d(s, a, opfn[a->size], NULL);
2094}
2095
2096static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
2097{
2098 static NeonGenTwoOpWidenFn * const opfn[] = {
2099 gen_helper_neon_abdl_u16,
2100 gen_helper_neon_abdl_u32,
2101 gen_helper_neon_abdl_u64,
2102 NULL,
2103 };
2104
2105 return do_long_3d(s, a, opfn[a->size], NULL);
2106}
2107
2108static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
2109{
2110 static NeonGenTwoOpWidenFn * const opfn[] = {
2111 gen_helper_neon_abdl_s16,
2112 gen_helper_neon_abdl_s32,
2113 gen_helper_neon_abdl_s64,
2114 NULL,
2115 };
2116 static NeonGenTwo64OpFn * const addfn[] = {
2117 gen_helper_neon_addl_u16,
2118 gen_helper_neon_addl_u32,
2119 tcg_gen_add_i64,
2120 NULL,
2121 };
2122
2123 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2124}
2125
2126static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
2127{
2128 static NeonGenTwoOpWidenFn * const opfn[] = {
2129 gen_helper_neon_abdl_u16,
2130 gen_helper_neon_abdl_u32,
2131 gen_helper_neon_abdl_u64,
2132 NULL,
2133 };
2134 static NeonGenTwo64OpFn * const addfn[] = {
2135 gen_helper_neon_addl_u16,
2136 gen_helper_neon_addl_u32,
2137 tcg_gen_add_i64,
2138 NULL,
2139 };
2140
2141 return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
2142}
2143
2144static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2145{
2146 TCGv_i32 lo = tcg_temp_new_i32();
2147 TCGv_i32 hi = tcg_temp_new_i32();
2148
2149 tcg_gen_muls2_i32(lo, hi, rn, rm);
2150 tcg_gen_concat_i32_i64(rd, lo, hi);
2151
2152 tcg_temp_free_i32(lo);
2153 tcg_temp_free_i32(hi);
2154}
2155
2156static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2157{
2158 TCGv_i32 lo = tcg_temp_new_i32();
2159 TCGv_i32 hi = tcg_temp_new_i32();
2160
2161 tcg_gen_mulu2_i32(lo, hi, rn, rm);
2162 tcg_gen_concat_i32_i64(rd, lo, hi);
2163
2164 tcg_temp_free_i32(lo);
2165 tcg_temp_free_i32(hi);
2166}
2167
2168static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
2169{
2170 static NeonGenTwoOpWidenFn * const opfn[] = {
2171 gen_helper_neon_mull_s8,
2172 gen_helper_neon_mull_s16,
2173 gen_mull_s32,
2174 NULL,
2175 };
2176
2177 return do_long_3d(s, a, opfn[a->size], NULL);
2178}
2179
2180static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
2181{
2182 static NeonGenTwoOpWidenFn * const opfn[] = {
2183 gen_helper_neon_mull_u8,
2184 gen_helper_neon_mull_u16,
2185 gen_mull_u32,
2186 NULL,
2187 };
2188
2189 return do_long_3d(s, a, opfn[a->size], NULL);
2190}
2191
2192#define DO_VMLAL(INSN,MULL,ACC) \
2193 static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
2194 { \
2195 static NeonGenTwoOpWidenFn * const opfn[] = { \
2196 gen_helper_neon_##MULL##8, \
2197 gen_helper_neon_##MULL##16, \
2198 gen_##MULL##32, \
2199 NULL, \
2200 }; \
2201 static NeonGenTwo64OpFn * const accfn[] = { \
2202 gen_helper_neon_##ACC##l_u16, \
2203 gen_helper_neon_##ACC##l_u32, \
2204 tcg_gen_##ACC##_i64, \
2205 NULL, \
2206 }; \
2207 return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
2208 }
2209
2210DO_VMLAL(VMLAL_S,mull_s,add)
2211DO_VMLAL(VMLAL_U,mull_u,add)
2212DO_VMLAL(VMLSL_S,mull_s,sub)
2213DO_VMLAL(VMLSL_U,mull_u,sub)
2214
2215static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2216{
2217 gen_helper_neon_mull_s16(rd, rn, rm);
2218 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
2219}
2220
2221static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
2222{
2223 gen_mull_s32(rd, rn, rm);
2224 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
2225}
2226
2227static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
2228{
2229 static NeonGenTwoOpWidenFn * const opfn[] = {
2230 NULL,
2231 gen_VQDMULL_16,
2232 gen_VQDMULL_32,
2233 NULL,
2234 };
2235
2236 return do_long_3d(s, a, opfn[a->size], NULL);
2237}
2238
2239static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2240{
2241 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2242}
2243
2244static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2245{
2246 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2247}
2248
2249static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
2250{
2251 static NeonGenTwoOpWidenFn * const opfn[] = {
2252 NULL,
2253 gen_VQDMULL_16,
2254 gen_VQDMULL_32,
2255 NULL,
2256 };
2257 static NeonGenTwo64OpFn * const accfn[] = {
2258 NULL,
2259 gen_VQDMLAL_acc_16,
2260 gen_VQDMLAL_acc_32,
2261 NULL,
2262 };
2263
2264 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2265}
2266
2267static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2268{
2269 gen_helper_neon_negl_u32(rm, rm);
2270 gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
2271}
2272
2273static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
2274{
2275 tcg_gen_neg_i64(rm, rm);
2276 gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
2277}
2278
2279static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
2280{
2281 static NeonGenTwoOpWidenFn * const opfn[] = {
2282 NULL,
2283 gen_VQDMULL_16,
2284 gen_VQDMULL_32,
2285 NULL,
2286 };
2287 static NeonGenTwo64OpFn * const accfn[] = {
2288 NULL,
2289 gen_VQDMLSL_acc_16,
2290 gen_VQDMLSL_acc_32,
2291 NULL,
2292 };
2293
2294 return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
2295}
2296
2297static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
2298{
2299 gen_helper_gvec_3 *fn_gvec;
2300
2301 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2302 return false;
2303 }
2304
2305
2306 if (!dc_isar_feature(aa32_simd_r32, s) &&
2307 ((a->vd | a->vn | a->vm) & 0x10)) {
2308 return false;
2309 }
2310
2311 if (a->vd & 1) {
2312 return false;
2313 }
2314
2315 switch (a->size) {
2316 case 0:
2317 fn_gvec = gen_helper_neon_pmull_h;
2318 break;
2319 case 2:
2320 if (!dc_isar_feature(aa32_pmull, s)) {
2321 return false;
2322 }
2323 fn_gvec = gen_helper_gvec_pmull_q;
2324 break;
2325 default:
2326 return false;
2327 }
2328
2329 if (!vfp_access_check(s)) {
2330 return true;
2331 }
2332
2333 tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
2334 neon_full_reg_offset(a->vn),
2335 neon_full_reg_offset(a->vm),
2336 16, 16, 0, fn_gvec);
2337 return true;
2338}
2339
2340static void gen_neon_dup_low16(TCGv_i32 var)
2341{
2342 TCGv_i32 tmp = tcg_temp_new_i32();
2343 tcg_gen_ext16u_i32(var, var);
2344 tcg_gen_shli_i32(tmp, var, 16);
2345 tcg_gen_or_i32(var, var, tmp);
2346 tcg_temp_free_i32(tmp);
2347}
2348
2349static void gen_neon_dup_high16(TCGv_i32 var)
2350{
2351 TCGv_i32 tmp = tcg_temp_new_i32();
2352 tcg_gen_andi_i32(var, var, 0xffff0000);
2353 tcg_gen_shri_i32(tmp, var, 16);
2354 tcg_gen_or_i32(var, var, tmp);
2355 tcg_temp_free_i32(tmp);
2356}
2357
2358static inline TCGv_i32 neon_get_scalar(int size, int reg)
2359{
2360 TCGv_i32 tmp = tcg_temp_new_i32();
2361 if (size == MO_16) {
2362 read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
2363 if (reg & 8) {
2364 gen_neon_dup_high16(tmp);
2365 } else {
2366 gen_neon_dup_low16(tmp);
2367 }
2368 } else {
2369 read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
2370 }
2371 return tmp;
2372}
2373
2374static bool do_2scalar(DisasContext *s, arg_2scalar *a,
2375 NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
2376{
2377
2378
2379
2380
2381
2382
2383 TCGv_i32 scalar, tmp;
2384 int pass;
2385
2386 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2387 return false;
2388 }
2389
2390
2391 if (!dc_isar_feature(aa32_simd_r32, s) &&
2392 ((a->vd | a->vn | a->vm) & 0x10)) {
2393 return false;
2394 }
2395
2396 if (!opfn) {
2397
2398 return false;
2399 }
2400
2401 if (a->q && ((a->vd | a->vn) & 1)) {
2402 return false;
2403 }
2404
2405 if (!vfp_access_check(s)) {
2406 return true;
2407 }
2408
2409 scalar = neon_get_scalar(a->size, a->vm);
2410 tmp = tcg_temp_new_i32();
2411
2412 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2413 read_neon_element32(tmp, a->vn, pass, MO_32);
2414 opfn(tmp, tmp, scalar);
2415 if (accfn) {
2416 TCGv_i32 rd = tcg_temp_new_i32();
2417 read_neon_element32(rd, a->vd, pass, MO_32);
2418 accfn(tmp, rd, tmp);
2419 tcg_temp_free_i32(rd);
2420 }
2421 write_neon_element32(tmp, a->vd, pass, MO_32);
2422 }
2423 tcg_temp_free_i32(tmp);
2424 tcg_temp_free_i32(scalar);
2425 return true;
2426}
2427
2428static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
2429{
2430 static NeonGenTwoOpFn * const opfn[] = {
2431 NULL,
2432 gen_helper_neon_mul_u16,
2433 tcg_gen_mul_i32,
2434 NULL,
2435 };
2436
2437 return do_2scalar(s, a, opfn[a->size], NULL);
2438}
2439
2440static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
2441{
2442 static NeonGenTwoOpFn * const opfn[] = {
2443 NULL,
2444 gen_helper_neon_mul_u16,
2445 tcg_gen_mul_i32,
2446 NULL,
2447 };
2448 static NeonGenTwoOpFn * const accfn[] = {
2449 NULL,
2450 gen_helper_neon_add_u16,
2451 tcg_gen_add_i32,
2452 NULL,
2453 };
2454
2455 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2456}
2457
2458static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
2459{
2460 static NeonGenTwoOpFn * const opfn[] = {
2461 NULL,
2462 gen_helper_neon_mul_u16,
2463 tcg_gen_mul_i32,
2464 NULL,
2465 };
2466 static NeonGenTwoOpFn * const accfn[] = {
2467 NULL,
2468 gen_helper_neon_sub_u16,
2469 tcg_gen_sub_i32,
2470 NULL,
2471 };
2472
2473 return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
2474}
2475
2476static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
2477 gen_helper_gvec_3_ptr *fn)
2478{
2479
2480 int vec_size = a->q ? 16 : 8;
2481 int rd_ofs = neon_full_reg_offset(a->vd);
2482 int rn_ofs = neon_full_reg_offset(a->vn);
2483 int rm_ofs;
2484 int idx;
2485 TCGv_ptr fpstatus;
2486
2487 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2488 return false;
2489 }
2490
2491
2492 if (!dc_isar_feature(aa32_simd_r32, s) &&
2493 ((a->vd | a->vn | a->vm) & 0x10)) {
2494 return false;
2495 }
2496
2497 if (!fn) {
2498
2499 return false;
2500 }
2501
2502 if (a->q && ((a->vd | a->vn) & 1)) {
2503 return false;
2504 }
2505
2506 if (!vfp_access_check(s)) {
2507 return true;
2508 }
2509
2510
2511 idx = extract32(a->vm, a->size + 2, 2);
2512 a->vm = extract32(a->vm, 0, a->size + 2);
2513 rm_ofs = neon_full_reg_offset(a->vm);
2514
2515 fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
2516 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
2517 vec_size, vec_size, idx, fn);
2518 tcg_temp_free_ptr(fpstatus);
2519 return true;
2520}
2521
2522#define DO_VMUL_F_2sc(NAME, FUNC) \
2523 static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
2524 { \
2525 static gen_helper_gvec_3_ptr * const opfn[] = { \
2526 NULL, \
2527 gen_helper_##FUNC##_h, \
2528 gen_helper_##FUNC##_s, \
2529 NULL, \
2530 }; \
2531 if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
2532 return false; \
2533 } \
2534 return do_2scalar_fp_vec(s, a, opfn[a->size]); \
2535 }
2536
2537DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
2538DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
2539DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
2540
2541WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
2542WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
2543WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
2544WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
2545
2546static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
2547{
2548 static NeonGenTwoOpFn * const opfn[] = {
2549 NULL,
2550 gen_VQDMULH_16,
2551 gen_VQDMULH_32,
2552 NULL,
2553 };
2554
2555 return do_2scalar(s, a, opfn[a->size], NULL);
2556}
2557
2558static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
2559{
2560 static NeonGenTwoOpFn * const opfn[] = {
2561 NULL,
2562 gen_VQRDMULH_16,
2563 gen_VQRDMULH_32,
2564 NULL,
2565 };
2566
2567 return do_2scalar(s, a, opfn[a->size], NULL);
2568}
2569
2570static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
2571 NeonGenThreeOpEnvFn *opfn)
2572{
2573
2574
2575
2576
2577
2578 TCGv_i32 scalar, rn, rd;
2579 int pass;
2580
2581 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2582 return false;
2583 }
2584
2585 if (!dc_isar_feature(aa32_rdm, s)) {
2586 return false;
2587 }
2588
2589
2590 if (!dc_isar_feature(aa32_simd_r32, s) &&
2591 ((a->vd | a->vn | a->vm) & 0x10)) {
2592 return false;
2593 }
2594
2595 if (!opfn) {
2596
2597 return false;
2598 }
2599
2600 if (a->q && ((a->vd | a->vn) & 1)) {
2601 return false;
2602 }
2603
2604 if (!vfp_access_check(s)) {
2605 return true;
2606 }
2607
2608 scalar = neon_get_scalar(a->size, a->vm);
2609 rn = tcg_temp_new_i32();
2610 rd = tcg_temp_new_i32();
2611
2612 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
2613 read_neon_element32(rn, a->vn, pass, MO_32);
2614 read_neon_element32(rd, a->vd, pass, MO_32);
2615 opfn(rd, cpu_env, rn, scalar, rd);
2616 write_neon_element32(rd, a->vd, pass, MO_32);
2617 }
2618 tcg_temp_free_i32(rn);
2619 tcg_temp_free_i32(rd);
2620 tcg_temp_free_i32(scalar);
2621
2622 return true;
2623}
2624
2625static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
2626{
2627 static NeonGenThreeOpEnvFn *opfn[] = {
2628 NULL,
2629 gen_helper_neon_qrdmlah_s16,
2630 gen_helper_neon_qrdmlah_s32,
2631 NULL,
2632 };
2633 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2634}
2635
2636static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
2637{
2638 static NeonGenThreeOpEnvFn *opfn[] = {
2639 NULL,
2640 gen_helper_neon_qrdmlsh_s16,
2641 gen_helper_neon_qrdmlsh_s32,
2642 NULL,
2643 };
2644 return do_vqrdmlah_2sc(s, a, opfn[a->size]);
2645}
2646
2647static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
2648 NeonGenTwoOpWidenFn *opfn,
2649 NeonGenTwo64OpFn *accfn)
2650{
2651
2652
2653
2654
2655
2656
2657 TCGv_i32 scalar, rn;
2658 TCGv_i64 rn0_64, rn1_64;
2659
2660 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2661 return false;
2662 }
2663
2664
2665 if (!dc_isar_feature(aa32_simd_r32, s) &&
2666 ((a->vd | a->vn | a->vm) & 0x10)) {
2667 return false;
2668 }
2669
2670 if (!opfn) {
2671
2672 return false;
2673 }
2674
2675 if (a->vd & 1) {
2676 return false;
2677 }
2678
2679 if (!vfp_access_check(s)) {
2680 return true;
2681 }
2682
2683 scalar = neon_get_scalar(a->size, a->vm);
2684
2685
2686 rn = tcg_temp_new_i32();
2687 read_neon_element32(rn, a->vn, 0, MO_32);
2688 rn0_64 = tcg_temp_new_i64();
2689 opfn(rn0_64, rn, scalar);
2690
2691 read_neon_element32(rn, a->vn, 1, MO_32);
2692 rn1_64 = tcg_temp_new_i64();
2693 opfn(rn1_64, rn, scalar);
2694 tcg_temp_free_i32(rn);
2695 tcg_temp_free_i32(scalar);
2696
2697 if (accfn) {
2698 TCGv_i64 t64 = tcg_temp_new_i64();
2699 read_neon_element64(t64, a->vd, 0, MO_64);
2700 accfn(rn0_64, t64, rn0_64);
2701 read_neon_element64(t64, a->vd, 1, MO_64);
2702 accfn(rn1_64, t64, rn1_64);
2703 tcg_temp_free_i64(t64);
2704 }
2705
2706 write_neon_element64(rn0_64, a->vd, 0, MO_64);
2707 write_neon_element64(rn1_64, a->vd, 1, MO_64);
2708 tcg_temp_free_i64(rn0_64);
2709 tcg_temp_free_i64(rn1_64);
2710 return true;
2711}
2712
2713static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
2714{
2715 static NeonGenTwoOpWidenFn * const opfn[] = {
2716 NULL,
2717 gen_helper_neon_mull_s16,
2718 gen_mull_s32,
2719 NULL,
2720 };
2721
2722 return do_2scalar_long(s, a, opfn[a->size], NULL);
2723}
2724
2725static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
2726{
2727 static NeonGenTwoOpWidenFn * const opfn[] = {
2728 NULL,
2729 gen_helper_neon_mull_u16,
2730 gen_mull_u32,
2731 NULL,
2732 };
2733
2734 return do_2scalar_long(s, a, opfn[a->size], NULL);
2735}
2736
2737#define DO_VMLAL_2SC(INSN, MULL, ACC) \
2738 static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
2739 { \
2740 static NeonGenTwoOpWidenFn * const opfn[] = { \
2741 NULL, \
2742 gen_helper_neon_##MULL##16, \
2743 gen_##MULL##32, \
2744 NULL, \
2745 }; \
2746 static NeonGenTwo64OpFn * const accfn[] = { \
2747 NULL, \
2748 gen_helper_neon_##ACC##l_u32, \
2749 tcg_gen_##ACC##_i64, \
2750 NULL, \
2751 }; \
2752 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
2753 }
2754
2755DO_VMLAL_2SC(VMLAL_S, mull_s, add)
2756DO_VMLAL_2SC(VMLAL_U, mull_u, add)
2757DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
2758DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
2759
2760static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
2761{
2762 static NeonGenTwoOpWidenFn * const opfn[] = {
2763 NULL,
2764 gen_VQDMULL_16,
2765 gen_VQDMULL_32,
2766 NULL,
2767 };
2768
2769 return do_2scalar_long(s, a, opfn[a->size], NULL);
2770}
2771
2772static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
2773{
2774 static NeonGenTwoOpWidenFn * const opfn[] = {
2775 NULL,
2776 gen_VQDMULL_16,
2777 gen_VQDMULL_32,
2778 NULL,
2779 };
2780 static NeonGenTwo64OpFn * const accfn[] = {
2781 NULL,
2782 gen_VQDMLAL_acc_16,
2783 gen_VQDMLAL_acc_32,
2784 NULL,
2785 };
2786
2787 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2788}
2789
2790static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
2791{
2792 static NeonGenTwoOpWidenFn * const opfn[] = {
2793 NULL,
2794 gen_VQDMULL_16,
2795 gen_VQDMULL_32,
2796 NULL,
2797 };
2798 static NeonGenTwo64OpFn * const accfn[] = {
2799 NULL,
2800 gen_VQDMLSL_acc_16,
2801 gen_VQDMLSL_acc_32,
2802 NULL,
2803 };
2804
2805 return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
2806}
2807
2808static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
2809{
2810 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2811 return false;
2812 }
2813
2814
2815 if (!dc_isar_feature(aa32_simd_r32, s) &&
2816 ((a->vd | a->vn | a->vm) & 0x10)) {
2817 return false;
2818 }
2819
2820 if ((a->vn | a->vm | a->vd) & a->q) {
2821 return false;
2822 }
2823
2824 if (a->imm > 7 && !a->q) {
2825 return false;
2826 }
2827
2828 if (!vfp_access_check(s)) {
2829 return true;
2830 }
2831
2832 if (!a->q) {
2833
2834 TCGv_i64 left, right, dest;
2835
2836 left = tcg_temp_new_i64();
2837 right = tcg_temp_new_i64();
2838 dest = tcg_temp_new_i64();
2839
2840 read_neon_element64(right, a->vn, 0, MO_64);
2841 read_neon_element64(left, a->vm, 0, MO_64);
2842 tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
2843 write_neon_element64(dest, a->vd, 0, MO_64);
2844
2845 tcg_temp_free_i64(left);
2846 tcg_temp_free_i64(right);
2847 tcg_temp_free_i64(dest);
2848 } else {
2849
2850 TCGv_i64 left, middle, right, destleft, destright;
2851
2852 left = tcg_temp_new_i64();
2853 middle = tcg_temp_new_i64();
2854 right = tcg_temp_new_i64();
2855 destleft = tcg_temp_new_i64();
2856 destright = tcg_temp_new_i64();
2857
2858 if (a->imm < 8) {
2859 read_neon_element64(right, a->vn, 0, MO_64);
2860 read_neon_element64(middle, a->vn, 1, MO_64);
2861 tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
2862 read_neon_element64(left, a->vm, 0, MO_64);
2863 tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
2864 } else {
2865 read_neon_element64(right, a->vn, 1, MO_64);
2866 read_neon_element64(middle, a->vm, 0, MO_64);
2867 tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
2868 read_neon_element64(left, a->vm, 1, MO_64);
2869 tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
2870 }
2871
2872 write_neon_element64(destright, a->vd, 0, MO_64);
2873 write_neon_element64(destleft, a->vd, 1, MO_64);
2874
2875 tcg_temp_free_i64(destright);
2876 tcg_temp_free_i64(destleft);
2877 tcg_temp_free_i64(right);
2878 tcg_temp_free_i64(middle);
2879 tcg_temp_free_i64(left);
2880 }
2881 return true;
2882}
2883
2884static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
2885{
2886 TCGv_i64 val, def;
2887 TCGv_i32 desc;
2888
2889 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2890 return false;
2891 }
2892
2893
2894 if (!dc_isar_feature(aa32_simd_r32, s) &&
2895 ((a->vd | a->vn | a->vm) & 0x10)) {
2896 return false;
2897 }
2898
2899 if ((a->vn + a->len + 1) > 32) {
2900
2901
2902
2903
2904 return false;
2905 }
2906
2907 if (!vfp_access_check(s)) {
2908 return true;
2909 }
2910
2911 desc = tcg_const_i32((a->vn << 2) | a->len);
2912 def = tcg_temp_new_i64();
2913 if (a->op) {
2914 read_neon_element64(def, a->vd, 0, MO_64);
2915 } else {
2916 tcg_gen_movi_i64(def, 0);
2917 }
2918 val = tcg_temp_new_i64();
2919 read_neon_element64(val, a->vm, 0, MO_64);
2920
2921 gen_helper_neon_tbl(val, cpu_env, desc, val, def);
2922 write_neon_element64(val, a->vd, 0, MO_64);
2923
2924 tcg_temp_free_i64(def);
2925 tcg_temp_free_i64(val);
2926 tcg_temp_free_i32(desc);
2927 return true;
2928}
2929
2930static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
2931{
2932 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2933 return false;
2934 }
2935
2936
2937 if (!dc_isar_feature(aa32_simd_r32, s) &&
2938 ((a->vd | a->vm) & 0x10)) {
2939 return false;
2940 }
2941
2942 if (a->vd & a->q) {
2943 return false;
2944 }
2945
2946 if (!vfp_access_check(s)) {
2947 return true;
2948 }
2949
2950 tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
2951 neon_element_offset(a->vm, a->index, a->size),
2952 a->q ? 16 : 8, a->q ? 16 : 8);
2953 return true;
2954}
2955
2956static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
2957{
2958 int pass, half;
2959 TCGv_i32 tmp[2];
2960
2961 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
2962 return false;
2963 }
2964
2965
2966 if (!dc_isar_feature(aa32_simd_r32, s) &&
2967 ((a->vd | a->vm) & 0x10)) {
2968 return false;
2969 }
2970
2971 if ((a->vd | a->vm) & a->q) {
2972 return false;
2973 }
2974
2975 if (a->size == 3) {
2976 return false;
2977 }
2978
2979 if (!vfp_access_check(s)) {
2980 return true;
2981 }
2982
2983 tmp[0] = tcg_temp_new_i32();
2984 tmp[1] = tcg_temp_new_i32();
2985
2986 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
2987 for (half = 0; half < 2; half++) {
2988 read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
2989 switch (a->size) {
2990 case 0:
2991 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
2992 break;
2993 case 1:
2994 gen_swap_half(tmp[half], tmp[half]);
2995 break;
2996 case 2:
2997 break;
2998 default:
2999 g_assert_not_reached();
3000 }
3001 }
3002 write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
3003 write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
3004 }
3005
3006 tcg_temp_free_i32(tmp[0]);
3007 tcg_temp_free_i32(tmp[1]);
3008 return true;
3009}
3010
3011static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
3012 NeonGenWidenFn *widenfn,
3013 NeonGenTwo64OpFn *opfn,
3014 NeonGenTwo64OpFn *accfn)
3015{
3016
3017
3018
3019
3020
3021 int pass;
3022
3023 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3024 return false;
3025 }
3026
3027
3028 if (!dc_isar_feature(aa32_simd_r32, s) &&
3029 ((a->vd | a->vm) & 0x10)) {
3030 return false;
3031 }
3032
3033 if ((a->vd | a->vm) & a->q) {
3034 return false;
3035 }
3036
3037 if (!widenfn) {
3038 return false;
3039 }
3040
3041 if (!vfp_access_check(s)) {
3042 return true;
3043 }
3044
3045 for (pass = 0; pass < a->q + 1; pass++) {
3046 TCGv_i32 tmp;
3047 TCGv_i64 rm0_64, rm1_64, rd_64;
3048
3049 rm0_64 = tcg_temp_new_i64();
3050 rm1_64 = tcg_temp_new_i64();
3051 rd_64 = tcg_temp_new_i64();
3052
3053 tmp = tcg_temp_new_i32();
3054 read_neon_element32(tmp, a->vm, pass * 2, MO_32);
3055 widenfn(rm0_64, tmp);
3056 read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
3057 widenfn(rm1_64, tmp);
3058 tcg_temp_free_i32(tmp);
3059
3060 opfn(rd_64, rm0_64, rm1_64);
3061 tcg_temp_free_i64(rm0_64);
3062 tcg_temp_free_i64(rm1_64);
3063
3064 if (accfn) {
3065 TCGv_i64 tmp64 = tcg_temp_new_i64();
3066 read_neon_element64(tmp64, a->vd, pass, MO_64);
3067 accfn(rd_64, tmp64, rd_64);
3068 tcg_temp_free_i64(tmp64);
3069 }
3070 write_neon_element64(rd_64, a->vd, pass, MO_64);
3071 tcg_temp_free_i64(rd_64);
3072 }
3073 return true;
3074}
3075
3076static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
3077{
3078 static NeonGenWidenFn * const widenfn[] = {
3079 gen_helper_neon_widen_s8,
3080 gen_helper_neon_widen_s16,
3081 tcg_gen_ext_i32_i64,
3082 NULL,
3083 };
3084 static NeonGenTwo64OpFn * const opfn[] = {
3085 gen_helper_neon_paddl_u16,
3086 gen_helper_neon_paddl_u32,
3087 tcg_gen_add_i64,
3088 NULL,
3089 };
3090
3091 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3092}
3093
3094static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
3095{
3096 static NeonGenWidenFn * const widenfn[] = {
3097 gen_helper_neon_widen_u8,
3098 gen_helper_neon_widen_u16,
3099 tcg_gen_extu_i32_i64,
3100 NULL,
3101 };
3102 static NeonGenTwo64OpFn * const opfn[] = {
3103 gen_helper_neon_paddl_u16,
3104 gen_helper_neon_paddl_u32,
3105 tcg_gen_add_i64,
3106 NULL,
3107 };
3108
3109 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
3110}
3111
3112static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
3113{
3114 static NeonGenWidenFn * const widenfn[] = {
3115 gen_helper_neon_widen_s8,
3116 gen_helper_neon_widen_s16,
3117 tcg_gen_ext_i32_i64,
3118 NULL,
3119 };
3120 static NeonGenTwo64OpFn * const opfn[] = {
3121 gen_helper_neon_paddl_u16,
3122 gen_helper_neon_paddl_u32,
3123 tcg_gen_add_i64,
3124 NULL,
3125 };
3126 static NeonGenTwo64OpFn * const accfn[] = {
3127 gen_helper_neon_addl_u16,
3128 gen_helper_neon_addl_u32,
3129 tcg_gen_add_i64,
3130 NULL,
3131 };
3132
3133 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3134 accfn[a->size]);
3135}
3136
3137static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
3138{
3139 static NeonGenWidenFn * const widenfn[] = {
3140 gen_helper_neon_widen_u8,
3141 gen_helper_neon_widen_u16,
3142 tcg_gen_extu_i32_i64,
3143 NULL,
3144 };
3145 static NeonGenTwo64OpFn * const opfn[] = {
3146 gen_helper_neon_paddl_u16,
3147 gen_helper_neon_paddl_u32,
3148 tcg_gen_add_i64,
3149 NULL,
3150 };
3151 static NeonGenTwo64OpFn * const accfn[] = {
3152 gen_helper_neon_addl_u16,
3153 gen_helper_neon_addl_u32,
3154 tcg_gen_add_i64,
3155 NULL,
3156 };
3157
3158 return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
3159 accfn[a->size]);
3160}
3161
3162typedef void ZipFn(TCGv_ptr, TCGv_ptr);
3163
3164static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
3165 ZipFn *fn)
3166{
3167 TCGv_ptr pd, pm;
3168
3169 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3170 return false;
3171 }
3172
3173
3174 if (!dc_isar_feature(aa32_simd_r32, s) &&
3175 ((a->vd | a->vm) & 0x10)) {
3176 return false;
3177 }
3178
3179 if ((a->vd | a->vm) & a->q) {
3180 return false;
3181 }
3182
3183 if (!fn) {
3184
3185 return false;
3186 }
3187
3188 if (!vfp_access_check(s)) {
3189 return true;
3190 }
3191
3192 pd = vfp_reg_ptr(true, a->vd);
3193 pm = vfp_reg_ptr(true, a->vm);
3194 fn(pd, pm);
3195 tcg_temp_free_ptr(pd);
3196 tcg_temp_free_ptr(pm);
3197 return true;
3198}
3199
3200static bool trans_VUZP(DisasContext *s, arg_2misc *a)
3201{
3202 static ZipFn * const fn[2][4] = {
3203 {
3204 gen_helper_neon_unzip8,
3205 gen_helper_neon_unzip16,
3206 NULL,
3207 NULL,
3208 }, {
3209 gen_helper_neon_qunzip8,
3210 gen_helper_neon_qunzip16,
3211 gen_helper_neon_qunzip32,
3212 NULL,
3213 }
3214 };
3215 return do_zip_uzp(s, a, fn[a->q][a->size]);
3216}
3217
3218static bool trans_VZIP(DisasContext *s, arg_2misc *a)
3219{
3220 static ZipFn * const fn[2][4] = {
3221 {
3222 gen_helper_neon_zip8,
3223 gen_helper_neon_zip16,
3224 NULL,
3225 NULL,
3226 }, {
3227 gen_helper_neon_qzip8,
3228 gen_helper_neon_qzip16,
3229 gen_helper_neon_qzip32,
3230 NULL,
3231 }
3232 };
3233 return do_zip_uzp(s, a, fn[a->q][a->size]);
3234}
3235
3236static bool do_vmovn(DisasContext *s, arg_2misc *a,
3237 NeonGenNarrowEnvFn *narrowfn)
3238{
3239 TCGv_i64 rm;
3240 TCGv_i32 rd0, rd1;
3241
3242 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3243 return false;
3244 }
3245
3246
3247 if (!dc_isar_feature(aa32_simd_r32, s) &&
3248 ((a->vd | a->vm) & 0x10)) {
3249 return false;
3250 }
3251
3252 if (a->vm & 1) {
3253 return false;
3254 }
3255
3256 if (!narrowfn) {
3257 return false;
3258 }
3259
3260 if (!vfp_access_check(s)) {
3261 return true;
3262 }
3263
3264 rm = tcg_temp_new_i64();
3265 rd0 = tcg_temp_new_i32();
3266 rd1 = tcg_temp_new_i32();
3267
3268 read_neon_element64(rm, a->vm, 0, MO_64);
3269 narrowfn(rd0, cpu_env, rm);
3270 read_neon_element64(rm, a->vm, 1, MO_64);
3271 narrowfn(rd1, cpu_env, rm);
3272 write_neon_element32(rd0, a->vd, 0, MO_32);
3273 write_neon_element32(rd1, a->vd, 1, MO_32);
3274 tcg_temp_free_i32(rd0);
3275 tcg_temp_free_i32(rd1);
3276 tcg_temp_free_i64(rm);
3277 return true;
3278}
3279
3280#define DO_VMOVN(INSN, FUNC) \
3281 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3282 { \
3283 static NeonGenNarrowEnvFn * const narrowfn[] = { \
3284 FUNC##8, \
3285 FUNC##16, \
3286 FUNC##32, \
3287 NULL, \
3288 }; \
3289 return do_vmovn(s, a, narrowfn[a->size]); \
3290 }
3291
3292DO_VMOVN(VMOVN, gen_neon_narrow_u)
3293DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
3294DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
3295DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
3296
3297static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
3298{
3299 TCGv_i32 rm0, rm1;
3300 TCGv_i64 rd;
3301 static NeonGenWidenFn * const widenfns[] = {
3302 gen_helper_neon_widen_u8,
3303 gen_helper_neon_widen_u16,
3304 tcg_gen_extu_i32_i64,
3305 NULL,
3306 };
3307 NeonGenWidenFn *widenfn = widenfns[a->size];
3308
3309 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3310 return false;
3311 }
3312
3313
3314 if (!dc_isar_feature(aa32_simd_r32, s) &&
3315 ((a->vd | a->vm) & 0x10)) {
3316 return false;
3317 }
3318
3319 if (a->vd & 1) {
3320 return false;
3321 }
3322
3323 if (!widenfn) {
3324 return false;
3325 }
3326
3327 if (!vfp_access_check(s)) {
3328 return true;
3329 }
3330
3331 rd = tcg_temp_new_i64();
3332 rm0 = tcg_temp_new_i32();
3333 rm1 = tcg_temp_new_i32();
3334
3335 read_neon_element32(rm0, a->vm, 0, MO_32);
3336 read_neon_element32(rm1, a->vm, 1, MO_32);
3337
3338 widenfn(rd, rm0);
3339 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3340 write_neon_element64(rd, a->vd, 0, MO_64);
3341 widenfn(rd, rm1);
3342 tcg_gen_shli_i64(rd, rd, 8 << a->size);
3343 write_neon_element64(rd, a->vd, 1, MO_64);
3344
3345 tcg_temp_free_i64(rd);
3346 tcg_temp_free_i32(rm0);
3347 tcg_temp_free_i32(rm1);
3348 return true;
3349}
3350
3351static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
3352{
3353 TCGv_ptr fpst;
3354 TCGv_i64 tmp;
3355 TCGv_i32 dst0, dst1;
3356
3357 if (!dc_isar_feature(aa32_bf16, s)) {
3358 return false;
3359 }
3360
3361
3362 if (!dc_isar_feature(aa32_simd_r32, s) &&
3363 ((a->vd | a->vm) & 0x10)) {
3364 return false;
3365 }
3366
3367 if ((a->vm & 1) || (a->size != 1)) {
3368 return false;
3369 }
3370
3371 if (!vfp_access_check(s)) {
3372 return true;
3373 }
3374
3375 fpst = fpstatus_ptr(FPST_STD);
3376 tmp = tcg_temp_new_i64();
3377 dst0 = tcg_temp_new_i32();
3378 dst1 = tcg_temp_new_i32();
3379
3380 read_neon_element64(tmp, a->vm, 0, MO_64);
3381 gen_helper_bfcvt_pair(dst0, tmp, fpst);
3382
3383 read_neon_element64(tmp, a->vm, 1, MO_64);
3384 gen_helper_bfcvt_pair(dst1, tmp, fpst);
3385
3386 write_neon_element32(dst0, a->vd, 0, MO_32);
3387 write_neon_element32(dst1, a->vd, 1, MO_32);
3388
3389 tcg_temp_free_i64(tmp);
3390 tcg_temp_free_i32(dst0);
3391 tcg_temp_free_i32(dst1);
3392 tcg_temp_free_ptr(fpst);
3393 return true;
3394}
3395
3396static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
3397{
3398 TCGv_ptr fpst;
3399 TCGv_i32 ahp, tmp, tmp2, tmp3;
3400
3401 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3402 !dc_isar_feature(aa32_fp16_spconv, s)) {
3403 return false;
3404 }
3405
3406
3407 if (!dc_isar_feature(aa32_simd_r32, s) &&
3408 ((a->vd | a->vm) & 0x10)) {
3409 return false;
3410 }
3411
3412 if ((a->vm & 1) || (a->size != 1)) {
3413 return false;
3414 }
3415
3416 if (!vfp_access_check(s)) {
3417 return true;
3418 }
3419
3420 fpst = fpstatus_ptr(FPST_STD);
3421 ahp = get_ahp_flag();
3422 tmp = tcg_temp_new_i32();
3423 read_neon_element32(tmp, a->vm, 0, MO_32);
3424 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3425 tmp2 = tcg_temp_new_i32();
3426 read_neon_element32(tmp2, a->vm, 1, MO_32);
3427 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
3428 tcg_gen_shli_i32(tmp2, tmp2, 16);
3429 tcg_gen_or_i32(tmp2, tmp2, tmp);
3430 read_neon_element32(tmp, a->vm, 2, MO_32);
3431 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
3432 tmp3 = tcg_temp_new_i32();
3433 read_neon_element32(tmp3, a->vm, 3, MO_32);
3434 write_neon_element32(tmp2, a->vd, 0, MO_32);
3435 tcg_temp_free_i32(tmp2);
3436 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
3437 tcg_gen_shli_i32(tmp3, tmp3, 16);
3438 tcg_gen_or_i32(tmp3, tmp3, tmp);
3439 write_neon_element32(tmp3, a->vd, 1, MO_32);
3440 tcg_temp_free_i32(tmp3);
3441 tcg_temp_free_i32(tmp);
3442 tcg_temp_free_i32(ahp);
3443 tcg_temp_free_ptr(fpst);
3444
3445 return true;
3446}
3447
3448static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
3449{
3450 TCGv_ptr fpst;
3451 TCGv_i32 ahp, tmp, tmp2, tmp3;
3452
3453 if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
3454 !dc_isar_feature(aa32_fp16_spconv, s)) {
3455 return false;
3456 }
3457
3458
3459 if (!dc_isar_feature(aa32_simd_r32, s) &&
3460 ((a->vd | a->vm) & 0x10)) {
3461 return false;
3462 }
3463
3464 if ((a->vd & 1) || (a->size != 1)) {
3465 return false;
3466 }
3467
3468 if (!vfp_access_check(s)) {
3469 return true;
3470 }
3471
3472 fpst = fpstatus_ptr(FPST_STD);
3473 ahp = get_ahp_flag();
3474 tmp3 = tcg_temp_new_i32();
3475 tmp2 = tcg_temp_new_i32();
3476 tmp = tcg_temp_new_i32();
3477 read_neon_element32(tmp, a->vm, 0, MO_32);
3478 read_neon_element32(tmp2, a->vm, 1, MO_32);
3479 tcg_gen_ext16u_i32(tmp3, tmp);
3480 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3481 write_neon_element32(tmp3, a->vd, 0, MO_32);
3482 tcg_gen_shri_i32(tmp, tmp, 16);
3483 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
3484 write_neon_element32(tmp, a->vd, 1, MO_32);
3485 tcg_temp_free_i32(tmp);
3486 tcg_gen_ext16u_i32(tmp3, tmp2);
3487 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
3488 write_neon_element32(tmp3, a->vd, 2, MO_32);
3489 tcg_temp_free_i32(tmp3);
3490 tcg_gen_shri_i32(tmp2, tmp2, 16);
3491 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
3492 write_neon_element32(tmp2, a->vd, 3, MO_32);
3493 tcg_temp_free_i32(tmp2);
3494 tcg_temp_free_i32(ahp);
3495 tcg_temp_free_ptr(fpst);
3496
3497 return true;
3498}
3499
3500static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
3501{
3502 int vec_size = a->q ? 16 : 8;
3503 int rd_ofs = neon_full_reg_offset(a->vd);
3504 int rm_ofs = neon_full_reg_offset(a->vm);
3505
3506 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3507 return false;
3508 }
3509
3510
3511 if (!dc_isar_feature(aa32_simd_r32, s) &&
3512 ((a->vd | a->vm) & 0x10)) {
3513 return false;
3514 }
3515
3516 if (a->size == 3) {
3517 return false;
3518 }
3519
3520 if ((a->vd | a->vm) & a->q) {
3521 return false;
3522 }
3523
3524 if (!vfp_access_check(s)) {
3525 return true;
3526 }
3527
3528 fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
3529
3530 return true;
3531}
3532
3533#define DO_2MISC_VEC(INSN, FN) \
3534 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3535 { \
3536 return do_2misc_vec(s, a, FN); \
3537 }
3538
3539DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
3540DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
3541DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
3542DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
3543DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
3544DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
3545DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
3546
3547static bool trans_VMVN(DisasContext *s, arg_2misc *a)
3548{
3549 if (a->size != 0) {
3550 return false;
3551 }
3552 return do_2misc_vec(s, a, tcg_gen_gvec_not);
3553}
3554
3555#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
3556 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3557 uint32_t rm_ofs, uint32_t oprsz, \
3558 uint32_t maxsz) \
3559 { \
3560 tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
3561 DATA, FUNC); \
3562 }
3563
3564#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
3565 static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
3566 uint32_t rm_ofs, uint32_t oprsz, \
3567 uint32_t maxsz) \
3568 { \
3569 tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
3570 }
3571
3572WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
3573WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
3574WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
3575WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
3576WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
3577WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
3578WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
3579
3580#define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
3581 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3582 { \
3583 if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
3584 return false; \
3585 } \
3586 return do_2misc_vec(s, a, gen_##INSN); \
3587 }
3588
3589DO_2M_CRYPTO(AESE, aa32_aes, 0)
3590DO_2M_CRYPTO(AESD, aa32_aes, 0)
3591DO_2M_CRYPTO(AESMC, aa32_aes, 0)
3592DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
3593DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
3594DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
3595DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
3596
3597static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
3598{
3599 TCGv_i32 tmp;
3600 int pass;
3601
3602
3603 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3604 return false;
3605 }
3606
3607
3608 if (!dc_isar_feature(aa32_simd_r32, s) &&
3609 ((a->vd | a->vm) & 0x10)) {
3610 return false;
3611 }
3612
3613 if (!fn) {
3614 return false;
3615 }
3616
3617 if ((a->vd | a->vm) & a->q) {
3618 return false;
3619 }
3620
3621 if (!vfp_access_check(s)) {
3622 return true;
3623 }
3624
3625 tmp = tcg_temp_new_i32();
3626 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3627 read_neon_element32(tmp, a->vm, pass, MO_32);
3628 fn(tmp, tmp);
3629 write_neon_element32(tmp, a->vd, pass, MO_32);
3630 }
3631 tcg_temp_free_i32(tmp);
3632
3633 return true;
3634}
3635
3636static bool trans_VREV32(DisasContext *s, arg_2misc *a)
3637{
3638 static NeonGenOneOpFn * const fn[] = {
3639 tcg_gen_bswap32_i32,
3640 gen_swap_half,
3641 NULL,
3642 NULL,
3643 };
3644 return do_2misc(s, a, fn[a->size]);
3645}
3646
3647static bool trans_VREV16(DisasContext *s, arg_2misc *a)
3648{
3649 if (a->size != 0) {
3650 return false;
3651 }
3652 return do_2misc(s, a, gen_rev16);
3653}
3654
3655static bool trans_VCLS(DisasContext *s, arg_2misc *a)
3656{
3657 static NeonGenOneOpFn * const fn[] = {
3658 gen_helper_neon_cls_s8,
3659 gen_helper_neon_cls_s16,
3660 gen_helper_neon_cls_s32,
3661 NULL,
3662 };
3663 return do_2misc(s, a, fn[a->size]);
3664}
3665
3666static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
3667{
3668 tcg_gen_clzi_i32(rd, rm, 32);
3669}
3670
3671static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
3672{
3673 static NeonGenOneOpFn * const fn[] = {
3674 gen_helper_neon_clz_u8,
3675 gen_helper_neon_clz_u16,
3676 do_VCLZ_32,
3677 NULL,
3678 };
3679 return do_2misc(s, a, fn[a->size]);
3680}
3681
3682static bool trans_VCNT(DisasContext *s, arg_2misc *a)
3683{
3684 if (a->size != 0) {
3685 return false;
3686 }
3687 return do_2misc(s, a, gen_helper_neon_cnt_u8);
3688}
3689
3690static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3691 uint32_t oprsz, uint32_t maxsz)
3692{
3693 tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
3694 vece == MO_16 ? 0x7fff : 0x7fffffff,
3695 oprsz, maxsz);
3696}
3697
3698static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
3699{
3700 if (a->size == MO_16) {
3701 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3702 return false;
3703 }
3704 } else if (a->size != MO_32) {
3705 return false;
3706 }
3707 return do_2misc_vec(s, a, gen_VABS_F);
3708}
3709
3710static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3711 uint32_t oprsz, uint32_t maxsz)
3712{
3713 tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
3714 vece == MO_16 ? 0x8000 : 0x80000000,
3715 oprsz, maxsz);
3716}
3717
3718static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
3719{
3720 if (a->size == MO_16) {
3721 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3722 return false;
3723 }
3724 } else if (a->size != MO_32) {
3725 return false;
3726 }
3727 return do_2misc_vec(s, a, gen_VNEG_F);
3728}
3729
3730static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
3731{
3732 if (a->size != 2) {
3733 return false;
3734 }
3735 return do_2misc(s, a, gen_helper_recpe_u32);
3736}
3737
3738static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
3739{
3740 if (a->size != 2) {
3741 return false;
3742 }
3743 return do_2misc(s, a, gen_helper_rsqrte_u32);
3744}
3745
3746#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
3747 static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
3748 { \
3749 FUNC(d, cpu_env, m); \
3750 }
3751
3752WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
3753WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
3754WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
3755WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
3756WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
3757WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
3758
3759static bool trans_VQABS(DisasContext *s, arg_2misc *a)
3760{
3761 static NeonGenOneOpFn * const fn[] = {
3762 gen_VQABS_s8,
3763 gen_VQABS_s16,
3764 gen_VQABS_s32,
3765 NULL,
3766 };
3767 return do_2misc(s, a, fn[a->size]);
3768}
3769
3770static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
3771{
3772 static NeonGenOneOpFn * const fn[] = {
3773 gen_VQNEG_s8,
3774 gen_VQNEG_s16,
3775 gen_VQNEG_s32,
3776 NULL,
3777 };
3778 return do_2misc(s, a, fn[a->size]);
3779}
3780
3781#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
3782 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3783 uint32_t rm_ofs, \
3784 uint32_t oprsz, uint32_t maxsz) \
3785 { \
3786 static gen_helper_gvec_2_ptr * const fns[4] = { \
3787 NULL, HFUNC, SFUNC, NULL, \
3788 }; \
3789 TCGv_ptr fpst; \
3790 fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
3791 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
3792 fns[vece]); \
3793 tcg_temp_free_ptr(fpst); \
3794 } \
3795 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3796 { \
3797 if (a->size == MO_16) { \
3798 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3799 return false; \
3800 } \
3801 } else if (a->size != MO_32) { \
3802 return false; \
3803 } \
3804 return do_2misc_vec(s, a, gen_##INSN); \
3805 }
3806
3807DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
3808DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
3809DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
3810DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
3811DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
3812DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
3813DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
3814DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
3815DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
3816DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
3817DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
3818
3819DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
3820
3821static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
3822{
3823 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3824 return false;
3825 }
3826 return trans_VRINTX_impl(s, a);
3827}
3828
3829#define DO_VEC_RMODE(INSN, RMODE, OP) \
3830 static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
3831 uint32_t rm_ofs, \
3832 uint32_t oprsz, uint32_t maxsz) \
3833 { \
3834 static gen_helper_gvec_2_ptr * const fns[4] = { \
3835 NULL, \
3836 gen_helper_gvec_##OP##h, \
3837 gen_helper_gvec_##OP##s, \
3838 NULL, \
3839 }; \
3840 TCGv_ptr fpst; \
3841 fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
3842 tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
3843 arm_rmode_to_sf(RMODE), fns[vece]); \
3844 tcg_temp_free_ptr(fpst); \
3845 } \
3846 static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
3847 { \
3848 if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
3849 return false; \
3850 } \
3851 if (a->size == MO_16) { \
3852 if (!dc_isar_feature(aa32_fp16_arith, s)) { \
3853 return false; \
3854 } \
3855 } else if (a->size != MO_32) { \
3856 return false; \
3857 } \
3858 return do_2misc_vec(s, a, gen_##INSN); \
3859 }
3860
3861DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
3862DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
3863DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
3864DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
3865DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
3866DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
3867DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
3868DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
3869
3870DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
3871DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
3872DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
3873DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
3874DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
3875
3876static bool trans_VSWP(DisasContext *s, arg_2misc *a)
3877{
3878 TCGv_i64 rm, rd;
3879 int pass;
3880
3881 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3882 return false;
3883 }
3884
3885
3886 if (!dc_isar_feature(aa32_simd_r32, s) &&
3887 ((a->vd | a->vm) & 0x10)) {
3888 return false;
3889 }
3890
3891 if (a->size != 0) {
3892 return false;
3893 }
3894
3895 if ((a->vd | a->vm) & a->q) {
3896 return false;
3897 }
3898
3899 if (!vfp_access_check(s)) {
3900 return true;
3901 }
3902
3903 rm = tcg_temp_new_i64();
3904 rd = tcg_temp_new_i64();
3905 for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
3906 read_neon_element64(rm, a->vm, pass, MO_64);
3907 read_neon_element64(rd, a->vd, pass, MO_64);
3908 write_neon_element64(rm, a->vd, pass, MO_64);
3909 write_neon_element64(rd, a->vm, pass, MO_64);
3910 }
3911 tcg_temp_free_i64(rm);
3912 tcg_temp_free_i64(rd);
3913
3914 return true;
3915}
3916static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3917{
3918 TCGv_i32 rd, tmp;
3919
3920 rd = tcg_temp_new_i32();
3921 tmp = tcg_temp_new_i32();
3922
3923 tcg_gen_shli_i32(rd, t0, 8);
3924 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3925 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3926 tcg_gen_or_i32(rd, rd, tmp);
3927
3928 tcg_gen_shri_i32(t1, t1, 8);
3929 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3930 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3931 tcg_gen_or_i32(t1, t1, tmp);
3932 tcg_gen_mov_i32(t0, rd);
3933
3934 tcg_temp_free_i32(tmp);
3935 tcg_temp_free_i32(rd);
3936}
3937
3938static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3939{
3940 TCGv_i32 rd, tmp;
3941
3942 rd = tcg_temp_new_i32();
3943 tmp = tcg_temp_new_i32();
3944
3945 tcg_gen_shli_i32(rd, t0, 16);
3946 tcg_gen_andi_i32(tmp, t1, 0xffff);
3947 tcg_gen_or_i32(rd, rd, tmp);
3948 tcg_gen_shri_i32(t1, t1, 16);
3949 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3950 tcg_gen_or_i32(t1, t1, tmp);
3951 tcg_gen_mov_i32(t0, rd);
3952
3953 tcg_temp_free_i32(tmp);
3954 tcg_temp_free_i32(rd);
3955}
3956
3957static bool trans_VTRN(DisasContext *s, arg_2misc *a)
3958{
3959 TCGv_i32 tmp, tmp2;
3960 int pass;
3961
3962 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
3963 return false;
3964 }
3965
3966
3967 if (!dc_isar_feature(aa32_simd_r32, s) &&
3968 ((a->vd | a->vm) & 0x10)) {
3969 return false;
3970 }
3971
3972 if ((a->vd | a->vm) & a->q) {
3973 return false;
3974 }
3975
3976 if (a->size == 3) {
3977 return false;
3978 }
3979
3980 if (!vfp_access_check(s)) {
3981 return true;
3982 }
3983
3984 tmp = tcg_temp_new_i32();
3985 tmp2 = tcg_temp_new_i32();
3986 if (a->size == MO_32) {
3987 for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
3988 read_neon_element32(tmp, a->vm, pass, MO_32);
3989 read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
3990 write_neon_element32(tmp2, a->vm, pass, MO_32);
3991 write_neon_element32(tmp, a->vd, pass + 1, MO_32);
3992 }
3993 } else {
3994 for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
3995 read_neon_element32(tmp, a->vm, pass, MO_32);
3996 read_neon_element32(tmp2, a->vd, pass, MO_32);
3997 if (a->size == MO_8) {
3998 gen_neon_trn_u8(tmp, tmp2);
3999 } else {
4000 gen_neon_trn_u16(tmp, tmp2);
4001 }
4002 write_neon_element32(tmp2, a->vm, pass, MO_32);
4003 write_neon_element32(tmp, a->vd, pass, MO_32);
4004 }
4005 }
4006 tcg_temp_free_i32(tmp);
4007 tcg_temp_free_i32(tmp2);
4008 return true;
4009}
4010
4011static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
4012{
4013 if (!dc_isar_feature(aa32_i8mm, s)) {
4014 return false;
4015 }
4016 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4017 gen_helper_gvec_smmla_b);
4018}
4019
4020static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
4021{
4022 if (!dc_isar_feature(aa32_i8mm, s)) {
4023 return false;
4024 }
4025 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4026 gen_helper_gvec_ummla_b);
4027}
4028
4029static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
4030{
4031 if (!dc_isar_feature(aa32_i8mm, s)) {
4032 return false;
4033 }
4034 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4035 gen_helper_gvec_usmmla_b);
4036}
4037
4038static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
4039{
4040 if (!dc_isar_feature(aa32_bf16, s)) {
4041 return false;
4042 }
4043 return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
4044 gen_helper_gvec_bfmmla);
4045}
4046
4047static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
4048{
4049 if (!dc_isar_feature(aa32_bf16, s)) {
4050 return false;
4051 }
4052 return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
4053 gen_helper_gvec_bfmlal);
4054}
4055
4056static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
4057{
4058 if (!dc_isar_feature(aa32_bf16, s)) {
4059 return false;
4060 }
4061 return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
4062 (a->index << 1) | a->q, FPST_STD,
4063 gen_helper_gvec_bfmlal_idx);
4064}
4065