1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "cpu.h"
21#include "exec/exec-all.h"
22#include "qemu/host-utils.h"
23#include "exec/helper-proto.h"
24#include "crypto/aes.h"
25
26#include "helper_regs.h"
27
28
29
30target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
32{
33 uint64_t rt = 0;
34 int overflow = 0;
35
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
38
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
44 }
45
46 if (unlikely(overflow)) {
47 rt = 0;
48 }
49
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
55 }
56 }
57
58 return (target_ulong)rt;
59}
60
61target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
63{
64 int64_t rt = 0;
65 int overflow = 0;
66
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
69
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
76 }
77
78 if (unlikely(overflow)) {
79 rt = 0;
80 }
81
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
87 }
88 }
89
90 return (target_ulong)rt;
91}
92
93#if defined(TARGET_PPC64)
94
95uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
96{
97 uint64_t rt = 0;
98 int overflow = 0;
99
100 overflow = divu128(&rt, &ra, rb);
101
102 if (unlikely(overflow)) {
103 rt = 0;
104 }
105
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
111 }
112 }
113
114 return rt;
115}
116
117uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118{
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
123
124 if (unlikely(overflow)) {
125 rt = 0;
126 }
127
128 if (oe) {
129
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
134 }
135 }
136
137 return rt;
138}
139
140#endif
141
142
143target_ulong helper_cntlzw(target_ulong t)
144{
145 return clz32(t);
146}
147
148#if defined(TARGET_PPC64)
149target_ulong helper_cntlzd(target_ulong t)
150{
151 return clz64(t);
152}
153#endif
154
155#if defined(TARGET_PPC64)
156
157uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
158{
159 int i;
160 uint64_t ra = 0;
161
162 for (i = 0; i < 8; i++) {
163 int index = (rs >> (i*8)) & 0xFF;
164 if (index < 64) {
165 if (rb & (1ull << (63-index))) {
166 ra |= 1 << i;
167 }
168 }
169 }
170 return ra;
171}
172
173#endif
174
175target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
176{
177 target_ulong mask = 0xff;
178 target_ulong ra = 0;
179 int i;
180
181 for (i = 0; i < sizeof(target_ulong); i++) {
182 if ((rs & mask) == (rb & mask)) {
183 ra |= mask;
184 }
185 mask <<= 8;
186 }
187 return ra;
188}
189
190
191target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
192 target_ulong shift)
193{
194 int32_t ret;
195
196 if (likely(!(shift & 0x20))) {
197 if (likely((uint32_t)shift != 0)) {
198 shift &= 0x1f;
199 ret = (int32_t)value >> shift;
200 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
201 env->ca = 0;
202 } else {
203 env->ca = 1;
204 }
205 } else {
206 ret = (int32_t)value;
207 env->ca = 0;
208 }
209 } else {
210 ret = (int32_t)value >> 31;
211 env->ca = (ret != 0);
212 }
213 return (target_long)ret;
214}
215
216#if defined(TARGET_PPC64)
217target_ulong helper_srad(CPUPPCState *env, target_ulong value,
218 target_ulong shift)
219{
220 int64_t ret;
221
222 if (likely(!(shift & 0x40))) {
223 if (likely((uint64_t)shift != 0)) {
224 shift &= 0x3f;
225 ret = (int64_t)value >> shift;
226 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
227 env->ca = 0;
228 } else {
229 env->ca = 1;
230 }
231 } else {
232 ret = (int64_t)value;
233 env->ca = 0;
234 }
235 } else {
236 ret = (int64_t)value >> 63;
237 env->ca = (ret != 0);
238 }
239 return ret;
240}
241#endif
242
243#if defined(TARGET_PPC64)
244target_ulong helper_popcntb(target_ulong val)
245{
246 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
247 0x5555555555555555ULL);
248 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
249 0x3333333333333333ULL);
250 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
251 0x0f0f0f0f0f0f0f0fULL);
252 return val;
253}
254
255target_ulong helper_popcntw(target_ulong val)
256{
257 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
258 0x5555555555555555ULL);
259 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
260 0x3333333333333333ULL);
261 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
262 0x0f0f0f0f0f0f0f0fULL);
263 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
264 0x00ff00ff00ff00ffULL);
265 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
266 0x0000ffff0000ffffULL);
267 return val;
268}
269
270target_ulong helper_popcntd(target_ulong val)
271{
272 return ctpop64(val);
273}
274#else
275target_ulong helper_popcntb(target_ulong val)
276{
277 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
278 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
279 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
280 return val;
281}
282
283target_ulong helper_popcntw(target_ulong val)
284{
285 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
286 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
287 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
288 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
289 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
290 return val;
291}
292#endif
293
294
295
296target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
297{
298 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
299
300 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
301 (int32_t)arg2 == 0) {
302 env->spr[SPR_MQ] = 0;
303 return INT32_MIN;
304 } else {
305 env->spr[SPR_MQ] = tmp % arg2;
306 return tmp / (int32_t)arg2;
307 }
308}
309
310target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
311 target_ulong arg2)
312{
313 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
314
315 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
316 (int32_t)arg2 == 0) {
317 env->so = env->ov = 1;
318 env->spr[SPR_MQ] = 0;
319 return INT32_MIN;
320 } else {
321 env->spr[SPR_MQ] = tmp % arg2;
322 tmp /= (int32_t)arg2;
323 if ((int32_t)tmp != tmp) {
324 env->so = env->ov = 1;
325 } else {
326 env->ov = 0;
327 }
328 return tmp;
329 }
330}
331
332target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
333 target_ulong arg2)
334{
335 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
336 (int32_t)arg2 == 0) {
337 env->spr[SPR_MQ] = 0;
338 return INT32_MIN;
339 } else {
340 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
341 return (int32_t)arg1 / (int32_t)arg2;
342 }
343}
344
345target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
346 target_ulong arg2)
347{
348 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
349 (int32_t)arg2 == 0) {
350 env->so = env->ov = 1;
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->ov = 0;
355 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
356 return (int32_t)arg1 / (int32_t)arg2;
357 }
358}
359
360
361
362
363
364
365
366
367
368#if !defined(CONFIG_USER_ONLY)
369target_ulong helper_602_mfrom(target_ulong arg)
370{
371 if (likely(arg < 602)) {
372#include "mfrom_table.c"
373 return mfrom_ROM_table[arg];
374 } else {
375 return 0;
376 }
377}
378#endif
379
380
381
382#if defined(HOST_WORDS_BIGENDIAN)
383#define HI_IDX 0
384#define LO_IDX 1
385#define AVRB(i) u8[i]
386#define AVRW(i) u32[i]
387#else
388#define HI_IDX 1
389#define LO_IDX 0
390#define AVRB(i) u8[15-(i)]
391#define AVRW(i) u32[3-(i)]
392#endif
393
394#if defined(HOST_WORDS_BIGENDIAN)
395#define VECTOR_FOR_INORDER_I(index, element) \
396 for (index = 0; index < ARRAY_SIZE(r->element); index++)
397#else
398#define VECTOR_FOR_INORDER_I(index, element) \
399 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
400#endif
401
402
403#define SATCVT(from, to, from_type, to_type, min, max) \
404 static inline to_type cvt##from##to(from_type x, int *sat) \
405 { \
406 to_type r; \
407 \
408 if (x < (from_type)min) { \
409 r = min; \
410 *sat = 1; \
411 } else if (x > (from_type)max) { \
412 r = max; \
413 *sat = 1; \
414 } else { \
415 r = x; \
416 } \
417 return r; \
418 }
419#define SATCVTU(from, to, from_type, to_type, min, max) \
420 static inline to_type cvt##from##to(from_type x, int *sat) \
421 { \
422 to_type r; \
423 \
424 if (x > (from_type)max) { \
425 r = max; \
426 *sat = 1; \
427 } else { \
428 r = x; \
429 } \
430 return r; \
431 }
432SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
433SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
434SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
435
436SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
437SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
438SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
439SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
440SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
441SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
442#undef SATCVT
443#undef SATCVTU
444
445void helper_lvsl(ppc_avr_t *r, target_ulong sh)
446{
447 int i, j = (sh & 0xf);
448
449 VECTOR_FOR_INORDER_I(i, u8) {
450 r->u8[i] = j++;
451 }
452}
453
454void helper_lvsr(ppc_avr_t *r, target_ulong sh)
455{
456 int i, j = 0x10 - (sh & 0xf);
457
458 VECTOR_FOR_INORDER_I(i, u8) {
459 r->u8[i] = j++;
460 }
461}
462
463void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
464{
465#if defined(HOST_WORDS_BIGENDIAN)
466 env->vscr = r->u32[3];
467#else
468 env->vscr = r->u32[0];
469#endif
470 set_flush_to_zero(vscr_nj, &env->vec_status);
471}
472
473void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
474{
475 int i;
476
477 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
478 r->u32[i] = ~a->u32[i] < b->u32[i];
479 }
480}
481
482#define VARITH_DO(name, op, element) \
483 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
484 { \
485 int i; \
486 \
487 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
488 r->element[i] = a->element[i] op b->element[i]; \
489 } \
490 }
491#define VARITH(suffix, element) \
492 VARITH_DO(add##suffix, +, element) \
493 VARITH_DO(sub##suffix, -, element)
494VARITH(ubm, u8)
495VARITH(uhm, u16)
496VARITH(uwm, u32)
497VARITH(udm, u64)
498VARITH_DO(muluwm, *, u32)
499#undef VARITH_DO
500#undef VARITH
501
502#define VARITHFP(suffix, func) \
503 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
504 ppc_avr_t *b) \
505 { \
506 int i; \
507 \
508 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
509 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
510 } \
511 }
512VARITHFP(addfp, float32_add)
513VARITHFP(subfp, float32_sub)
514VARITHFP(minfp, float32_min)
515VARITHFP(maxfp, float32_max)
516#undef VARITHFP
517
518#define VARITHFPFMA(suffix, type) \
519 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
520 ppc_avr_t *b, ppc_avr_t *c) \
521 { \
522 int i; \
523 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
524 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
525 type, &env->vec_status); \
526 } \
527 }
528VARITHFPFMA(maddfp, 0);
529VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
530#undef VARITHFPFMA
531
532#define VARITHSAT_CASE(type, op, cvt, element) \
533 { \
534 type result = (type)a->element[i] op (type)b->element[i]; \
535 r->element[i] = cvt(result, &sat); \
536 }
537
538#define VARITHSAT_DO(name, op, optype, cvt, element) \
539 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
541 { \
542 int sat = 0; \
543 int i; \
544 \
545 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
546 switch (sizeof(r->element[0])) { \
547 case 1: \
548 VARITHSAT_CASE(optype, op, cvt, element); \
549 break; \
550 case 2: \
551 VARITHSAT_CASE(optype, op, cvt, element); \
552 break; \
553 case 4: \
554 VARITHSAT_CASE(optype, op, cvt, element); \
555 break; \
556 } \
557 } \
558 if (sat) { \
559 env->vscr |= (1 << VSCR_SAT); \
560 } \
561 }
562#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
563 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
564 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
565#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
566 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
567 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
568VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
569VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
570VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
571VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
572VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
573VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
574#undef VARITHSAT_CASE
575#undef VARITHSAT_DO
576#undef VARITHSAT_SIGNED
577#undef VARITHSAT_UNSIGNED
578
579#define VAVG_DO(name, element, etype) \
580 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
581 { \
582 int i; \
583 \
584 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
585 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
586 r->element[i] = x >> 1; \
587 } \
588 }
589
590#define VAVG(type, signed_element, signed_type, unsigned_element, \
591 unsigned_type) \
592 VAVG_DO(avgs##type, signed_element, signed_type) \
593 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
594VAVG(b, s8, int16_t, u8, uint16_t)
595VAVG(h, s16, int32_t, u16, uint32_t)
596VAVG(w, s32, int64_t, u32, uint64_t)
597#undef VAVG_DO
598#undef VAVG
599
600#define VCF(suffix, cvt, element) \
601 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
602 ppc_avr_t *b, uint32_t uim) \
603 { \
604 int i; \
605 \
606 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
607 float32 t = cvt(b->element[i], &env->vec_status); \
608 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
609 } \
610 }
611VCF(ux, uint32_to_float32, u32)
612VCF(sx, int32_to_float32, s32)
613#undef VCF
614
615#define VCMP_DO(suffix, compare, element, record) \
616 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
617 ppc_avr_t *a, ppc_avr_t *b) \
618 { \
619 uint64_t ones = (uint64_t)-1; \
620 uint64_t all = ones; \
621 uint64_t none = 0; \
622 int i; \
623 \
624 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
625 uint64_t result = (a->element[i] compare b->element[i] ? \
626 ones : 0x0); \
627 switch (sizeof(a->element[0])) { \
628 case 8: \
629 r->u64[i] = result; \
630 break; \
631 case 4: \
632 r->u32[i] = result; \
633 break; \
634 case 2: \
635 r->u16[i] = result; \
636 break; \
637 case 1: \
638 r->u8[i] = result; \
639 break; \
640 } \
641 all &= result; \
642 none |= result; \
643 } \
644 if (record) { \
645 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
646 } \
647 }
648#define VCMP(suffix, compare, element) \
649 VCMP_DO(suffix, compare, element, 0) \
650 VCMP_DO(suffix##_dot, compare, element, 1)
651VCMP(equb, ==, u8)
652VCMP(equh, ==, u16)
653VCMP(equw, ==, u32)
654VCMP(equd, ==, u64)
655VCMP(gtub, >, u8)
656VCMP(gtuh, >, u16)
657VCMP(gtuw, >, u32)
658VCMP(gtud, >, u64)
659VCMP(gtsb, >, s8)
660VCMP(gtsh, >, s16)
661VCMP(gtsw, >, s32)
662VCMP(gtsd, >, s64)
663#undef VCMP_DO
664#undef VCMP
665
666#define VCMPFP_DO(suffix, compare, order, record) \
667 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
668 ppc_avr_t *a, ppc_avr_t *b) \
669 { \
670 uint32_t ones = (uint32_t)-1; \
671 uint32_t all = ones; \
672 uint32_t none = 0; \
673 int i; \
674 \
675 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
676 uint32_t result; \
677 int rel = float32_compare_quiet(a->f[i], b->f[i], \
678 &env->vec_status); \
679 if (rel == float_relation_unordered) { \
680 result = 0; \
681 } else if (rel compare order) { \
682 result = ones; \
683 } else { \
684 result = 0; \
685 } \
686 r->u32[i] = result; \
687 all &= result; \
688 none |= result; \
689 } \
690 if (record) { \
691 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
692 } \
693 }
694#define VCMPFP(suffix, compare, order) \
695 VCMPFP_DO(suffix, compare, order, 0) \
696 VCMPFP_DO(suffix##_dot, compare, order, 1)
697VCMPFP(eqfp, ==, float_relation_equal)
698VCMPFP(gefp, !=, float_relation_less)
699VCMPFP(gtfp, ==, float_relation_greater)
700#undef VCMPFP_DO
701#undef VCMPFP
702
703static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
704 ppc_avr_t *a, ppc_avr_t *b, int record)
705{
706 int i;
707 int all_in = 0;
708
709 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
710 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
711 if (le_rel == float_relation_unordered) {
712 r->u32[i] = 0xc0000000;
713 all_in = 1;
714 } else {
715 float32 bneg = float32_chs(b->f[i]);
716 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
717 int le = le_rel != float_relation_greater;
718 int ge = ge_rel != float_relation_less;
719
720 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
721 all_in |= (!le | !ge);
722 }
723 }
724 if (record) {
725 env->crf[6] = (all_in == 0) << 1;
726 }
727}
728
729void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
730{
731 vcmpbfp_internal(env, r, a, b, 0);
732}
733
734void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
735 ppc_avr_t *b)
736{
737 vcmpbfp_internal(env, r, a, b, 1);
738}
739
740#define VCT(suffix, satcvt, element) \
741 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
742 ppc_avr_t *b, uint32_t uim) \
743 { \
744 int i; \
745 int sat = 0; \
746 float_status s = env->vec_status; \
747 \
748 set_float_rounding_mode(float_round_to_zero, &s); \
749 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
750 if (float32_is_any_nan(b->f[i])) { \
751 r->element[i] = 0; \
752 } else { \
753 float64 t = float32_to_float64(b->f[i], &s); \
754 int64_t j; \
755 \
756 t = float64_scalbn(t, uim, &s); \
757 j = float64_to_int64(t, &s); \
758 r->element[i] = satcvt(j, &sat); \
759 } \
760 } \
761 if (sat) { \
762 env->vscr |= (1 << VSCR_SAT); \
763 } \
764 }
765VCT(uxs, cvtsduw, u32)
766VCT(sxs, cvtsdsw, s32)
767#undef VCT
768
769void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
770 ppc_avr_t *b, ppc_avr_t *c)
771{
772 int sat = 0;
773 int i;
774
775 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
776 int32_t prod = a->s16[i] * b->s16[i];
777 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
778
779 r->s16[i] = cvtswsh(t, &sat);
780 }
781
782 if (sat) {
783 env->vscr |= (1 << VSCR_SAT);
784 }
785}
786
787void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
788 ppc_avr_t *b, ppc_avr_t *c)
789{
790 int sat = 0;
791 int i;
792
793 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
794 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
795 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
796 r->s16[i] = cvtswsh(t, &sat);
797 }
798
799 if (sat) {
800 env->vscr |= (1 << VSCR_SAT);
801 }
802}
803
804#define VMINMAX_DO(name, compare, element) \
805 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
806 { \
807 int i; \
808 \
809 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
810 if (a->element[i] compare b->element[i]) { \
811 r->element[i] = b->element[i]; \
812 } else { \
813 r->element[i] = a->element[i]; \
814 } \
815 } \
816 }
817#define VMINMAX(suffix, element) \
818 VMINMAX_DO(min##suffix, >, element) \
819 VMINMAX_DO(max##suffix, <, element)
820VMINMAX(sb, s8)
821VMINMAX(sh, s16)
822VMINMAX(sw, s32)
823VMINMAX(sd, s64)
824VMINMAX(ub, u8)
825VMINMAX(uh, u16)
826VMINMAX(uw, u32)
827VMINMAX(ud, u64)
828#undef VMINMAX_DO
829#undef VMINMAX
830
831void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
832{
833 int i;
834
835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
836 int32_t prod = a->s16[i] * b->s16[i];
837 r->s16[i] = (int16_t) (prod + c->s16[i]);
838 }
839}
840
841#define VMRG_DO(name, element, highp) \
842 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
843 { \
844 ppc_avr_t result; \
845 int i; \
846 size_t n_elems = ARRAY_SIZE(r->element); \
847 \
848 for (i = 0; i < n_elems / 2; i++) { \
849 if (highp) { \
850 result.element[i*2+HI_IDX] = a->element[i]; \
851 result.element[i*2+LO_IDX] = b->element[i]; \
852 } else { \
853 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
854 b->element[n_elems - i - 1]; \
855 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
856 a->element[n_elems - i - 1]; \
857 } \
858 } \
859 *r = result; \
860 }
861#if defined(HOST_WORDS_BIGENDIAN)
862#define MRGHI 0
863#define MRGLO 1
864#else
865#define MRGHI 1
866#define MRGLO 0
867#endif
868#define VMRG(suffix, element) \
869 VMRG_DO(mrgl##suffix, element, MRGHI) \
870 VMRG_DO(mrgh##suffix, element, MRGLO)
871VMRG(b, u8)
872VMRG(h, u16)
873VMRG(w, u32)
874#undef VMRG_DO
875#undef VMRG
876#undef MRGHI
877#undef MRGLO
878
879void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
880 ppc_avr_t *b, ppc_avr_t *c)
881{
882 int32_t prod[16];
883 int i;
884
885 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
886 prod[i] = (int32_t)a->s8[i] * b->u8[i];
887 }
888
889 VECTOR_FOR_INORDER_I(i, s32) {
890 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
891 prod[4 * i + 2] + prod[4 * i + 3];
892 }
893}
894
895void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
896 ppc_avr_t *b, ppc_avr_t *c)
897{
898 int32_t prod[8];
899 int i;
900
901 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
902 prod[i] = a->s16[i] * b->s16[i];
903 }
904
905 VECTOR_FOR_INORDER_I(i, s32) {
906 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
907 }
908}
909
910void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
911 ppc_avr_t *b, ppc_avr_t *c)
912{
913 int32_t prod[8];
914 int i;
915 int sat = 0;
916
917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
918 prod[i] = (int32_t)a->s16[i] * b->s16[i];
919 }
920
921 VECTOR_FOR_INORDER_I(i, s32) {
922 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
923
924 r->u32[i] = cvtsdsw(t, &sat);
925 }
926
927 if (sat) {
928 env->vscr |= (1 << VSCR_SAT);
929 }
930}
931
932void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
933 ppc_avr_t *b, ppc_avr_t *c)
934{
935 uint16_t prod[16];
936 int i;
937
938 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
939 prod[i] = a->u8[i] * b->u8[i];
940 }
941
942 VECTOR_FOR_INORDER_I(i, u32) {
943 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
944 prod[4 * i + 2] + prod[4 * i + 3];
945 }
946}
947
948void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
949 ppc_avr_t *b, ppc_avr_t *c)
950{
951 uint32_t prod[8];
952 int i;
953
954 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
955 prod[i] = a->u16[i] * b->u16[i];
956 }
957
958 VECTOR_FOR_INORDER_I(i, u32) {
959 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
960 }
961}
962
963void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
964 ppc_avr_t *b, ppc_avr_t *c)
965{
966 uint32_t prod[8];
967 int i;
968 int sat = 0;
969
970 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
971 prod[i] = a->u16[i] * b->u16[i];
972 }
973
974 VECTOR_FOR_INORDER_I(i, s32) {
975 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
976
977 r->u32[i] = cvtuduw(t, &sat);
978 }
979
980 if (sat) {
981 env->vscr |= (1 << VSCR_SAT);
982 }
983}
984
985#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
986 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 { \
988 int i; \
989 \
990 VECTOR_FOR_INORDER_I(i, prod_element) { \
991 if (evenp) { \
992 r->prod_element[i] = \
993 (cast)a->mul_element[i * 2 + HI_IDX] * \
994 (cast)b->mul_element[i * 2 + HI_IDX]; \
995 } else { \
996 r->prod_element[i] = \
997 (cast)a->mul_element[i * 2 + LO_IDX] * \
998 (cast)b->mul_element[i * 2 + LO_IDX]; \
999 } \
1000 } \
1001 }
1002#define VMUL(suffix, mul_element, prod_element, cast) \
1003 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1004 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1005VMUL(sb, s8, s16, int16_t)
1006VMUL(sh, s16, s32, int32_t)
1007VMUL(sw, s32, s64, int64_t)
1008VMUL(ub, u8, u16, uint16_t)
1009VMUL(uh, u16, u32, uint32_t)
1010VMUL(uw, u32, u64, uint64_t)
1011#undef VMUL_DO
1012#undef VMUL
1013
1014void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1015 ppc_avr_t *c)
1016{
1017 ppc_avr_t result;
1018 int i;
1019
1020 VECTOR_FOR_INORDER_I(i, u8) {
1021 int s = c->u8[i] & 0x1f;
1022#if defined(HOST_WORDS_BIGENDIAN)
1023 int index = s & 0xf;
1024#else
1025 int index = 15 - (s & 0xf);
1026#endif
1027
1028 if (s & 0x10) {
1029 result.u8[i] = b->u8[index];
1030 } else {
1031 result.u8[i] = a->u8[index];
1032 }
1033 }
1034 *r = result;
1035}
1036
1037#if defined(HOST_WORDS_BIGENDIAN)
1038#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1039#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1040#else
1041#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1042#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1043#endif
1044
1045void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1046{
1047 int i;
1048 uint64_t perm = 0;
1049
1050 VECTOR_FOR_INORDER_I(i, u8) {
1051 int index = VBPERMQ_INDEX(b, i);
1052
1053 if (index < 128) {
1054 uint64_t mask = (1ull << (63-(index & 0x3F)));
1055 if (a->u64[VBPERMQ_DW(index)] & mask) {
1056 perm |= (0x8000 >> i);
1057 }
1058 }
1059 }
1060
1061 r->u64[HI_IDX] = perm;
1062 r->u64[LO_IDX] = 0;
1063}
1064
1065#undef VBPERMQ_INDEX
1066#undef VBPERMQ_DW
1067
1068static const uint64_t VGBBD_MASKS[256] = {
1069 0x0000000000000000ull,
1070 0x0000000000000080ull,
1071 0x0000000000008000ull,
1072 0x0000000000008080ull,
1073 0x0000000000800000ull,
1074 0x0000000000800080ull,
1075 0x0000000000808000ull,
1076 0x0000000000808080ull,
1077 0x0000000080000000ull,
1078 0x0000000080000080ull,
1079 0x0000000080008000ull,
1080 0x0000000080008080ull,
1081 0x0000000080800000ull,
1082 0x0000000080800080ull,
1083 0x0000000080808000ull,
1084 0x0000000080808080ull,
1085 0x0000008000000000ull,
1086 0x0000008000000080ull,
1087 0x0000008000008000ull,
1088 0x0000008000008080ull,
1089 0x0000008000800000ull,
1090 0x0000008000800080ull,
1091 0x0000008000808000ull,
1092 0x0000008000808080ull,
1093 0x0000008080000000ull,
1094 0x0000008080000080ull,
1095 0x0000008080008000ull,
1096 0x0000008080008080ull,
1097 0x0000008080800000ull,
1098 0x0000008080800080ull,
1099 0x0000008080808000ull,
1100 0x0000008080808080ull,
1101 0x0000800000000000ull,
1102 0x0000800000000080ull,
1103 0x0000800000008000ull,
1104 0x0000800000008080ull,
1105 0x0000800000800000ull,
1106 0x0000800000800080ull,
1107 0x0000800000808000ull,
1108 0x0000800000808080ull,
1109 0x0000800080000000ull,
1110 0x0000800080000080ull,
1111 0x0000800080008000ull,
1112 0x0000800080008080ull,
1113 0x0000800080800000ull,
1114 0x0000800080800080ull,
1115 0x0000800080808000ull,
1116 0x0000800080808080ull,
1117 0x0000808000000000ull,
1118 0x0000808000000080ull,
1119 0x0000808000008000ull,
1120 0x0000808000008080ull,
1121 0x0000808000800000ull,
1122 0x0000808000800080ull,
1123 0x0000808000808000ull,
1124 0x0000808000808080ull,
1125 0x0000808080000000ull,
1126 0x0000808080000080ull,
1127 0x0000808080008000ull,
1128 0x0000808080008080ull,
1129 0x0000808080800000ull,
1130 0x0000808080800080ull,
1131 0x0000808080808000ull,
1132 0x0000808080808080ull,
1133 0x0080000000000000ull,
1134 0x0080000000000080ull,
1135 0x0080000000008000ull,
1136 0x0080000000008080ull,
1137 0x0080000000800000ull,
1138 0x0080000000800080ull,
1139 0x0080000000808000ull,
1140 0x0080000000808080ull,
1141 0x0080000080000000ull,
1142 0x0080000080000080ull,
1143 0x0080000080008000ull,
1144 0x0080000080008080ull,
1145 0x0080000080800000ull,
1146 0x0080000080800080ull,
1147 0x0080000080808000ull,
1148 0x0080000080808080ull,
1149 0x0080008000000000ull,
1150 0x0080008000000080ull,
1151 0x0080008000008000ull,
1152 0x0080008000008080ull,
1153 0x0080008000800000ull,
1154 0x0080008000800080ull,
1155 0x0080008000808000ull,
1156 0x0080008000808080ull,
1157 0x0080008080000000ull,
1158 0x0080008080000080ull,
1159 0x0080008080008000ull,
1160 0x0080008080008080ull,
1161 0x0080008080800000ull,
1162 0x0080008080800080ull,
1163 0x0080008080808000ull,
1164 0x0080008080808080ull,
1165 0x0080800000000000ull,
1166 0x0080800000000080ull,
1167 0x0080800000008000ull,
1168 0x0080800000008080ull,
1169 0x0080800000800000ull,
1170 0x0080800000800080ull,
1171 0x0080800000808000ull,
1172 0x0080800000808080ull,
1173 0x0080800080000000ull,
1174 0x0080800080000080ull,
1175 0x0080800080008000ull,
1176 0x0080800080008080ull,
1177 0x0080800080800000ull,
1178 0x0080800080800080ull,
1179 0x0080800080808000ull,
1180 0x0080800080808080ull,
1181 0x0080808000000000ull,
1182 0x0080808000000080ull,
1183 0x0080808000008000ull,
1184 0x0080808000008080ull,
1185 0x0080808000800000ull,
1186 0x0080808000800080ull,
1187 0x0080808000808000ull,
1188 0x0080808000808080ull,
1189 0x0080808080000000ull,
1190 0x0080808080000080ull,
1191 0x0080808080008000ull,
1192 0x0080808080008080ull,
1193 0x0080808080800000ull,
1194 0x0080808080800080ull,
1195 0x0080808080808000ull,
1196 0x0080808080808080ull,
1197 0x8000000000000000ull,
1198 0x8000000000000080ull,
1199 0x8000000000008000ull,
1200 0x8000000000008080ull,
1201 0x8000000000800000ull,
1202 0x8000000000800080ull,
1203 0x8000000000808000ull,
1204 0x8000000000808080ull,
1205 0x8000000080000000ull,
1206 0x8000000080000080ull,
1207 0x8000000080008000ull,
1208 0x8000000080008080ull,
1209 0x8000000080800000ull,
1210 0x8000000080800080ull,
1211 0x8000000080808000ull,
1212 0x8000000080808080ull,
1213 0x8000008000000000ull,
1214 0x8000008000000080ull,
1215 0x8000008000008000ull,
1216 0x8000008000008080ull,
1217 0x8000008000800000ull,
1218 0x8000008000800080ull,
1219 0x8000008000808000ull,
1220 0x8000008000808080ull,
1221 0x8000008080000000ull,
1222 0x8000008080000080ull,
1223 0x8000008080008000ull,
1224 0x8000008080008080ull,
1225 0x8000008080800000ull,
1226 0x8000008080800080ull,
1227 0x8000008080808000ull,
1228 0x8000008080808080ull,
1229 0x8000800000000000ull,
1230 0x8000800000000080ull,
1231 0x8000800000008000ull,
1232 0x8000800000008080ull,
1233 0x8000800000800000ull,
1234 0x8000800000800080ull,
1235 0x8000800000808000ull,
1236 0x8000800000808080ull,
1237 0x8000800080000000ull,
1238 0x8000800080000080ull,
1239 0x8000800080008000ull,
1240 0x8000800080008080ull,
1241 0x8000800080800000ull,
1242 0x8000800080800080ull,
1243 0x8000800080808000ull,
1244 0x8000800080808080ull,
1245 0x8000808000000000ull,
1246 0x8000808000000080ull,
1247 0x8000808000008000ull,
1248 0x8000808000008080ull,
1249 0x8000808000800000ull,
1250 0x8000808000800080ull,
1251 0x8000808000808000ull,
1252 0x8000808000808080ull,
1253 0x8000808080000000ull,
1254 0x8000808080000080ull,
1255 0x8000808080008000ull,
1256 0x8000808080008080ull,
1257 0x8000808080800000ull,
1258 0x8000808080800080ull,
1259 0x8000808080808000ull,
1260 0x8000808080808080ull,
1261 0x8080000000000000ull,
1262 0x8080000000000080ull,
1263 0x8080000000008000ull,
1264 0x8080000000008080ull,
1265 0x8080000000800000ull,
1266 0x8080000000800080ull,
1267 0x8080000000808000ull,
1268 0x8080000000808080ull,
1269 0x8080000080000000ull,
1270 0x8080000080000080ull,
1271 0x8080000080008000ull,
1272 0x8080000080008080ull,
1273 0x8080000080800000ull,
1274 0x8080000080800080ull,
1275 0x8080000080808000ull,
1276 0x8080000080808080ull,
1277 0x8080008000000000ull,
1278 0x8080008000000080ull,
1279 0x8080008000008000ull,
1280 0x8080008000008080ull,
1281 0x8080008000800000ull,
1282 0x8080008000800080ull,
1283 0x8080008000808000ull,
1284 0x8080008000808080ull,
1285 0x8080008080000000ull,
1286 0x8080008080000080ull,
1287 0x8080008080008000ull,
1288 0x8080008080008080ull,
1289 0x8080008080800000ull,
1290 0x8080008080800080ull,
1291 0x8080008080808000ull,
1292 0x8080008080808080ull,
1293 0x8080800000000000ull,
1294 0x8080800000000080ull,
1295 0x8080800000008000ull,
1296 0x8080800000008080ull,
1297 0x8080800000800000ull,
1298 0x8080800000800080ull,
1299 0x8080800000808000ull,
1300 0x8080800000808080ull,
1301 0x8080800080000000ull,
1302 0x8080800080000080ull,
1303 0x8080800080008000ull,
1304 0x8080800080008080ull,
1305 0x8080800080800000ull,
1306 0x8080800080800080ull,
1307 0x8080800080808000ull,
1308 0x8080800080808080ull,
1309 0x8080808000000000ull,
1310 0x8080808000000080ull,
1311 0x8080808000008000ull,
1312 0x8080808000008080ull,
1313 0x8080808000800000ull,
1314 0x8080808000800080ull,
1315 0x8080808000808000ull,
1316 0x8080808000808080ull,
1317 0x8080808080000000ull,
1318 0x8080808080000080ull,
1319 0x8080808080008000ull,
1320 0x8080808080008080ull,
1321 0x8080808080800000ull,
1322 0x8080808080800080ull,
1323 0x8080808080808000ull,
1324 0x8080808080808080ull,
1325};
1326
1327void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1328{
1329 int i;
1330 uint64_t t[2] = { 0, 0 };
1331
1332 VECTOR_FOR_INORDER_I(i, u8) {
1333#if defined(HOST_WORDS_BIGENDIAN)
1334 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1335#else
1336 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1337#endif
1338 }
1339
1340 r->u64[0] = t[0];
1341 r->u64[1] = t[1];
1342}
1343
1344#define PMSUM(name, srcfld, trgfld, trgtyp) \
1345void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1346{ \
1347 int i, j; \
1348 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1349 \
1350 VECTOR_FOR_INORDER_I(i, srcfld) { \
1351 prod[i] = 0; \
1352 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1353 if (a->srcfld[i] & (1ull<<j)) { \
1354 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1355 } \
1356 } \
1357 } \
1358 \
1359 VECTOR_FOR_INORDER_I(i, trgfld) { \
1360 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1361 } \
1362}
1363
1364PMSUM(vpmsumb, u8, u16, uint16_t)
1365PMSUM(vpmsumh, u16, u32, uint32_t)
1366PMSUM(vpmsumw, u32, u64, uint64_t)
1367
1368void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1369{
1370
1371#ifdef CONFIG_INT128
1372 int i, j;
1373 __uint128_t prod[2];
1374
1375 VECTOR_FOR_INORDER_I(i, u64) {
1376 prod[i] = 0;
1377 for (j = 0; j < 64; j++) {
1378 if (a->u64[i] & (1ull<<j)) {
1379 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1380 }
1381 }
1382 }
1383
1384 r->u128 = prod[0] ^ prod[1];
1385
1386#else
1387 int i, j;
1388 ppc_avr_t prod[2];
1389
1390 VECTOR_FOR_INORDER_I(i, u64) {
1391 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1392 for (j = 0; j < 64; j++) {
1393 if (a->u64[i] & (1ull<<j)) {
1394 ppc_avr_t bshift;
1395 if (j == 0) {
1396 bshift.u64[HI_IDX] = 0;
1397 bshift.u64[LO_IDX] = b->u64[i];
1398 } else {
1399 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1400 bshift.u64[LO_IDX] = b->u64[i] << j;
1401 }
1402 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1403 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1404 }
1405 }
1406 }
1407
1408 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1409 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1410#endif
1411}
1412
1413
1414#if defined(HOST_WORDS_BIGENDIAN)
1415#define PKBIG 1
1416#else
1417#define PKBIG 0
1418#endif
1419void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1420{
1421 int i, j;
1422 ppc_avr_t result;
1423#if defined(HOST_WORDS_BIGENDIAN)
1424 const ppc_avr_t *x[2] = { a, b };
1425#else
1426 const ppc_avr_t *x[2] = { b, a };
1427#endif
1428
1429 VECTOR_FOR_INORDER_I(i, u64) {
1430 VECTOR_FOR_INORDER_I(j, u32) {
1431 uint32_t e = x[i]->u32[j];
1432
1433 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1434 ((e >> 6) & 0x3e0) |
1435 ((e >> 3) & 0x1f));
1436 }
1437 }
1438 *r = result;
1439}
1440
1441#define VPK(suffix, from, to, cvt, dosat) \
1442 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1443 ppc_avr_t *a, ppc_avr_t *b) \
1444 { \
1445 int i; \
1446 int sat = 0; \
1447 ppc_avr_t result; \
1448 ppc_avr_t *a0 = PKBIG ? a : b; \
1449 ppc_avr_t *a1 = PKBIG ? b : a; \
1450 \
1451 VECTOR_FOR_INORDER_I(i, from) { \
1452 result.to[i] = cvt(a0->from[i], &sat); \
1453 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1454 } \
1455 *r = result; \
1456 if (dosat && sat) { \
1457 env->vscr |= (1 << VSCR_SAT); \
1458 } \
1459 }
1460#define I(x, y) (x)
1461VPK(shss, s16, s8, cvtshsb, 1)
1462VPK(shus, s16, u8, cvtshub, 1)
1463VPK(swss, s32, s16, cvtswsh, 1)
1464VPK(swus, s32, u16, cvtswuh, 1)
1465VPK(sdss, s64, s32, cvtsdsw, 1)
1466VPK(sdus, s64, u32, cvtsduw, 1)
1467VPK(uhus, u16, u8, cvtuhub, 1)
1468VPK(uwus, u32, u16, cvtuwuh, 1)
1469VPK(udus, u64, u32, cvtuduw, 1)
1470VPK(uhum, u16, u8, I, 0)
1471VPK(uwum, u32, u16, I, 0)
1472VPK(udum, u64, u32, I, 0)
1473#undef I
1474#undef VPK
1475#undef PKBIG
1476
1477void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1478{
1479 int i;
1480
1481 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1482 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1483 }
1484}
1485
1486#define VRFI(suffix, rounding) \
1487 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1488 ppc_avr_t *b) \
1489 { \
1490 int i; \
1491 float_status s = env->vec_status; \
1492 \
1493 set_float_rounding_mode(rounding, &s); \
1494 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1495 r->f[i] = float32_round_to_int (b->f[i], &s); \
1496 } \
1497 }
1498VRFI(n, float_round_nearest_even)
1499VRFI(m, float_round_down)
1500VRFI(p, float_round_up)
1501VRFI(z, float_round_to_zero)
1502#undef VRFI
1503
1504#define VROTATE(suffix, element, mask) \
1505 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1506 { \
1507 int i; \
1508 \
1509 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1510 unsigned int shift = b->element[i] & mask; \
1511 r->element[i] = (a->element[i] << shift) | \
1512 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1513 } \
1514 }
1515VROTATE(b, u8, 0x7)
1516VROTATE(h, u16, 0xF)
1517VROTATE(w, u32, 0x1F)
1518VROTATE(d, u64, 0x3F)
1519#undef VROTATE
1520
1521void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1522{
1523 int i;
1524
1525 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1526 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1527
1528 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1529 }
1530}
1531
1532void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1533 ppc_avr_t *c)
1534{
1535 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1536 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1537}
1538
1539void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1540{
1541 int i;
1542
1543 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1544 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1545 }
1546}
1547
1548void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1549{
1550 int i;
1551
1552 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1553 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1554 }
1555}
1556
1557
1558
1559
1560#define VSHIFT(suffix, leftp) \
1561 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1562 { \
1563 int shift = b->u8[LO_IDX*15] & 0x7; \
1564 int doit = 1; \
1565 int i; \
1566 \
1567 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1568 doit = doit && ((b->u8[i] & 0x7) == shift); \
1569 } \
1570 if (doit) { \
1571 if (shift == 0) { \
1572 *r = *a; \
1573 } else if (leftp) { \
1574 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1575 \
1576 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1577 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1578 } else { \
1579 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1580 \
1581 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1582 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1583 } \
1584 } \
1585 }
1586VSHIFT(l, 1)
1587VSHIFT(r, 0)
1588#undef VSHIFT
1589
1590#define VSL(suffix, element, mask) \
1591 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1592 { \
1593 int i; \
1594 \
1595 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1596 unsigned int shift = b->element[i] & mask; \
1597 \
1598 r->element[i] = a->element[i] << shift; \
1599 } \
1600 }
1601VSL(b, u8, 0x7)
1602VSL(h, u16, 0x0F)
1603VSL(w, u32, 0x1F)
1604VSL(d, u64, 0x3F)
1605#undef VSL
1606
1607void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1608{
1609 int sh = shift & 0xf;
1610 int i;
1611 ppc_avr_t result;
1612
1613#if defined(HOST_WORDS_BIGENDIAN)
1614 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1615 int index = sh + i;
1616 if (index > 0xf) {
1617 result.u8[i] = b->u8[index - 0x10];
1618 } else {
1619 result.u8[i] = a->u8[index];
1620 }
1621 }
1622#else
1623 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1624 int index = (16 - sh) + i;
1625 if (index > 0xf) {
1626 result.u8[i] = a->u8[index - 0x10];
1627 } else {
1628 result.u8[i] = b->u8[index];
1629 }
1630 }
1631#endif
1632 *r = result;
1633}
1634
1635void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1636{
1637 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1638
1639#if defined(HOST_WORDS_BIGENDIAN)
1640 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1641 memset(&r->u8[16-sh], 0, sh);
1642#else
1643 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1644 memset(&r->u8[0], 0, sh);
1645#endif
1646}
1647
1648
1649#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1650#if defined(HOST_WORDS_BIGENDIAN)
1651#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1652#else
1653#define SPLAT_ELEMENT(element) \
1654 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1655#endif
1656#define VSPLT(suffix, element) \
1657 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1658 { \
1659 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1660 int i; \
1661 \
1662 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1663 r->element[i] = s; \
1664 } \
1665 }
1666VSPLT(b, u8)
1667VSPLT(h, u16)
1668VSPLT(w, u32)
1669#undef VSPLT
1670#undef SPLAT_ELEMENT
1671#undef _SPLAT_MASKED
1672
1673#define VSPLTI(suffix, element, splat_type) \
1674 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1675 { \
1676 splat_type x = (int8_t)(splat << 3) >> 3; \
1677 int i; \
1678 \
1679 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1680 r->element[i] = x; \
1681 } \
1682 }
1683VSPLTI(b, s8, int8_t)
1684VSPLTI(h, s16, int16_t)
1685VSPLTI(w, s32, int32_t)
1686#undef VSPLTI
1687
1688#define VSR(suffix, element, mask) \
1689 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1690 { \
1691 int i; \
1692 \
1693 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1694 unsigned int shift = b->element[i] & mask; \
1695 r->element[i] = a->element[i] >> shift; \
1696 } \
1697 }
1698VSR(ab, s8, 0x7)
1699VSR(ah, s16, 0xF)
1700VSR(aw, s32, 0x1F)
1701VSR(ad, s64, 0x3F)
1702VSR(b, u8, 0x7)
1703VSR(h, u16, 0xF)
1704VSR(w, u32, 0x1F)
1705VSR(d, u64, 0x3F)
1706#undef VSR
1707
1708void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1709{
1710 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1711
1712#if defined(HOST_WORDS_BIGENDIAN)
1713 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1714 memset(&r->u8[0], 0, sh);
1715#else
1716 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1717 memset(&r->u8[16 - sh], 0, sh);
1718#endif
1719}
1720
1721void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1722{
1723 int i;
1724
1725 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1726 r->u32[i] = a->u32[i] >= b->u32[i];
1727 }
1728}
1729
1730void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1731{
1732 int64_t t;
1733 int i, upper;
1734 ppc_avr_t result;
1735 int sat = 0;
1736
1737#if defined(HOST_WORDS_BIGENDIAN)
1738 upper = ARRAY_SIZE(r->s32)-1;
1739#else
1740 upper = 0;
1741#endif
1742 t = (int64_t)b->s32[upper];
1743 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1744 t += a->s32[i];
1745 result.s32[i] = 0;
1746 }
1747 result.s32[upper] = cvtsdsw(t, &sat);
1748 *r = result;
1749
1750 if (sat) {
1751 env->vscr |= (1 << VSCR_SAT);
1752 }
1753}
1754
1755void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1756{
1757 int i, j, upper;
1758 ppc_avr_t result;
1759 int sat = 0;
1760
1761#if defined(HOST_WORDS_BIGENDIAN)
1762 upper = 1;
1763#else
1764 upper = 0;
1765#endif
1766 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1767 int64_t t = (int64_t)b->s32[upper + i * 2];
1768
1769 result.u64[i] = 0;
1770 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1771 t += a->s32[2 * i + j];
1772 }
1773 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1774 }
1775
1776 *r = result;
1777 if (sat) {
1778 env->vscr |= (1 << VSCR_SAT);
1779 }
1780}
1781
1782void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1783{
1784 int i, j;
1785 int sat = 0;
1786
1787 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1788 int64_t t = (int64_t)b->s32[i];
1789
1790 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1791 t += a->s8[4 * i + j];
1792 }
1793 r->s32[i] = cvtsdsw(t, &sat);
1794 }
1795
1796 if (sat) {
1797 env->vscr |= (1 << VSCR_SAT);
1798 }
1799}
1800
1801void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1802{
1803 int sat = 0;
1804 int i;
1805
1806 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1807 int64_t t = (int64_t)b->s32[i];
1808
1809 t += a->s16[2 * i] + a->s16[2 * i + 1];
1810 r->s32[i] = cvtsdsw(t, &sat);
1811 }
1812
1813 if (sat) {
1814 env->vscr |= (1 << VSCR_SAT);
1815 }
1816}
1817
1818void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1819{
1820 int i, j;
1821 int sat = 0;
1822
1823 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1824 uint64_t t = (uint64_t)b->u32[i];
1825
1826 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1827 t += a->u8[4 * i + j];
1828 }
1829 r->u32[i] = cvtuduw(t, &sat);
1830 }
1831
1832 if (sat) {
1833 env->vscr |= (1 << VSCR_SAT);
1834 }
1835}
1836
1837#if defined(HOST_WORDS_BIGENDIAN)
1838#define UPKHI 1
1839#define UPKLO 0
1840#else
1841#define UPKHI 0
1842#define UPKLO 1
1843#endif
1844#define VUPKPX(suffix, hi) \
1845 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1846 { \
1847 int i; \
1848 ppc_avr_t result; \
1849 \
1850 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1851 uint16_t e = b->u16[hi ? i : i+4]; \
1852 uint8_t a = (e >> 15) ? 0xff : 0; \
1853 uint8_t r = (e >> 10) & 0x1f; \
1854 uint8_t g = (e >> 5) & 0x1f; \
1855 uint8_t b = e & 0x1f; \
1856 \
1857 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1858 } \
1859 *r = result; \
1860 }
1861VUPKPX(lpx, UPKLO)
1862VUPKPX(hpx, UPKHI)
1863#undef VUPKPX
1864
1865#define VUPK(suffix, unpacked, packee, hi) \
1866 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1867 { \
1868 int i; \
1869 ppc_avr_t result; \
1870 \
1871 if (hi) { \
1872 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1873 result.unpacked[i] = b->packee[i]; \
1874 } \
1875 } else { \
1876 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1877 i++) { \
1878 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1879 } \
1880 } \
1881 *r = result; \
1882 }
1883VUPK(hsb, s16, s8, UPKHI)
1884VUPK(hsh, s32, s16, UPKHI)
1885VUPK(hsw, s64, s32, UPKHI)
1886VUPK(lsb, s16, s8, UPKLO)
1887VUPK(lsh, s32, s16, UPKLO)
1888VUPK(lsw, s64, s32, UPKLO)
1889#undef VUPK
1890#undef UPKHI
1891#undef UPKLO
1892
1893#define VGENERIC_DO(name, element) \
1894 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1895 { \
1896 int i; \
1897 \
1898 VECTOR_FOR_INORDER_I(i, element) { \
1899 r->element[i] = name(b->element[i]); \
1900 } \
1901 }
1902
1903#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1904#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1905#define clzw(v) clz32((v))
1906#define clzd(v) clz64((v))
1907
1908VGENERIC_DO(clzb, u8)
1909VGENERIC_DO(clzh, u16)
1910VGENERIC_DO(clzw, u32)
1911VGENERIC_DO(clzd, u64)
1912
1913#undef clzb
1914#undef clzh
1915#undef clzw
1916#undef clzd
1917
1918#define popcntb(v) ctpop8(v)
1919#define popcnth(v) ctpop16(v)
1920#define popcntw(v) ctpop32(v)
1921#define popcntd(v) ctpop64(v)
1922
1923VGENERIC_DO(popcntb, u8)
1924VGENERIC_DO(popcnth, u16)
1925VGENERIC_DO(popcntw, u32)
1926VGENERIC_DO(popcntd, u64)
1927
1928#undef popcntb
1929#undef popcnth
1930#undef popcntw
1931#undef popcntd
1932
1933#undef VGENERIC_DO
1934
1935#if defined(HOST_WORDS_BIGENDIAN)
1936#define QW_ONE { .u64 = { 0, 1 } }
1937#else
1938#define QW_ONE { .u64 = { 1, 0 } }
1939#endif
1940
1941#ifndef CONFIG_INT128
1942
1943static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1944{
1945 t->u64[0] = ~a.u64[0];
1946 t->u64[1] = ~a.u64[1];
1947}
1948
1949static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1950{
1951 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1952 return -1;
1953 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1954 return 1;
1955 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1956 return -1;
1957 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1958 return 1;
1959 } else {
1960 return 0;
1961 }
1962}
1963
1964static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1965{
1966 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1967 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1968 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1969}
1970
1971static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1972{
1973 ppc_avr_t not_a;
1974 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1975 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1976 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1977 avr_qw_not(¬_a, a);
1978 return avr_qw_cmpu(not_a, b) < 0;
1979}
1980
1981#endif
1982
1983void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1984{
1985#ifdef CONFIG_INT128
1986 r->u128 = a->u128 + b->u128;
1987#else
1988 avr_qw_add(r, *a, *b);
1989#endif
1990}
1991
1992void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1993{
1994#ifdef CONFIG_INT128
1995 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1996#else
1997
1998 if (c->u64[LO_IDX] & 1) {
1999 ppc_avr_t tmp;
2000
2001 tmp.u64[HI_IDX] = 0;
2002 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2003 avr_qw_add(&tmp, *a, tmp);
2004 avr_qw_add(r, tmp, *b);
2005 } else {
2006 avr_qw_add(r, *a, *b);
2007 }
2008#endif
2009}
2010
2011void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2012{
2013#ifdef CONFIG_INT128
2014 r->u128 = (~a->u128 < b->u128);
2015#else
2016 ppc_avr_t not_a;
2017
2018 avr_qw_not(¬_a, *a);
2019
2020 r->u64[HI_IDX] = 0;
2021 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2022#endif
2023}
2024
2025void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2026{
2027#ifdef CONFIG_INT128
2028 int carry_out = (~a->u128 < b->u128);
2029 if (!carry_out && (c->u128 & 1)) {
2030 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2031 ((a->u128 != 0) || (b->u128 != 0));
2032 }
2033 r->u128 = carry_out;
2034#else
2035
2036 int carry_in = c->u64[LO_IDX] & 1;
2037 int carry_out = 0;
2038 ppc_avr_t tmp;
2039
2040 carry_out = avr_qw_addc(&tmp, *a, *b);
2041
2042 if (!carry_out && carry_in) {
2043 ppc_avr_t one = QW_ONE;
2044 carry_out = avr_qw_addc(&tmp, tmp, one);
2045 }
2046 r->u64[HI_IDX] = 0;
2047 r->u64[LO_IDX] = carry_out;
2048#endif
2049}
2050
2051void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2052{
2053#ifdef CONFIG_INT128
2054 r->u128 = a->u128 - b->u128;
2055#else
2056 ppc_avr_t tmp;
2057 ppc_avr_t one = QW_ONE;
2058
2059 avr_qw_not(&tmp, *b);
2060 avr_qw_add(&tmp, *a, tmp);
2061 avr_qw_add(r, tmp, one);
2062#endif
2063}
2064
2065void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2066{
2067#ifdef CONFIG_INT128
2068 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2069#else
2070 ppc_avr_t tmp, sum;
2071
2072 avr_qw_not(&tmp, *b);
2073 avr_qw_add(&sum, *a, tmp);
2074
2075 tmp.u64[HI_IDX] = 0;
2076 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2077 avr_qw_add(r, sum, tmp);
2078#endif
2079}
2080
2081void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2082{
2083#ifdef CONFIG_INT128
2084 r->u128 = (~a->u128 < ~b->u128) ||
2085 (a->u128 + ~b->u128 == (__uint128_t)-1);
2086#else
2087 int carry = (avr_qw_cmpu(*a, *b) > 0);
2088 if (!carry) {
2089 ppc_avr_t tmp;
2090 avr_qw_not(&tmp, *b);
2091 avr_qw_add(&tmp, *a, tmp);
2092 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2093 }
2094 r->u64[HI_IDX] = 0;
2095 r->u64[LO_IDX] = carry;
2096#endif
2097}
2098
2099void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2100{
2101#ifdef CONFIG_INT128
2102 r->u128 =
2103 (~a->u128 < ~b->u128) ||
2104 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2105#else
2106 int carry_in = c->u64[LO_IDX] & 1;
2107 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2108 if (!carry_out && carry_in) {
2109 ppc_avr_t tmp;
2110 avr_qw_not(&tmp, *b);
2111 avr_qw_add(&tmp, *a, tmp);
2112 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2113 }
2114
2115 r->u64[HI_IDX] = 0;
2116 r->u64[LO_IDX] = carry_out;
2117#endif
2118}
2119
2120#define BCD_PLUS_PREF_1 0xC
2121#define BCD_PLUS_PREF_2 0xF
2122#define BCD_PLUS_ALT_1 0xA
2123#define BCD_NEG_PREF 0xD
2124#define BCD_NEG_ALT 0xB
2125#define BCD_PLUS_ALT_2 0xE
2126
2127#if defined(HOST_WORDS_BIGENDIAN)
2128#define BCD_DIG_BYTE(n) (15 - (n/2))
2129#else
2130#define BCD_DIG_BYTE(n) (n/2)
2131#endif
2132
2133static int bcd_get_sgn(ppc_avr_t *bcd)
2134{
2135 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2136 case BCD_PLUS_PREF_1:
2137 case BCD_PLUS_PREF_2:
2138 case BCD_PLUS_ALT_1:
2139 case BCD_PLUS_ALT_2:
2140 {
2141 return 1;
2142 }
2143
2144 case BCD_NEG_PREF:
2145 case BCD_NEG_ALT:
2146 {
2147 return -1;
2148 }
2149
2150 default:
2151 {
2152 return 0;
2153 }
2154 }
2155}
2156
2157static int bcd_preferred_sgn(int sgn, int ps)
2158{
2159 if (sgn >= 0) {
2160 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2161 } else {
2162 return BCD_NEG_PREF;
2163 }
2164}
2165
2166static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2167{
2168 uint8_t result;
2169 if (n & 1) {
2170 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2171 } else {
2172 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2173 }
2174
2175 if (unlikely(result > 9)) {
2176 *invalid = true;
2177 }
2178 return result;
2179}
2180
2181static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2182{
2183 if (n & 1) {
2184 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2185 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2186 } else {
2187 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2188 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2189 }
2190}
2191
2192static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2193{
2194 int i;
2195 int invalid = 0;
2196 for (i = 31; i > 0; i--) {
2197 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2198 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2199 if (unlikely(invalid)) {
2200 return 0;
2201 } else if (dig_a > dig_b) {
2202 return 1;
2203 } else if (dig_a < dig_b) {
2204 return -1;
2205 }
2206 }
2207
2208 return 0;
2209}
2210
2211static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2212 int *overflow)
2213{
2214 int carry = 0;
2215 int i;
2216 int is_zero = 1;
2217 for (i = 1; i <= 31; i++) {
2218 uint8_t digit = bcd_get_digit(a, i, invalid) +
2219 bcd_get_digit(b, i, invalid) + carry;
2220 is_zero &= (digit == 0);
2221 if (digit > 9) {
2222 carry = 1;
2223 digit -= 10;
2224 } else {
2225 carry = 0;
2226 }
2227
2228 bcd_put_digit(t, digit, i);
2229
2230 if (unlikely(*invalid)) {
2231 return -1;
2232 }
2233 }
2234
2235 *overflow = carry;
2236 return is_zero;
2237}
2238
2239static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2240 int *overflow)
2241{
2242 int carry = 0;
2243 int i;
2244 int is_zero = 1;
2245 for (i = 1; i <= 31; i++) {
2246 uint8_t digit = bcd_get_digit(a, i, invalid) -
2247 bcd_get_digit(b, i, invalid) + carry;
2248 is_zero &= (digit == 0);
2249 if (digit & 0x80) {
2250 carry = -1;
2251 digit += 10;
2252 } else {
2253 carry = 0;
2254 }
2255
2256 bcd_put_digit(t, digit, i);
2257
2258 if (unlikely(*invalid)) {
2259 return -1;
2260 }
2261 }
2262
2263 *overflow = carry;
2264 return is_zero;
2265}
2266
2267uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2268{
2269
2270 int sgna = bcd_get_sgn(a);
2271 int sgnb = bcd_get_sgn(b);
2272 int invalid = (sgna == 0) || (sgnb == 0);
2273 int overflow = 0;
2274 int zero = 0;
2275 uint32_t cr = 0;
2276 ppc_avr_t result = { .u64 = { 0, 0 } };
2277
2278 if (!invalid) {
2279 if (sgna == sgnb) {
2280 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2281 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2282 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2283 } else if (bcd_cmp_mag(a, b) > 0) {
2284 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2285 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2286 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2287 } else {
2288 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2289 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2290 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2291 }
2292 }
2293
2294 if (unlikely(invalid)) {
2295 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2296 cr = 1 << CRF_SO;
2297 } else if (overflow) {
2298 cr |= 1 << CRF_SO;
2299 } else if (zero) {
2300 cr = 1 << CRF_EQ;
2301 }
2302
2303 *r = result;
2304
2305 return cr;
2306}
2307
2308uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2309{
2310 ppc_avr_t bcopy = *b;
2311 int sgnb = bcd_get_sgn(b);
2312 if (sgnb < 0) {
2313 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2314 } else if (sgnb > 0) {
2315 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2316 }
2317
2318
2319 return helper_bcdadd(r, a, &bcopy, ps);
2320}
2321
2322void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2323{
2324 int i;
2325 VECTOR_FOR_INORDER_I(i, u8) {
2326 r->u8[i] = AES_sbox[a->u8[i]];
2327 }
2328}
2329
2330void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2331{
2332 ppc_avr_t result;
2333 int i;
2334
2335 VECTOR_FOR_INORDER_I(i, u32) {
2336 result.AVRW(i) = b->AVRW(i) ^
2337 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2338 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2339 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2340 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2341 }
2342 *r = result;
2343}
2344
2345void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2346{
2347 ppc_avr_t result;
2348 int i;
2349
2350 VECTOR_FOR_INORDER_I(i, u8) {
2351 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2352 }
2353 *r = result;
2354}
2355
2356void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2357{
2358
2359
2360 int i;
2361 ppc_avr_t tmp;
2362
2363 VECTOR_FOR_INORDER_I(i, u8) {
2364 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2365 }
2366
2367 VECTOR_FOR_INORDER_I(i, u32) {
2368 r->AVRW(i) =
2369 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2370 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2371 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2372 AES_imc[tmp.AVRB(4*i + 3)][3];
2373 }
2374}
2375
2376void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2377{
2378 ppc_avr_t result;
2379 int i;
2380
2381 VECTOR_FOR_INORDER_I(i, u8) {
2382 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2383 }
2384 *r = result;
2385}
2386
2387#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2388#if defined(HOST_WORDS_BIGENDIAN)
2389#define EL_IDX(i) (i)
2390#else
2391#define EL_IDX(i) (3 - (i))
2392#endif
2393
2394void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2395{
2396 int st = (st_six & 0x10) != 0;
2397 int six = st_six & 0xF;
2398 int i;
2399
2400 VECTOR_FOR_INORDER_I(i, u32) {
2401 if (st == 0) {
2402 if ((six & (0x8 >> i)) == 0) {
2403 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2404 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2405 (a->u32[EL_IDX(i)] >> 3);
2406 } else {
2407 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2408 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2409 (a->u32[EL_IDX(i)] >> 10);
2410 }
2411 } else {
2412 if ((six & (0x8 >> i)) == 0) {
2413 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2414 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2415 ROTRu32(a->u32[EL_IDX(i)], 22);
2416 } else {
2417 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2418 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2419 ROTRu32(a->u32[EL_IDX(i)], 25);
2420 }
2421 }
2422 }
2423}
2424
2425#undef ROTRu32
2426#undef EL_IDX
2427
2428#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2429#if defined(HOST_WORDS_BIGENDIAN)
2430#define EL_IDX(i) (i)
2431#else
2432#define EL_IDX(i) (1 - (i))
2433#endif
2434
2435void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2436{
2437 int st = (st_six & 0x10) != 0;
2438 int six = st_six & 0xF;
2439 int i;
2440
2441 VECTOR_FOR_INORDER_I(i, u64) {
2442 if (st == 0) {
2443 if ((six & (0x8 >> (2*i))) == 0) {
2444 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2445 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2446 (a->u64[EL_IDX(i)] >> 7);
2447 } else {
2448 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2449 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2450 (a->u64[EL_IDX(i)] >> 6);
2451 }
2452 } else {
2453 if ((six & (0x8 >> (2*i))) == 0) {
2454 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2455 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2456 ROTRu64(a->u64[EL_IDX(i)], 39);
2457 } else {
2458 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2459 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2460 ROTRu64(a->u64[EL_IDX(i)], 41);
2461 }
2462 }
2463 }
2464}
2465
2466#undef ROTRu64
2467#undef EL_IDX
2468
2469void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2470{
2471 ppc_avr_t result;
2472 int i;
2473
2474 VECTOR_FOR_INORDER_I(i, u8) {
2475 int indexA = c->u8[i] >> 4;
2476 int indexB = c->u8[i] & 0xF;
2477#if defined(HOST_WORDS_BIGENDIAN)
2478 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2479#else
2480 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2481#endif
2482 }
2483 *r = result;
2484}
2485
2486#undef VECTOR_FOR_INORDER_I
2487#undef HI_IDX
2488#undef LO_IDX
2489
2490
2491
2492
2493static const uint8_t hbrev[16] = {
2494 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2495 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2496};
2497
2498static inline uint8_t byte_reverse(uint8_t val)
2499{
2500 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2501}
2502
2503static inline uint32_t word_reverse(uint32_t val)
2504{
2505 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2506 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2507}
2508
2509#define MASKBITS 16
2510target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2511{
2512 uint32_t a, b, d, mask;
2513
2514 mask = UINT32_MAX >> (32 - MASKBITS);
2515 a = arg1 & mask;
2516 b = arg2 & mask;
2517 d = word_reverse(1 + word_reverse(a | ~b));
2518 return (arg1 & ~mask) | (d & b);
2519}
2520
2521uint32_t helper_cntlsw32(uint32_t val)
2522{
2523 if (val & 0x80000000) {
2524 return clz32(~val);
2525 } else {
2526 return clz32(val);
2527 }
2528}
2529
2530uint32_t helper_cntlzw32(uint32_t val)
2531{
2532 return clz32(val);
2533}
2534
2535
2536target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2537 target_ulong low, uint32_t update_Rc)
2538{
2539 target_ulong mask;
2540 int i;
2541
2542 i = 1;
2543 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2544 if ((high & mask) == 0) {
2545 if (update_Rc) {
2546 env->crf[0] = 0x4;
2547 }
2548 goto done;
2549 }
2550 i++;
2551 }
2552 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2553 if ((low & mask) == 0) {
2554 if (update_Rc) {
2555 env->crf[0] = 0x8;
2556 }
2557 goto done;
2558 }
2559 i++;
2560 }
2561 i = 8;
2562 if (update_Rc) {
2563 env->crf[0] = 0x2;
2564 }
2565 done:
2566 env->xer = (env->xer & ~0x7F) | i;
2567 if (update_Rc) {
2568 env->crf[0] |= xer_so;
2569 }
2570 return i;
2571}
2572