1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "cpu.h"
21#include "qemu/host-utils.h"
22#include "exec/helper-proto.h"
23#include "crypto/aes.h"
24
25#include "helper_regs.h"
26
27
28
29target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
30 uint32_t oe)
31{
32 uint64_t rt = 0;
33 int overflow = 0;
34
35 uint64_t dividend = (uint64_t)ra << 32;
36 uint64_t divisor = (uint32_t)rb;
37
38 if (unlikely(divisor == 0)) {
39 overflow = 1;
40 } else {
41 rt = dividend / divisor;
42 overflow = rt > UINT32_MAX;
43 }
44
45 if (unlikely(overflow)) {
46 rt = 0;
47 }
48
49 if (oe) {
50 if (unlikely(overflow)) {
51 env->so = env->ov = 1;
52 } else {
53 env->ov = 0;
54 }
55 }
56
57 return (target_ulong)rt;
58}
59
60target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
61 uint32_t oe)
62{
63 int64_t rt = 0;
64 int overflow = 0;
65
66 int64_t dividend = (int64_t)ra << 32;
67 int64_t divisor = (int64_t)((int32_t)rb);
68
69 if (unlikely((divisor == 0) ||
70 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
71 overflow = 1;
72 } else {
73 rt = dividend / divisor;
74 overflow = rt != (int32_t)rt;
75 }
76
77 if (unlikely(overflow)) {
78 rt = 0;
79 }
80
81 if (oe) {
82 if (unlikely(overflow)) {
83 env->so = env->ov = 1;
84 } else {
85 env->ov = 0;
86 }
87 }
88
89 return (target_ulong)rt;
90}
91
92#if defined(TARGET_PPC64)
93
94uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
95{
96 uint64_t rt = 0;
97 int overflow = 0;
98
99 overflow = divu128(&rt, &ra, rb);
100
101 if (unlikely(overflow)) {
102 rt = 0;
103 }
104
105 if (oe) {
106 if (unlikely(overflow)) {
107 env->so = env->ov = 1;
108 } else {
109 env->ov = 0;
110 }
111 }
112
113 return rt;
114}
115
116uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
117{
118 int64_t rt = 0;
119 int64_t ra = (int64_t)rau;
120 int64_t rb = (int64_t)rbu;
121 int overflow = divs128(&rt, &ra, rb);
122
123 if (unlikely(overflow)) {
124 rt = 0;
125 }
126
127 if (oe) {
128
129 if (unlikely(overflow)) {
130 env->so = env->ov = 1;
131 } else {
132 env->ov = 0;
133 }
134 }
135
136 return rt;
137}
138
139#endif
140
141
142target_ulong helper_cntlzw(target_ulong t)
143{
144 return clz32(t);
145}
146
147#if defined(TARGET_PPC64)
148target_ulong helper_cntlzd(target_ulong t)
149{
150 return clz64(t);
151}
152#endif
153
154#if defined(TARGET_PPC64)
155
156uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
157{
158 int i;
159 uint64_t ra = 0;
160
161 for (i = 0; i < 8; i++) {
162 int index = (rs >> (i*8)) & 0xFF;
163 if (index < 64) {
164 if (rb & (1ull << (63-index))) {
165 ra |= 1 << i;
166 }
167 }
168 }
169 return ra;
170}
171
172#endif
173
174target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
175{
176 target_ulong mask = 0xff;
177 target_ulong ra = 0;
178 int i;
179
180 for (i = 0; i < sizeof(target_ulong); i++) {
181 if ((rs & mask) == (rb & mask)) {
182 ra |= mask;
183 }
184 mask <<= 8;
185 }
186 return ra;
187}
188
189
190target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
191 target_ulong shift)
192{
193 int32_t ret;
194
195 if (likely(!(shift & 0x20))) {
196 if (likely((uint32_t)shift != 0)) {
197 shift &= 0x1f;
198 ret = (int32_t)value >> shift;
199 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
200 env->ca = 0;
201 } else {
202 env->ca = 1;
203 }
204 } else {
205 ret = (int32_t)value;
206 env->ca = 0;
207 }
208 } else {
209 ret = (int32_t)value >> 31;
210 env->ca = (ret != 0);
211 }
212 return (target_long)ret;
213}
214
215#if defined(TARGET_PPC64)
216target_ulong helper_srad(CPUPPCState *env, target_ulong value,
217 target_ulong shift)
218{
219 int64_t ret;
220
221 if (likely(!(shift & 0x40))) {
222 if (likely((uint64_t)shift != 0)) {
223 shift &= 0x3f;
224 ret = (int64_t)value >> shift;
225 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
226 env->ca = 0;
227 } else {
228 env->ca = 1;
229 }
230 } else {
231 ret = (int64_t)value;
232 env->ca = 0;
233 }
234 } else {
235 ret = (int64_t)value >> 63;
236 env->ca = (ret != 0);
237 }
238 return ret;
239}
240#endif
241
242#if defined(TARGET_PPC64)
243target_ulong helper_popcntb(target_ulong val)
244{
245 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
246 0x5555555555555555ULL);
247 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
248 0x3333333333333333ULL);
249 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
250 0x0f0f0f0f0f0f0f0fULL);
251 return val;
252}
253
254target_ulong helper_popcntw(target_ulong val)
255{
256 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
257 0x5555555555555555ULL);
258 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
259 0x3333333333333333ULL);
260 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
261 0x0f0f0f0f0f0f0f0fULL);
262 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
263 0x00ff00ff00ff00ffULL);
264 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
265 0x0000ffff0000ffffULL);
266 return val;
267}
268
269target_ulong helper_popcntd(target_ulong val)
270{
271 return ctpop64(val);
272}
273#else
274target_ulong helper_popcntb(target_ulong val)
275{
276 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
277 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
278 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
279 return val;
280}
281
282target_ulong helper_popcntw(target_ulong val)
283{
284 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
285 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
286 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
287 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
288 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
289 return val;
290}
291#endif
292
293
294
295target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
296{
297 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
298
299 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
300 (int32_t)arg2 == 0) {
301 env->spr[SPR_MQ] = 0;
302 return INT32_MIN;
303 } else {
304 env->spr[SPR_MQ] = tmp % arg2;
305 return tmp / (int32_t)arg2;
306 }
307}
308
309target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
310 target_ulong arg2)
311{
312 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
313
314 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
315 (int32_t)arg2 == 0) {
316 env->so = env->ov = 1;
317 env->spr[SPR_MQ] = 0;
318 return INT32_MIN;
319 } else {
320 env->spr[SPR_MQ] = tmp % arg2;
321 tmp /= (int32_t)arg2;
322 if ((int32_t)tmp != tmp) {
323 env->so = env->ov = 1;
324 } else {
325 env->ov = 0;
326 }
327 return tmp;
328 }
329}
330
331target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
332 target_ulong arg2)
333{
334 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
335 (int32_t)arg2 == 0) {
336 env->spr[SPR_MQ] = 0;
337 return INT32_MIN;
338 } else {
339 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
340 return (int32_t)arg1 / (int32_t)arg2;
341 }
342}
343
344target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
345 target_ulong arg2)
346{
347 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
348 (int32_t)arg2 == 0) {
349 env->so = env->ov = 1;
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->ov = 0;
354 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
355 return (int32_t)arg1 / (int32_t)arg2;
356 }
357}
358
359
360
361
362
363
364
365
366
367#if !defined(CONFIG_USER_ONLY)
368target_ulong helper_602_mfrom(target_ulong arg)
369{
370 if (likely(arg < 602)) {
371#include "mfrom_table.c"
372 return mfrom_ROM_table[arg];
373 } else {
374 return 0;
375 }
376}
377#endif
378
379
380
381#if defined(HOST_WORDS_BIGENDIAN)
382#define HI_IDX 0
383#define LO_IDX 1
384#define AVRB(i) u8[i]
385#define AVRW(i) u32[i]
386#else
387#define HI_IDX 1
388#define LO_IDX 0
389#define AVRB(i) u8[15-(i)]
390#define AVRW(i) u32[3-(i)]
391#endif
392
393#if defined(HOST_WORDS_BIGENDIAN)
394#define VECTOR_FOR_INORDER_I(index, element) \
395 for (index = 0; index < ARRAY_SIZE(r->element); index++)
396#else
397#define VECTOR_FOR_INORDER_I(index, element) \
398 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
399#endif
400
401
402#define SATCVT(from, to, from_type, to_type, min, max) \
403 static inline to_type cvt##from##to(from_type x, int *sat) \
404 { \
405 to_type r; \
406 \
407 if (x < (from_type)min) { \
408 r = min; \
409 *sat = 1; \
410 } else if (x > (from_type)max) { \
411 r = max; \
412 *sat = 1; \
413 } else { \
414 r = x; \
415 } \
416 return r; \
417 }
418#define SATCVTU(from, to, from_type, to_type, min, max) \
419 static inline to_type cvt##from##to(from_type x, int *sat) \
420 { \
421 to_type r; \
422 \
423 if (x > (from_type)max) { \
424 r = max; \
425 *sat = 1; \
426 } else { \
427 r = x; \
428 } \
429 return r; \
430 }
431SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
432SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
433SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
434
435SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
436SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
437SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
438SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
439SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
440SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
441#undef SATCVT
442#undef SATCVTU
443
444void helper_lvsl(ppc_avr_t *r, target_ulong sh)
445{
446 int i, j = (sh & 0xf);
447
448 VECTOR_FOR_INORDER_I(i, u8) {
449 r->u8[i] = j++;
450 }
451}
452
453void helper_lvsr(ppc_avr_t *r, target_ulong sh)
454{
455 int i, j = 0x10 - (sh & 0xf);
456
457 VECTOR_FOR_INORDER_I(i, u8) {
458 r->u8[i] = j++;
459 }
460}
461
462void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
463{
464#if defined(HOST_WORDS_BIGENDIAN)
465 env->vscr = r->u32[3];
466#else
467 env->vscr = r->u32[0];
468#endif
469 set_flush_to_zero(vscr_nj, &env->vec_status);
470}
471
472void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
473{
474 int i;
475
476 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
477 r->u32[i] = ~a->u32[i] < b->u32[i];
478 }
479}
480
481#define VARITH_DO(name, op, element) \
482 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
483 { \
484 int i; \
485 \
486 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
487 r->element[i] = a->element[i] op b->element[i]; \
488 } \
489 }
490#define VARITH(suffix, element) \
491 VARITH_DO(add##suffix, +, element) \
492 VARITH_DO(sub##suffix, -, element)
493VARITH(ubm, u8)
494VARITH(uhm, u16)
495VARITH(uwm, u32)
496VARITH(udm, u64)
497VARITH_DO(muluwm, *, u32)
498#undef VARITH_DO
499#undef VARITH
500
501#define VARITHFP(suffix, func) \
502 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
503 ppc_avr_t *b) \
504 { \
505 int i; \
506 \
507 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
508 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
509 } \
510 }
511VARITHFP(addfp, float32_add)
512VARITHFP(subfp, float32_sub)
513VARITHFP(minfp, float32_min)
514VARITHFP(maxfp, float32_max)
515#undef VARITHFP
516
517#define VARITHFPFMA(suffix, type) \
518 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
519 ppc_avr_t *b, ppc_avr_t *c) \
520 { \
521 int i; \
522 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
523 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
524 type, &env->vec_status); \
525 } \
526 }
527VARITHFPFMA(maddfp, 0);
528VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
529#undef VARITHFPFMA
530
531#define VARITHSAT_CASE(type, op, cvt, element) \
532 { \
533 type result = (type)a->element[i] op (type)b->element[i]; \
534 r->element[i] = cvt(result, &sat); \
535 }
536
537#define VARITHSAT_DO(name, op, optype, cvt, element) \
538 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
540 { \
541 int sat = 0; \
542 int i; \
543 \
544 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
545 switch (sizeof(r->element[0])) { \
546 case 1: \
547 VARITHSAT_CASE(optype, op, cvt, element); \
548 break; \
549 case 2: \
550 VARITHSAT_CASE(optype, op, cvt, element); \
551 break; \
552 case 4: \
553 VARITHSAT_CASE(optype, op, cvt, element); \
554 break; \
555 } \
556 } \
557 if (sat) { \
558 env->vscr |= (1 << VSCR_SAT); \
559 } \
560 }
561#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
562 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
563 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
564#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
565 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
566 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
567VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
568VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
569VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
570VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
571VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
572VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
573#undef VARITHSAT_CASE
574#undef VARITHSAT_DO
575#undef VARITHSAT_SIGNED
576#undef VARITHSAT_UNSIGNED
577
578#define VAVG_DO(name, element, etype) \
579 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
580 { \
581 int i; \
582 \
583 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
584 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
585 r->element[i] = x >> 1; \
586 } \
587 }
588
589#define VAVG(type, signed_element, signed_type, unsigned_element, \
590 unsigned_type) \
591 VAVG_DO(avgs##type, signed_element, signed_type) \
592 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
593VAVG(b, s8, int16_t, u8, uint16_t)
594VAVG(h, s16, int32_t, u16, uint32_t)
595VAVG(w, s32, int64_t, u32, uint64_t)
596#undef VAVG_DO
597#undef VAVG
598
599#define VCF(suffix, cvt, element) \
600 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
601 ppc_avr_t *b, uint32_t uim) \
602 { \
603 int i; \
604 \
605 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
606 float32 t = cvt(b->element[i], &env->vec_status); \
607 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
608 } \
609 }
610VCF(ux, uint32_to_float32, u32)
611VCF(sx, int32_to_float32, s32)
612#undef VCF
613
614#define VCMP_DO(suffix, compare, element, record) \
615 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
616 ppc_avr_t *a, ppc_avr_t *b) \
617 { \
618 uint64_t ones = (uint64_t)-1; \
619 uint64_t all = ones; \
620 uint64_t none = 0; \
621 int i; \
622 \
623 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
624 uint64_t result = (a->element[i] compare b->element[i] ? \
625 ones : 0x0); \
626 switch (sizeof(a->element[0])) { \
627 case 8: \
628 r->u64[i] = result; \
629 break; \
630 case 4: \
631 r->u32[i] = result; \
632 break; \
633 case 2: \
634 r->u16[i] = result; \
635 break; \
636 case 1: \
637 r->u8[i] = result; \
638 break; \
639 } \
640 all &= result; \
641 none |= result; \
642 } \
643 if (record) { \
644 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
645 } \
646 }
647#define VCMP(suffix, compare, element) \
648 VCMP_DO(suffix, compare, element, 0) \
649 VCMP_DO(suffix##_dot, compare, element, 1)
650VCMP(equb, ==, u8)
651VCMP(equh, ==, u16)
652VCMP(equw, ==, u32)
653VCMP(equd, ==, u64)
654VCMP(gtub, >, u8)
655VCMP(gtuh, >, u16)
656VCMP(gtuw, >, u32)
657VCMP(gtud, >, u64)
658VCMP(gtsb, >, s8)
659VCMP(gtsh, >, s16)
660VCMP(gtsw, >, s32)
661VCMP(gtsd, >, s64)
662#undef VCMP_DO
663#undef VCMP
664
665#define VCMPFP_DO(suffix, compare, order, record) \
666 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
667 ppc_avr_t *a, ppc_avr_t *b) \
668 { \
669 uint32_t ones = (uint32_t)-1; \
670 uint32_t all = ones; \
671 uint32_t none = 0; \
672 int i; \
673 \
674 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
675 uint32_t result; \
676 int rel = float32_compare_quiet(a->f[i], b->f[i], \
677 &env->vec_status); \
678 if (rel == float_relation_unordered) { \
679 result = 0; \
680 } else if (rel compare order) { \
681 result = ones; \
682 } else { \
683 result = 0; \
684 } \
685 r->u32[i] = result; \
686 all &= result; \
687 none |= result; \
688 } \
689 if (record) { \
690 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
691 } \
692 }
693#define VCMPFP(suffix, compare, order) \
694 VCMPFP_DO(suffix, compare, order, 0) \
695 VCMPFP_DO(suffix##_dot, compare, order, 1)
696VCMPFP(eqfp, ==, float_relation_equal)
697VCMPFP(gefp, !=, float_relation_less)
698VCMPFP(gtfp, ==, float_relation_greater)
699#undef VCMPFP_DO
700#undef VCMPFP
701
702static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
703 ppc_avr_t *a, ppc_avr_t *b, int record)
704{
705 int i;
706 int all_in = 0;
707
708 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
709 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
710 if (le_rel == float_relation_unordered) {
711 r->u32[i] = 0xc0000000;
712 all_in = 1;
713 } else {
714 float32 bneg = float32_chs(b->f[i]);
715 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
716 int le = le_rel != float_relation_greater;
717 int ge = ge_rel != float_relation_less;
718
719 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
720 all_in |= (!le | !ge);
721 }
722 }
723 if (record) {
724 env->crf[6] = (all_in == 0) << 1;
725 }
726}
727
728void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
729{
730 vcmpbfp_internal(env, r, a, b, 0);
731}
732
733void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
734 ppc_avr_t *b)
735{
736 vcmpbfp_internal(env, r, a, b, 1);
737}
738
739#define VCT(suffix, satcvt, element) \
740 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
741 ppc_avr_t *b, uint32_t uim) \
742 { \
743 int i; \
744 int sat = 0; \
745 float_status s = env->vec_status; \
746 \
747 set_float_rounding_mode(float_round_to_zero, &s); \
748 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
749 if (float32_is_any_nan(b->f[i])) { \
750 r->element[i] = 0; \
751 } else { \
752 float64 t = float32_to_float64(b->f[i], &s); \
753 int64_t j; \
754 \
755 t = float64_scalbn(t, uim, &s); \
756 j = float64_to_int64(t, &s); \
757 r->element[i] = satcvt(j, &sat); \
758 } \
759 } \
760 if (sat) { \
761 env->vscr |= (1 << VSCR_SAT); \
762 } \
763 }
764VCT(uxs, cvtsduw, u32)
765VCT(sxs, cvtsdsw, s32)
766#undef VCT
767
768void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
769 ppc_avr_t *b, ppc_avr_t *c)
770{
771 int sat = 0;
772 int i;
773
774 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
775 int32_t prod = a->s16[i] * b->s16[i];
776 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
777
778 r->s16[i] = cvtswsh(t, &sat);
779 }
780
781 if (sat) {
782 env->vscr |= (1 << VSCR_SAT);
783 }
784}
785
786void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
787 ppc_avr_t *b, ppc_avr_t *c)
788{
789 int sat = 0;
790 int i;
791
792 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
793 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
794 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
795 r->s16[i] = cvtswsh(t, &sat);
796 }
797
798 if (sat) {
799 env->vscr |= (1 << VSCR_SAT);
800 }
801}
802
803#define VMINMAX_DO(name, compare, element) \
804 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
805 { \
806 int i; \
807 \
808 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
809 if (a->element[i] compare b->element[i]) { \
810 r->element[i] = b->element[i]; \
811 } else { \
812 r->element[i] = a->element[i]; \
813 } \
814 } \
815 }
816#define VMINMAX(suffix, element) \
817 VMINMAX_DO(min##suffix, >, element) \
818 VMINMAX_DO(max##suffix, <, element)
819VMINMAX(sb, s8)
820VMINMAX(sh, s16)
821VMINMAX(sw, s32)
822VMINMAX(sd, s64)
823VMINMAX(ub, u8)
824VMINMAX(uh, u16)
825VMINMAX(uw, u32)
826VMINMAX(ud, u64)
827#undef VMINMAX_DO
828#undef VMINMAX
829
830void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
831{
832 int i;
833
834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
835 int32_t prod = a->s16[i] * b->s16[i];
836 r->s16[i] = (int16_t) (prod + c->s16[i]);
837 }
838}
839
840#define VMRG_DO(name, element, highp) \
841 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
842 { \
843 ppc_avr_t result; \
844 int i; \
845 size_t n_elems = ARRAY_SIZE(r->element); \
846 \
847 for (i = 0; i < n_elems / 2; i++) { \
848 if (highp) { \
849 result.element[i*2+HI_IDX] = a->element[i]; \
850 result.element[i*2+LO_IDX] = b->element[i]; \
851 } else { \
852 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
853 b->element[n_elems - i - 1]; \
854 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
855 a->element[n_elems - i - 1]; \
856 } \
857 } \
858 *r = result; \
859 }
860#if defined(HOST_WORDS_BIGENDIAN)
861#define MRGHI 0
862#define MRGLO 1
863#else
864#define MRGHI 1
865#define MRGLO 0
866#endif
867#define VMRG(suffix, element) \
868 VMRG_DO(mrgl##suffix, element, MRGHI) \
869 VMRG_DO(mrgh##suffix, element, MRGLO)
870VMRG(b, u8)
871VMRG(h, u16)
872VMRG(w, u32)
873#undef VMRG_DO
874#undef VMRG
875#undef MRGHI
876#undef MRGLO
877
878void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b, ppc_avr_t *c)
880{
881 int32_t prod[16];
882 int i;
883
884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
885 prod[i] = (int32_t)a->s8[i] * b->u8[i];
886 }
887
888 VECTOR_FOR_INORDER_I(i, s32) {
889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
890 prod[4 * i + 2] + prod[4 * i + 3];
891 }
892}
893
894void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
896{
897 int32_t prod[8];
898 int i;
899
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 prod[i] = a->s16[i] * b->s16[i];
902 }
903
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
906 }
907}
908
909void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
911{
912 int32_t prod[8];
913 int i;
914 int sat = 0;
915
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = (int32_t)a->s16[i] * b->s16[i];
918 }
919
920 VECTOR_FOR_INORDER_I(i, s32) {
921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922
923 r->u32[i] = cvtsdsw(t, &sat);
924 }
925
926 if (sat) {
927 env->vscr |= (1 << VSCR_SAT);
928 }
929}
930
931void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
932 ppc_avr_t *b, ppc_avr_t *c)
933{
934 uint16_t prod[16];
935 int i;
936
937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
938 prod[i] = a->u8[i] * b->u8[i];
939 }
940
941 VECTOR_FOR_INORDER_I(i, u32) {
942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
943 prod[4 * i + 2] + prod[4 * i + 3];
944 }
945}
946
947void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
949{
950 uint32_t prod[8];
951 int i;
952
953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
954 prod[i] = a->u16[i] * b->u16[i];
955 }
956
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
959 }
960}
961
962void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
963 ppc_avr_t *b, ppc_avr_t *c)
964{
965 uint32_t prod[8];
966 int i;
967 int sat = 0;
968
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
971 }
972
973 VECTOR_FOR_INORDER_I(i, s32) {
974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975
976 r->u32[i] = cvtuduw(t, &sat);
977 }
978
979 if (sat) {
980 env->vscr |= (1 << VSCR_SAT);
981 }
982}
983
984#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 { \
987 int i; \
988 \
989 VECTOR_FOR_INORDER_I(i, prod_element) { \
990 if (evenp) { \
991 r->prod_element[i] = \
992 (cast)a->mul_element[i * 2 + HI_IDX] * \
993 (cast)b->mul_element[i * 2 + HI_IDX]; \
994 } else { \
995 r->prod_element[i] = \
996 (cast)a->mul_element[i * 2 + LO_IDX] * \
997 (cast)b->mul_element[i * 2 + LO_IDX]; \
998 } \
999 } \
1000 }
1001#define VMUL(suffix, mul_element, prod_element, cast) \
1002 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1003 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1004VMUL(sb, s8, s16, int16_t)
1005VMUL(sh, s16, s32, int32_t)
1006VMUL(sw, s32, s64, int64_t)
1007VMUL(ub, u8, u16, uint16_t)
1008VMUL(uh, u16, u32, uint32_t)
1009VMUL(uw, u32, u64, uint64_t)
1010#undef VMUL_DO
1011#undef VMUL
1012
1013void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1014 ppc_avr_t *c)
1015{
1016 ppc_avr_t result;
1017 int i;
1018
1019 VECTOR_FOR_INORDER_I(i, u8) {
1020 int s = c->u8[i] & 0x1f;
1021#if defined(HOST_WORDS_BIGENDIAN)
1022 int index = s & 0xf;
1023#else
1024 int index = 15 - (s & 0xf);
1025#endif
1026
1027 if (s & 0x10) {
1028 result.u8[i] = b->u8[index];
1029 } else {
1030 result.u8[i] = a->u8[index];
1031 }
1032 }
1033 *r = result;
1034}
1035
1036#if defined(HOST_WORDS_BIGENDIAN)
1037#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1038#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1039#else
1040#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1041#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1042#endif
1043
1044void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1045{
1046 int i;
1047 uint64_t perm = 0;
1048
1049 VECTOR_FOR_INORDER_I(i, u8) {
1050 int index = VBPERMQ_INDEX(b, i);
1051
1052 if (index < 128) {
1053 uint64_t mask = (1ull << (63-(index & 0x3F)));
1054 if (a->u64[VBPERMQ_DW(index)] & mask) {
1055 perm |= (0x8000 >> i);
1056 }
1057 }
1058 }
1059
1060 r->u64[HI_IDX] = perm;
1061 r->u64[LO_IDX] = 0;
1062}
1063
1064#undef VBPERMQ_INDEX
1065#undef VBPERMQ_DW
1066
1067static const uint64_t VGBBD_MASKS[256] = {
1068 0x0000000000000000ull,
1069 0x0000000000000080ull,
1070 0x0000000000008000ull,
1071 0x0000000000008080ull,
1072 0x0000000000800000ull,
1073 0x0000000000800080ull,
1074 0x0000000000808000ull,
1075 0x0000000000808080ull,
1076 0x0000000080000000ull,
1077 0x0000000080000080ull,
1078 0x0000000080008000ull,
1079 0x0000000080008080ull,
1080 0x0000000080800000ull,
1081 0x0000000080800080ull,
1082 0x0000000080808000ull,
1083 0x0000000080808080ull,
1084 0x0000008000000000ull,
1085 0x0000008000000080ull,
1086 0x0000008000008000ull,
1087 0x0000008000008080ull,
1088 0x0000008000800000ull,
1089 0x0000008000800080ull,
1090 0x0000008000808000ull,
1091 0x0000008000808080ull,
1092 0x0000008080000000ull,
1093 0x0000008080000080ull,
1094 0x0000008080008000ull,
1095 0x0000008080008080ull,
1096 0x0000008080800000ull,
1097 0x0000008080800080ull,
1098 0x0000008080808000ull,
1099 0x0000008080808080ull,
1100 0x0000800000000000ull,
1101 0x0000800000000080ull,
1102 0x0000800000008000ull,
1103 0x0000800000008080ull,
1104 0x0000800000800000ull,
1105 0x0000800000800080ull,
1106 0x0000800000808000ull,
1107 0x0000800000808080ull,
1108 0x0000800080000000ull,
1109 0x0000800080000080ull,
1110 0x0000800080008000ull,
1111 0x0000800080008080ull,
1112 0x0000800080800000ull,
1113 0x0000800080800080ull,
1114 0x0000800080808000ull,
1115 0x0000800080808080ull,
1116 0x0000808000000000ull,
1117 0x0000808000000080ull,
1118 0x0000808000008000ull,
1119 0x0000808000008080ull,
1120 0x0000808000800000ull,
1121 0x0000808000800080ull,
1122 0x0000808000808000ull,
1123 0x0000808000808080ull,
1124 0x0000808080000000ull,
1125 0x0000808080000080ull,
1126 0x0000808080008000ull,
1127 0x0000808080008080ull,
1128 0x0000808080800000ull,
1129 0x0000808080800080ull,
1130 0x0000808080808000ull,
1131 0x0000808080808080ull,
1132 0x0080000000000000ull,
1133 0x0080000000000080ull,
1134 0x0080000000008000ull,
1135 0x0080000000008080ull,
1136 0x0080000000800000ull,
1137 0x0080000000800080ull,
1138 0x0080000000808000ull,
1139 0x0080000000808080ull,
1140 0x0080000080000000ull,
1141 0x0080000080000080ull,
1142 0x0080000080008000ull,
1143 0x0080000080008080ull,
1144 0x0080000080800000ull,
1145 0x0080000080800080ull,
1146 0x0080000080808000ull,
1147 0x0080000080808080ull,
1148 0x0080008000000000ull,
1149 0x0080008000000080ull,
1150 0x0080008000008000ull,
1151 0x0080008000008080ull,
1152 0x0080008000800000ull,
1153 0x0080008000800080ull,
1154 0x0080008000808000ull,
1155 0x0080008000808080ull,
1156 0x0080008080000000ull,
1157 0x0080008080000080ull,
1158 0x0080008080008000ull,
1159 0x0080008080008080ull,
1160 0x0080008080800000ull,
1161 0x0080008080800080ull,
1162 0x0080008080808000ull,
1163 0x0080008080808080ull,
1164 0x0080800000000000ull,
1165 0x0080800000000080ull,
1166 0x0080800000008000ull,
1167 0x0080800000008080ull,
1168 0x0080800000800000ull,
1169 0x0080800000800080ull,
1170 0x0080800000808000ull,
1171 0x0080800000808080ull,
1172 0x0080800080000000ull,
1173 0x0080800080000080ull,
1174 0x0080800080008000ull,
1175 0x0080800080008080ull,
1176 0x0080800080800000ull,
1177 0x0080800080800080ull,
1178 0x0080800080808000ull,
1179 0x0080800080808080ull,
1180 0x0080808000000000ull,
1181 0x0080808000000080ull,
1182 0x0080808000008000ull,
1183 0x0080808000008080ull,
1184 0x0080808000800000ull,
1185 0x0080808000800080ull,
1186 0x0080808000808000ull,
1187 0x0080808000808080ull,
1188 0x0080808080000000ull,
1189 0x0080808080000080ull,
1190 0x0080808080008000ull,
1191 0x0080808080008080ull,
1192 0x0080808080800000ull,
1193 0x0080808080800080ull,
1194 0x0080808080808000ull,
1195 0x0080808080808080ull,
1196 0x8000000000000000ull,
1197 0x8000000000000080ull,
1198 0x8000000000008000ull,
1199 0x8000000000008080ull,
1200 0x8000000000800000ull,
1201 0x8000000000800080ull,
1202 0x8000000000808000ull,
1203 0x8000000000808080ull,
1204 0x8000000080000000ull,
1205 0x8000000080000080ull,
1206 0x8000000080008000ull,
1207 0x8000000080008080ull,
1208 0x8000000080800000ull,
1209 0x8000000080800080ull,
1210 0x8000000080808000ull,
1211 0x8000000080808080ull,
1212 0x8000008000000000ull,
1213 0x8000008000000080ull,
1214 0x8000008000008000ull,
1215 0x8000008000008080ull,
1216 0x8000008000800000ull,
1217 0x8000008000800080ull,
1218 0x8000008000808000ull,
1219 0x8000008000808080ull,
1220 0x8000008080000000ull,
1221 0x8000008080000080ull,
1222 0x8000008080008000ull,
1223 0x8000008080008080ull,
1224 0x8000008080800000ull,
1225 0x8000008080800080ull,
1226 0x8000008080808000ull,
1227 0x8000008080808080ull,
1228 0x8000800000000000ull,
1229 0x8000800000000080ull,
1230 0x8000800000008000ull,
1231 0x8000800000008080ull,
1232 0x8000800000800000ull,
1233 0x8000800000800080ull,
1234 0x8000800000808000ull,
1235 0x8000800000808080ull,
1236 0x8000800080000000ull,
1237 0x8000800080000080ull,
1238 0x8000800080008000ull,
1239 0x8000800080008080ull,
1240 0x8000800080800000ull,
1241 0x8000800080800080ull,
1242 0x8000800080808000ull,
1243 0x8000800080808080ull,
1244 0x8000808000000000ull,
1245 0x8000808000000080ull,
1246 0x8000808000008000ull,
1247 0x8000808000008080ull,
1248 0x8000808000800000ull,
1249 0x8000808000800080ull,
1250 0x8000808000808000ull,
1251 0x8000808000808080ull,
1252 0x8000808080000000ull,
1253 0x8000808080000080ull,
1254 0x8000808080008000ull,
1255 0x8000808080008080ull,
1256 0x8000808080800000ull,
1257 0x8000808080800080ull,
1258 0x8000808080808000ull,
1259 0x8000808080808080ull,
1260 0x8080000000000000ull,
1261 0x8080000000000080ull,
1262 0x8080000000008000ull,
1263 0x8080000000008080ull,
1264 0x8080000000800000ull,
1265 0x8080000000800080ull,
1266 0x8080000000808000ull,
1267 0x8080000000808080ull,
1268 0x8080000080000000ull,
1269 0x8080000080000080ull,
1270 0x8080000080008000ull,
1271 0x8080000080008080ull,
1272 0x8080000080800000ull,
1273 0x8080000080800080ull,
1274 0x8080000080808000ull,
1275 0x8080000080808080ull,
1276 0x8080008000000000ull,
1277 0x8080008000000080ull,
1278 0x8080008000008000ull,
1279 0x8080008000008080ull,
1280 0x8080008000800000ull,
1281 0x8080008000800080ull,
1282 0x8080008000808000ull,
1283 0x8080008000808080ull,
1284 0x8080008080000000ull,
1285 0x8080008080000080ull,
1286 0x8080008080008000ull,
1287 0x8080008080008080ull,
1288 0x8080008080800000ull,
1289 0x8080008080800080ull,
1290 0x8080008080808000ull,
1291 0x8080008080808080ull,
1292 0x8080800000000000ull,
1293 0x8080800000000080ull,
1294 0x8080800000008000ull,
1295 0x8080800000008080ull,
1296 0x8080800000800000ull,
1297 0x8080800000800080ull,
1298 0x8080800000808000ull,
1299 0x8080800000808080ull,
1300 0x8080800080000000ull,
1301 0x8080800080000080ull,
1302 0x8080800080008000ull,
1303 0x8080800080008080ull,
1304 0x8080800080800000ull,
1305 0x8080800080800080ull,
1306 0x8080800080808000ull,
1307 0x8080800080808080ull,
1308 0x8080808000000000ull,
1309 0x8080808000000080ull,
1310 0x8080808000008000ull,
1311 0x8080808000008080ull,
1312 0x8080808000800000ull,
1313 0x8080808000800080ull,
1314 0x8080808000808000ull,
1315 0x8080808000808080ull,
1316 0x8080808080000000ull,
1317 0x8080808080000080ull,
1318 0x8080808080008000ull,
1319 0x8080808080008080ull,
1320 0x8080808080800000ull,
1321 0x8080808080800080ull,
1322 0x8080808080808000ull,
1323 0x8080808080808080ull,
1324};
1325
1326void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1327{
1328 int i;
1329 uint64_t t[2] = { 0, 0 };
1330
1331 VECTOR_FOR_INORDER_I(i, u8) {
1332#if defined(HOST_WORDS_BIGENDIAN)
1333 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1334#else
1335 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1336#endif
1337 }
1338
1339 r->u64[0] = t[0];
1340 r->u64[1] = t[1];
1341}
1342
1343#define PMSUM(name, srcfld, trgfld, trgtyp) \
1344void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1345{ \
1346 int i, j; \
1347 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1348 \
1349 VECTOR_FOR_INORDER_I(i, srcfld) { \
1350 prod[i] = 0; \
1351 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1352 if (a->srcfld[i] & (1ull<<j)) { \
1353 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1354 } \
1355 } \
1356 } \
1357 \
1358 VECTOR_FOR_INORDER_I(i, trgfld) { \
1359 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1360 } \
1361}
1362
1363PMSUM(vpmsumb, u8, u16, uint16_t)
1364PMSUM(vpmsumh, u16, u32, uint32_t)
1365PMSUM(vpmsumw, u32, u64, uint64_t)
1366
1367void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1368{
1369
1370#ifdef CONFIG_INT128
1371 int i, j;
1372 __uint128_t prod[2];
1373
1374 VECTOR_FOR_INORDER_I(i, u64) {
1375 prod[i] = 0;
1376 for (j = 0; j < 64; j++) {
1377 if (a->u64[i] & (1ull<<j)) {
1378 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1379 }
1380 }
1381 }
1382
1383 r->u128 = prod[0] ^ prod[1];
1384
1385#else
1386 int i, j;
1387 ppc_avr_t prod[2];
1388
1389 VECTOR_FOR_INORDER_I(i, u64) {
1390 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1391 for (j = 0; j < 64; j++) {
1392 if (a->u64[i] & (1ull<<j)) {
1393 ppc_avr_t bshift;
1394 if (j == 0) {
1395 bshift.u64[HI_IDX] = 0;
1396 bshift.u64[LO_IDX] = b->u64[i];
1397 } else {
1398 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1399 bshift.u64[LO_IDX] = b->u64[i] << j;
1400 }
1401 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1402 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1403 }
1404 }
1405 }
1406
1407 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1408 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1409#endif
1410}
1411
1412
1413#if defined(HOST_WORDS_BIGENDIAN)
1414#define PKBIG 1
1415#else
1416#define PKBIG 0
1417#endif
1418void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1419{
1420 int i, j;
1421 ppc_avr_t result;
1422#if defined(HOST_WORDS_BIGENDIAN)
1423 const ppc_avr_t *x[2] = { a, b };
1424#else
1425 const ppc_avr_t *x[2] = { b, a };
1426#endif
1427
1428 VECTOR_FOR_INORDER_I(i, u64) {
1429 VECTOR_FOR_INORDER_I(j, u32) {
1430 uint32_t e = x[i]->u32[j];
1431
1432 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1433 ((e >> 6) & 0x3e0) |
1434 ((e >> 3) & 0x1f));
1435 }
1436 }
1437 *r = result;
1438}
1439
1440#define VPK(suffix, from, to, cvt, dosat) \
1441 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1442 ppc_avr_t *a, ppc_avr_t *b) \
1443 { \
1444 int i; \
1445 int sat = 0; \
1446 ppc_avr_t result; \
1447 ppc_avr_t *a0 = PKBIG ? a : b; \
1448 ppc_avr_t *a1 = PKBIG ? b : a; \
1449 \
1450 VECTOR_FOR_INORDER_I(i, from) { \
1451 result.to[i] = cvt(a0->from[i], &sat); \
1452 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1453 } \
1454 *r = result; \
1455 if (dosat && sat) { \
1456 env->vscr |= (1 << VSCR_SAT); \
1457 } \
1458 }
1459#define I(x, y) (x)
1460VPK(shss, s16, s8, cvtshsb, 1)
1461VPK(shus, s16, u8, cvtshub, 1)
1462VPK(swss, s32, s16, cvtswsh, 1)
1463VPK(swus, s32, u16, cvtswuh, 1)
1464VPK(sdss, s64, s32, cvtsdsw, 1)
1465VPK(sdus, s64, u32, cvtsduw, 1)
1466VPK(uhus, u16, u8, cvtuhub, 1)
1467VPK(uwus, u32, u16, cvtuwuh, 1)
1468VPK(udus, u64, u32, cvtuduw, 1)
1469VPK(uhum, u16, u8, I, 0)
1470VPK(uwum, u32, u16, I, 0)
1471VPK(udum, u64, u32, I, 0)
1472#undef I
1473#undef VPK
1474#undef PKBIG
1475
1476void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1477{
1478 int i;
1479
1480 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1481 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1482 }
1483}
1484
1485#define VRFI(suffix, rounding) \
1486 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1487 ppc_avr_t *b) \
1488 { \
1489 int i; \
1490 float_status s = env->vec_status; \
1491 \
1492 set_float_rounding_mode(rounding, &s); \
1493 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1494 r->f[i] = float32_round_to_int (b->f[i], &s); \
1495 } \
1496 }
1497VRFI(n, float_round_nearest_even)
1498VRFI(m, float_round_down)
1499VRFI(p, float_round_up)
1500VRFI(z, float_round_to_zero)
1501#undef VRFI
1502
1503#define VROTATE(suffix, element, mask) \
1504 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1505 { \
1506 int i; \
1507 \
1508 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1509 unsigned int shift = b->element[i] & mask; \
1510 r->element[i] = (a->element[i] << shift) | \
1511 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1512 } \
1513 }
1514VROTATE(b, u8, 0x7)
1515VROTATE(h, u16, 0xF)
1516VROTATE(w, u32, 0x1F)
1517VROTATE(d, u64, 0x3F)
1518#undef VROTATE
1519
1520void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1521{
1522 int i;
1523
1524 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1525 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1526
1527 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1528 }
1529}
1530
1531void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1532 ppc_avr_t *c)
1533{
1534 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1535 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1536}
1537
1538void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1539{
1540 int i;
1541
1542 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1543 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1544 }
1545}
1546
1547void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1548{
1549 int i;
1550
1551 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1552 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1553 }
1554}
1555
1556
1557
1558
1559#define VSHIFT(suffix, leftp) \
1560 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1561 { \
1562 int shift = b->u8[LO_IDX*15] & 0x7; \
1563 int doit = 1; \
1564 int i; \
1565 \
1566 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1567 doit = doit && ((b->u8[i] & 0x7) == shift); \
1568 } \
1569 if (doit) { \
1570 if (shift == 0) { \
1571 *r = *a; \
1572 } else if (leftp) { \
1573 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1574 \
1575 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1576 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1577 } else { \
1578 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1579 \
1580 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1581 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1582 } \
1583 } \
1584 }
1585VSHIFT(l, 1)
1586VSHIFT(r, 0)
1587#undef VSHIFT
1588
1589#define VSL(suffix, element, mask) \
1590 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1591 { \
1592 int i; \
1593 \
1594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1595 unsigned int shift = b->element[i] & mask; \
1596 \
1597 r->element[i] = a->element[i] << shift; \
1598 } \
1599 }
1600VSL(b, u8, 0x7)
1601VSL(h, u16, 0x0F)
1602VSL(w, u32, 0x1F)
1603VSL(d, u64, 0x3F)
1604#undef VSL
1605
1606void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1607{
1608 int sh = shift & 0xf;
1609 int i;
1610 ppc_avr_t result;
1611
1612#if defined(HOST_WORDS_BIGENDIAN)
1613 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1614 int index = sh + i;
1615 if (index > 0xf) {
1616 result.u8[i] = b->u8[index - 0x10];
1617 } else {
1618 result.u8[i] = a->u8[index];
1619 }
1620 }
1621#else
1622 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1623 int index = (16 - sh) + i;
1624 if (index > 0xf) {
1625 result.u8[i] = a->u8[index - 0x10];
1626 } else {
1627 result.u8[i] = b->u8[index];
1628 }
1629 }
1630#endif
1631 *r = result;
1632}
1633
1634void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1635{
1636 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1637
1638#if defined(HOST_WORDS_BIGENDIAN)
1639 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1640 memset(&r->u8[16-sh], 0, sh);
1641#else
1642 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1643 memset(&r->u8[0], 0, sh);
1644#endif
1645}
1646
1647
1648#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1649#if defined(HOST_WORDS_BIGENDIAN)
1650#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1651#else
1652#define SPLAT_ELEMENT(element) \
1653 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1654#endif
1655#define VSPLT(suffix, element) \
1656 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1657 { \
1658 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1659 int i; \
1660 \
1661 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1662 r->element[i] = s; \
1663 } \
1664 }
1665VSPLT(b, u8)
1666VSPLT(h, u16)
1667VSPLT(w, u32)
1668#undef VSPLT
1669#undef SPLAT_ELEMENT
1670#undef _SPLAT_MASKED
1671
1672#define VSPLTI(suffix, element, splat_type) \
1673 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1674 { \
1675 splat_type x = (int8_t)(splat << 3) >> 3; \
1676 int i; \
1677 \
1678 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1679 r->element[i] = x; \
1680 } \
1681 }
1682VSPLTI(b, s8, int8_t)
1683VSPLTI(h, s16, int16_t)
1684VSPLTI(w, s32, int32_t)
1685#undef VSPLTI
1686
1687#define VSR(suffix, element, mask) \
1688 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1689 { \
1690 int i; \
1691 \
1692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1693 unsigned int shift = b->element[i] & mask; \
1694 r->element[i] = a->element[i] >> shift; \
1695 } \
1696 }
1697VSR(ab, s8, 0x7)
1698VSR(ah, s16, 0xF)
1699VSR(aw, s32, 0x1F)
1700VSR(ad, s64, 0x3F)
1701VSR(b, u8, 0x7)
1702VSR(h, u16, 0xF)
1703VSR(w, u32, 0x1F)
1704VSR(d, u64, 0x3F)
1705#undef VSR
1706
1707void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1708{
1709 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1710
1711#if defined(HOST_WORDS_BIGENDIAN)
1712 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1713 memset(&r->u8[0], 0, sh);
1714#else
1715 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1716 memset(&r->u8[16 - sh], 0, sh);
1717#endif
1718}
1719
1720void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1721{
1722 int i;
1723
1724 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1725 r->u32[i] = a->u32[i] >= b->u32[i];
1726 }
1727}
1728
1729void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1730{
1731 int64_t t;
1732 int i, upper;
1733 ppc_avr_t result;
1734 int sat = 0;
1735
1736#if defined(HOST_WORDS_BIGENDIAN)
1737 upper = ARRAY_SIZE(r->s32)-1;
1738#else
1739 upper = 0;
1740#endif
1741 t = (int64_t)b->s32[upper];
1742 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1743 t += a->s32[i];
1744 result.s32[i] = 0;
1745 }
1746 result.s32[upper] = cvtsdsw(t, &sat);
1747 *r = result;
1748
1749 if (sat) {
1750 env->vscr |= (1 << VSCR_SAT);
1751 }
1752}
1753
1754void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1755{
1756 int i, j, upper;
1757 ppc_avr_t result;
1758 int sat = 0;
1759
1760#if defined(HOST_WORDS_BIGENDIAN)
1761 upper = 1;
1762#else
1763 upper = 0;
1764#endif
1765 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1766 int64_t t = (int64_t)b->s32[upper + i * 2];
1767
1768 result.u64[i] = 0;
1769 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1770 t += a->s32[2 * i + j];
1771 }
1772 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1773 }
1774
1775 *r = result;
1776 if (sat) {
1777 env->vscr |= (1 << VSCR_SAT);
1778 }
1779}
1780
1781void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1782{
1783 int i, j;
1784 int sat = 0;
1785
1786 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1787 int64_t t = (int64_t)b->s32[i];
1788
1789 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1790 t += a->s8[4 * i + j];
1791 }
1792 r->s32[i] = cvtsdsw(t, &sat);
1793 }
1794
1795 if (sat) {
1796 env->vscr |= (1 << VSCR_SAT);
1797 }
1798}
1799
1800void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1801{
1802 int sat = 0;
1803 int i;
1804
1805 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1806 int64_t t = (int64_t)b->s32[i];
1807
1808 t += a->s16[2 * i] + a->s16[2 * i + 1];
1809 r->s32[i] = cvtsdsw(t, &sat);
1810 }
1811
1812 if (sat) {
1813 env->vscr |= (1 << VSCR_SAT);
1814 }
1815}
1816
1817void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1818{
1819 int i, j;
1820 int sat = 0;
1821
1822 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1823 uint64_t t = (uint64_t)b->u32[i];
1824
1825 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1826 t += a->u8[4 * i + j];
1827 }
1828 r->u32[i] = cvtuduw(t, &sat);
1829 }
1830
1831 if (sat) {
1832 env->vscr |= (1 << VSCR_SAT);
1833 }
1834}
1835
1836#if defined(HOST_WORDS_BIGENDIAN)
1837#define UPKHI 1
1838#define UPKLO 0
1839#else
1840#define UPKHI 0
1841#define UPKLO 1
1842#endif
1843#define VUPKPX(suffix, hi) \
1844 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1845 { \
1846 int i; \
1847 ppc_avr_t result; \
1848 \
1849 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1850 uint16_t e = b->u16[hi ? i : i+4]; \
1851 uint8_t a = (e >> 15) ? 0xff : 0; \
1852 uint8_t r = (e >> 10) & 0x1f; \
1853 uint8_t g = (e >> 5) & 0x1f; \
1854 uint8_t b = e & 0x1f; \
1855 \
1856 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1857 } \
1858 *r = result; \
1859 }
1860VUPKPX(lpx, UPKLO)
1861VUPKPX(hpx, UPKHI)
1862#undef VUPKPX
1863
1864#define VUPK(suffix, unpacked, packee, hi) \
1865 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1866 { \
1867 int i; \
1868 ppc_avr_t result; \
1869 \
1870 if (hi) { \
1871 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1872 result.unpacked[i] = b->packee[i]; \
1873 } \
1874 } else { \
1875 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1876 i++) { \
1877 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1878 } \
1879 } \
1880 *r = result; \
1881 }
1882VUPK(hsb, s16, s8, UPKHI)
1883VUPK(hsh, s32, s16, UPKHI)
1884VUPK(hsw, s64, s32, UPKHI)
1885VUPK(lsb, s16, s8, UPKLO)
1886VUPK(lsh, s32, s16, UPKLO)
1887VUPK(lsw, s64, s32, UPKLO)
1888#undef VUPK
1889#undef UPKHI
1890#undef UPKLO
1891
1892#define VGENERIC_DO(name, element) \
1893 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1894 { \
1895 int i; \
1896 \
1897 VECTOR_FOR_INORDER_I(i, element) { \
1898 r->element[i] = name(b->element[i]); \
1899 } \
1900 }
1901
1902#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1903#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1904#define clzw(v) clz32((v))
1905#define clzd(v) clz64((v))
1906
1907VGENERIC_DO(clzb, u8)
1908VGENERIC_DO(clzh, u16)
1909VGENERIC_DO(clzw, u32)
1910VGENERIC_DO(clzd, u64)
1911
1912#undef clzb
1913#undef clzh
1914#undef clzw
1915#undef clzd
1916
1917#define popcntb(v) ctpop8(v)
1918#define popcnth(v) ctpop16(v)
1919#define popcntw(v) ctpop32(v)
1920#define popcntd(v) ctpop64(v)
1921
1922VGENERIC_DO(popcntb, u8)
1923VGENERIC_DO(popcnth, u16)
1924VGENERIC_DO(popcntw, u32)
1925VGENERIC_DO(popcntd, u64)
1926
1927#undef popcntb
1928#undef popcnth
1929#undef popcntw
1930#undef popcntd
1931
1932#undef VGENERIC_DO
1933
1934#if defined(HOST_WORDS_BIGENDIAN)
1935#define QW_ONE { .u64 = { 0, 1 } }
1936#else
1937#define QW_ONE { .u64 = { 1, 0 } }
1938#endif
1939
1940#ifndef CONFIG_INT128
1941
1942static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1943{
1944 t->u64[0] = ~a.u64[0];
1945 t->u64[1] = ~a.u64[1];
1946}
1947
1948static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1949{
1950 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1951 return -1;
1952 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1953 return 1;
1954 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1955 return -1;
1956 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1957 return 1;
1958 } else {
1959 return 0;
1960 }
1961}
1962
1963static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1964{
1965 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1966 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1967 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1968}
1969
1970static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1971{
1972 ppc_avr_t not_a;
1973 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1974 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1975 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1976 avr_qw_not(¬_a, a);
1977 return avr_qw_cmpu(not_a, b) < 0;
1978}
1979
1980#endif
1981
1982void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1983{
1984#ifdef CONFIG_INT128
1985 r->u128 = a->u128 + b->u128;
1986#else
1987 avr_qw_add(r, *a, *b);
1988#endif
1989}
1990
1991void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1992{
1993#ifdef CONFIG_INT128
1994 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1995#else
1996
1997 if (c->u64[LO_IDX] & 1) {
1998 ppc_avr_t tmp;
1999
2000 tmp.u64[HI_IDX] = 0;
2001 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2002 avr_qw_add(&tmp, *a, tmp);
2003 avr_qw_add(r, tmp, *b);
2004 } else {
2005 avr_qw_add(r, *a, *b);
2006 }
2007#endif
2008}
2009
2010void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2011{
2012#ifdef CONFIG_INT128
2013 r->u128 = (~a->u128 < b->u128);
2014#else
2015 ppc_avr_t not_a;
2016
2017 avr_qw_not(¬_a, *a);
2018
2019 r->u64[HI_IDX] = 0;
2020 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2021#endif
2022}
2023
2024void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2025{
2026#ifdef CONFIG_INT128
2027 int carry_out = (~a->u128 < b->u128);
2028 if (!carry_out && (c->u128 & 1)) {
2029 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2030 ((a->u128 != 0) || (b->u128 != 0));
2031 }
2032 r->u128 = carry_out;
2033#else
2034
2035 int carry_in = c->u64[LO_IDX] & 1;
2036 int carry_out = 0;
2037 ppc_avr_t tmp;
2038
2039 carry_out = avr_qw_addc(&tmp, *a, *b);
2040
2041 if (!carry_out && carry_in) {
2042 ppc_avr_t one = QW_ONE;
2043 carry_out = avr_qw_addc(&tmp, tmp, one);
2044 }
2045 r->u64[HI_IDX] = 0;
2046 r->u64[LO_IDX] = carry_out;
2047#endif
2048}
2049
2050void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2051{
2052#ifdef CONFIG_INT128
2053 r->u128 = a->u128 - b->u128;
2054#else
2055 ppc_avr_t tmp;
2056 ppc_avr_t one = QW_ONE;
2057
2058 avr_qw_not(&tmp, *b);
2059 avr_qw_add(&tmp, *a, tmp);
2060 avr_qw_add(r, tmp, one);
2061#endif
2062}
2063
2064void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2065{
2066#ifdef CONFIG_INT128
2067 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2068#else
2069 ppc_avr_t tmp, sum;
2070
2071 avr_qw_not(&tmp, *b);
2072 avr_qw_add(&sum, *a, tmp);
2073
2074 tmp.u64[HI_IDX] = 0;
2075 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2076 avr_qw_add(r, sum, tmp);
2077#endif
2078}
2079
2080void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2081{
2082#ifdef CONFIG_INT128
2083 r->u128 = (~a->u128 < ~b->u128) ||
2084 (a->u128 + ~b->u128 == (__uint128_t)-1);
2085#else
2086 int carry = (avr_qw_cmpu(*a, *b) > 0);
2087 if (!carry) {
2088 ppc_avr_t tmp;
2089 avr_qw_not(&tmp, *b);
2090 avr_qw_add(&tmp, *a, tmp);
2091 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2092 }
2093 r->u64[HI_IDX] = 0;
2094 r->u64[LO_IDX] = carry;
2095#endif
2096}
2097
2098void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2099{
2100#ifdef CONFIG_INT128
2101 r->u128 =
2102 (~a->u128 < ~b->u128) ||
2103 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2104#else
2105 int carry_in = c->u64[LO_IDX] & 1;
2106 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2107 if (!carry_out && carry_in) {
2108 ppc_avr_t tmp;
2109 avr_qw_not(&tmp, *b);
2110 avr_qw_add(&tmp, *a, tmp);
2111 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2112 }
2113
2114 r->u64[HI_IDX] = 0;
2115 r->u64[LO_IDX] = carry_out;
2116#endif
2117}
2118
2119#define BCD_PLUS_PREF_1 0xC
2120#define BCD_PLUS_PREF_2 0xF
2121#define BCD_PLUS_ALT_1 0xA
2122#define BCD_NEG_PREF 0xD
2123#define BCD_NEG_ALT 0xB
2124#define BCD_PLUS_ALT_2 0xE
2125
2126#if defined(HOST_WORDS_BIGENDIAN)
2127#define BCD_DIG_BYTE(n) (15 - (n/2))
2128#else
2129#define BCD_DIG_BYTE(n) (n/2)
2130#endif
2131
2132static int bcd_get_sgn(ppc_avr_t *bcd)
2133{
2134 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2135 case BCD_PLUS_PREF_1:
2136 case BCD_PLUS_PREF_2:
2137 case BCD_PLUS_ALT_1:
2138 case BCD_PLUS_ALT_2:
2139 {
2140 return 1;
2141 }
2142
2143 case BCD_NEG_PREF:
2144 case BCD_NEG_ALT:
2145 {
2146 return -1;
2147 }
2148
2149 default:
2150 {
2151 return 0;
2152 }
2153 }
2154}
2155
2156static int bcd_preferred_sgn(int sgn, int ps)
2157{
2158 if (sgn >= 0) {
2159 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2160 } else {
2161 return BCD_NEG_PREF;
2162 }
2163}
2164
2165static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2166{
2167 uint8_t result;
2168 if (n & 1) {
2169 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2170 } else {
2171 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2172 }
2173
2174 if (unlikely(result > 9)) {
2175 *invalid = true;
2176 }
2177 return result;
2178}
2179
2180static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2181{
2182 if (n & 1) {
2183 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2184 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2185 } else {
2186 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2187 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2188 }
2189}
2190
2191static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2192{
2193 int i;
2194 int invalid = 0;
2195 for (i = 31; i > 0; i--) {
2196 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2197 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2198 if (unlikely(invalid)) {
2199 return 0;
2200 } else if (dig_a > dig_b) {
2201 return 1;
2202 } else if (dig_a < dig_b) {
2203 return -1;
2204 }
2205 }
2206
2207 return 0;
2208}
2209
2210static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2211 int *overflow)
2212{
2213 int carry = 0;
2214 int i;
2215 int is_zero = 1;
2216 for (i = 1; i <= 31; i++) {
2217 uint8_t digit = bcd_get_digit(a, i, invalid) +
2218 bcd_get_digit(b, i, invalid) + carry;
2219 is_zero &= (digit == 0);
2220 if (digit > 9) {
2221 carry = 1;
2222 digit -= 10;
2223 } else {
2224 carry = 0;
2225 }
2226
2227 bcd_put_digit(t, digit, i);
2228
2229 if (unlikely(*invalid)) {
2230 return -1;
2231 }
2232 }
2233
2234 *overflow = carry;
2235 return is_zero;
2236}
2237
2238static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2239 int *overflow)
2240{
2241 int carry = 0;
2242 int i;
2243 int is_zero = 1;
2244 for (i = 1; i <= 31; i++) {
2245 uint8_t digit = bcd_get_digit(a, i, invalid) -
2246 bcd_get_digit(b, i, invalid) + carry;
2247 is_zero &= (digit == 0);
2248 if (digit & 0x80) {
2249 carry = -1;
2250 digit += 10;
2251 } else {
2252 carry = 0;
2253 }
2254
2255 bcd_put_digit(t, digit, i);
2256
2257 if (unlikely(*invalid)) {
2258 return -1;
2259 }
2260 }
2261
2262 *overflow = carry;
2263 return is_zero;
2264}
2265
2266uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2267{
2268
2269 int sgna = bcd_get_sgn(a);
2270 int sgnb = bcd_get_sgn(b);
2271 int invalid = (sgna == 0) || (sgnb == 0);
2272 int overflow = 0;
2273 int zero = 0;
2274 uint32_t cr = 0;
2275 ppc_avr_t result = { .u64 = { 0, 0 } };
2276
2277 if (!invalid) {
2278 if (sgna == sgnb) {
2279 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2280 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2281 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2282 } else if (bcd_cmp_mag(a, b) > 0) {
2283 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2284 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2285 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2286 } else {
2287 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2288 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2289 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2290 }
2291 }
2292
2293 if (unlikely(invalid)) {
2294 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2295 cr = 1 << CRF_SO;
2296 } else if (overflow) {
2297 cr |= 1 << CRF_SO;
2298 } else if (zero) {
2299 cr = 1 << CRF_EQ;
2300 }
2301
2302 *r = result;
2303
2304 return cr;
2305}
2306
2307uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2308{
2309 ppc_avr_t bcopy = *b;
2310 int sgnb = bcd_get_sgn(b);
2311 if (sgnb < 0) {
2312 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2313 } else if (sgnb > 0) {
2314 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2315 }
2316
2317
2318 return helper_bcdadd(r, a, &bcopy, ps);
2319}
2320
2321void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2322{
2323 int i;
2324 VECTOR_FOR_INORDER_I(i, u8) {
2325 r->u8[i] = AES_sbox[a->u8[i]];
2326 }
2327}
2328
2329void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2330{
2331 ppc_avr_t result;
2332 int i;
2333
2334 VECTOR_FOR_INORDER_I(i, u32) {
2335 result.AVRW(i) = b->AVRW(i) ^
2336 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2337 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2338 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2339 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2340 }
2341 *r = result;
2342}
2343
2344void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2345{
2346 ppc_avr_t result;
2347 int i;
2348
2349 VECTOR_FOR_INORDER_I(i, u8) {
2350 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2351 }
2352 *r = result;
2353}
2354
2355void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2356{
2357
2358
2359 int i;
2360 ppc_avr_t tmp;
2361
2362 VECTOR_FOR_INORDER_I(i, u8) {
2363 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2364 }
2365
2366 VECTOR_FOR_INORDER_I(i, u32) {
2367 r->AVRW(i) =
2368 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2369 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2370 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2371 AES_imc[tmp.AVRB(4*i + 3)][3];
2372 }
2373}
2374
2375void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2376{
2377 ppc_avr_t result;
2378 int i;
2379
2380 VECTOR_FOR_INORDER_I(i, u8) {
2381 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2382 }
2383 *r = result;
2384}
2385
2386#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2387#if defined(HOST_WORDS_BIGENDIAN)
2388#define EL_IDX(i) (i)
2389#else
2390#define EL_IDX(i) (3 - (i))
2391#endif
2392
2393void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2394{
2395 int st = (st_six & 0x10) != 0;
2396 int six = st_six & 0xF;
2397 int i;
2398
2399 VECTOR_FOR_INORDER_I(i, u32) {
2400 if (st == 0) {
2401 if ((six & (0x8 >> i)) == 0) {
2402 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2403 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2404 (a->u32[EL_IDX(i)] >> 3);
2405 } else {
2406 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2407 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2408 (a->u32[EL_IDX(i)] >> 10);
2409 }
2410 } else {
2411 if ((six & (0x8 >> i)) == 0) {
2412 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2413 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2414 ROTRu32(a->u32[EL_IDX(i)], 22);
2415 } else {
2416 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2417 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2418 ROTRu32(a->u32[EL_IDX(i)], 25);
2419 }
2420 }
2421 }
2422}
2423
2424#undef ROTRu32
2425#undef EL_IDX
2426
2427#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2428#if defined(HOST_WORDS_BIGENDIAN)
2429#define EL_IDX(i) (i)
2430#else
2431#define EL_IDX(i) (1 - (i))
2432#endif
2433
2434void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2435{
2436 int st = (st_six & 0x10) != 0;
2437 int six = st_six & 0xF;
2438 int i;
2439
2440 VECTOR_FOR_INORDER_I(i, u64) {
2441 if (st == 0) {
2442 if ((six & (0x8 >> (2*i))) == 0) {
2443 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2444 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2445 (a->u64[EL_IDX(i)] >> 7);
2446 } else {
2447 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2448 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2449 (a->u64[EL_IDX(i)] >> 6);
2450 }
2451 } else {
2452 if ((six & (0x8 >> (2*i))) == 0) {
2453 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2454 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2455 ROTRu64(a->u64[EL_IDX(i)], 39);
2456 } else {
2457 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2458 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2459 ROTRu64(a->u64[EL_IDX(i)], 41);
2460 }
2461 }
2462 }
2463}
2464
2465#undef ROTRu64
2466#undef EL_IDX
2467
2468void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2469{
2470 ppc_avr_t result;
2471 int i;
2472
2473 VECTOR_FOR_INORDER_I(i, u8) {
2474 int indexA = c->u8[i] >> 4;
2475 int indexB = c->u8[i] & 0xF;
2476#if defined(HOST_WORDS_BIGENDIAN)
2477 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2478#else
2479 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2480#endif
2481 }
2482 *r = result;
2483}
2484
2485#undef VECTOR_FOR_INORDER_I
2486#undef HI_IDX
2487#undef LO_IDX
2488
2489
2490
2491
2492static const uint8_t hbrev[16] = {
2493 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2494 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2495};
2496
2497static inline uint8_t byte_reverse(uint8_t val)
2498{
2499 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2500}
2501
2502static inline uint32_t word_reverse(uint32_t val)
2503{
2504 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2505 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2506}
2507
2508#define MASKBITS 16
2509target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2510{
2511 uint32_t a, b, d, mask;
2512
2513 mask = UINT32_MAX >> (32 - MASKBITS);
2514 a = arg1 & mask;
2515 b = arg2 & mask;
2516 d = word_reverse(1 + word_reverse(a | ~b));
2517 return (arg1 & ~mask) | (d & b);
2518}
2519
2520uint32_t helper_cntlsw32(uint32_t val)
2521{
2522 if (val & 0x80000000) {
2523 return clz32(~val);
2524 } else {
2525 return clz32(val);
2526 }
2527}
2528
2529uint32_t helper_cntlzw32(uint32_t val)
2530{
2531 return clz32(val);
2532}
2533
2534
2535target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2536 target_ulong low, uint32_t update_Rc)
2537{
2538 target_ulong mask;
2539 int i;
2540
2541 i = 1;
2542 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2543 if ((high & mask) == 0) {
2544 if (update_Rc) {
2545 env->crf[0] = 0x4;
2546 }
2547 goto done;
2548 }
2549 i++;
2550 }
2551 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2552 if ((low & mask) == 0) {
2553 if (update_Rc) {
2554 env->crf[0] = 0x8;
2555 }
2556 goto done;
2557 }
2558 i++;
2559 }
2560 i = 8;
2561 if (update_Rc) {
2562 env->crf[0] = 0x2;
2563 }
2564 done:
2565 env->xer = (env->xer & ~0x7F) | i;
2566 if (update_Rc) {
2567 env->crf[0] |= xer_so;
2568 }
2569 return i;
2570}
2571