1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/helper-proto.h"
23
24
25
26
27typedef union {
28 uint8_t ub[8];
29 int8_t sb[8];
30 uint16_t uh[4];
31 int16_t sh[4];
32 uint32_t uw[2];
33 int32_t sw[2];
34 uint64_t d;
35} LMIValue;
36
37
38#ifdef HOST_WORDS_BIGENDIAN
39# define BYTE_ORDER_XOR(N) N
40#else
41# define BYTE_ORDER_XOR(N) 0
42#endif
43
44#define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
45#define SATUB(x) (x > 0xff ? 0xff : x)
46
47#define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
48#define SATUH(x) (x > 0xffff ? 0xffff : x)
49
50#define SATSW(x) \
51 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
52#define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
53
54uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
55{
56 LMIValue vs, vt;
57 unsigned int i;
58
59 vs.d = fs;
60 vt.d = ft;
61 for (i = 0; i < 8; ++i) {
62 int r = vs.sb[i] + vt.sb[i];
63 vs.sb[i] = SATSB(r);
64 }
65 return vs.d;
66}
67
68uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
69{
70 LMIValue vs, vt;
71 unsigned int i;
72
73 vs.d = fs;
74 vt.d = ft;
75 for (i = 0; i < 8; ++i) {
76 int r = vs.ub[i] + vt.ub[i];
77 vs.ub[i] = SATUB(r);
78 }
79 return vs.d;
80}
81
82uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
83{
84 LMIValue vs, vt;
85 unsigned int i;
86
87 vs.d = fs;
88 vt.d = ft;
89 for (i = 0; i < 4; ++i) {
90 int r = vs.sh[i] + vt.sh[i];
91 vs.sh[i] = SATSH(r);
92 }
93 return vs.d;
94}
95
96uint64_t helper_paddush(uint64_t fs, uint64_t ft)
97{
98 LMIValue vs, vt;
99 unsigned int i;
100
101 vs.d = fs;
102 vt.d = ft;
103 for (i = 0; i < 4; ++i) {
104 int r = vs.uh[i] + vt.uh[i];
105 vs.uh[i] = SATUH(r);
106 }
107 return vs.d;
108}
109
110uint64_t helper_paddb(uint64_t fs, uint64_t ft)
111{
112 LMIValue vs, vt;
113 unsigned int i;
114
115 vs.d = fs;
116 vt.d = ft;
117 for (i = 0; i < 8; ++i) {
118 vs.ub[i] += vt.ub[i];
119 }
120 return vs.d;
121}
122
123uint64_t helper_paddh(uint64_t fs, uint64_t ft)
124{
125 LMIValue vs, vt;
126 unsigned int i;
127
128 vs.d = fs;
129 vt.d = ft;
130 for (i = 0; i < 4; ++i) {
131 vs.uh[i] += vt.uh[i];
132 }
133 return vs.d;
134}
135
136uint64_t helper_paddw(uint64_t fs, uint64_t ft)
137{
138 LMIValue vs, vt;
139 unsigned int i;
140
141 vs.d = fs;
142 vt.d = ft;
143 for (i = 0; i < 2; ++i) {
144 vs.uw[i] += vt.uw[i];
145 }
146 return vs.d;
147}
148
149uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
150{
151 LMIValue vs, vt;
152 unsigned int i;
153
154 vs.d = fs;
155 vt.d = ft;
156 for (i = 0; i < 8; ++i) {
157 int r = vs.sb[i] - vt.sb[i];
158 vs.sb[i] = SATSB(r);
159 }
160 return vs.d;
161}
162
163uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
164{
165 LMIValue vs, vt;
166 unsigned int i;
167
168 vs.d = fs;
169 vt.d = ft;
170 for (i = 0; i < 8; ++i) {
171 int r = vs.ub[i] - vt.ub[i];
172 vs.ub[i] = SATUB(r);
173 }
174 return vs.d;
175}
176
177uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
178{
179 LMIValue vs, vt;
180 unsigned int i;
181
182 vs.d = fs;
183 vt.d = ft;
184 for (i = 0; i < 4; ++i) {
185 int r = vs.sh[i] - vt.sh[i];
186 vs.sh[i] = SATSH(r);
187 }
188 return vs.d;
189}
190
191uint64_t helper_psubush(uint64_t fs, uint64_t ft)
192{
193 LMIValue vs, vt;
194 unsigned int i;
195
196 vs.d = fs;
197 vt.d = ft;
198 for (i = 0; i < 4; ++i) {
199 int r = vs.uh[i] - vt.uh[i];
200 vs.uh[i] = SATUH(r);
201 }
202 return vs.d;
203}
204
205uint64_t helper_psubb(uint64_t fs, uint64_t ft)
206{
207 LMIValue vs, vt;
208 unsigned int i;
209
210 vs.d = fs;
211 vt.d = ft;
212 for (i = 0; i < 8; ++i) {
213 vs.ub[i] -= vt.ub[i];
214 }
215 return vs.d;
216}
217
218uint64_t helper_psubh(uint64_t fs, uint64_t ft)
219{
220 LMIValue vs, vt;
221 unsigned int i;
222
223 vs.d = fs;
224 vt.d = ft;
225 for (i = 0; i < 4; ++i) {
226 vs.uh[i] -= vt.uh[i];
227 }
228 return vs.d;
229}
230
231uint64_t helper_psubw(uint64_t fs, uint64_t ft)
232{
233 LMIValue vs, vt;
234 unsigned int i;
235
236 vs.d = fs;
237 vt.d = ft;
238 for (i = 0; i < 2; ++i) {
239 vs.uw[i] -= vt.uw[i];
240 }
241 return vs.d;
242}
243
244uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
245{
246 unsigned host = BYTE_ORDER_XOR(3);
247 LMIValue vd, vs;
248 unsigned i;
249
250 vs.d = fs;
251 vd.d = 0;
252 for (i = 0; i < 4; i++, ft >>= 2) {
253 vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
254 }
255 return vd.d;
256}
257
258uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
259{
260 uint64_t fd = 0;
261 int64_t tmp;
262
263 tmp = (int32_t)(fs >> 0);
264 tmp = SATSH(tmp);
265 fd |= (tmp & 0xffff) << 0;
266
267 tmp = (int32_t)(fs >> 32);
268 tmp = SATSH(tmp);
269 fd |= (tmp & 0xffff) << 16;
270
271 tmp = (int32_t)(ft >> 0);
272 tmp = SATSH(tmp);
273 fd |= (tmp & 0xffff) << 32;
274
275 tmp = (int32_t)(ft >> 32);
276 tmp = SATSH(tmp);
277 fd |= (tmp & 0xffff) << 48;
278
279 return fd;
280}
281
282uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
283{
284 uint64_t fd = 0;
285 unsigned int i;
286
287 for (i = 0; i < 4; ++i) {
288 int16_t tmp = fs >> (i * 16);
289 tmp = SATSB(tmp);
290 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
291 }
292 for (i = 0; i < 4; ++i) {
293 int16_t tmp = ft >> (i * 16);
294 tmp = SATSB(tmp);
295 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
296 }
297
298 return fd;
299}
300
301uint64_t helper_packushb(uint64_t fs, uint64_t ft)
302{
303 uint64_t fd = 0;
304 unsigned int i;
305
306 for (i = 0; i < 4; ++i) {
307 int16_t tmp = fs >> (i * 16);
308 tmp = SATUB(tmp);
309 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
310 }
311 for (i = 0; i < 4; ++i) {
312 int16_t tmp = ft >> (i * 16);
313 tmp = SATUB(tmp);
314 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
315 }
316
317 return fd;
318}
319
320uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
321{
322 return (fs & 0xffffffff) | (ft << 32);
323}
324
325uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
326{
327 return (fs >> 32) | (ft & ~0xffffffffull);
328}
329
330uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
331{
332 unsigned host = BYTE_ORDER_XOR(3);
333 LMIValue vd, vs, vt;
334
335 vs.d = fs;
336 vt.d = ft;
337 vd.uh[0 ^ host] = vs.uh[0 ^ host];
338 vd.uh[1 ^ host] = vt.uh[0 ^ host];
339 vd.uh[2 ^ host] = vs.uh[1 ^ host];
340 vd.uh[3 ^ host] = vt.uh[1 ^ host];
341
342 return vd.d;
343}
344
345uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
346{
347 unsigned host = BYTE_ORDER_XOR(3);
348 LMIValue vd, vs, vt;
349
350 vs.d = fs;
351 vt.d = ft;
352 vd.uh[0 ^ host] = vs.uh[2 ^ host];
353 vd.uh[1 ^ host] = vt.uh[2 ^ host];
354 vd.uh[2 ^ host] = vs.uh[3 ^ host];
355 vd.uh[3 ^ host] = vt.uh[3 ^ host];
356
357 return vd.d;
358}
359
360uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
361{
362 unsigned host = BYTE_ORDER_XOR(7);
363 LMIValue vd, vs, vt;
364
365 vs.d = fs;
366 vt.d = ft;
367 vd.ub[0 ^ host] = vs.ub[0 ^ host];
368 vd.ub[1 ^ host] = vt.ub[0 ^ host];
369 vd.ub[2 ^ host] = vs.ub[1 ^ host];
370 vd.ub[3 ^ host] = vt.ub[1 ^ host];
371 vd.ub[4 ^ host] = vs.ub[2 ^ host];
372 vd.ub[5 ^ host] = vt.ub[2 ^ host];
373 vd.ub[6 ^ host] = vs.ub[3 ^ host];
374 vd.ub[7 ^ host] = vt.ub[3 ^ host];
375
376 return vd.d;
377}
378
379uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
380{
381 unsigned host = BYTE_ORDER_XOR(7);
382 LMIValue vd, vs, vt;
383
384 vs.d = fs;
385 vt.d = ft;
386 vd.ub[0 ^ host] = vs.ub[4 ^ host];
387 vd.ub[1 ^ host] = vt.ub[4 ^ host];
388 vd.ub[2 ^ host] = vs.ub[5 ^ host];
389 vd.ub[3 ^ host] = vt.ub[5 ^ host];
390 vd.ub[4 ^ host] = vs.ub[6 ^ host];
391 vd.ub[5 ^ host] = vt.ub[6 ^ host];
392 vd.ub[6 ^ host] = vs.ub[7 ^ host];
393 vd.ub[7 ^ host] = vt.ub[7 ^ host];
394
395 return vd.d;
396}
397
398uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
399{
400 LMIValue vs, vt;
401 unsigned i;
402
403 vs.d = fs;
404 vt.d = ft;
405 for (i = 0; i < 4; i++) {
406 vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
407 }
408 return vs.d;
409}
410
411uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
412{
413 LMIValue vs, vt;
414 unsigned i;
415
416 vs.d = fs;
417 vt.d = ft;
418 for (i = 0; i < 8; i++) {
419 vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
420 }
421 return vs.d;
422}
423
424uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
425{
426 LMIValue vs, vt;
427 unsigned i;
428
429 vs.d = fs;
430 vt.d = ft;
431 for (i = 0; i < 4; i++) {
432 vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
433 }
434 return vs.d;
435}
436
437uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
438{
439 LMIValue vs, vt;
440 unsigned i;
441
442 vs.d = fs;
443 vt.d = ft;
444 for (i = 0; i < 4; i++) {
445 vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
446 }
447 return vs.d;
448}
449
450uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
451{
452 LMIValue vs, vt;
453 unsigned i;
454
455 vs.d = fs;
456 vt.d = ft;
457 for (i = 0; i < 4; i++) {
458 vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
459 }
460 return vs.d;
461}
462
463uint64_t helper_pminub(uint64_t fs, uint64_t ft)
464{
465 LMIValue vs, vt;
466 unsigned i;
467
468 vs.d = fs;
469 vt.d = ft;
470 for (i = 0; i < 4; i++) {
471 vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
472 }
473 return vs.d;
474}
475
476uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
477{
478 LMIValue vs, vt;
479 unsigned i;
480
481 vs.d = fs;
482 vt.d = ft;
483 for (i = 0; i < 2; i++) {
484 vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
485 }
486 return vs.d;
487}
488
489uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
490{
491 LMIValue vs, vt;
492 unsigned i;
493
494 vs.d = fs;
495 vt.d = ft;
496 for (i = 0; i < 2; i++) {
497 vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
498 }
499 return vs.d;
500}
501
502uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
503{
504 LMIValue vs, vt;
505 unsigned i;
506
507 vs.d = fs;
508 vt.d = ft;
509 for (i = 0; i < 4; i++) {
510 vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
511 }
512 return vs.d;
513}
514
515uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
516{
517 LMIValue vs, vt;
518 unsigned i;
519
520 vs.d = fs;
521 vt.d = ft;
522 for (i = 0; i < 4; i++) {
523 vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
524 }
525 return vs.d;
526}
527
528uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
529{
530 LMIValue vs, vt;
531 unsigned i;
532
533 vs.d = fs;
534 vt.d = ft;
535 for (i = 0; i < 8; i++) {
536 vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
537 }
538 return vs.d;
539}
540
541uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
542{
543 LMIValue vs, vt;
544 unsigned i;
545
546 vs.d = fs;
547 vt.d = ft;
548 for (i = 0; i < 8; i++) {
549 vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
550 }
551 return vs.d;
552}
553
554uint64_t helper_psllw(uint64_t fs, uint64_t ft)
555{
556 LMIValue vs;
557 unsigned i;
558
559 ft &= 0x7f;
560 if (ft > 31) {
561 return 0;
562 }
563 vs.d = fs;
564 for (i = 0; i < 2; ++i) {
565 vs.uw[i] <<= ft;
566 }
567 return vs.d;
568}
569
570uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
571{
572 LMIValue vs;
573 unsigned i;
574
575 ft &= 0x7f;
576 if (ft > 31) {
577 return 0;
578 }
579 vs.d = fs;
580 for (i = 0; i < 2; ++i) {
581 vs.uw[i] >>= ft;
582 }
583 return vs.d;
584}
585
586uint64_t helper_psraw(uint64_t fs, uint64_t ft)
587{
588 LMIValue vs;
589 unsigned i;
590
591 ft &= 0x7f;
592 if (ft > 31) {
593 ft = 31;
594 }
595 vs.d = fs;
596 for (i = 0; i < 2; ++i) {
597 vs.sw[i] >>= ft;
598 }
599 return vs.d;
600}
601
602uint64_t helper_psllh(uint64_t fs, uint64_t ft)
603{
604 LMIValue vs;
605 unsigned i;
606
607 ft &= 0x7f;
608 if (ft > 15) {
609 return 0;
610 }
611 vs.d = fs;
612 for (i = 0; i < 4; ++i) {
613 vs.uh[i] <<= ft;
614 }
615 return vs.d;
616}
617
618uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
619{
620 LMIValue vs;
621 unsigned i;
622
623 ft &= 0x7f;
624 if (ft > 15) {
625 return 0;
626 }
627 vs.d = fs;
628 for (i = 0; i < 4; ++i) {
629 vs.uh[i] >>= ft;
630 }
631 return vs.d;
632}
633
634uint64_t helper_psrah(uint64_t fs, uint64_t ft)
635{
636 LMIValue vs;
637 unsigned i;
638
639 ft &= 0x7f;
640 if (ft > 15) {
641 ft = 15;
642 }
643 vs.d = fs;
644 for (i = 0; i < 4; ++i) {
645 vs.sh[i] >>= ft;
646 }
647 return vs.d;
648}
649
650uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
651{
652 LMIValue vs, vt;
653 unsigned i;
654
655 vs.d = fs;
656 vt.d = ft;
657 for (i = 0; i < 4; ++i) {
658 vs.sh[i] *= vt.sh[i];
659 }
660 return vs.d;
661}
662
663uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
664{
665 LMIValue vs, vt;
666 unsigned i;
667
668 vs.d = fs;
669 vt.d = ft;
670 for (i = 0; i < 4; ++i) {
671 int32_t r = vs.sh[i] * vt.sh[i];
672 vs.sh[i] = r >> 16;
673 }
674 return vs.d;
675}
676
677uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
678{
679 LMIValue vs, vt;
680 unsigned i;
681
682 vs.d = fs;
683 vt.d = ft;
684 for (i = 0; i < 4; ++i) {
685 uint32_t r = vs.uh[i] * vt.uh[i];
686 vs.uh[i] = r >> 16;
687 }
688 return vs.d;
689}
690
691uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
692{
693 unsigned host = BYTE_ORDER_XOR(3);
694 LMIValue vs, vt;
695 uint32_t p0, p1;
696
697 vs.d = fs;
698 vt.d = ft;
699 p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host];
700 p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
701 p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host];
702 p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
703
704 return ((uint64_t)p1 << 32) | p0;
705}
706
707uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
708{
709 LMIValue vs, vt;
710 unsigned i;
711
712 vs.d = fs;
713 vt.d = ft;
714 for (i = 0; i < 8; ++i) {
715 int r = vs.ub[i] - vt.ub[i];
716 vs.ub[i] = (r < 0 ? -r : r);
717 }
718 return vs.d;
719}
720
721uint64_t helper_biadd(uint64_t fs)
722{
723 unsigned i, fd;
724
725 for (i = fd = 0; i < 8; ++i) {
726 fd += (fs >> (i * 8)) & 0xff;
727 }
728 return fd & 0xffff;
729}
730
731uint64_t helper_pmovmskb(uint64_t fs)
732{
733 unsigned fd = 0;
734
735 fd |= ((fs >> 7) & 1) << 0;
736 fd |= ((fs >> 15) & 1) << 1;
737 fd |= ((fs >> 23) & 1) << 2;
738 fd |= ((fs >> 31) & 1) << 3;
739 fd |= ((fs >> 39) & 1) << 4;
740 fd |= ((fs >> 47) & 1) << 5;
741 fd |= ((fs >> 55) & 1) << 6;
742 fd |= ((fs >> 63) & 1) << 7;
743
744 return fd & 0xff;
745}
746