1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "tcg-pool.inc.c"
26
27#ifdef CONFIG_DEBUG_TCG
28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31#else
32 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
33#endif
34 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
35 "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
36#if TCG_TARGET_REG_BITS == 64
37 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
38 "%xmm12", "%xmm13", "%xmm14", "%xmm15",
39#endif
40};
41#endif
42
43static const int tcg_target_reg_alloc_order[] = {
44#if TCG_TARGET_REG_BITS == 64
45 TCG_REG_RBP,
46 TCG_REG_RBX,
47 TCG_REG_R12,
48 TCG_REG_R13,
49 TCG_REG_R14,
50 TCG_REG_R15,
51 TCG_REG_R10,
52 TCG_REG_R11,
53 TCG_REG_R9,
54 TCG_REG_R8,
55 TCG_REG_RCX,
56 TCG_REG_RDX,
57 TCG_REG_RSI,
58 TCG_REG_RDI,
59 TCG_REG_RAX,
60#else
61 TCG_REG_EBX,
62 TCG_REG_ESI,
63 TCG_REG_EDI,
64 TCG_REG_EBP,
65 TCG_REG_ECX,
66 TCG_REG_EDX,
67 TCG_REG_EAX,
68#endif
69 TCG_REG_XMM0,
70 TCG_REG_XMM1,
71 TCG_REG_XMM2,
72 TCG_REG_XMM3,
73 TCG_REG_XMM4,
74 TCG_REG_XMM5,
75#ifndef _WIN64
76
77
78 TCG_REG_XMM6,
79 TCG_REG_XMM7,
80#if TCG_TARGET_REG_BITS == 64
81 TCG_REG_XMM8,
82 TCG_REG_XMM9,
83 TCG_REG_XMM10,
84 TCG_REG_XMM11,
85 TCG_REG_XMM12,
86 TCG_REG_XMM13,
87 TCG_REG_XMM14,
88 TCG_REG_XMM15,
89#endif
90#endif
91};
92
93static const int tcg_target_call_iarg_regs[] = {
94#if TCG_TARGET_REG_BITS == 64
95#if defined(_WIN64)
96 TCG_REG_RCX,
97 TCG_REG_RDX,
98#else
99 TCG_REG_RDI,
100 TCG_REG_RSI,
101 TCG_REG_RDX,
102 TCG_REG_RCX,
103#endif
104 TCG_REG_R8,
105 TCG_REG_R9,
106#else
107
108#endif
109};
110
111static const int tcg_target_call_oarg_regs[] = {
112 TCG_REG_EAX,
113#if TCG_TARGET_REG_BITS == 32
114 TCG_REG_EDX
115#endif
116};
117
118
119#define TCG_CT_CONST_S32 0x100
120#define TCG_CT_CONST_U32 0x200
121#define TCG_CT_CONST_I32 0x400
122#define TCG_CT_CONST_WSZ 0x800
123
124
125
126
127#if TCG_TARGET_REG_BITS == 64
128# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
129# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
130#else
131# define TCG_REG_L0 TCG_REG_EAX
132# define TCG_REG_L1 TCG_REG_EDX
133#endif
134
135
136
137
138#if defined(CONFIG_CPUID_H)
139#include "qemu/cpuid.h"
140#endif
141
142
143#if TCG_TARGET_REG_BITS == 64
144# define have_cmov 1
145#elif defined(CONFIG_CPUID_H)
146static bool have_cmov;
147#else
148# define have_cmov 0
149#endif
150
151
152
153bool have_bmi1;
154bool have_popcnt;
155bool have_avx1;
156bool have_avx2;
157
158#ifdef CONFIG_CPUID_H
159static bool have_movbe;
160static bool have_bmi2;
161static bool have_lzcnt;
162#else
163# define have_movbe 0
164# define have_bmi2 0
165# define have_lzcnt 0
166#endif
167
168static tcg_insn_unit *tb_ret_addr;
169
170static void patch_reloc(tcg_insn_unit *code_ptr, int type,
171 intptr_t value, intptr_t addend)
172{
173 value += addend;
174 switch(type) {
175 case R_386_PC32:
176 value -= (uintptr_t)code_ptr;
177 if (value != (int32_t)value) {
178 tcg_abort();
179 }
180
181 case R_386_32:
182 tcg_patch32(code_ptr, value);
183 break;
184 case R_386_PC8:
185 value -= (uintptr_t)code_ptr;
186 if (value != (int8_t)value) {
187 tcg_abort();
188 }
189 tcg_patch8(code_ptr, value);
190 break;
191 default:
192 tcg_abort();
193 }
194}
195
196#if TCG_TARGET_REG_BITS == 64
197#define ALL_GENERAL_REGS 0x0000ffffu
198#define ALL_VECTOR_REGS 0xffff0000u
199#else
200#define ALL_GENERAL_REGS 0x000000ffu
201#define ALL_VECTOR_REGS 0x00ff0000u
202#endif
203
204
205static const char *target_parse_constraint(TCGArgConstraint *ct,
206 const char *ct_str, TCGType type)
207{
208 switch(*ct_str++) {
209 case 'a':
210 ct->ct |= TCG_CT_REG;
211 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
212 break;
213 case 'b':
214 ct->ct |= TCG_CT_REG;
215 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
216 break;
217 case 'c':
218 ct->ct |= TCG_CT_REG;
219 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
220 break;
221 case 'd':
222 ct->ct |= TCG_CT_REG;
223 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
224 break;
225 case 'S':
226 ct->ct |= TCG_CT_REG;
227 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
228 break;
229 case 'D':
230 ct->ct |= TCG_CT_REG;
231 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
232 break;
233 case 'q':
234
235 ct->ct |= TCG_CT_REG;
236 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
237 break;
238 case 'Q':
239
240 ct->ct |= TCG_CT_REG;
241 ct->u.regs = 0xf;
242 break;
243 case 'r':
244
245 ct->ct |= TCG_CT_REG;
246 ct->u.regs |= ALL_GENERAL_REGS;
247 break;
248 case 'W':
249
250 ct->ct |= TCG_CT_CONST_WSZ;
251 break;
252 case 'x':
253
254 ct->ct |= TCG_CT_REG;
255 ct->u.regs |= ALL_VECTOR_REGS;
256 break;
257
258
259 case 'L':
260 ct->ct |= TCG_CT_REG;
261 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
262 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
263 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
264 break;
265
266 case 'e':
267 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
268 break;
269 case 'Z':
270 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
271 break;
272 case 'I':
273 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
274 break;
275
276 default:
277 return NULL;
278 }
279 return ct_str;
280}
281
282
283static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
284 const TCGArgConstraint *arg_ct)
285{
286 int ct = arg_ct->ct;
287 if (ct & TCG_CT_CONST) {
288 return 1;
289 }
290 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
291 return 1;
292 }
293 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
294 return 1;
295 }
296 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
297 return 1;
298 }
299 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
300 return 1;
301 }
302 return 0;
303}
304
305#if TCG_TARGET_REG_BITS == 64
306# define LOWREGMASK(x) ((x) & 7)
307#else
308# define LOWREGMASK(x) (x)
309#endif
310
311#define P_EXT 0x100
312#define P_EXT38 0x200
313#define P_DATA16 0x400
314#if TCG_TARGET_REG_BITS == 64
315# define P_ADDR32 0x800
316# define P_REXW 0x1000
317# define P_REXB_R 0x2000
318# define P_REXB_RM 0x4000
319# define P_GS 0x8000
320#else
321# define P_ADDR32 0
322# define P_REXW 0
323# define P_REXB_R 0
324# define P_REXB_RM 0
325# define P_GS 0
326#endif
327#define P_EXT3A 0x10000
328#define P_SIMDF3 0x20000
329#define P_SIMDF2 0x40000
330#define P_VEXL 0x80000
331
332#define OPC_ARITH_EvIz (0x81)
333#define OPC_ARITH_EvIb (0x83)
334#define OPC_ARITH_GvEv (0x03)
335#define OPC_ANDN (0xf2 | P_EXT38)
336#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
337#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16)
338#define OPC_BSF (0xbc | P_EXT)
339#define OPC_BSR (0xbd | P_EXT)
340#define OPC_BSWAP (0xc8 | P_EXT)
341#define OPC_CALL_Jz (0xe8)
342#define OPC_CMOVCC (0x40 | P_EXT)
343#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
344#define OPC_DEC_r32 (0x48)
345#define OPC_IMUL_GvEv (0xaf | P_EXT)
346#define OPC_IMUL_GvEvIb (0x6b)
347#define OPC_IMUL_GvEvIz (0x69)
348#define OPC_INC_r32 (0x40)
349#define OPC_JCC_long (0x80 | P_EXT)
350#define OPC_JCC_short (0x70)
351#define OPC_JMP_long (0xe9)
352#define OPC_JMP_short (0xeb)
353#define OPC_LEA (0x8d)
354#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
355#define OPC_MOVB_EvGv (0x88)
356#define OPC_MOVL_EvGv (0x89)
357#define OPC_MOVL_GvEv (0x8b)
358#define OPC_MOVB_EvIz (0xc6)
359#define OPC_MOVL_EvIz (0xc7)
360#define OPC_MOVL_Iv (0xb8)
361#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
362#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
363#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
364#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
365#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
366#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
367#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
368#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
369#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3)
370#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3)
371#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16)
372#define OPC_MOVSBL (0xbe | P_EXT)
373#define OPC_MOVSWL (0xbf | P_EXT)
374#define OPC_MOVSLQ (0x63 | P_REXW)
375#define OPC_MOVZBL (0xb6 | P_EXT)
376#define OPC_MOVZWL (0xb7 | P_EXT)
377#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
378#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
379#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
380#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16)
381#define OPC_PADDB (0xfc | P_EXT | P_DATA16)
382#define OPC_PADDW (0xfd | P_EXT | P_DATA16)
383#define OPC_PADDD (0xfe | P_EXT | P_DATA16)
384#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16)
385#define OPC_PAND (0xdb | P_EXT | P_DATA16)
386#define OPC_PANDN (0xdf | P_EXT | P_DATA16)
387#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16)
388#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16)
389#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16)
390#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16)
391#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16)
392#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16)
393#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
394#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
395#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
396#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
397#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
398#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
399#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16)
400#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16)
401#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
402#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
403#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
404#define OPC_POR (0xeb | P_EXT | P_DATA16)
405#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
406#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
407#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
408#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
409#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16)
410#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16)
411#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16)
412#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16)
413#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16)
414#define OPC_PSUBD (0xfa | P_EXT | P_DATA16)
415#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16)
416#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16)
417#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16)
418#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16)
419#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16)
420#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16)
421#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16)
422#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16)
423#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16)
424#define OPC_PXOR (0xef | P_EXT | P_DATA16)
425#define OPC_POP_r32 (0x58)
426#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
427#define OPC_PUSH_r32 (0x50)
428#define OPC_PUSH_Iv (0x68)
429#define OPC_PUSH_Ib (0x6a)
430#define OPC_RET (0xc3)
431#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM)
432#define OPC_SHIFT_1 (0xd1)
433#define OPC_SHIFT_Ib (0xc1)
434#define OPC_SHIFT_cl (0xd3)
435#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
436#define OPC_SHUFPS (0xc6 | P_EXT)
437#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
438#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
439#define OPC_TESTL (0x85)
440#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
441#define OPC_UD2 (0x0b | P_EXT)
442#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
443#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
444#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
445#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
446#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
447#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
448#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW)
449#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
450#define OPC_VZEROUPPER (0x77 | P_EXT)
451#define OPC_XCHG_ax_r32 (0x90)
452
453#define OPC_GRP3_Ev (0xf7)
454#define OPC_GRP5 (0xff)
455#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
456
457
458
459#define ARITH_ADD 0
460#define ARITH_OR 1
461#define ARITH_ADC 2
462#define ARITH_SBB 3
463#define ARITH_AND 4
464#define ARITH_SUB 5
465#define ARITH_XOR 6
466#define ARITH_CMP 7
467
468
469#define SHIFT_ROL 0
470#define SHIFT_ROR 1
471#define SHIFT_SHL 4
472#define SHIFT_SHR 5
473#define SHIFT_SAR 7
474
475
476#define EXT3_NOT 2
477#define EXT3_NEG 3
478#define EXT3_MUL 4
479#define EXT3_IMUL 5
480#define EXT3_DIV 6
481#define EXT3_IDIV 7
482
483
484#define EXT5_INC_Ev 0
485#define EXT5_DEC_Ev 1
486#define EXT5_CALLN_Ev 2
487#define EXT5_JMPN_Ev 4
488
489
490#define JCC_JMP (-1)
491#define JCC_JO 0x0
492#define JCC_JNO 0x1
493#define JCC_JB 0x2
494#define JCC_JAE 0x3
495#define JCC_JE 0x4
496#define JCC_JNE 0x5
497#define JCC_JBE 0x6
498#define JCC_JA 0x7
499#define JCC_JS 0x8
500#define JCC_JNS 0x9
501#define JCC_JP 0xa
502#define JCC_JNP 0xb
503#define JCC_JL 0xc
504#define JCC_JGE 0xd
505#define JCC_JLE 0xe
506#define JCC_JG 0xf
507
508static const uint8_t tcg_cond_to_jcc[] = {
509 [TCG_COND_EQ] = JCC_JE,
510 [TCG_COND_NE] = JCC_JNE,
511 [TCG_COND_LT] = JCC_JL,
512 [TCG_COND_GE] = JCC_JGE,
513 [TCG_COND_LE] = JCC_JLE,
514 [TCG_COND_GT] = JCC_JG,
515 [TCG_COND_LTU] = JCC_JB,
516 [TCG_COND_GEU] = JCC_JAE,
517 [TCG_COND_LEU] = JCC_JBE,
518 [TCG_COND_GTU] = JCC_JA,
519};
520
521#if TCG_TARGET_REG_BITS == 64
522static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
523{
524 int rex;
525
526 if (opc & P_GS) {
527 tcg_out8(s, 0x65);
528 }
529 if (opc & P_DATA16) {
530
531 tcg_debug_assert((opc & P_REXW) == 0);
532 tcg_out8(s, 0x66);
533 }
534 if (opc & P_ADDR32) {
535 tcg_out8(s, 0x67);
536 }
537 if (opc & P_SIMDF3) {
538 tcg_out8(s, 0xf3);
539 } else if (opc & P_SIMDF2) {
540 tcg_out8(s, 0xf2);
541 }
542
543 rex = 0;
544 rex |= (opc & P_REXW) ? 0x8 : 0x0;
545 rex |= (r & 8) >> 1;
546 rex |= (x & 8) >> 2;
547 rex |= (rm & 8) >> 3;
548
549
550
551
552
553
554 rex |= opc & (r >= 4 ? P_REXB_R : 0);
555 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
556
557 if (rex) {
558 tcg_out8(s, (uint8_t)(rex | 0x40));
559 }
560
561 if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
562 tcg_out8(s, 0x0f);
563 if (opc & P_EXT38) {
564 tcg_out8(s, 0x38);
565 } else if (opc & P_EXT3A) {
566 tcg_out8(s, 0x3a);
567 }
568 }
569
570 tcg_out8(s, opc);
571}
572#else
573static void tcg_out_opc(TCGContext *s, int opc)
574{
575 if (opc & P_DATA16) {
576 tcg_out8(s, 0x66);
577 }
578 if (opc & P_SIMDF3) {
579 tcg_out8(s, 0xf3);
580 } else if (opc & P_SIMDF2) {
581 tcg_out8(s, 0xf2);
582 }
583 if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
584 tcg_out8(s, 0x0f);
585 if (opc & P_EXT38) {
586 tcg_out8(s, 0x38);
587 } else if (opc & P_EXT3A) {
588 tcg_out8(s, 0x3a);
589 }
590 }
591 tcg_out8(s, opc);
592}
593
594
595
596#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
597#endif
598
599static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
600{
601 tcg_out_opc(s, opc, r, rm, 0);
602 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
603}
604
605static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
606 int rm, int index)
607{
608 int tmp;
609
610
611
612 if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT
613 && ((rm | index) & 8) == 0) {
614
615 tcg_out8(s, 0xc5);
616
617 tmp = (r & 8 ? 0 : 0x80);
618 } else {
619
620 tcg_out8(s, 0xc4);
621
622
623 if (opc & P_EXT3A) {
624 tmp = 3;
625 } else if (opc & P_EXT38) {
626 tmp = 2;
627 } else if (opc & P_EXT) {
628 tmp = 1;
629 } else {
630 g_assert_not_reached();
631 }
632 tmp |= (r & 8 ? 0 : 0x80);
633 tmp |= (index & 8 ? 0 : 0x40);
634 tmp |= (rm & 8 ? 0 : 0x20);
635 tcg_out8(s, tmp);
636
637 tmp = (opc & P_REXW ? 0x80 : 0);
638 }
639
640 tmp |= (opc & P_VEXL ? 0x04 : 0);
641
642 if (opc & P_DATA16) {
643 tmp |= 1;
644 } else if (opc & P_SIMDF3) {
645 tmp |= 2;
646 } else if (opc & P_SIMDF2) {
647 tmp |= 3;
648 }
649 tmp |= (~v & 15) << 3;
650 tcg_out8(s, tmp);
651 tcg_out8(s, opc);
652}
653
654static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
655{
656 tcg_out_vex_opc(s, opc, r, v, rm, 0);
657 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
658}
659
660
661
662
663
664
665static void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index,
666 int shift, intptr_t offset)
667{
668 int mod, len;
669
670 if (index < 0 && rm < 0) {
671 if (TCG_TARGET_REG_BITS == 64) {
672
673
674 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
675 intptr_t disp = offset - pc;
676 if (disp == (int32_t)disp) {
677 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
678 tcg_out32(s, disp);
679 return;
680 }
681
682
683
684
685 if (offset == (int32_t)offset) {
686 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
687 tcg_out8(s, (4 << 3) | 5);
688 tcg_out32(s, offset);
689 return;
690 }
691
692
693 g_assert_not_reached();
694 } else {
695
696 tcg_out8(s, (r << 3) | 5);
697 tcg_out32(s, offset);
698 return;
699 }
700 }
701
702
703
704 if (rm < 0) {
705 mod = 0, len = 4, rm = 5;
706 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
707 mod = 0, len = 0;
708 } else if (offset == (int8_t)offset) {
709 mod = 0x40, len = 1;
710 } else {
711 mod = 0x80, len = 4;
712 }
713
714
715
716 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
717
718 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
719 } else {
720
721
722
723
724
725 if (index < 0) {
726 index = 4;
727 } else {
728 tcg_debug_assert(index != TCG_REG_ESP);
729 }
730
731 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
732 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
733 }
734
735 if (len == 1) {
736 tcg_out8(s, offset);
737 } else if (len == 4) {
738 tcg_out32(s, offset);
739 }
740}
741
742static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
743 int index, int shift, intptr_t offset)
744{
745 tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
746 tcg_out_sib_offset(s, r, rm, index, shift, offset);
747}
748
749static void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v,
750 int rm, int index, int shift,
751 intptr_t offset)
752{
753 tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
754 tcg_out_sib_offset(s, r, rm, index, shift, offset);
755}
756
757
758static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
759 int rm, intptr_t offset)
760{
761 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
762}
763
764static inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r,
765 int v, int rm, intptr_t offset)
766{
767 tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
768}
769
770
771static inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r)
772{
773 tcg_out_opc(s, opc, r, 0, 0);
774
775 tcg_out8(s, LOWREGMASK(r) << 3 | 5);
776 tcg_out32(s, 0);
777}
778
779
780static inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r)
781{
782 tcg_out_vex_opc(s, opc, r, 0, 0, 0);
783
784 tcg_out8(s, LOWREGMASK(r) << 3 | 5);
785 tcg_out32(s, 0);
786}
787
788
789static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
790{
791
792 int ext = subop & ~0x7;
793 subop &= 0x7;
794
795 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
796}
797
798static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
799{
800 int rexw = 0;
801
802 if (arg == ret) {
803 return;
804 }
805 switch (type) {
806 case TCG_TYPE_I64:
807 rexw = P_REXW;
808
809 case TCG_TYPE_I32:
810 if (ret < 16) {
811 if (arg < 16) {
812 tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
813 } else {
814 tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret);
815 }
816 } else {
817 if (arg < 16) {
818 tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg);
819 } else {
820 tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
821 }
822 }
823 break;
824
825 case TCG_TYPE_V64:
826 tcg_debug_assert(ret >= 16 && arg >= 16);
827 tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
828 break;
829 case TCG_TYPE_V128:
830 tcg_debug_assert(ret >= 16 && arg >= 16);
831 tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg);
832 break;
833 case TCG_TYPE_V256:
834 tcg_debug_assert(ret >= 16 && arg >= 16);
835 tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg);
836 break;
837
838 default:
839 g_assert_not_reached();
840 }
841}
842
843static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
844 TCGReg r, TCGReg a)
845{
846 if (have_avx2) {
847 static const int dup_insn[4] = {
848 OPC_VPBROADCASTB, OPC_VPBROADCASTW,
849 OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
850 };
851 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
852 tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a);
853 } else {
854 switch (vece) {
855 case MO_8:
856
857 tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a);
858 a = r;
859
860 case MO_16:
861 tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a);
862 a = r;
863
864 case MO_32:
865 tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a);
866
867 tcg_out8(s, 0);
868 break;
869 case MO_64:
870 tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a);
871 break;
872 default:
873 g_assert_not_reached();
874 }
875 }
876}
877
878static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
879 TCGReg ret, tcg_target_long arg)
880{
881 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
882
883 if (arg == 0) {
884 tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
885 return;
886 }
887 if (arg == -1) {
888 tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret);
889 return;
890 }
891
892 if (TCG_TARGET_REG_BITS == 64) {
893 if (type == TCG_TYPE_V64) {
894 tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
895 } else if (have_avx2) {
896 tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
897 } else {
898 tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
899 }
900 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
901 } else if (have_avx2) {
902 tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
903 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
904 } else {
905 tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret);
906 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
907 tcg_out_dup_vec(s, type, MO_32, ret, ret);
908 }
909}
910
911static void tcg_out_movi(TCGContext *s, TCGType type,
912 TCGReg ret, tcg_target_long arg)
913{
914 tcg_target_long diff;
915
916 switch (type) {
917 case TCG_TYPE_I32:
918#if TCG_TARGET_REG_BITS == 64
919 case TCG_TYPE_I64:
920#endif
921 if (ret < 16) {
922 break;
923 }
924
925 case TCG_TYPE_V64:
926 case TCG_TYPE_V128:
927 case TCG_TYPE_V256:
928 tcg_debug_assert(ret >= 16);
929 tcg_out_dupi_vec(s, type, ret, arg);
930 return;
931 default:
932 g_assert_not_reached();
933 }
934
935 if (arg == 0) {
936 tgen_arithr(s, ARITH_XOR, ret, ret);
937 return;
938 }
939 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
940 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
941 tcg_out32(s, arg);
942 return;
943 }
944 if (arg == (int32_t)arg) {
945 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
946 tcg_out32(s, arg);
947 return;
948 }
949
950
951 diff = arg - ((uintptr_t)s->code_ptr + 7);
952 if (diff == (int32_t)diff) {
953 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
954 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
955 tcg_out32(s, diff);
956 return;
957 }
958
959 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
960 tcg_out64(s, arg);
961}
962
963static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
964{
965 if (val == (int8_t)val) {
966 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
967 tcg_out8(s, val);
968 } else if (val == (int32_t)val) {
969 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
970 tcg_out32(s, val);
971 } else {
972 tcg_abort();
973 }
974}
975
976static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
977{
978
979
980
981 if (a0 & TCG_MO_ST_LD) {
982 tcg_out8(s, 0xf0);
983 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
984 tcg_out8(s, 0);
985 }
986}
987
988static inline void tcg_out_push(TCGContext *s, int reg)
989{
990 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
991}
992
993static inline void tcg_out_pop(TCGContext *s, int reg)
994{
995 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
996}
997
998static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
999 TCGReg arg1, intptr_t arg2)
1000{
1001 switch (type) {
1002 case TCG_TYPE_I32:
1003 if (ret < 16) {
1004 tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
1005 } else {
1006 tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2);
1007 }
1008 break;
1009 case TCG_TYPE_I64:
1010 if (ret < 16) {
1011 tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
1012 break;
1013 }
1014
1015 case TCG_TYPE_V64:
1016 tcg_debug_assert(ret >= 16);
1017 tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2);
1018 break;
1019 case TCG_TYPE_V128:
1020 tcg_debug_assert(ret >= 16);
1021 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2);
1022 break;
1023 case TCG_TYPE_V256:
1024 tcg_debug_assert(ret >= 16);
1025 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL,
1026 ret, 0, arg1, arg2);
1027 break;
1028 default:
1029 g_assert_not_reached();
1030 }
1031}
1032
1033static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1034 TCGReg arg1, intptr_t arg2)
1035{
1036 switch (type) {
1037 case TCG_TYPE_I32:
1038 if (arg < 16) {
1039 tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
1040 } else {
1041 tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2);
1042 }
1043 break;
1044 case TCG_TYPE_I64:
1045 if (arg < 16) {
1046 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
1047 break;
1048 }
1049
1050 case TCG_TYPE_V64:
1051 tcg_debug_assert(arg >= 16);
1052 tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2);
1053 break;
1054 case TCG_TYPE_V128:
1055 tcg_debug_assert(arg >= 16);
1056 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2);
1057 break;
1058 case TCG_TYPE_V256:
1059 tcg_debug_assert(arg >= 16);
1060 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL,
1061 arg, 0, arg1, arg2);
1062 break;
1063 default:
1064 g_assert_not_reached();
1065 }
1066}
1067
1068static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1069 TCGReg base, intptr_t ofs)
1070{
1071 int rexw = 0;
1072 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
1073 if (val != (int32_t)val) {
1074 return false;
1075 }
1076 rexw = P_REXW;
1077 } else if (type != TCG_TYPE_I32) {
1078 return false;
1079 }
1080 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
1081 tcg_out32(s, val);
1082 return true;
1083}
1084
1085static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
1086{
1087
1088 int ext = subopc & ~0x7;
1089 subopc &= 0x7;
1090
1091 if (count == 1) {
1092 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
1093 } else {
1094 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
1095 tcg_out8(s, count);
1096 }
1097}
1098
1099static inline void tcg_out_bswap32(TCGContext *s, int reg)
1100{
1101 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
1102}
1103
1104static inline void tcg_out_rolw_8(TCGContext *s, int reg)
1105{
1106 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
1107}
1108
1109static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
1110{
1111
1112 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1113 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
1114}
1115
1116static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
1117{
1118
1119 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1120 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
1121}
1122
1123static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
1124{
1125
1126 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
1127}
1128
1129static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
1130{
1131
1132 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
1133}
1134
1135static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
1136{
1137
1138 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
1139}
1140
1141static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
1142{
1143 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
1144}
1145
1146static inline void tcg_out_bswap64(TCGContext *s, int reg)
1147{
1148 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
1149}
1150
1151static void tgen_arithi(TCGContext *s, int c, int r0,
1152 tcg_target_long val, int cf)
1153{
1154 int rexw = 0;
1155
1156 if (TCG_TARGET_REG_BITS == 64) {
1157 rexw = c & -8;
1158 c &= 7;
1159 }
1160
1161
1162
1163
1164 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
1165 int is_inc = (c == ARITH_ADD) ^ (val < 0);
1166 if (TCG_TARGET_REG_BITS == 64) {
1167
1168
1169 tcg_out_modrm(s, OPC_GRP5 + rexw,
1170 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
1171 } else {
1172 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
1173 }
1174 return;
1175 }
1176
1177 if (c == ARITH_AND) {
1178 if (TCG_TARGET_REG_BITS == 64) {
1179 if (val == 0xffffffffu) {
1180 tcg_out_ext32u(s, r0, r0);
1181 return;
1182 }
1183 if (val == (uint32_t)val) {
1184
1185 rexw = 0;
1186 }
1187 }
1188 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
1189 tcg_out_ext8u(s, r0, r0);
1190 return;
1191 }
1192 if (val == 0xffffu) {
1193 tcg_out_ext16u(s, r0, r0);
1194 return;
1195 }
1196 }
1197
1198 if (val == (int8_t)val) {
1199 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
1200 tcg_out8(s, val);
1201 return;
1202 }
1203 if (rexw == 0 || val == (int32_t)val) {
1204 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
1205 tcg_out32(s, val);
1206 return;
1207 }
1208
1209 tcg_abort();
1210}
1211
1212static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
1213{
1214 if (val != 0) {
1215 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
1216 }
1217}
1218
1219
1220static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
1221{
1222 int32_t val, val1;
1223
1224 if (l->has_value) {
1225 val = tcg_pcrel_diff(s, l->u.value_ptr);
1226 val1 = val - 2;
1227 if ((int8_t)val1 == val1) {
1228 if (opc == -1) {
1229 tcg_out8(s, OPC_JMP_short);
1230 } else {
1231 tcg_out8(s, OPC_JCC_short + opc);
1232 }
1233 tcg_out8(s, val1);
1234 } else {
1235 if (small) {
1236 tcg_abort();
1237 }
1238 if (opc == -1) {
1239 tcg_out8(s, OPC_JMP_long);
1240 tcg_out32(s, val - 5);
1241 } else {
1242 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1243 tcg_out32(s, val - 6);
1244 }
1245 }
1246 } else if (small) {
1247 if (opc == -1) {
1248 tcg_out8(s, OPC_JMP_short);
1249 } else {
1250 tcg_out8(s, OPC_JCC_short + opc);
1251 }
1252 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
1253 s->code_ptr += 1;
1254 } else {
1255 if (opc == -1) {
1256 tcg_out8(s, OPC_JMP_long);
1257 } else {
1258 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1259 }
1260 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
1261 s->code_ptr += 4;
1262 }
1263}
1264
1265static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
1266 int const_arg2, int rexw)
1267{
1268 if (const_arg2) {
1269 if (arg2 == 0) {
1270
1271 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
1272 } else {
1273 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
1274 }
1275 } else {
1276 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
1277 }
1278}
1279
1280static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
1281 TCGArg arg1, TCGArg arg2, int const_arg2,
1282 TCGLabel *label, int small)
1283{
1284 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1285 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
1286}
1287
1288#if TCG_TARGET_REG_BITS == 64
1289static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
1290 TCGArg arg1, TCGArg arg2, int const_arg2,
1291 TCGLabel *label, int small)
1292{
1293 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1294 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
1295}
1296#else
1297
1298
1299static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
1300 const int *const_args, int small)
1301{
1302 TCGLabel *label_next = gen_new_label();
1303 TCGLabel *label_this = arg_label(args[5]);
1304
1305 switch(args[4]) {
1306 case TCG_COND_EQ:
1307 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
1308 label_next, 1);
1309 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
1310 label_this, small);
1311 break;
1312 case TCG_COND_NE:
1313 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
1314 label_this, small);
1315 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
1316 label_this, small);
1317 break;
1318 case TCG_COND_LT:
1319 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
1320 label_this, small);
1321 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1322 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1323 label_this, small);
1324 break;
1325 case TCG_COND_LE:
1326 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
1327 label_this, small);
1328 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1329 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1330 label_this, small);
1331 break;
1332 case TCG_COND_GT:
1333 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
1334 label_this, small);
1335 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1336 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1337 label_this, small);
1338 break;
1339 case TCG_COND_GE:
1340 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
1341 label_this, small);
1342 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1343 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1344 label_this, small);
1345 break;
1346 case TCG_COND_LTU:
1347 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1348 label_this, small);
1349 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1350 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1351 label_this, small);
1352 break;
1353 case TCG_COND_LEU:
1354 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1355 label_this, small);
1356 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1357 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1358 label_this, small);
1359 break;
1360 case TCG_COND_GTU:
1361 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1362 label_this, small);
1363 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1364 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1365 label_this, small);
1366 break;
1367 case TCG_COND_GEU:
1368 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1369 label_this, small);
1370 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1371 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1372 label_this, small);
1373 break;
1374 default:
1375 tcg_abort();
1376 }
1377 tcg_out_label(s, label_next, s->code_ptr);
1378}
1379#endif
1380
1381static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1382 TCGArg arg1, TCGArg arg2, int const_arg2)
1383{
1384 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1385 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1386 tcg_out_ext8u(s, dest, dest);
1387}
1388
1389#if TCG_TARGET_REG_BITS == 64
1390static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1391 TCGArg arg1, TCGArg arg2, int const_arg2)
1392{
1393 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1394 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1395 tcg_out_ext8u(s, dest, dest);
1396}
1397#else
1398static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1399 const int *const_args)
1400{
1401 TCGArg new_args[6];
1402 TCGLabel *label_true, *label_over;
1403
1404 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1405
1406 if (args[0] == args[1] || args[0] == args[2]
1407 || (!const_args[3] && args[0] == args[3])
1408 || (!const_args[4] && args[0] == args[4])) {
1409
1410
1411 label_true = gen_new_label();
1412 label_over = gen_new_label();
1413
1414 new_args[5] = label_arg(label_true);
1415 tcg_out_brcond2(s, new_args, const_args+1, 1);
1416
1417 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1418 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1419 tcg_out_label(s, label_true, s->code_ptr);
1420
1421 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1422 tcg_out_label(s, label_over, s->code_ptr);
1423 } else {
1424
1425
1426
1427
1428 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1429
1430 label_over = gen_new_label();
1431 new_args[4] = tcg_invert_cond(new_args[4]);
1432 new_args[5] = label_arg(label_over);
1433 tcg_out_brcond2(s, new_args, const_args+1, 1);
1434
1435 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1436 tcg_out_label(s, label_over, s->code_ptr);
1437 }
1438}
1439#endif
1440
1441static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
1442 TCGReg dest, TCGReg v1)
1443{
1444 if (have_cmov) {
1445 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
1446 } else {
1447 TCGLabel *over = gen_new_label();
1448 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1449 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1450 tcg_out_label(s, over, s->code_ptr);
1451 }
1452}
1453
1454static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
1455 TCGReg c1, TCGArg c2, int const_c2,
1456 TCGReg v1)
1457{
1458 tcg_out_cmp(s, c1, c2, const_c2, 0);
1459 tcg_out_cmov(s, cond, 0, dest, v1);
1460}
1461
1462#if TCG_TARGET_REG_BITS == 64
1463static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
1464 TCGReg c1, TCGArg c2, int const_c2,
1465 TCGReg v1)
1466{
1467 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1468 tcg_out_cmov(s, cond, P_REXW, dest, v1);
1469}
1470#endif
1471
1472static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1473 TCGArg arg2, bool const_a2)
1474{
1475 if (have_bmi1) {
1476 tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
1477 if (const_a2) {
1478 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1479 } else {
1480 tcg_debug_assert(dest != arg2);
1481 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1482 }
1483 } else {
1484 tcg_debug_assert(dest != arg2);
1485 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
1486 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1487 }
1488}
1489
1490static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1491 TCGArg arg2, bool const_a2)
1492{
1493 if (have_lzcnt) {
1494 tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1495 if (const_a2) {
1496 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1497 } else {
1498 tcg_debug_assert(dest != arg2);
1499 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1500 }
1501 } else {
1502 tcg_debug_assert(!const_a2);
1503 tcg_debug_assert(dest != arg1);
1504 tcg_debug_assert(dest != arg2);
1505
1506
1507 tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
1508 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1509
1510
1511 tcg_out_cmp(s, arg1, 0, 1, rexw);
1512 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1513 }
1514}
1515
1516static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1517{
1518 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1519
1520 if (disp == (int32_t)disp) {
1521 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1522 tcg_out32(s, disp);
1523 } else {
1524
1525
1526
1527
1528 tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
1529 tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
1530 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1531 tcg_out32(s, 0);
1532 }
1533}
1534
1535static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1536{
1537 tcg_out_branch(s, 1, dest);
1538}
1539
1540static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1541{
1542 tcg_out_branch(s, 0, dest);
1543}
1544
1545static void tcg_out_nopn(TCGContext *s, int n)
1546{
1547 int i;
1548
1549
1550
1551
1552
1553 tcg_debug_assert(n >= 1);
1554 for (i = 1; i < n; ++i) {
1555 tcg_out8(s, 0x66);
1556 }
1557 tcg_out8(s, 0x90);
1558}
1559
1560#if defined(CONFIG_SOFTMMU)
1561#include "tcg-ldst.inc.c"
1562
1563
1564
1565
1566static void * const qemu_ld_helpers[16] = {
1567 [MO_UB] = helper_ret_ldub_mmu,
1568 [MO_LEUW] = helper_le_lduw_mmu,
1569 [MO_LEUL] = helper_le_ldul_mmu,
1570 [MO_LEQ] = helper_le_ldq_mmu,
1571 [MO_BEUW] = helper_be_lduw_mmu,
1572 [MO_BEUL] = helper_be_ldul_mmu,
1573 [MO_BEQ] = helper_be_ldq_mmu,
1574};
1575
1576
1577
1578
1579static void * const qemu_st_helpers[16] = {
1580 [MO_UB] = helper_ret_stb_mmu,
1581 [MO_LEUW] = helper_le_stw_mmu,
1582 [MO_LEUL] = helper_le_stl_mmu,
1583 [MO_LEQ] = helper_le_stq_mmu,
1584 [MO_BEUW] = helper_be_stw_mmu,
1585 [MO_BEUL] = helper_be_stl_mmu,
1586 [MO_BEQ] = helper_be_stq_mmu,
1587};
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1611 int mem_index, TCGMemOp opc,
1612 tcg_insn_unit **label_ptr, int which)
1613{
1614 const TCGReg r0 = TCG_REG_L0;
1615 const TCGReg r1 = TCG_REG_L1;
1616 TCGType ttype = TCG_TYPE_I32;
1617 TCGType tlbtype = TCG_TYPE_I32;
1618 int trexw = 0, hrexw = 0, tlbrexw = 0;
1619 unsigned a_bits = get_alignment_bits(opc);
1620 unsigned s_bits = opc & MO_SIZE;
1621 unsigned a_mask = (1 << a_bits) - 1;
1622 unsigned s_mask = (1 << s_bits) - 1;
1623 target_ulong tlb_mask;
1624
1625 if (TCG_TARGET_REG_BITS == 64) {
1626 if (TARGET_LONG_BITS == 64) {
1627 ttype = TCG_TYPE_I64;
1628 trexw = P_REXW;
1629 }
1630 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1631 hrexw = P_REXW;
1632 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1633 tlbtype = TCG_TYPE_I64;
1634 tlbrexw = P_REXW;
1635 }
1636 }
1637 }
1638
1639 tcg_out_mov(s, tlbtype, r0, addrlo);
1640
1641
1642
1643 if (a_bits >= s_bits) {
1644 tcg_out_mov(s, ttype, r1, addrlo);
1645 } else {
1646 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1647 }
1648 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
1649
1650 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1651 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1652
1653 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1654 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1655 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1656
1657 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1658 offsetof(CPUArchState, tlb_table[mem_index][0])
1659 + which);
1660
1661
1662 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1663
1664
1665
1666
1667
1668
1669
1670 tcg_out_mov(s, ttype, r1, addrlo);
1671
1672
1673 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1674 label_ptr[0] = s->code_ptr;
1675 s->code_ptr += 4;
1676
1677 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1678
1679 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1680
1681
1682 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1683 label_ptr[1] = s->code_ptr;
1684 s->code_ptr += 4;
1685 }
1686
1687
1688
1689
1690 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1691 offsetof(CPUTLBEntry, addend) - which);
1692}
1693
1694
1695
1696
1697
1698static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1699 TCGReg datalo, TCGReg datahi,
1700 TCGReg addrlo, TCGReg addrhi,
1701 tcg_insn_unit *raddr,
1702 tcg_insn_unit **label_ptr)
1703{
1704 TCGLabelQemuLdst *label = new_ldst_label(s);
1705
1706 label->is_ld = is_ld;
1707 label->oi = oi;
1708 label->datalo_reg = datalo;
1709 label->datahi_reg = datahi;
1710 label->addrlo_reg = addrlo;
1711 label->addrhi_reg = addrhi;
1712 label->raddr = raddr;
1713 label->label_ptr[0] = label_ptr[0];
1714 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1715 label->label_ptr[1] = label_ptr[1];
1716 }
1717}
1718
1719
1720
1721
1722static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1723{
1724 TCGMemOpIdx oi = l->oi;
1725 TCGMemOp opc = get_memop(oi);
1726 TCGReg data_reg;
1727 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1728
1729
1730 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1731 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1732 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1733 }
1734
1735 if (TCG_TARGET_REG_BITS == 32) {
1736 int ofs = 0;
1737
1738 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1739 ofs += 4;
1740
1741 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1742 ofs += 4;
1743
1744 if (TARGET_LONG_BITS == 64) {
1745 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1746 ofs += 4;
1747 }
1748
1749 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1750 ofs += 4;
1751
1752 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1753 } else {
1754 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1755
1756 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1757 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1758 (uintptr_t)l->raddr);
1759 }
1760
1761 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1762
1763 data_reg = l->datalo_reg;
1764 switch (opc & MO_SSIZE) {
1765 case MO_SB:
1766 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1767 break;
1768 case MO_SW:
1769 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1770 break;
1771#if TCG_TARGET_REG_BITS == 64
1772 case MO_SL:
1773 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1774 break;
1775#endif
1776 case MO_UB:
1777 case MO_UW:
1778
1779 case MO_UL:
1780 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1781 break;
1782 case MO_Q:
1783 if (TCG_TARGET_REG_BITS == 64) {
1784 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1785 } else if (data_reg == TCG_REG_EDX) {
1786
1787 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1788 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1789 } else {
1790 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1791 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1792 }
1793 break;
1794 default:
1795 tcg_abort();
1796 }
1797
1798
1799 tcg_out_jmp(s, l->raddr);
1800}
1801
1802
1803
1804
1805static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1806{
1807 TCGMemOpIdx oi = l->oi;
1808 TCGMemOp opc = get_memop(oi);
1809 TCGMemOp s_bits = opc & MO_SIZE;
1810 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1811 TCGReg retaddr;
1812
1813
1814 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1815 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1816 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1817 }
1818
1819 if (TCG_TARGET_REG_BITS == 32) {
1820 int ofs = 0;
1821
1822 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1823 ofs += 4;
1824
1825 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1826 ofs += 4;
1827
1828 if (TARGET_LONG_BITS == 64) {
1829 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1830 ofs += 4;
1831 }
1832
1833 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1834 ofs += 4;
1835
1836 if (s_bits == MO_64) {
1837 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1838 ofs += 4;
1839 }
1840
1841 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1842 ofs += 4;
1843
1844 retaddr = TCG_REG_EAX;
1845 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1846 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1847 } else {
1848 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1849
1850 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1851 tcg_target_call_iarg_regs[2], l->datalo_reg);
1852 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1853
1854 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1855 retaddr = tcg_target_call_iarg_regs[4];
1856 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1857 } else {
1858 retaddr = TCG_REG_RAX;
1859 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1860 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1861 TCG_TARGET_CALL_STACK_OFFSET);
1862 }
1863 }
1864
1865
1866 tcg_out_push(s, retaddr);
1867 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1868}
1869#elif defined(__x86_64__) && defined(__linux__)
1870# include <asm/prctl.h>
1871# include <sys/prctl.h>
1872
1873int arch_prctl(int code, unsigned long addr);
1874
1875static int guest_base_flags;
1876static inline void setup_guest_base_seg(void)
1877{
1878 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1879 guest_base_flags = P_GS;
1880 }
1881}
1882#else
1883# define guest_base_flags 0
1884static inline void setup_guest_base_seg(void) { }
1885#endif
1886
1887static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1888 TCGReg base, int index, intptr_t ofs,
1889 int seg, TCGMemOp memop)
1890{
1891 const TCGMemOp real_bswap = memop & MO_BSWAP;
1892 TCGMemOp bswap = real_bswap;
1893 int movop = OPC_MOVL_GvEv;
1894
1895 if (have_movbe && real_bswap) {
1896 bswap = 0;
1897 movop = OPC_MOVBE_GyMy;
1898 }
1899
1900 switch (memop & MO_SSIZE) {
1901 case MO_UB:
1902 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1903 base, index, 0, ofs);
1904 break;
1905 case MO_SB:
1906 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1907 base, index, 0, ofs);
1908 break;
1909 case MO_UW:
1910 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1911 base, index, 0, ofs);
1912 if (real_bswap) {
1913 tcg_out_rolw_8(s, datalo);
1914 }
1915 break;
1916 case MO_SW:
1917 if (real_bswap) {
1918 if (have_movbe) {
1919 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1920 datalo, base, index, 0, ofs);
1921 } else {
1922 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1923 base, index, 0, ofs);
1924 tcg_out_rolw_8(s, datalo);
1925 }
1926 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1927 } else {
1928 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1929 datalo, base, index, 0, ofs);
1930 }
1931 break;
1932 case MO_UL:
1933 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1934 if (bswap) {
1935 tcg_out_bswap32(s, datalo);
1936 }
1937 break;
1938#if TCG_TARGET_REG_BITS == 64
1939 case MO_SL:
1940 if (real_bswap) {
1941 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1942 base, index, 0, ofs);
1943 if (bswap) {
1944 tcg_out_bswap32(s, datalo);
1945 }
1946 tcg_out_ext32s(s, datalo, datalo);
1947 } else {
1948 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1949 base, index, 0, ofs);
1950 }
1951 break;
1952#endif
1953 case MO_Q:
1954 if (TCG_TARGET_REG_BITS == 64) {
1955 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1956 base, index, 0, ofs);
1957 if (bswap) {
1958 tcg_out_bswap64(s, datalo);
1959 }
1960 } else {
1961 if (real_bswap) {
1962 int t = datalo;
1963 datalo = datahi;
1964 datahi = t;
1965 }
1966 if (base != datalo) {
1967 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1968 base, index, 0, ofs);
1969 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1970 base, index, 0, ofs + 4);
1971 } else {
1972 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1973 base, index, 0, ofs + 4);
1974 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1975 base, index, 0, ofs);
1976 }
1977 if (bswap) {
1978 tcg_out_bswap32(s, datalo);
1979 tcg_out_bswap32(s, datahi);
1980 }
1981 }
1982 break;
1983 default:
1984 tcg_abort();
1985 }
1986}
1987
1988
1989
1990
1991static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1992{
1993 TCGReg datalo, datahi, addrlo;
1994 TCGReg addrhi __attribute__((unused));
1995 TCGMemOpIdx oi;
1996 TCGMemOp opc;
1997#if defined(CONFIG_SOFTMMU)
1998 int mem_index;
1999 tcg_insn_unit *label_ptr[2];
2000#endif
2001
2002 datalo = *args++;
2003 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
2004 addrlo = *args++;
2005 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
2006 oi = *args++;
2007 opc = get_memop(oi);
2008
2009#if defined(CONFIG_SOFTMMU)
2010 mem_index = get_mmuidx(oi);
2011
2012 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
2013 label_ptr, offsetof(CPUTLBEntry, addr_read));
2014
2015
2016 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
2017
2018
2019 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2020 s->code_ptr, label_ptr);
2021#else
2022 {
2023 int32_t offset = guest_base;
2024 TCGReg base = addrlo;
2025 int index = -1;
2026 int seg = 0;
2027
2028
2029
2030
2031
2032 if (guest_base == 0 || guest_base_flags) {
2033 seg = guest_base_flags;
2034 offset = 0;
2035 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2036 seg |= P_ADDR32;
2037 }
2038 } else if (TCG_TARGET_REG_BITS == 64) {
2039 if (TARGET_LONG_BITS == 32) {
2040 tcg_out_ext32u(s, TCG_REG_L0, base);
2041 base = TCG_REG_L0;
2042 }
2043 if (offset != guest_base) {
2044 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
2045 index = TCG_REG_L1;
2046 offset = 0;
2047 }
2048 }
2049
2050 tcg_out_qemu_ld_direct(s, datalo, datahi,
2051 base, index, offset, seg, opc);
2052 }
2053#endif
2054}
2055
2056static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
2057 TCGReg base, intptr_t ofs, int seg,
2058 TCGMemOp memop)
2059{
2060
2061
2062
2063
2064 const TCGReg scratch = TCG_REG_L0;
2065 const TCGMemOp real_bswap = memop & MO_BSWAP;
2066 TCGMemOp bswap = real_bswap;
2067 int movop = OPC_MOVL_EvGv;
2068
2069 if (have_movbe && real_bswap) {
2070 bswap = 0;
2071 movop = OPC_MOVBE_MyGy;
2072 }
2073
2074 switch (memop & MO_SIZE) {
2075 case MO_8:
2076
2077
2078 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
2079 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2080 datalo = scratch;
2081 }
2082 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
2083 datalo, base, ofs);
2084 break;
2085 case MO_16:
2086 if (bswap) {
2087 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2088 tcg_out_rolw_8(s, scratch);
2089 datalo = scratch;
2090 }
2091 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
2092 break;
2093 case MO_32:
2094 if (bswap) {
2095 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2096 tcg_out_bswap32(s, scratch);
2097 datalo = scratch;
2098 }
2099 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
2100 break;
2101 case MO_64:
2102 if (TCG_TARGET_REG_BITS == 64) {
2103 if (bswap) {
2104 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
2105 tcg_out_bswap64(s, scratch);
2106 datalo = scratch;
2107 }
2108 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
2109 } else if (bswap) {
2110 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
2111 tcg_out_bswap32(s, scratch);
2112 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
2113 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2114 tcg_out_bswap32(s, scratch);
2115 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
2116 } else {
2117 if (real_bswap) {
2118 int t = datalo;
2119 datalo = datahi;
2120 datahi = t;
2121 }
2122 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
2123 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
2124 }
2125 break;
2126 default:
2127 tcg_abort();
2128 }
2129}
2130
2131static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
2132{
2133 TCGReg datalo, datahi, addrlo;
2134 TCGReg addrhi __attribute__((unused));
2135 TCGMemOpIdx oi;
2136 TCGMemOp opc;
2137#if defined(CONFIG_SOFTMMU)
2138 int mem_index;
2139 tcg_insn_unit *label_ptr[2];
2140#endif
2141
2142 datalo = *args++;
2143 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
2144 addrlo = *args++;
2145 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
2146 oi = *args++;
2147 opc = get_memop(oi);
2148
2149#if defined(CONFIG_SOFTMMU)
2150 mem_index = get_mmuidx(oi);
2151
2152 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
2153 label_ptr, offsetof(CPUTLBEntry, addr_write));
2154
2155
2156 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
2157
2158
2159 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2160 s->code_ptr, label_ptr);
2161#else
2162 {
2163 int32_t offset = guest_base;
2164 TCGReg base = addrlo;
2165 int seg = 0;
2166
2167
2168 if (guest_base == 0 || guest_base_flags) {
2169 seg = guest_base_flags;
2170 offset = 0;
2171 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2172 seg |= P_ADDR32;
2173 }
2174 } else if (TCG_TARGET_REG_BITS == 64) {
2175
2176
2177 if (offset != guest_base) {
2178 if (TARGET_LONG_BITS == 32) {
2179 tcg_out_ext32u(s, TCG_REG_L0, base);
2180 base = TCG_REG_L0;
2181 }
2182 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
2183 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
2184 base = TCG_REG_L1;
2185 offset = 0;
2186 } else if (TARGET_LONG_BITS == 32) {
2187 tcg_out_ext32u(s, TCG_REG_L1, base);
2188 base = TCG_REG_L1;
2189 }
2190 }
2191
2192 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
2193 }
2194#endif
2195}
2196
2197static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2198 const TCGArg *args, const int *const_args)
2199{
2200 TCGArg a0, a1, a2;
2201 int c, const_a2, vexop, rexw = 0;
2202
2203#if TCG_TARGET_REG_BITS == 64
2204# define OP_32_64(x) \
2205 case glue(glue(INDEX_op_, x), _i64): \
2206 rexw = P_REXW; \
2207 case glue(glue(INDEX_op_, x), _i32)
2208#else
2209# define OP_32_64(x) \
2210 case glue(glue(INDEX_op_, x), _i32)
2211#endif
2212
2213
2214 a0 = args[0];
2215 a1 = args[1];
2216 a2 = args[2];
2217 const_a2 = const_args[2];
2218
2219 switch (opc) {
2220 case INDEX_op_exit_tb:
2221
2222 if (a0 == 0) {
2223 tcg_out_jmp(s, s->code_gen_epilogue);
2224 } else {
2225 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
2226 tcg_out_jmp(s, tb_ret_addr);
2227 }
2228 break;
2229 case INDEX_op_goto_tb:
2230 if (s->tb_jmp_insn_offset) {
2231
2232 int gap;
2233
2234
2235
2236 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
2237 if (gap != 1) {
2238 tcg_out_nopn(s, gap - 1);
2239 }
2240 tcg_out8(s, OPC_JMP_long);
2241 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2242 tcg_out32(s, 0);
2243 } else {
2244
2245 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
2246 (intptr_t)(s->tb_jmp_target_addr + a0));
2247 }
2248 set_jmp_reset_offset(s, a0);
2249 break;
2250 case INDEX_op_goto_ptr:
2251
2252 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
2253 break;
2254 case INDEX_op_br:
2255 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
2256 break;
2257 OP_32_64(ld8u):
2258
2259 tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
2260 break;
2261 OP_32_64(ld8s):
2262 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
2263 break;
2264 OP_32_64(ld16u):
2265
2266 tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
2267 break;
2268 OP_32_64(ld16s):
2269 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
2270 break;
2271#if TCG_TARGET_REG_BITS == 64
2272 case INDEX_op_ld32u_i64:
2273#endif
2274 case INDEX_op_ld_i32:
2275 tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
2276 break;
2277
2278 OP_32_64(st8):
2279 if (const_args[0]) {
2280 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
2281 tcg_out8(s, a0);
2282 } else {
2283 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
2284 }
2285 break;
2286 OP_32_64(st16):
2287 if (const_args[0]) {
2288 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
2289 tcg_out16(s, a0);
2290 } else {
2291 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
2292 }
2293 break;
2294#if TCG_TARGET_REG_BITS == 64
2295 case INDEX_op_st32_i64:
2296#endif
2297 case INDEX_op_st_i32:
2298 if (const_args[0]) {
2299 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
2300 tcg_out32(s, a0);
2301 } else {
2302 tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
2303 }
2304 break;
2305
2306 OP_32_64(add):
2307
2308 if (a0 != a1) {
2309 TCGArg c3 = 0;
2310 if (const_a2) {
2311 c3 = a2, a2 = -1;
2312 } else if (a0 == a2) {
2313
2314
2315 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
2316 break;
2317 }
2318
2319 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
2320 break;
2321 }
2322 c = ARITH_ADD;
2323 goto gen_arith;
2324 OP_32_64(sub):
2325 c = ARITH_SUB;
2326 goto gen_arith;
2327 OP_32_64(and):
2328 c = ARITH_AND;
2329 goto gen_arith;
2330 OP_32_64(or):
2331 c = ARITH_OR;
2332 goto gen_arith;
2333 OP_32_64(xor):
2334 c = ARITH_XOR;
2335 goto gen_arith;
2336 gen_arith:
2337 if (const_a2) {
2338 tgen_arithi(s, c + rexw, a0, a2, 0);
2339 } else {
2340 tgen_arithr(s, c + rexw, a0, a2);
2341 }
2342 break;
2343
2344 OP_32_64(andc):
2345 if (const_a2) {
2346 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2347 tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
2348 } else {
2349 tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
2350 }
2351 break;
2352
2353 OP_32_64(mul):
2354 if (const_a2) {
2355 int32_t val;
2356 val = a2;
2357 if (val == (int8_t)val) {
2358 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
2359 tcg_out8(s, val);
2360 } else {
2361 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
2362 tcg_out32(s, val);
2363 }
2364 } else {
2365 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
2366 }
2367 break;
2368
2369 OP_32_64(div2):
2370 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
2371 break;
2372 OP_32_64(divu2):
2373 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
2374 break;
2375
2376 OP_32_64(shl):
2377
2378 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2379 if (a2 - 1 == 0) {
2380
2381 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2382 } else {
2383
2384 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2385 }
2386 break;
2387 }
2388 c = SHIFT_SHL;
2389 vexop = OPC_SHLX;
2390 goto gen_shift_maybe_vex;
2391 OP_32_64(shr):
2392 c = SHIFT_SHR;
2393 vexop = OPC_SHRX;
2394 goto gen_shift_maybe_vex;
2395 OP_32_64(sar):
2396 c = SHIFT_SAR;
2397 vexop = OPC_SARX;
2398 goto gen_shift_maybe_vex;
2399 OP_32_64(rotl):
2400 c = SHIFT_ROL;
2401 goto gen_shift;
2402 OP_32_64(rotr):
2403 c = SHIFT_ROR;
2404 goto gen_shift;
2405 gen_shift_maybe_vex:
2406 if (have_bmi2) {
2407 if (!const_a2) {
2408 tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2409 break;
2410 }
2411 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2412 }
2413
2414 gen_shift:
2415 if (const_a2) {
2416 tcg_out_shifti(s, c + rexw, a0, a2);
2417 } else {
2418 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
2419 }
2420 break;
2421
2422 OP_32_64(ctz):
2423 tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2424 break;
2425 OP_32_64(clz):
2426 tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2427 break;
2428 OP_32_64(ctpop):
2429 tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2430 break;
2431
2432 case INDEX_op_brcond_i32:
2433 tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2434 break;
2435 case INDEX_op_setcond_i32:
2436 tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
2437 break;
2438 case INDEX_op_movcond_i32:
2439 tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
2440 break;
2441
2442 OP_32_64(bswap16):
2443 tcg_out_rolw_8(s, a0);
2444 break;
2445 OP_32_64(bswap32):
2446 tcg_out_bswap32(s, a0);
2447 break;
2448
2449 OP_32_64(neg):
2450 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
2451 break;
2452 OP_32_64(not):
2453 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
2454 break;
2455
2456 OP_32_64(ext8s):
2457 tcg_out_ext8s(s, a0, a1, rexw);
2458 break;
2459 OP_32_64(ext16s):
2460 tcg_out_ext16s(s, a0, a1, rexw);
2461 break;
2462 OP_32_64(ext8u):
2463 tcg_out_ext8u(s, a0, a1);
2464 break;
2465 OP_32_64(ext16u):
2466 tcg_out_ext16u(s, a0, a1);
2467 break;
2468
2469 case INDEX_op_qemu_ld_i32:
2470 tcg_out_qemu_ld(s, args, 0);
2471 break;
2472 case INDEX_op_qemu_ld_i64:
2473 tcg_out_qemu_ld(s, args, 1);
2474 break;
2475 case INDEX_op_qemu_st_i32:
2476 tcg_out_qemu_st(s, args, 0);
2477 break;
2478 case INDEX_op_qemu_st_i64:
2479 tcg_out_qemu_st(s, args, 1);
2480 break;
2481
2482 OP_32_64(mulu2):
2483 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2484 break;
2485 OP_32_64(muls2):
2486 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2487 break;
2488 OP_32_64(add2):
2489 if (const_args[4]) {
2490 tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
2491 } else {
2492 tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
2493 }
2494 if (const_args[5]) {
2495 tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
2496 } else {
2497 tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
2498 }
2499 break;
2500 OP_32_64(sub2):
2501 if (const_args[4]) {
2502 tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
2503 } else {
2504 tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
2505 }
2506 if (const_args[5]) {
2507 tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
2508 } else {
2509 tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
2510 }
2511 break;
2512
2513#if TCG_TARGET_REG_BITS == 32
2514 case INDEX_op_brcond2_i32:
2515 tcg_out_brcond2(s, args, const_args, 0);
2516 break;
2517 case INDEX_op_setcond2_i32:
2518 tcg_out_setcond2(s, args, const_args);
2519 break;
2520#else
2521 case INDEX_op_ld32s_i64:
2522 tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
2523 break;
2524 case INDEX_op_ld_i64:
2525 tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
2526 break;
2527 case INDEX_op_st_i64:
2528 if (const_args[0]) {
2529 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2530 tcg_out32(s, a0);
2531 } else {
2532 tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
2533 }
2534 break;
2535
2536 case INDEX_op_brcond_i64:
2537 tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2538 break;
2539 case INDEX_op_setcond_i64:
2540 tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
2541 break;
2542 case INDEX_op_movcond_i64:
2543 tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
2544 break;
2545
2546 case INDEX_op_bswap64_i64:
2547 tcg_out_bswap64(s, a0);
2548 break;
2549 case INDEX_op_extu_i32_i64:
2550 case INDEX_op_ext32u_i64:
2551 tcg_out_ext32u(s, a0, a1);
2552 break;
2553 case INDEX_op_ext_i32_i64:
2554 case INDEX_op_ext32s_i64:
2555 tcg_out_ext32s(s, a0, a1);
2556 break;
2557#endif
2558
2559 OP_32_64(deposit):
2560 if (args[3] == 0 && args[4] == 8) {
2561
2562 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
2563 } else if (args[3] == 8 && args[4] == 8) {
2564
2565 tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
2566 } else if (args[3] == 0 && args[4] == 16) {
2567
2568 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
2569 } else {
2570 tcg_abort();
2571 }
2572 break;
2573
2574 case INDEX_op_extract_i64:
2575 if (a2 + args[3] == 32) {
2576
2577 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2578 tcg_out_shifti(s, SHIFT_SHR, a0, a2);
2579 break;
2580 }
2581
2582 case INDEX_op_extract_i32:
2583
2584
2585
2586 tcg_debug_assert(a2 == 8 && args[3] == 8);
2587 if (a1 < 4 && a0 < 8) {
2588 tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
2589 } else {
2590 tcg_out_ext16u(s, a0, a1);
2591 tcg_out_shifti(s, SHIFT_SHR, a0, 8);
2592 }
2593 break;
2594
2595 case INDEX_op_sextract_i32:
2596
2597
2598
2599 tcg_debug_assert(a2 == 8 && args[3] == 8);
2600 if (a1 < 4 && a0 < 8) {
2601 tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
2602 } else {
2603 tcg_out_ext16s(s, a0, a1, 0);
2604 tcg_out_shifti(s, SHIFT_SAR, a0, 8);
2605 }
2606 break;
2607
2608 case INDEX_op_mb:
2609 tcg_out_mb(s, a0);
2610 break;
2611 case INDEX_op_mov_i32:
2612 case INDEX_op_mov_i64:
2613 case INDEX_op_mov_vec:
2614 case INDEX_op_movi_i32:
2615 case INDEX_op_movi_i64:
2616 case INDEX_op_dupi_vec:
2617 case INDEX_op_call:
2618 default:
2619 tcg_abort();
2620 }
2621
2622#undef OP_32_64
2623}
2624
2625static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2626 unsigned vecl, unsigned vece,
2627 const TCGArg *args, const int *const_args)
2628{
2629 static int const add_insn[4] = {
2630 OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
2631 };
2632 static int const sub_insn[4] = {
2633 OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
2634 };
2635 static int const mul_insn[4] = {
2636 OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
2637 };
2638 static int const shift_imm_insn[4] = {
2639 OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
2640 };
2641 static int const cmpeq_insn[4] = {
2642 OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
2643 };
2644 static int const cmpgt_insn[4] = {
2645 OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
2646 };
2647 static int const punpckl_insn[4] = {
2648 OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
2649 };
2650 static int const punpckh_insn[4] = {
2651 OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
2652 };
2653 static int const packss_insn[4] = {
2654 OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
2655 };
2656 static int const packus_insn[4] = {
2657 OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
2658 };
2659
2660 TCGType type = vecl + TCG_TYPE_V64;
2661 int insn, sub;
2662 TCGArg a0, a1, a2;
2663
2664 a0 = args[0];
2665 a1 = args[1];
2666 a2 = args[2];
2667
2668 switch (opc) {
2669 case INDEX_op_add_vec:
2670 insn = add_insn[vece];
2671 goto gen_simd;
2672 case INDEX_op_sub_vec:
2673 insn = sub_insn[vece];
2674 goto gen_simd;
2675 case INDEX_op_mul_vec:
2676 insn = mul_insn[vece];
2677 goto gen_simd;
2678 case INDEX_op_and_vec:
2679 insn = OPC_PAND;
2680 goto gen_simd;
2681 case INDEX_op_or_vec:
2682 insn = OPC_POR;
2683 goto gen_simd;
2684 case INDEX_op_xor_vec:
2685 insn = OPC_PXOR;
2686 goto gen_simd;
2687 case INDEX_op_x86_punpckl_vec:
2688 insn = punpckl_insn[vece];
2689 goto gen_simd;
2690 case INDEX_op_x86_punpckh_vec:
2691 insn = punpckh_insn[vece];
2692 goto gen_simd;
2693 case INDEX_op_x86_packss_vec:
2694 insn = packss_insn[vece];
2695 goto gen_simd;
2696 case INDEX_op_x86_packus_vec:
2697 insn = packus_insn[vece];
2698 goto gen_simd;
2699#if TCG_TARGET_REG_BITS == 32
2700 case INDEX_op_dup2_vec:
2701
2702 insn = OPC_PUNPCKLDQ;
2703 goto gen_simd;
2704#endif
2705 gen_simd:
2706 tcg_debug_assert(insn != OPC_UD2);
2707 if (type == TCG_TYPE_V256) {
2708 insn |= P_VEXL;
2709 }
2710 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2711 break;
2712
2713 case INDEX_op_cmp_vec:
2714 sub = args[3];
2715 if (sub == TCG_COND_EQ) {
2716 insn = cmpeq_insn[vece];
2717 } else if (sub == TCG_COND_GT) {
2718 insn = cmpgt_insn[vece];
2719 } else {
2720 g_assert_not_reached();
2721 }
2722 goto gen_simd;
2723
2724 case INDEX_op_andc_vec:
2725 insn = OPC_PANDN;
2726 if (type == TCG_TYPE_V256) {
2727 insn |= P_VEXL;
2728 }
2729 tcg_out_vex_modrm(s, insn, a0, a2, a1);
2730 break;
2731
2732 case INDEX_op_shli_vec:
2733 sub = 6;
2734 goto gen_shift;
2735 case INDEX_op_shri_vec:
2736 sub = 2;
2737 goto gen_shift;
2738 case INDEX_op_sari_vec:
2739 tcg_debug_assert(vece != MO_64);
2740 sub = 4;
2741 gen_shift:
2742 tcg_debug_assert(vece != MO_8);
2743 insn = shift_imm_insn[vece];
2744 if (type == TCG_TYPE_V256) {
2745 insn |= P_VEXL;
2746 }
2747 tcg_out_vex_modrm(s, insn, sub, a0, a1);
2748 tcg_out8(s, a2);
2749 break;
2750
2751 case INDEX_op_ld_vec:
2752 tcg_out_ld(s, type, a0, a1, a2);
2753 break;
2754 case INDEX_op_st_vec:
2755 tcg_out_st(s, type, a0, a1, a2);
2756 break;
2757 case INDEX_op_dup_vec:
2758 tcg_out_dup_vec(s, type, vece, a0, a1);
2759 break;
2760
2761 case INDEX_op_x86_shufps_vec:
2762 insn = OPC_SHUFPS;
2763 sub = args[3];
2764 goto gen_simd_imm8;
2765 case INDEX_op_x86_blend_vec:
2766 if (vece == MO_16) {
2767 insn = OPC_PBLENDW;
2768 } else if (vece == MO_32) {
2769 insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
2770 } else {
2771 g_assert_not_reached();
2772 }
2773 sub = args[3];
2774 goto gen_simd_imm8;
2775 case INDEX_op_x86_vperm2i128_vec:
2776 insn = OPC_VPERM2I128;
2777 sub = args[3];
2778 goto gen_simd_imm8;
2779 gen_simd_imm8:
2780 if (type == TCG_TYPE_V256) {
2781 insn |= P_VEXL;
2782 }
2783 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2784 tcg_out8(s, sub);
2785 break;
2786
2787 case INDEX_op_x86_vpblendvb_vec:
2788 insn = OPC_VPBLENDVB;
2789 if (type == TCG_TYPE_V256) {
2790 insn |= P_VEXL;
2791 }
2792 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2793 tcg_out8(s, args[3] << 4);
2794 break;
2795
2796 case INDEX_op_x86_psrldq_vec:
2797 tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
2798 tcg_out8(s, a2);
2799 break;
2800
2801 default:
2802 g_assert_not_reached();
2803 }
2804}
2805
2806static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2807{
2808 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2809 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2810 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2811 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2812 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2813 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2814 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2815 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
2816 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2817 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2818 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
2819 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2820 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2821 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2822 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2823 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2824 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2825 static const TCGTargetOpDef r_r_L_L
2826 = { .args_ct_str = { "r", "r", "L", "L" } };
2827 static const TCGTargetOpDef L_L_L_L
2828 = { .args_ct_str = { "L", "L", "L", "L" } };
2829 static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
2830 static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
2831 static const TCGTargetOpDef x_x_x_x
2832 = { .args_ct_str = { "x", "x", "x", "x" } };
2833 static const TCGTargetOpDef x_r = { .args_ct_str = { "x", "r" } };
2834
2835 switch (op) {
2836 case INDEX_op_goto_ptr:
2837 return &r;
2838
2839 case INDEX_op_ld8u_i32:
2840 case INDEX_op_ld8u_i64:
2841 case INDEX_op_ld8s_i32:
2842 case INDEX_op_ld8s_i64:
2843 case INDEX_op_ld16u_i32:
2844 case INDEX_op_ld16u_i64:
2845 case INDEX_op_ld16s_i32:
2846 case INDEX_op_ld16s_i64:
2847 case INDEX_op_ld_i32:
2848 case INDEX_op_ld32u_i64:
2849 case INDEX_op_ld32s_i64:
2850 case INDEX_op_ld_i64:
2851 return &r_r;
2852
2853 case INDEX_op_st8_i32:
2854 case INDEX_op_st8_i64:
2855 return &qi_r;
2856 case INDEX_op_st16_i32:
2857 case INDEX_op_st16_i64:
2858 case INDEX_op_st_i32:
2859 case INDEX_op_st32_i64:
2860 return &ri_r;
2861 case INDEX_op_st_i64:
2862 return &re_r;
2863
2864 case INDEX_op_add_i32:
2865 case INDEX_op_add_i64:
2866 return &r_r_re;
2867 case INDEX_op_sub_i32:
2868 case INDEX_op_sub_i64:
2869 case INDEX_op_mul_i32:
2870 case INDEX_op_mul_i64:
2871 case INDEX_op_or_i32:
2872 case INDEX_op_or_i64:
2873 case INDEX_op_xor_i32:
2874 case INDEX_op_xor_i64:
2875 return &r_0_re;
2876
2877 case INDEX_op_and_i32:
2878 case INDEX_op_and_i64:
2879 {
2880 static const TCGTargetOpDef and
2881 = { .args_ct_str = { "r", "0", "reZ" } };
2882 return ∧
2883 }
2884 break;
2885 case INDEX_op_andc_i32:
2886 case INDEX_op_andc_i64:
2887 {
2888 static const TCGTargetOpDef andc
2889 = { .args_ct_str = { "r", "r", "rI" } };
2890 return &andc;
2891 }
2892 break;
2893
2894 case INDEX_op_shl_i32:
2895 case INDEX_op_shl_i64:
2896 case INDEX_op_shr_i32:
2897 case INDEX_op_shr_i64:
2898 case INDEX_op_sar_i32:
2899 case INDEX_op_sar_i64:
2900 return have_bmi2 ? &r_r_ri : &r_0_ci;
2901 case INDEX_op_rotl_i32:
2902 case INDEX_op_rotl_i64:
2903 case INDEX_op_rotr_i32:
2904 case INDEX_op_rotr_i64:
2905 return &r_0_ci;
2906
2907 case INDEX_op_brcond_i32:
2908 case INDEX_op_brcond_i64:
2909 return &r_re;
2910
2911 case INDEX_op_bswap16_i32:
2912 case INDEX_op_bswap16_i64:
2913 case INDEX_op_bswap32_i32:
2914 case INDEX_op_bswap32_i64:
2915 case INDEX_op_bswap64_i64:
2916 case INDEX_op_neg_i32:
2917 case INDEX_op_neg_i64:
2918 case INDEX_op_not_i32:
2919 case INDEX_op_not_i64:
2920 return &r_0;
2921
2922 case INDEX_op_ext8s_i32:
2923 case INDEX_op_ext8s_i64:
2924 case INDEX_op_ext8u_i32:
2925 case INDEX_op_ext8u_i64:
2926 return &r_q;
2927 case INDEX_op_ext16s_i32:
2928 case INDEX_op_ext16s_i64:
2929 case INDEX_op_ext16u_i32:
2930 case INDEX_op_ext16u_i64:
2931 case INDEX_op_ext32s_i64:
2932 case INDEX_op_ext32u_i64:
2933 case INDEX_op_ext_i32_i64:
2934 case INDEX_op_extu_i32_i64:
2935 case INDEX_op_extract_i32:
2936 case INDEX_op_extract_i64:
2937 case INDEX_op_sextract_i32:
2938 case INDEX_op_ctpop_i32:
2939 case INDEX_op_ctpop_i64:
2940 return &r_r;
2941
2942 case INDEX_op_deposit_i32:
2943 case INDEX_op_deposit_i64:
2944 {
2945 static const TCGTargetOpDef dep
2946 = { .args_ct_str = { "Q", "0", "Q" } };
2947 return &dep;
2948 }
2949 case INDEX_op_setcond_i32:
2950 case INDEX_op_setcond_i64:
2951 {
2952 static const TCGTargetOpDef setc
2953 = { .args_ct_str = { "q", "r", "re" } };
2954 return &setc;
2955 }
2956 case INDEX_op_movcond_i32:
2957 case INDEX_op_movcond_i64:
2958 {
2959 static const TCGTargetOpDef movc
2960 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
2961 return &movc;
2962 }
2963 case INDEX_op_div2_i32:
2964 case INDEX_op_div2_i64:
2965 case INDEX_op_divu2_i32:
2966 case INDEX_op_divu2_i64:
2967 {
2968 static const TCGTargetOpDef div2
2969 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
2970 return &div2;
2971 }
2972 case INDEX_op_mulu2_i32:
2973 case INDEX_op_mulu2_i64:
2974 case INDEX_op_muls2_i32:
2975 case INDEX_op_muls2_i64:
2976 {
2977 static const TCGTargetOpDef mul2
2978 = { .args_ct_str = { "a", "d", "a", "r" } };
2979 return &mul2;
2980 }
2981 case INDEX_op_add2_i32:
2982 case INDEX_op_add2_i64:
2983 case INDEX_op_sub2_i32:
2984 case INDEX_op_sub2_i64:
2985 {
2986 static const TCGTargetOpDef arith2
2987 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
2988 return &arith2;
2989 }
2990 case INDEX_op_ctz_i32:
2991 case INDEX_op_ctz_i64:
2992 {
2993 static const TCGTargetOpDef ctz[2] = {
2994 { .args_ct_str = { "&r", "r", "r" } },
2995 { .args_ct_str = { "&r", "r", "rW" } },
2996 };
2997 return &ctz[have_bmi1];
2998 }
2999 case INDEX_op_clz_i32:
3000 case INDEX_op_clz_i64:
3001 {
3002 static const TCGTargetOpDef clz[2] = {
3003 { .args_ct_str = { "&r", "r", "r" } },
3004 { .args_ct_str = { "&r", "r", "rW" } },
3005 };
3006 return &clz[have_lzcnt];
3007 }
3008
3009 case INDEX_op_qemu_ld_i32:
3010 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
3011 case INDEX_op_qemu_st_i32:
3012 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
3013 case INDEX_op_qemu_ld_i64:
3014 return (TCG_TARGET_REG_BITS == 64 ? &r_L
3015 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
3016 : &r_r_L_L);
3017 case INDEX_op_qemu_st_i64:
3018 return (TCG_TARGET_REG_BITS == 64 ? &L_L
3019 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
3020 : &L_L_L_L);
3021
3022 case INDEX_op_brcond2_i32:
3023 {
3024 static const TCGTargetOpDef b2
3025 = { .args_ct_str = { "r", "r", "ri", "ri" } };
3026 return &b2;
3027 }
3028 case INDEX_op_setcond2_i32:
3029 {
3030 static const TCGTargetOpDef s2
3031 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3032 return &s2;
3033 }
3034
3035 case INDEX_op_ld_vec:
3036 case INDEX_op_st_vec:
3037 return &x_r;
3038
3039 case INDEX_op_add_vec:
3040 case INDEX_op_sub_vec:
3041 case INDEX_op_mul_vec:
3042 case INDEX_op_and_vec:
3043 case INDEX_op_or_vec:
3044 case INDEX_op_xor_vec:
3045 case INDEX_op_andc_vec:
3046 case INDEX_op_cmp_vec:
3047 case INDEX_op_x86_shufps_vec:
3048 case INDEX_op_x86_blend_vec:
3049 case INDEX_op_x86_packss_vec:
3050 case INDEX_op_x86_packus_vec:
3051 case INDEX_op_x86_vperm2i128_vec:
3052 case INDEX_op_x86_punpckl_vec:
3053 case INDEX_op_x86_punpckh_vec:
3054#if TCG_TARGET_REG_BITS == 32
3055 case INDEX_op_dup2_vec:
3056#endif
3057 return &x_x_x;
3058 case INDEX_op_dup_vec:
3059 case INDEX_op_shli_vec:
3060 case INDEX_op_shri_vec:
3061 case INDEX_op_sari_vec:
3062 case INDEX_op_x86_psrldq_vec:
3063 return &x_x;
3064 case INDEX_op_x86_vpblendvb_vec:
3065 return &x_x_x_x;
3066
3067 default:
3068 break;
3069 }
3070 return NULL;
3071}
3072
3073int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3074{
3075 switch (opc) {
3076 case INDEX_op_add_vec:
3077 case INDEX_op_sub_vec:
3078 case INDEX_op_and_vec:
3079 case INDEX_op_or_vec:
3080 case INDEX_op_xor_vec:
3081 case INDEX_op_andc_vec:
3082 return 1;
3083 case INDEX_op_cmp_vec:
3084 return -1;
3085
3086 case INDEX_op_shli_vec:
3087 case INDEX_op_shri_vec:
3088
3089 return vece == MO_8 ? -1 : 1;
3090
3091 case INDEX_op_sari_vec:
3092
3093 if (vece == MO_8) {
3094 return -1;
3095 }
3096
3097
3098 if (vece == MO_64) {
3099 return type >= TCG_TYPE_V256 ? -1 : 0;
3100 }
3101 return 1;
3102
3103 case INDEX_op_mul_vec:
3104 if (vece == MO_8) {
3105
3106 return -1;
3107 }
3108 if (vece == MO_64) {
3109 return 0;
3110 }
3111 return 1;
3112
3113 default:
3114 return 0;
3115 }
3116}
3117
3118void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3119 TCGArg a0, ...)
3120{
3121 va_list va;
3122 TCGArg a1, a2;
3123 TCGv_vec v0, t1, t2, t3, t4;
3124
3125 va_start(va, a0);
3126 v0 = temp_tcgv_vec(arg_temp(a0));
3127
3128 switch (opc) {
3129 case INDEX_op_shli_vec:
3130 case INDEX_op_shri_vec:
3131 tcg_debug_assert(vece == MO_8);
3132 a1 = va_arg(va, TCGArg);
3133 a2 = va_arg(va, TCGArg);
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143 t1 = tcg_temp_new_vec(type);
3144 t2 = tcg_temp_new_vec(type);
3145 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3146 tcgv_vec_arg(t1), a1, a1);
3147 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3148 tcgv_vec_arg(t2), a1, a1);
3149 if (opc == INDEX_op_shri_vec) {
3150 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3151 tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
3152 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3153 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
3154 } else {
3155 vec_gen_3(INDEX_op_shli_vec, type, MO_16,
3156 tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
3157 vec_gen_3(INDEX_op_shli_vec, type, MO_16,
3158 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
3159 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3160 tcgv_vec_arg(t1), tcgv_vec_arg(t1), 8);
3161 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3162 tcgv_vec_arg(t2), tcgv_vec_arg(t2), 8);
3163 }
3164 vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
3165 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3166 tcg_temp_free_vec(t1);
3167 tcg_temp_free_vec(t2);
3168 break;
3169
3170 case INDEX_op_sari_vec:
3171 a1 = va_arg(va, TCGArg);
3172 a2 = va_arg(va, TCGArg);
3173 if (vece == MO_8) {
3174
3175 t1 = tcg_temp_new_vec(type);
3176 t2 = tcg_temp_new_vec(type);
3177 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3178 tcgv_vec_arg(t1), a1, a1);
3179 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3180 tcgv_vec_arg(t2), a1, a1);
3181 vec_gen_3(INDEX_op_sari_vec, type, MO_16,
3182 tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
3183 vec_gen_3(INDEX_op_sari_vec, type, MO_16,
3184 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
3185 vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8,
3186 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3187 tcg_temp_free_vec(t1);
3188 tcg_temp_free_vec(t2);
3189 break;
3190 }
3191 tcg_debug_assert(vece == MO_64);
3192
3193
3194
3195 if (a2 <= 32) {
3196 t1 = tcg_temp_new_vec(type);
3197 vec_gen_3(INDEX_op_sari_vec, type, MO_32, tcgv_vec_arg(t1), a1, a2);
3198 vec_gen_3(INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
3199 vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
3200 a0, a0, tcgv_vec_arg(t1), 0xaa);
3201 tcg_temp_free_vec(t1);
3202 break;
3203 }
3204
3205
3206 t1 = tcg_temp_new_vec(type);
3207 t2 = tcg_const_zeros_vec(type);
3208 vec_gen_4(INDEX_op_cmp_vec, type, MO_64,
3209 tcgv_vec_arg(t1), tcgv_vec_arg(t2), a1, TCG_COND_GT);
3210 tcg_temp_free_vec(t2);
3211 vec_gen_3(INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
3212 vec_gen_3(INDEX_op_shli_vec, type, MO_64,
3213 tcgv_vec_arg(t1), tcgv_vec_arg(t1), 64 - a2);
3214 vec_gen_3(INDEX_op_or_vec, type, MO_64, a0, a0, tcgv_vec_arg(t1));
3215 tcg_temp_free_vec(t1);
3216 break;
3217
3218 case INDEX_op_mul_vec:
3219 tcg_debug_assert(vece == MO_8);
3220 a1 = va_arg(va, TCGArg);
3221 a2 = va_arg(va, TCGArg);
3222 switch (type) {
3223 case TCG_TYPE_V64:
3224 t1 = tcg_temp_new_vec(TCG_TYPE_V128);
3225 t2 = tcg_temp_new_vec(TCG_TYPE_V128);
3226 tcg_gen_dup16i_vec(t2, 0);
3227 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3228 tcgv_vec_arg(t1), a1, tcgv_vec_arg(t2));
3229 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3230 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2);
3231 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3232 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3233 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
3234 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t1));
3235 tcg_temp_free_vec(t1);
3236 tcg_temp_free_vec(t2);
3237 break;
3238
3239 case TCG_TYPE_V128:
3240 t1 = tcg_temp_new_vec(TCG_TYPE_V128);
3241 t2 = tcg_temp_new_vec(TCG_TYPE_V128);
3242 t3 = tcg_temp_new_vec(TCG_TYPE_V128);
3243 t4 = tcg_temp_new_vec(TCG_TYPE_V128);
3244 tcg_gen_dup16i_vec(t4, 0);
3245 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3246 tcgv_vec_arg(t1), a1, tcgv_vec_arg(t4));
3247 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3248 tcgv_vec_arg(t2), tcgv_vec_arg(t4), a2);
3249 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
3250 tcgv_vec_arg(t3), a1, tcgv_vec_arg(t4));
3251 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
3252 tcgv_vec_arg(t4), tcgv_vec_arg(t4), a2);
3253 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3254 tcg_gen_mul_vec(MO_16, t3, t3, t4);
3255 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3256 tcg_gen_shri_vec(MO_16, t3, t3, 8);
3257 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
3258 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3259 tcg_temp_free_vec(t1);
3260 tcg_temp_free_vec(t2);
3261 tcg_temp_free_vec(t3);
3262 tcg_temp_free_vec(t4);
3263 break;
3264
3265 case TCG_TYPE_V256:
3266 t1 = tcg_temp_new_vec(TCG_TYPE_V256);
3267 t2 = tcg_temp_new_vec(TCG_TYPE_V256);
3268 t3 = tcg_temp_new_vec(TCG_TYPE_V256);
3269 t4 = tcg_temp_new_vec(TCG_TYPE_V256);
3270 tcg_gen_dup16i_vec(t4, 0);
3271
3272
3273
3274
3275
3276 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
3277 tcgv_vec_arg(t1), a1, tcgv_vec_arg(t4));
3278 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
3279 tcgv_vec_arg(t2), tcgv_vec_arg(t4), a2);
3280 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
3281 tcgv_vec_arg(t3), a1, tcgv_vec_arg(t4));
3282 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
3283 tcgv_vec_arg(t4), tcgv_vec_arg(t4), a2);
3284
3285 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3286 tcg_gen_mul_vec(MO_16, t3, t3, t4);
3287 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3288 tcg_gen_shri_vec(MO_16, t3, t3, 8);
3289
3290 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V256, MO_8,
3291 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3292 tcg_temp_free_vec(t1);
3293 tcg_temp_free_vec(t2);
3294 tcg_temp_free_vec(t3);
3295 tcg_temp_free_vec(t4);
3296 break;
3297
3298 default:
3299 g_assert_not_reached();
3300 }
3301 break;
3302
3303 case INDEX_op_cmp_vec:
3304 {
3305 enum {
3306 NEED_SWAP = 1,
3307 NEED_INV = 2,
3308 NEED_BIAS = 4
3309 };
3310 static const uint8_t fixups[16] = {
3311 [0 ... 15] = -1,
3312 [TCG_COND_EQ] = 0,
3313 [TCG_COND_NE] = NEED_INV,
3314 [TCG_COND_GT] = 0,
3315 [TCG_COND_LT] = NEED_SWAP,
3316 [TCG_COND_LE] = NEED_INV,
3317 [TCG_COND_GE] = NEED_SWAP | NEED_INV,
3318 [TCG_COND_GTU] = NEED_BIAS,
3319 [TCG_COND_LTU] = NEED_BIAS | NEED_SWAP,
3320 [TCG_COND_LEU] = NEED_BIAS | NEED_INV,
3321 [TCG_COND_GEU] = NEED_BIAS | NEED_SWAP | NEED_INV,
3322 };
3323
3324 TCGCond cond;
3325 uint8_t fixup;
3326
3327 a1 = va_arg(va, TCGArg);
3328 a2 = va_arg(va, TCGArg);
3329 cond = va_arg(va, TCGArg);
3330 fixup = fixups[cond & 15];
3331 tcg_debug_assert(fixup != 0xff);
3332
3333 if (fixup & NEED_INV) {
3334 cond = tcg_invert_cond(cond);
3335 }
3336 if (fixup & NEED_SWAP) {
3337 TCGArg t;
3338 t = a1, a1 = a2, a2 = t;
3339 cond = tcg_swap_cond(cond);
3340 }
3341
3342 t1 = t2 = NULL;
3343 if (fixup & NEED_BIAS) {
3344 t1 = tcg_temp_new_vec(type);
3345 t2 = tcg_temp_new_vec(type);
3346 tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1));
3347 tcg_gen_sub_vec(vece, t1, temp_tcgv_vec(arg_temp(a1)), t2);
3348 tcg_gen_sub_vec(vece, t2, temp_tcgv_vec(arg_temp(a2)), t2);
3349 a1 = tcgv_vec_arg(t1);
3350 a2 = tcgv_vec_arg(t2);
3351 cond = tcg_signed_cond(cond);
3352 }
3353
3354 tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
3355 vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond);
3356
3357 if (fixup & NEED_BIAS) {
3358 tcg_temp_free_vec(t1);
3359 tcg_temp_free_vec(t2);
3360 }
3361 if (fixup & NEED_INV) {
3362 tcg_gen_not_vec(vece, v0, v0);
3363 }
3364 }
3365 break;
3366
3367 default:
3368 break;
3369 }
3370
3371 va_end(va);
3372}
3373
3374static const int tcg_target_callee_save_regs[] = {
3375#if TCG_TARGET_REG_BITS == 64
3376 TCG_REG_RBP,
3377 TCG_REG_RBX,
3378#if defined(_WIN64)
3379 TCG_REG_RDI,
3380 TCG_REG_RSI,
3381#endif
3382 TCG_REG_R12,
3383 TCG_REG_R13,
3384 TCG_REG_R14,
3385 TCG_REG_R15,
3386#else
3387 TCG_REG_EBP,
3388 TCG_REG_EBX,
3389 TCG_REG_ESI,
3390 TCG_REG_EDI,
3391#endif
3392};
3393
3394
3395
3396
3397#define PUSH_SIZE \
3398 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
3399 * (TCG_TARGET_REG_BITS / 8))
3400
3401#define FRAME_SIZE \
3402 ((PUSH_SIZE \
3403 + TCG_STATIC_CALL_ARGS_SIZE \
3404 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3405 + TCG_TARGET_STACK_ALIGN - 1) \
3406 & ~(TCG_TARGET_STACK_ALIGN - 1))
3407
3408
3409static void tcg_target_qemu_prologue(TCGContext *s)
3410{
3411 int i, stack_addend;
3412
3413
3414
3415
3416 stack_addend = FRAME_SIZE - PUSH_SIZE;
3417 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
3418 CPU_TEMP_BUF_NLONGS * sizeof(long));
3419
3420
3421 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
3422 tcg_out_push(s, tcg_target_callee_save_regs[i]);
3423 }
3424
3425#if TCG_TARGET_REG_BITS == 32
3426 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
3427 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
3428 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
3429
3430 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
3431 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
3432 + stack_addend);
3433#else
3434 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3435 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
3436
3437 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
3438#endif
3439
3440
3441
3442
3443
3444 s->code_gen_epilogue = s->code_ptr;
3445 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
3446
3447
3448 tb_ret_addr = s->code_ptr;
3449
3450 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
3451
3452 if (have_avx2) {
3453 tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0);
3454 }
3455 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
3456 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
3457 }
3458 tcg_out_opc(s, OPC_RET, 0, 0, 0);
3459
3460#if !defined(CONFIG_SOFTMMU)
3461
3462 if (guest_base) {
3463 setup_guest_base_seg();
3464 }
3465#endif
3466}
3467
3468static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3469{
3470 memset(p, 0x90, count);
3471}
3472
3473static void tcg_target_init(TCGContext *s)
3474{
3475#ifdef CONFIG_CPUID_H
3476 unsigned a, b, c, d, b7 = 0;
3477 int max = __get_cpuid_max(0, 0);
3478
3479 if (max >= 7) {
3480
3481 __cpuid_count(7, 0, a, b7, c, d);
3482 have_bmi1 = (b7 & bit_BMI) != 0;
3483 have_bmi2 = (b7 & bit_BMI2) != 0;
3484 }
3485
3486 if (max >= 1) {
3487 __cpuid(1, a, b, c, d);
3488#ifndef have_cmov
3489
3490
3491
3492 have_cmov = (d & bit_CMOV) != 0;
3493#endif
3494
3495
3496
3497 have_movbe = (c & bit_MOVBE) != 0;
3498 have_popcnt = (c & bit_POPCNT) != 0;
3499
3500
3501
3502 if (c & bit_OSXSAVE) {
3503 unsigned xcrl, xcrh;
3504
3505
3506
3507 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
3508 if ((xcrl & 6) == 6) {
3509 have_avx1 = (c & bit_AVX) != 0;
3510 have_avx2 = (b7 & bit_AVX2) != 0;
3511 }
3512 }
3513 }
3514
3515 max = __get_cpuid_max(0x8000000, 0);
3516 if (max >= 1) {
3517 __cpuid(0x80000001, a, b, c, d);
3518
3519 have_lzcnt = (c & bit_LZCNT) != 0;
3520 }
3521#endif
3522
3523 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
3524 if (TCG_TARGET_REG_BITS == 64) {
3525 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
3526 }
3527 if (have_avx1) {
3528 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
3529 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
3530 }
3531 if (have_avx2) {
3532 tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
3533 }
3534
3535 tcg_target_call_clobber_regs = ALL_VECTOR_REGS;
3536 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
3537 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
3538 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
3539 if (TCG_TARGET_REG_BITS == 64) {
3540#if !defined(_WIN64)
3541 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
3542 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
3543#endif
3544 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3545 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3546 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3547 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3548 }
3549
3550 s->reserved_regs = 0;
3551 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3552}
3553
3554typedef struct {
3555 DebugFrameHeader h;
3556 uint8_t fde_def_cfa[4];
3557 uint8_t fde_reg_ofs[14];
3558} DebugFrame;
3559
3560
3561QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3562
3563#if !defined(__ELF__)
3564
3565#elif TCG_TARGET_REG_BITS == 64
3566#define ELF_HOST_MACHINE EM_X86_64
3567static const DebugFrame debug_frame = {
3568 .h.cie.len = sizeof(DebugFrameCIE)-4,
3569 .h.cie.id = -1,
3570 .h.cie.version = 1,
3571 .h.cie.code_align = 1,
3572 .h.cie.data_align = 0x78,
3573 .h.cie.return_column = 16,
3574
3575
3576 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3577
3578 .fde_def_cfa = {
3579 12, 7,
3580 (FRAME_SIZE & 0x7f) | 0x80,
3581 (FRAME_SIZE >> 7)
3582 },
3583 .fde_reg_ofs = {
3584 0x90, 1,
3585
3586 0x86, 2,
3587 0x83, 3,
3588 0x8c, 4,
3589 0x8d, 5,
3590 0x8e, 6,
3591 0x8f, 7,
3592 }
3593};
3594#else
3595#define ELF_HOST_MACHINE EM_386
3596static const DebugFrame debug_frame = {
3597 .h.cie.len = sizeof(DebugFrameCIE)-4,
3598 .h.cie.id = -1,
3599 .h.cie.version = 1,
3600 .h.cie.code_align = 1,
3601 .h.cie.data_align = 0x7c,
3602 .h.cie.return_column = 8,
3603
3604
3605 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3606
3607 .fde_def_cfa = {
3608 12, 4,
3609 (FRAME_SIZE & 0x7f) | 0x80,
3610 (FRAME_SIZE >> 7)
3611 },
3612 .fde_reg_ofs = {
3613 0x88, 1,
3614
3615 0x85, 2,
3616 0x83, 3,
3617 0x86, 4,
3618 0x87, 5,
3619 }
3620};
3621#endif
3622
3623#if defined(ELF_HOST_MACHINE)
3624void tcg_register_jit(void *buf, size_t buf_size)
3625{
3626 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3627}
3628#endif
3629