1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "tcg-pool.inc.c"
26
27#ifdef CONFIG_DEBUG_TCG
28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31#else
32 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
33#endif
34 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
35 "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
36#if TCG_TARGET_REG_BITS == 64
37 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
38 "%xmm12", "%xmm13", "%xmm14", "%xmm15",
39#endif
40};
41#endif
42
43static const int tcg_target_reg_alloc_order[] = {
44#if TCG_TARGET_REG_BITS == 64
45 TCG_REG_RBP,
46 TCG_REG_RBX,
47 TCG_REG_R12,
48 TCG_REG_R13,
49 TCG_REG_R14,
50 TCG_REG_R15,
51 TCG_REG_R10,
52 TCG_REG_R11,
53 TCG_REG_R9,
54 TCG_REG_R8,
55 TCG_REG_RCX,
56 TCG_REG_RDX,
57 TCG_REG_RSI,
58 TCG_REG_RDI,
59 TCG_REG_RAX,
60#else
61 TCG_REG_EBX,
62 TCG_REG_ESI,
63 TCG_REG_EDI,
64 TCG_REG_EBP,
65 TCG_REG_ECX,
66 TCG_REG_EDX,
67 TCG_REG_EAX,
68#endif
69 TCG_REG_XMM0,
70 TCG_REG_XMM1,
71 TCG_REG_XMM2,
72 TCG_REG_XMM3,
73 TCG_REG_XMM4,
74 TCG_REG_XMM5,
75#ifndef _WIN64
76
77
78 TCG_REG_XMM6,
79 TCG_REG_XMM7,
80#if TCG_TARGET_REG_BITS == 64
81 TCG_REG_XMM8,
82 TCG_REG_XMM9,
83 TCG_REG_XMM10,
84 TCG_REG_XMM11,
85 TCG_REG_XMM12,
86 TCG_REG_XMM13,
87 TCG_REG_XMM14,
88 TCG_REG_XMM15,
89#endif
90#endif
91};
92
93static const int tcg_target_call_iarg_regs[] = {
94#if TCG_TARGET_REG_BITS == 64
95#if defined(_WIN64)
96 TCG_REG_RCX,
97 TCG_REG_RDX,
98#else
99 TCG_REG_RDI,
100 TCG_REG_RSI,
101 TCG_REG_RDX,
102 TCG_REG_RCX,
103#endif
104 TCG_REG_R8,
105 TCG_REG_R9,
106#else
107
108#endif
109};
110
111static const int tcg_target_call_oarg_regs[] = {
112 TCG_REG_EAX,
113#if TCG_TARGET_REG_BITS == 32
114 TCG_REG_EDX
115#endif
116};
117
118
119#define TCG_CT_CONST_S32 0x100
120#define TCG_CT_CONST_U32 0x200
121#define TCG_CT_CONST_I32 0x400
122#define TCG_CT_CONST_WSZ 0x800
123
124
125
126
127#if TCG_TARGET_REG_BITS == 64
128# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
129# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
130#else
131# define TCG_REG_L0 TCG_REG_EAX
132# define TCG_REG_L1 TCG_REG_EDX
133#endif
134
135
136
137
138#if defined(CONFIG_CPUID_H)
139#include "qemu/cpuid.h"
140#endif
141
142
143#if TCG_TARGET_REG_BITS == 64
144# define have_cmov 1
145#elif defined(CONFIG_CPUID_H)
146static bool have_cmov;
147#else
148# define have_cmov 0
149#endif
150
151
152
153bool have_bmi1;
154bool have_popcnt;
155bool have_avx1;
156bool have_avx2;
157
158#ifdef CONFIG_CPUID_H
159static bool have_movbe;
160static bool have_bmi2;
161static bool have_lzcnt;
162#else
163# define have_movbe 0
164# define have_bmi2 0
165# define have_lzcnt 0
166#endif
167
168static tcg_insn_unit *tb_ret_addr;
169
170static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
171 intptr_t value, intptr_t addend)
172{
173 value += addend;
174 switch(type) {
175 case R_386_PC32:
176 value -= (uintptr_t)code_ptr;
177 if (value != (int32_t)value) {
178 return false;
179 }
180
181 case R_386_32:
182 tcg_patch32(code_ptr, value);
183 break;
184 case R_386_PC8:
185 value -= (uintptr_t)code_ptr;
186 if (value != (int8_t)value) {
187 return false;
188 }
189 tcg_patch8(code_ptr, value);
190 break;
191 default:
192 tcg_abort();
193 }
194 return true;
195}
196
197#if TCG_TARGET_REG_BITS == 64
198#define ALL_GENERAL_REGS 0x0000ffffu
199#define ALL_VECTOR_REGS 0xffff0000u
200#else
201#define ALL_GENERAL_REGS 0x000000ffu
202#define ALL_VECTOR_REGS 0x00ff0000u
203#endif
204
205
206static const char *target_parse_constraint(TCGArgConstraint *ct,
207 const char *ct_str, TCGType type)
208{
209 switch(*ct_str++) {
210 case 'a':
211 ct->ct |= TCG_CT_REG;
212 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
213 break;
214 case 'b':
215 ct->ct |= TCG_CT_REG;
216 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
217 break;
218 case 'c':
219 ct->ct |= TCG_CT_REG;
220 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
221 break;
222 case 'd':
223 ct->ct |= TCG_CT_REG;
224 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
225 break;
226 case 'S':
227 ct->ct |= TCG_CT_REG;
228 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
229 break;
230 case 'D':
231 ct->ct |= TCG_CT_REG;
232 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
233 break;
234 case 'q':
235
236 ct->ct |= TCG_CT_REG;
237 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
238 break;
239 case 'Q':
240
241 ct->ct |= TCG_CT_REG;
242 ct->u.regs = 0xf;
243 break;
244 case 'r':
245
246 ct->ct |= TCG_CT_REG;
247 ct->u.regs |= ALL_GENERAL_REGS;
248 break;
249 case 'W':
250
251 ct->ct |= TCG_CT_CONST_WSZ;
252 break;
253 case 'x':
254
255 ct->ct |= TCG_CT_REG;
256 ct->u.regs |= ALL_VECTOR_REGS;
257 break;
258
259
260 case 'L':
261 ct->ct |= TCG_CT_REG;
262 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
263 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
264 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
265 break;
266
267 case 'e':
268 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
269 break;
270 case 'Z':
271 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
272 break;
273 case 'I':
274 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
275 break;
276
277 default:
278 return NULL;
279 }
280 return ct_str;
281}
282
283
284static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
285 const TCGArgConstraint *arg_ct)
286{
287 int ct = arg_ct->ct;
288 if (ct & TCG_CT_CONST) {
289 return 1;
290 }
291 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
292 return 1;
293 }
294 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
295 return 1;
296 }
297 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
298 return 1;
299 }
300 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
301 return 1;
302 }
303 return 0;
304}
305
306# define LOWREGMASK(x) ((x) & 7)
307
308#define P_EXT 0x100
309#define P_EXT38 0x200
310#define P_DATA16 0x400
311#if TCG_TARGET_REG_BITS == 64
312# define P_REXW 0x1000
313# define P_REXB_R 0x2000
314# define P_REXB_RM 0x4000
315# define P_GS 0x8000
316#else
317# define P_REXW 0
318# define P_REXB_R 0
319# define P_REXB_RM 0
320# define P_GS 0
321#endif
322#define P_EXT3A 0x10000
323#define P_SIMDF3 0x20000
324#define P_SIMDF2 0x40000
325#define P_VEXL 0x80000
326
327#define OPC_ARITH_EvIz (0x81)
328#define OPC_ARITH_EvIb (0x83)
329#define OPC_ARITH_GvEv (0x03)
330#define OPC_ANDN (0xf2 | P_EXT38)
331#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
332#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3))
333#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16)
334#define OPC_BSF (0xbc | P_EXT)
335#define OPC_BSR (0xbd | P_EXT)
336#define OPC_BSWAP (0xc8 | P_EXT)
337#define OPC_CALL_Jz (0xe8)
338#define OPC_CMOVCC (0x40 | P_EXT)
339#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
340#define OPC_DEC_r32 (0x48)
341#define OPC_IMUL_GvEv (0xaf | P_EXT)
342#define OPC_IMUL_GvEvIb (0x6b)
343#define OPC_IMUL_GvEvIz (0x69)
344#define OPC_INC_r32 (0x40)
345#define OPC_JCC_long (0x80 | P_EXT)
346#define OPC_JCC_short (0x70)
347#define OPC_JMP_long (0xe9)
348#define OPC_JMP_short (0xeb)
349#define OPC_LEA (0x8d)
350#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
351#define OPC_MOVB_EvGv (0x88)
352#define OPC_MOVL_EvGv (0x89)
353#define OPC_MOVL_GvEv (0x8b)
354#define OPC_MOVB_EvIz (0xc6)
355#define OPC_MOVL_EvIz (0xc7)
356#define OPC_MOVL_Iv (0xb8)
357#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
358#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
359#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
360#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
361#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
362#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
363#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
364#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
365#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3)
366#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3)
367#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16)
368#define OPC_MOVSBL (0xbe | P_EXT)
369#define OPC_MOVSWL (0xbf | P_EXT)
370#define OPC_MOVSLQ (0x63 | P_REXW)
371#define OPC_MOVZBL (0xb6 | P_EXT)
372#define OPC_MOVZWL (0xb7 | P_EXT)
373#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
374#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
375#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
376#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
377#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
378#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
379#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16)
380#define OPC_PADDB (0xfc | P_EXT | P_DATA16)
381#define OPC_PADDW (0xfd | P_EXT | P_DATA16)
382#define OPC_PADDD (0xfe | P_EXT | P_DATA16)
383#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16)
384#define OPC_PADDSB (0xec | P_EXT | P_DATA16)
385#define OPC_PADDSW (0xed | P_EXT | P_DATA16)
386#define OPC_PADDUB (0xdc | P_EXT | P_DATA16)
387#define OPC_PADDUW (0xdd | P_EXT | P_DATA16)
388#define OPC_PAND (0xdb | P_EXT | P_DATA16)
389#define OPC_PANDN (0xdf | P_EXT | P_DATA16)
390#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16)
391#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16)
392#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16)
393#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16)
394#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16)
395#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16)
396#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
397#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
398#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
399#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
400#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
401#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
402#define OPC_PMAXUB (0xde | P_EXT | P_DATA16)
403#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16)
404#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16)
405#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16)
406#define OPC_PMINSW (0xea | P_EXT | P_DATA16)
407#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16)
408#define OPC_PMINUB (0xda | P_EXT | P_DATA16)
409#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16)
410#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16)
411#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
412#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
413#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
414#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16)
415#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16)
416#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
417#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
418#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
419#define OPC_POR (0xeb | P_EXT | P_DATA16)
420#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
421#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
422#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
423#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
424#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16)
425#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16)
426#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16)
427#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
428#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
429#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16)
430#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16)
431#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16)
432#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16)
433#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16)
434#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16)
435#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16)
436#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16)
437#define OPC_PSUBD (0xfa | P_EXT | P_DATA16)
438#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16)
439#define OPC_PSUBSB (0xe8 | P_EXT | P_DATA16)
440#define OPC_PSUBSW (0xe9 | P_EXT | P_DATA16)
441#define OPC_PSUBUB (0xd8 | P_EXT | P_DATA16)
442#define OPC_PSUBUW (0xd9 | P_EXT | P_DATA16)
443#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16)
444#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16)
445#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16)
446#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16)
447#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16)
448#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16)
449#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16)
450#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16)
451#define OPC_PXOR (0xef | P_EXT | P_DATA16)
452#define OPC_POP_r32 (0x58)
453#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
454#define OPC_PUSH_r32 (0x50)
455#define OPC_PUSH_Iv (0x68)
456#define OPC_PUSH_Ib (0x6a)
457#define OPC_RET (0xc3)
458#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM)
459#define OPC_SHIFT_1 (0xd1)
460#define OPC_SHIFT_Ib (0xc1)
461#define OPC_SHIFT_cl (0xd3)
462#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
463#define OPC_SHUFPS (0xc6 | P_EXT)
464#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
465#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
466#define OPC_SHRD_Ib (0xac | P_EXT)
467#define OPC_TESTL (0x85)
468#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
469#define OPC_UD2 (0x0b | P_EXT)
470#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
471#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
472#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16)
473#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16)
474#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
475#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16)
476#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
477#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
478#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
479#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
480#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW)
481#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
482#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
483#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_REXW)
484#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
485#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
486#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_REXW)
487#define OPC_VZEROUPPER (0x77 | P_EXT)
488#define OPC_XCHG_ax_r32 (0x90)
489
490#define OPC_GRP3_Ev (0xf7)
491#define OPC_GRP5 (0xff)
492#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
493
494
495
496#define ARITH_ADD 0
497#define ARITH_OR 1
498#define ARITH_ADC 2
499#define ARITH_SBB 3
500#define ARITH_AND 4
501#define ARITH_SUB 5
502#define ARITH_XOR 6
503#define ARITH_CMP 7
504
505
506#define SHIFT_ROL 0
507#define SHIFT_ROR 1
508#define SHIFT_SHL 4
509#define SHIFT_SHR 5
510#define SHIFT_SAR 7
511
512
513#define EXT3_NOT 2
514#define EXT3_NEG 3
515#define EXT3_MUL 4
516#define EXT3_IMUL 5
517#define EXT3_DIV 6
518#define EXT3_IDIV 7
519
520
521#define EXT5_INC_Ev 0
522#define EXT5_DEC_Ev 1
523#define EXT5_CALLN_Ev 2
524#define EXT5_JMPN_Ev 4
525
526
527#define JCC_JMP (-1)
528#define JCC_JO 0x0
529#define JCC_JNO 0x1
530#define JCC_JB 0x2
531#define JCC_JAE 0x3
532#define JCC_JE 0x4
533#define JCC_JNE 0x5
534#define JCC_JBE 0x6
535#define JCC_JA 0x7
536#define JCC_JS 0x8
537#define JCC_JNS 0x9
538#define JCC_JP 0xa
539#define JCC_JNP 0xb
540#define JCC_JL 0xc
541#define JCC_JGE 0xd
542#define JCC_JLE 0xe
543#define JCC_JG 0xf
544
545static const uint8_t tcg_cond_to_jcc[] = {
546 [TCG_COND_EQ] = JCC_JE,
547 [TCG_COND_NE] = JCC_JNE,
548 [TCG_COND_LT] = JCC_JL,
549 [TCG_COND_GE] = JCC_JGE,
550 [TCG_COND_LE] = JCC_JLE,
551 [TCG_COND_GT] = JCC_JG,
552 [TCG_COND_LTU] = JCC_JB,
553 [TCG_COND_GEU] = JCC_JAE,
554 [TCG_COND_LEU] = JCC_JBE,
555 [TCG_COND_GTU] = JCC_JA,
556};
557
558#if TCG_TARGET_REG_BITS == 64
559static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
560{
561 int rex;
562
563 if (opc & P_GS) {
564 tcg_out8(s, 0x65);
565 }
566 if (opc & P_DATA16) {
567
568 tcg_debug_assert((opc & P_REXW) == 0);
569 tcg_out8(s, 0x66);
570 }
571 if (opc & P_SIMDF3) {
572 tcg_out8(s, 0xf3);
573 } else if (opc & P_SIMDF2) {
574 tcg_out8(s, 0xf2);
575 }
576
577 rex = 0;
578 rex |= (opc & P_REXW) ? 0x8 : 0x0;
579 rex |= (r & 8) >> 1;
580 rex |= (x & 8) >> 2;
581 rex |= (rm & 8) >> 3;
582
583
584
585
586
587
588 rex |= opc & (r >= 4 ? P_REXB_R : 0);
589 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
590
591 if (rex) {
592 tcg_out8(s, (uint8_t)(rex | 0x40));
593 }
594
595 if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
596 tcg_out8(s, 0x0f);
597 if (opc & P_EXT38) {
598 tcg_out8(s, 0x38);
599 } else if (opc & P_EXT3A) {
600 tcg_out8(s, 0x3a);
601 }
602 }
603
604 tcg_out8(s, opc);
605}
606#else
607static void tcg_out_opc(TCGContext *s, int opc)
608{
609 if (opc & P_DATA16) {
610 tcg_out8(s, 0x66);
611 }
612 if (opc & P_SIMDF3) {
613 tcg_out8(s, 0xf3);
614 } else if (opc & P_SIMDF2) {
615 tcg_out8(s, 0xf2);
616 }
617 if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
618 tcg_out8(s, 0x0f);
619 if (opc & P_EXT38) {
620 tcg_out8(s, 0x38);
621 } else if (opc & P_EXT3A) {
622 tcg_out8(s, 0x3a);
623 }
624 }
625 tcg_out8(s, opc);
626}
627
628
629
630#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
631#endif
632
633static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
634{
635 tcg_out_opc(s, opc, r, rm, 0);
636 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
637}
638
639static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
640 int rm, int index)
641{
642 int tmp;
643
644
645
646 if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT
647 && ((rm | index) & 8) == 0) {
648
649 tcg_out8(s, 0xc5);
650
651 tmp = (r & 8 ? 0 : 0x80);
652 } else {
653
654 tcg_out8(s, 0xc4);
655
656
657 if (opc & P_EXT3A) {
658 tmp = 3;
659 } else if (opc & P_EXT38) {
660 tmp = 2;
661 } else if (opc & P_EXT) {
662 tmp = 1;
663 } else {
664 g_assert_not_reached();
665 }
666 tmp |= (r & 8 ? 0 : 0x80);
667 tmp |= (index & 8 ? 0 : 0x40);
668 tmp |= (rm & 8 ? 0 : 0x20);
669 tcg_out8(s, tmp);
670
671 tmp = (opc & P_REXW ? 0x80 : 0);
672 }
673
674 tmp |= (opc & P_VEXL ? 0x04 : 0);
675
676 if (opc & P_DATA16) {
677 tmp |= 1;
678 } else if (opc & P_SIMDF3) {
679 tmp |= 2;
680 } else if (opc & P_SIMDF2) {
681 tmp |= 3;
682 }
683 tmp |= (~v & 15) << 3;
684 tcg_out8(s, tmp);
685 tcg_out8(s, opc);
686}
687
688static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
689{
690 tcg_out_vex_opc(s, opc, r, v, rm, 0);
691 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
692}
693
694
695
696
697
698
699static void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index,
700 int shift, intptr_t offset)
701{
702 int mod, len;
703
704 if (index < 0 && rm < 0) {
705 if (TCG_TARGET_REG_BITS == 64) {
706
707
708 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
709 intptr_t disp = offset - pc;
710 if (disp == (int32_t)disp) {
711 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
712 tcg_out32(s, disp);
713 return;
714 }
715
716
717
718
719 if (offset == (int32_t)offset) {
720 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
721 tcg_out8(s, (4 << 3) | 5);
722 tcg_out32(s, offset);
723 return;
724 }
725
726
727 g_assert_not_reached();
728 } else {
729
730 tcg_out8(s, (r << 3) | 5);
731 tcg_out32(s, offset);
732 return;
733 }
734 }
735
736
737
738 if (rm < 0) {
739 mod = 0, len = 4, rm = 5;
740 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
741 mod = 0, len = 0;
742 } else if (offset == (int8_t)offset) {
743 mod = 0x40, len = 1;
744 } else {
745 mod = 0x80, len = 4;
746 }
747
748
749
750 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
751
752 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
753 } else {
754
755
756
757
758
759 if (index < 0) {
760 index = 4;
761 } else {
762 tcg_debug_assert(index != TCG_REG_ESP);
763 }
764
765 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
766 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
767 }
768
769 if (len == 1) {
770 tcg_out8(s, offset);
771 } else if (len == 4) {
772 tcg_out32(s, offset);
773 }
774}
775
776static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
777 int index, int shift, intptr_t offset)
778{
779 tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
780 tcg_out_sib_offset(s, r, rm, index, shift, offset);
781}
782
783static void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v,
784 int rm, int index, int shift,
785 intptr_t offset)
786{
787 tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
788 tcg_out_sib_offset(s, r, rm, index, shift, offset);
789}
790
791
792static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
793 int rm, intptr_t offset)
794{
795 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
796}
797
798static inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r,
799 int v, int rm, intptr_t offset)
800{
801 tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
802}
803
804
805static inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r)
806{
807 tcg_out_opc(s, opc, r, 0, 0);
808
809 tcg_out8(s, LOWREGMASK(r) << 3 | 5);
810 tcg_out32(s, 0);
811}
812
813
814static inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r)
815{
816 tcg_out_vex_opc(s, opc, r, 0, 0, 0);
817
818 tcg_out8(s, LOWREGMASK(r) << 3 | 5);
819 tcg_out32(s, 0);
820}
821
822
823static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
824{
825
826 int ext = subop & ~0x7;
827 subop &= 0x7;
828
829 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
830}
831
832static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
833{
834 int rexw = 0;
835
836 if (arg == ret) {
837 return true;
838 }
839 switch (type) {
840 case TCG_TYPE_I64:
841 rexw = P_REXW;
842
843 case TCG_TYPE_I32:
844 if (ret < 16) {
845 if (arg < 16) {
846 tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
847 } else {
848 tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret);
849 }
850 } else {
851 if (arg < 16) {
852 tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg);
853 } else {
854 tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
855 }
856 }
857 break;
858
859 case TCG_TYPE_V64:
860 tcg_debug_assert(ret >= 16 && arg >= 16);
861 tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
862 break;
863 case TCG_TYPE_V128:
864 tcg_debug_assert(ret >= 16 && arg >= 16);
865 tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg);
866 break;
867 case TCG_TYPE_V256:
868 tcg_debug_assert(ret >= 16 && arg >= 16);
869 tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg);
870 break;
871
872 default:
873 g_assert_not_reached();
874 }
875 return true;
876}
877
878static const int avx2_dup_insn[4] = {
879 OPC_VPBROADCASTB, OPC_VPBROADCASTW,
880 OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
881};
882
883static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
884 TCGReg r, TCGReg a)
885{
886 if (have_avx2) {
887 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
888 tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a);
889 } else {
890 switch (vece) {
891 case MO_8:
892
893 tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a);
894 a = r;
895
896 case MO_16:
897 tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a);
898 a = r;
899
900 case MO_32:
901 tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a);
902
903 tcg_out8(s, 0);
904 break;
905 case MO_64:
906 tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a);
907 break;
908 default:
909 g_assert_not_reached();
910 }
911 }
912 return true;
913}
914
915static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
916 TCGReg r, TCGReg base, intptr_t offset)
917{
918 if (have_avx2) {
919 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
920 tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l,
921 r, 0, base, offset);
922 } else {
923 switch (vece) {
924 case MO_64:
925 tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset);
926 break;
927 case MO_32:
928 tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
929 break;
930 case MO_16:
931 tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset);
932 tcg_out8(s, 0);
933 tcg_out_dup_vec(s, type, vece, r, r);
934 break;
935 case MO_8:
936 tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset);
937 tcg_out8(s, 0);
938 tcg_out_dup_vec(s, type, vece, r, r);
939 break;
940 default:
941 g_assert_not_reached();
942 }
943 }
944 return true;
945}
946
947static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
948 TCGReg ret, tcg_target_long arg)
949{
950 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
951
952 if (arg == 0) {
953 tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
954 return;
955 }
956 if (arg == -1) {
957 tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret);
958 return;
959 }
960
961 if (TCG_TARGET_REG_BITS == 64) {
962 if (type == TCG_TYPE_V64) {
963 tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
964 } else if (have_avx2) {
965 tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
966 } else {
967 tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
968 }
969 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
970 } else {
971 if (have_avx2) {
972 tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTW + vex_l, ret);
973 } else {
974 tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
975 }
976 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
977 }
978}
979
980static void tcg_out_movi(TCGContext *s, TCGType type,
981 TCGReg ret, tcg_target_long arg)
982{
983 tcg_target_long diff;
984
985 switch (type) {
986 case TCG_TYPE_I32:
987#if TCG_TARGET_REG_BITS == 64
988 case TCG_TYPE_I64:
989#endif
990 if (ret < 16) {
991 break;
992 }
993
994 case TCG_TYPE_V64:
995 case TCG_TYPE_V128:
996 case TCG_TYPE_V256:
997 tcg_debug_assert(ret >= 16);
998 tcg_out_dupi_vec(s, type, ret, arg);
999 return;
1000 default:
1001 g_assert_not_reached();
1002 }
1003
1004 if (arg == 0) {
1005 tgen_arithr(s, ARITH_XOR, ret, ret);
1006 return;
1007 }
1008 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
1009 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
1010 tcg_out32(s, arg);
1011 return;
1012 }
1013 if (arg == (int32_t)arg) {
1014 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
1015 tcg_out32(s, arg);
1016 return;
1017 }
1018
1019
1020 diff = arg - ((uintptr_t)s->code_ptr + 7);
1021 if (diff == (int32_t)diff) {
1022 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
1023 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
1024 tcg_out32(s, diff);
1025 return;
1026 }
1027
1028 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
1029 tcg_out64(s, arg);
1030}
1031
1032static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
1033{
1034 if (val == (int8_t)val) {
1035 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
1036 tcg_out8(s, val);
1037 } else if (val == (int32_t)val) {
1038 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
1039 tcg_out32(s, val);
1040 } else {
1041 tcg_abort();
1042 }
1043}
1044
1045static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1046{
1047
1048
1049
1050 if (a0 & TCG_MO_ST_LD) {
1051 tcg_out8(s, 0xf0);
1052 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
1053 tcg_out8(s, 0);
1054 }
1055}
1056
1057static inline void tcg_out_push(TCGContext *s, int reg)
1058{
1059 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
1060}
1061
1062static inline void tcg_out_pop(TCGContext *s, int reg)
1063{
1064 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
1065}
1066
1067static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1068 TCGReg arg1, intptr_t arg2)
1069{
1070 switch (type) {
1071 case TCG_TYPE_I32:
1072 if (ret < 16) {
1073 tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
1074 } else {
1075 tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2);
1076 }
1077 break;
1078 case TCG_TYPE_I64:
1079 if (ret < 16) {
1080 tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
1081 break;
1082 }
1083
1084 case TCG_TYPE_V64:
1085
1086 tcg_debug_assert(ret >= 16);
1087 tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2);
1088 break;
1089 case TCG_TYPE_V128:
1090
1091
1092
1093
1094
1095 tcg_debug_assert(ret >= 16);
1096 tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2);
1097 break;
1098 case TCG_TYPE_V256:
1099
1100
1101
1102
1103 tcg_debug_assert(ret >= 16);
1104 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL,
1105 ret, 0, arg1, arg2);
1106 break;
1107 default:
1108 g_assert_not_reached();
1109 }
1110}
1111
1112static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1113 TCGReg arg1, intptr_t arg2)
1114{
1115 switch (type) {
1116 case TCG_TYPE_I32:
1117 if (arg < 16) {
1118 tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
1119 } else {
1120 tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2);
1121 }
1122 break;
1123 case TCG_TYPE_I64:
1124 if (arg < 16) {
1125 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
1126 break;
1127 }
1128
1129 case TCG_TYPE_V64:
1130
1131 tcg_debug_assert(arg >= 16);
1132 tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2);
1133 break;
1134 case TCG_TYPE_V128:
1135
1136
1137
1138
1139
1140 tcg_debug_assert(arg >= 16);
1141 tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
1142 break;
1143 case TCG_TYPE_V256:
1144
1145
1146
1147
1148 tcg_debug_assert(arg >= 16);
1149 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL,
1150 arg, 0, arg1, arg2);
1151 break;
1152 default:
1153 g_assert_not_reached();
1154 }
1155}
1156
1157static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1158 TCGReg base, intptr_t ofs)
1159{
1160 int rexw = 0;
1161 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
1162 if (val != (int32_t)val) {
1163 return false;
1164 }
1165 rexw = P_REXW;
1166 } else if (type != TCG_TYPE_I32) {
1167 return false;
1168 }
1169 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
1170 tcg_out32(s, val);
1171 return true;
1172}
1173
1174static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
1175{
1176
1177 int ext = subopc & ~0x7;
1178 subopc &= 0x7;
1179
1180 if (count == 1) {
1181 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
1182 } else {
1183 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
1184 tcg_out8(s, count);
1185 }
1186}
1187
1188static inline void tcg_out_bswap32(TCGContext *s, int reg)
1189{
1190 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
1191}
1192
1193static inline void tcg_out_rolw_8(TCGContext *s, int reg)
1194{
1195 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
1196}
1197
1198static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
1199{
1200
1201 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1202 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
1203}
1204
1205static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
1206{
1207
1208 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1209 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
1210}
1211
1212static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
1213{
1214
1215 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
1216}
1217
1218static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
1219{
1220
1221 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
1222}
1223
1224static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
1225{
1226
1227 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
1228}
1229
1230static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
1231{
1232 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
1233}
1234
1235static inline void tcg_out_bswap64(TCGContext *s, int reg)
1236{
1237 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
1238}
1239
1240static void tgen_arithi(TCGContext *s, int c, int r0,
1241 tcg_target_long val, int cf)
1242{
1243 int rexw = 0;
1244
1245 if (TCG_TARGET_REG_BITS == 64) {
1246 rexw = c & -8;
1247 c &= 7;
1248 }
1249
1250
1251
1252
1253 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
1254 int is_inc = (c == ARITH_ADD) ^ (val < 0);
1255 if (TCG_TARGET_REG_BITS == 64) {
1256
1257
1258 tcg_out_modrm(s, OPC_GRP5 + rexw,
1259 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
1260 } else {
1261 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
1262 }
1263 return;
1264 }
1265
1266 if (c == ARITH_AND) {
1267 if (TCG_TARGET_REG_BITS == 64) {
1268 if (val == 0xffffffffu) {
1269 tcg_out_ext32u(s, r0, r0);
1270 return;
1271 }
1272 if (val == (uint32_t)val) {
1273
1274 rexw = 0;
1275 }
1276 }
1277 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
1278 tcg_out_ext8u(s, r0, r0);
1279 return;
1280 }
1281 if (val == 0xffffu) {
1282 tcg_out_ext16u(s, r0, r0);
1283 return;
1284 }
1285 }
1286
1287 if (val == (int8_t)val) {
1288 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
1289 tcg_out8(s, val);
1290 return;
1291 }
1292 if (rexw == 0 || val == (int32_t)val) {
1293 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
1294 tcg_out32(s, val);
1295 return;
1296 }
1297
1298 tcg_abort();
1299}
1300
1301static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
1302{
1303 if (val != 0) {
1304 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
1305 }
1306}
1307
1308
1309static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
1310{
1311 int32_t val, val1;
1312
1313 if (l->has_value) {
1314 val = tcg_pcrel_diff(s, l->u.value_ptr);
1315 val1 = val - 2;
1316 if ((int8_t)val1 == val1) {
1317 if (opc == -1) {
1318 tcg_out8(s, OPC_JMP_short);
1319 } else {
1320 tcg_out8(s, OPC_JCC_short + opc);
1321 }
1322 tcg_out8(s, val1);
1323 } else {
1324 if (small) {
1325 tcg_abort();
1326 }
1327 if (opc == -1) {
1328 tcg_out8(s, OPC_JMP_long);
1329 tcg_out32(s, val - 5);
1330 } else {
1331 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1332 tcg_out32(s, val - 6);
1333 }
1334 }
1335 } else if (small) {
1336 if (opc == -1) {
1337 tcg_out8(s, OPC_JMP_short);
1338 } else {
1339 tcg_out8(s, OPC_JCC_short + opc);
1340 }
1341 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
1342 s->code_ptr += 1;
1343 } else {
1344 if (opc == -1) {
1345 tcg_out8(s, OPC_JMP_long);
1346 } else {
1347 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1348 }
1349 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
1350 s->code_ptr += 4;
1351 }
1352}
1353
1354static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
1355 int const_arg2, int rexw)
1356{
1357 if (const_arg2) {
1358 if (arg2 == 0) {
1359
1360 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
1361 } else {
1362 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
1363 }
1364 } else {
1365 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
1366 }
1367}
1368
1369static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
1370 TCGArg arg1, TCGArg arg2, int const_arg2,
1371 TCGLabel *label, int small)
1372{
1373 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1374 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
1375}
1376
1377#if TCG_TARGET_REG_BITS == 64
1378static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
1379 TCGArg arg1, TCGArg arg2, int const_arg2,
1380 TCGLabel *label, int small)
1381{
1382 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1383 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
1384}
1385#else
1386
1387
1388static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
1389 const int *const_args, int small)
1390{
1391 TCGLabel *label_next = gen_new_label();
1392 TCGLabel *label_this = arg_label(args[5]);
1393
1394 switch(args[4]) {
1395 case TCG_COND_EQ:
1396 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
1397 label_next, 1);
1398 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
1399 label_this, small);
1400 break;
1401 case TCG_COND_NE:
1402 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
1403 label_this, small);
1404 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
1405 label_this, small);
1406 break;
1407 case TCG_COND_LT:
1408 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
1409 label_this, small);
1410 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1411 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1412 label_this, small);
1413 break;
1414 case TCG_COND_LE:
1415 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
1416 label_this, small);
1417 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1418 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1419 label_this, small);
1420 break;
1421 case TCG_COND_GT:
1422 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
1423 label_this, small);
1424 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1425 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1426 label_this, small);
1427 break;
1428 case TCG_COND_GE:
1429 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
1430 label_this, small);
1431 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1432 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1433 label_this, small);
1434 break;
1435 case TCG_COND_LTU:
1436 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1437 label_this, small);
1438 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1439 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1440 label_this, small);
1441 break;
1442 case TCG_COND_LEU:
1443 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1444 label_this, small);
1445 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1446 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1447 label_this, small);
1448 break;
1449 case TCG_COND_GTU:
1450 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1451 label_this, small);
1452 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1453 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1454 label_this, small);
1455 break;
1456 case TCG_COND_GEU:
1457 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1458 label_this, small);
1459 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1460 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1461 label_this, small);
1462 break;
1463 default:
1464 tcg_abort();
1465 }
1466 tcg_out_label(s, label_next, s->code_ptr);
1467}
1468#endif
1469
1470static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1471 TCGArg arg1, TCGArg arg2, int const_arg2)
1472{
1473 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1474 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1475 tcg_out_ext8u(s, dest, dest);
1476}
1477
1478#if TCG_TARGET_REG_BITS == 64
1479static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1480 TCGArg arg1, TCGArg arg2, int const_arg2)
1481{
1482 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1483 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1484 tcg_out_ext8u(s, dest, dest);
1485}
1486#else
1487static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1488 const int *const_args)
1489{
1490 TCGArg new_args[6];
1491 TCGLabel *label_true, *label_over;
1492
1493 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1494
1495 if (args[0] == args[1] || args[0] == args[2]
1496 || (!const_args[3] && args[0] == args[3])
1497 || (!const_args[4] && args[0] == args[4])) {
1498
1499
1500 label_true = gen_new_label();
1501 label_over = gen_new_label();
1502
1503 new_args[5] = label_arg(label_true);
1504 tcg_out_brcond2(s, new_args, const_args+1, 1);
1505
1506 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1507 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1508 tcg_out_label(s, label_true, s->code_ptr);
1509
1510 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1511 tcg_out_label(s, label_over, s->code_ptr);
1512 } else {
1513
1514
1515
1516
1517 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1518
1519 label_over = gen_new_label();
1520 new_args[4] = tcg_invert_cond(new_args[4]);
1521 new_args[5] = label_arg(label_over);
1522 tcg_out_brcond2(s, new_args, const_args+1, 1);
1523
1524 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1525 tcg_out_label(s, label_over, s->code_ptr);
1526 }
1527}
1528#endif
1529
1530static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
1531 TCGReg dest, TCGReg v1)
1532{
1533 if (have_cmov) {
1534 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
1535 } else {
1536 TCGLabel *over = gen_new_label();
1537 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1538 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1539 tcg_out_label(s, over, s->code_ptr);
1540 }
1541}
1542
1543static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
1544 TCGReg c1, TCGArg c2, int const_c2,
1545 TCGReg v1)
1546{
1547 tcg_out_cmp(s, c1, c2, const_c2, 0);
1548 tcg_out_cmov(s, cond, 0, dest, v1);
1549}
1550
1551#if TCG_TARGET_REG_BITS == 64
1552static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
1553 TCGReg c1, TCGArg c2, int const_c2,
1554 TCGReg v1)
1555{
1556 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1557 tcg_out_cmov(s, cond, P_REXW, dest, v1);
1558}
1559#endif
1560
1561static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1562 TCGArg arg2, bool const_a2)
1563{
1564 if (have_bmi1) {
1565 tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
1566 if (const_a2) {
1567 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1568 } else {
1569 tcg_debug_assert(dest != arg2);
1570 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1571 }
1572 } else {
1573 tcg_debug_assert(dest != arg2);
1574 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
1575 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1576 }
1577}
1578
1579static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1580 TCGArg arg2, bool const_a2)
1581{
1582 if (have_lzcnt) {
1583 tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1584 if (const_a2) {
1585 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1586 } else {
1587 tcg_debug_assert(dest != arg2);
1588 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1589 }
1590 } else {
1591 tcg_debug_assert(!const_a2);
1592 tcg_debug_assert(dest != arg1);
1593 tcg_debug_assert(dest != arg2);
1594
1595
1596 tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
1597 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1598
1599
1600 tcg_out_cmp(s, arg1, 0, 1, rexw);
1601 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1602 }
1603}
1604
1605static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1606{
1607 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1608
1609 if (disp == (int32_t)disp) {
1610 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1611 tcg_out32(s, disp);
1612 } else {
1613
1614
1615
1616
1617 tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
1618 tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
1619 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1620 tcg_out32(s, 0);
1621 }
1622}
1623
1624static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1625{
1626 tcg_out_branch(s, 1, dest);
1627}
1628
1629static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1630{
1631 tcg_out_branch(s, 0, dest);
1632}
1633
1634static void tcg_out_nopn(TCGContext *s, int n)
1635{
1636 int i;
1637
1638
1639
1640
1641
1642 tcg_debug_assert(n >= 1);
1643 for (i = 1; i < n; ++i) {
1644 tcg_out8(s, 0x66);
1645 }
1646 tcg_out8(s, 0x90);
1647}
1648
1649#if defined(CONFIG_SOFTMMU)
1650#include "tcg-ldst.inc.c"
1651
1652
1653
1654
1655static void * const qemu_ld_helpers[16] = {
1656 [MO_UB] = helper_ret_ldub_mmu,
1657 [MO_LEUW] = helper_le_lduw_mmu,
1658 [MO_LEUL] = helper_le_ldul_mmu,
1659 [MO_LEQ] = helper_le_ldq_mmu,
1660 [MO_BEUW] = helper_be_lduw_mmu,
1661 [MO_BEUL] = helper_be_ldul_mmu,
1662 [MO_BEQ] = helper_be_ldq_mmu,
1663};
1664
1665
1666
1667
1668static void * const qemu_st_helpers[16] = {
1669 [MO_UB] = helper_ret_stb_mmu,
1670 [MO_LEUW] = helper_le_stw_mmu,
1671 [MO_LEUL] = helper_le_stl_mmu,
1672 [MO_LEQ] = helper_le_stq_mmu,
1673 [MO_BEUW] = helper_be_stw_mmu,
1674 [MO_BEUL] = helper_be_stl_mmu,
1675 [MO_BEQ] = helper_be_stq_mmu,
1676};
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1700 int mem_index, TCGMemOp opc,
1701 tcg_insn_unit **label_ptr, int which)
1702{
1703 const TCGReg r0 = TCG_REG_L0;
1704 const TCGReg r1 = TCG_REG_L1;
1705 TCGType ttype = TCG_TYPE_I32;
1706 TCGType tlbtype = TCG_TYPE_I32;
1707 int trexw = 0, hrexw = 0, tlbrexw = 0;
1708 unsigned a_bits = get_alignment_bits(opc);
1709 unsigned s_bits = opc & MO_SIZE;
1710 unsigned a_mask = (1 << a_bits) - 1;
1711 unsigned s_mask = (1 << s_bits) - 1;
1712 target_ulong tlb_mask;
1713
1714 if (TCG_TARGET_REG_BITS == 64) {
1715 if (TARGET_LONG_BITS == 64) {
1716 ttype = TCG_TYPE_I64;
1717 trexw = P_REXW;
1718 }
1719 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1720 hrexw = P_REXW;
1721 if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
1722 tlbtype = TCG_TYPE_I64;
1723 tlbrexw = P_REXW;
1724 }
1725 }
1726 }
1727
1728 tcg_out_mov(s, tlbtype, r0, addrlo);
1729 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1730 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1731
1732 tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0,
1733 TLB_MASK_TABLE_OFS(mem_index) +
1734 offsetof(CPUTLBDescFast, mask));
1735
1736 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0,
1737 TLB_MASK_TABLE_OFS(mem_index) +
1738 offsetof(CPUTLBDescFast, table));
1739
1740
1741
1742
1743 if (a_bits >= s_bits) {
1744 tcg_out_mov(s, ttype, r1, addrlo);
1745 } else {
1746 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1747 }
1748 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
1749 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1750
1751
1752 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, which);
1753
1754
1755
1756 tcg_out_mov(s, ttype, r1, addrlo);
1757
1758
1759 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1760 label_ptr[0] = s->code_ptr;
1761 s->code_ptr += 4;
1762
1763 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1764
1765 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, which + 4);
1766
1767
1768 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1769 label_ptr[1] = s->code_ptr;
1770 s->code_ptr += 4;
1771 }
1772
1773
1774
1775
1776 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1777 offsetof(CPUTLBEntry, addend));
1778}
1779
1780
1781
1782
1783
1784static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
1785 TCGMemOpIdx oi,
1786 TCGReg datalo, TCGReg datahi,
1787 TCGReg addrlo, TCGReg addrhi,
1788 tcg_insn_unit *raddr,
1789 tcg_insn_unit **label_ptr)
1790{
1791 TCGLabelQemuLdst *label = new_ldst_label(s);
1792
1793 label->is_ld = is_ld;
1794 label->oi = oi;
1795 label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1796 label->datalo_reg = datalo;
1797 label->datahi_reg = datahi;
1798 label->addrlo_reg = addrlo;
1799 label->addrhi_reg = addrhi;
1800 label->raddr = raddr;
1801 label->label_ptr[0] = label_ptr[0];
1802 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1803 label->label_ptr[1] = label_ptr[1];
1804 }
1805}
1806
1807
1808
1809
1810static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1811{
1812 TCGMemOpIdx oi = l->oi;
1813 TCGMemOp opc = get_memop(oi);
1814 TCGReg data_reg;
1815 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1816 int rexw = (l->type == TCG_TYPE_I64 ? P_REXW : 0);
1817
1818
1819 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1820 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1821 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1822 }
1823
1824 if (TCG_TARGET_REG_BITS == 32) {
1825 int ofs = 0;
1826
1827 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1828 ofs += 4;
1829
1830 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1831 ofs += 4;
1832
1833 if (TARGET_LONG_BITS == 64) {
1834 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1835 ofs += 4;
1836 }
1837
1838 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1839 ofs += 4;
1840
1841 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1842 } else {
1843 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1844
1845 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1846 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1847 (uintptr_t)l->raddr);
1848 }
1849
1850 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1851
1852 data_reg = l->datalo_reg;
1853 switch (opc & MO_SSIZE) {
1854 case MO_SB:
1855 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, rexw);
1856 break;
1857 case MO_SW:
1858 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, rexw);
1859 break;
1860#if TCG_TARGET_REG_BITS == 64
1861 case MO_SL:
1862 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1863 break;
1864#endif
1865 case MO_UB:
1866 case MO_UW:
1867
1868 case MO_UL:
1869 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1870 break;
1871 case MO_Q:
1872 if (TCG_TARGET_REG_BITS == 64) {
1873 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1874 } else if (data_reg == TCG_REG_EDX) {
1875
1876 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1877 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1878 } else {
1879 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1880 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1881 }
1882 break;
1883 default:
1884 tcg_abort();
1885 }
1886
1887
1888 tcg_out_jmp(s, l->raddr);
1889 return true;
1890}
1891
1892
1893
1894
1895static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1896{
1897 TCGMemOpIdx oi = l->oi;
1898 TCGMemOp opc = get_memop(oi);
1899 TCGMemOp s_bits = opc & MO_SIZE;
1900 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1901 TCGReg retaddr;
1902
1903
1904 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1905 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1906 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1907 }
1908
1909 if (TCG_TARGET_REG_BITS == 32) {
1910 int ofs = 0;
1911
1912 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1913 ofs += 4;
1914
1915 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1916 ofs += 4;
1917
1918 if (TARGET_LONG_BITS == 64) {
1919 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1920 ofs += 4;
1921 }
1922
1923 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1924 ofs += 4;
1925
1926 if (s_bits == MO_64) {
1927 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1928 ofs += 4;
1929 }
1930
1931 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1932 ofs += 4;
1933
1934 retaddr = TCG_REG_EAX;
1935 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1936 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1937 } else {
1938 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1939
1940 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1941 tcg_target_call_iarg_regs[2], l->datalo_reg);
1942 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1943
1944 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1945 retaddr = tcg_target_call_iarg_regs[4];
1946 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1947 } else {
1948 retaddr = TCG_REG_RAX;
1949 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1950 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1951 TCG_TARGET_CALL_STACK_OFFSET);
1952 }
1953 }
1954
1955
1956 tcg_out_push(s, retaddr);
1957 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1958 return true;
1959}
1960#elif TCG_TARGET_REG_BITS == 32
1961# define x86_guest_base_seg 0
1962# define x86_guest_base_index -1
1963# define x86_guest_base_offset guest_base
1964#else
1965static int x86_guest_base_seg;
1966static int x86_guest_base_index = -1;
1967static int32_t x86_guest_base_offset;
1968# if defined(__x86_64__) && defined(__linux__)
1969# include <asm/prctl.h>
1970# include <sys/prctl.h>
1971int arch_prctl(int code, unsigned long addr);
1972static inline int setup_guest_base_seg(void)
1973{
1974 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1975 return P_GS;
1976 }
1977 return 0;
1978}
1979# elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__)
1980# include <machine/sysarch.h>
1981static inline int setup_guest_base_seg(void)
1982{
1983 if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) {
1984 return P_GS;
1985 }
1986 return 0;
1987}
1988# else
1989static inline int setup_guest_base_seg(void)
1990{
1991 return 0;
1992}
1993# endif
1994#endif
1995
1996static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1997 TCGReg base, int index, intptr_t ofs,
1998 int seg, bool is64, TCGMemOp memop)
1999{
2000 const TCGMemOp real_bswap = memop & MO_BSWAP;
2001 TCGMemOp bswap = real_bswap;
2002 int rexw = is64 * P_REXW;
2003 int movop = OPC_MOVL_GvEv;
2004
2005 if (have_movbe && real_bswap) {
2006 bswap = 0;
2007 movop = OPC_MOVBE_GyMy;
2008 }
2009
2010 switch (memop & MO_SSIZE) {
2011 case MO_UB:
2012 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
2013 base, index, 0, ofs);
2014 break;
2015 case MO_SB:
2016 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + seg, datalo,
2017 base, index, 0, ofs);
2018 break;
2019 case MO_UW:
2020 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
2021 base, index, 0, ofs);
2022 if (real_bswap) {
2023 tcg_out_rolw_8(s, datalo);
2024 }
2025 break;
2026 case MO_SW:
2027 if (real_bswap) {
2028 if (have_movbe) {
2029 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
2030 datalo, base, index, 0, ofs);
2031 } else {
2032 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
2033 base, index, 0, ofs);
2034 tcg_out_rolw_8(s, datalo);
2035 }
2036 tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo);
2037 } else {
2038 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
2039 datalo, base, index, 0, ofs);
2040 }
2041 break;
2042 case MO_UL:
2043 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
2044 if (bswap) {
2045 tcg_out_bswap32(s, datalo);
2046 }
2047 break;
2048#if TCG_TARGET_REG_BITS == 64
2049 case MO_SL:
2050 if (real_bswap) {
2051 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
2052 base, index, 0, ofs);
2053 if (bswap) {
2054 tcg_out_bswap32(s, datalo);
2055 }
2056 tcg_out_ext32s(s, datalo, datalo);
2057 } else {
2058 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
2059 base, index, 0, ofs);
2060 }
2061 break;
2062#endif
2063 case MO_Q:
2064 if (TCG_TARGET_REG_BITS == 64) {
2065 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
2066 base, index, 0, ofs);
2067 if (bswap) {
2068 tcg_out_bswap64(s, datalo);
2069 }
2070 } else {
2071 if (real_bswap) {
2072 int t = datalo;
2073 datalo = datahi;
2074 datahi = t;
2075 }
2076 if (base != datalo) {
2077 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
2078 base, index, 0, ofs);
2079 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
2080 base, index, 0, ofs + 4);
2081 } else {
2082 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
2083 base, index, 0, ofs + 4);
2084 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
2085 base, index, 0, ofs);
2086 }
2087 if (bswap) {
2088 tcg_out_bswap32(s, datalo);
2089 tcg_out_bswap32(s, datahi);
2090 }
2091 }
2092 break;
2093 default:
2094 tcg_abort();
2095 }
2096}
2097
2098
2099
2100
2101static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
2102{
2103 TCGReg datalo, datahi, addrlo;
2104 TCGReg addrhi __attribute__((unused));
2105 TCGMemOpIdx oi;
2106 TCGMemOp opc;
2107#if defined(CONFIG_SOFTMMU)
2108 int mem_index;
2109 tcg_insn_unit *label_ptr[2];
2110#endif
2111
2112 datalo = *args++;
2113 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
2114 addrlo = *args++;
2115 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
2116 oi = *args++;
2117 opc = get_memop(oi);
2118
2119#if defined(CONFIG_SOFTMMU)
2120 mem_index = get_mmuidx(oi);
2121
2122 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
2123 label_ptr, offsetof(CPUTLBEntry, addr_read));
2124
2125
2126 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc);
2127
2128
2129 add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi,
2130 s->code_ptr, label_ptr);
2131#else
2132 tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
2133 x86_guest_base_offset, x86_guest_base_seg,
2134 is64, opc);
2135#endif
2136}
2137
2138static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
2139 TCGReg base, int index, intptr_t ofs,
2140 int seg, TCGMemOp memop)
2141{
2142
2143
2144
2145
2146 const TCGReg scratch = TCG_REG_L0;
2147 const TCGMemOp real_bswap = memop & MO_BSWAP;
2148 TCGMemOp bswap = real_bswap;
2149 int movop = OPC_MOVL_EvGv;
2150
2151 if (have_movbe && real_bswap) {
2152 bswap = 0;
2153 movop = OPC_MOVBE_MyGy;
2154 }
2155
2156 switch (memop & MO_SIZE) {
2157 case MO_8:
2158
2159
2160 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
2161 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2162 datalo = scratch;
2163 }
2164 tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
2165 datalo, base, index, 0, ofs);
2166 break;
2167 case MO_16:
2168 if (bswap) {
2169 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2170 tcg_out_rolw_8(s, scratch);
2171 datalo = scratch;
2172 }
2173 tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
2174 base, index, 0, ofs);
2175 break;
2176 case MO_32:
2177 if (bswap) {
2178 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2179 tcg_out_bswap32(s, scratch);
2180 datalo = scratch;
2181 }
2182 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
2183 break;
2184 case MO_64:
2185 if (TCG_TARGET_REG_BITS == 64) {
2186 if (bswap) {
2187 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
2188 tcg_out_bswap64(s, scratch);
2189 datalo = scratch;
2190 }
2191 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
2192 base, index, 0, ofs);
2193 } else if (bswap) {
2194 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
2195 tcg_out_bswap32(s, scratch);
2196 tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
2197 base, index, 0, ofs);
2198 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2199 tcg_out_bswap32(s, scratch);
2200 tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
2201 base, index, 0, ofs + 4);
2202 } else {
2203 if (real_bswap) {
2204 int t = datalo;
2205 datalo = datahi;
2206 datahi = t;
2207 }
2208 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
2209 base, index, 0, ofs);
2210 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
2211 base, index, 0, ofs + 4);
2212 }
2213 break;
2214 default:
2215 tcg_abort();
2216 }
2217}
2218
2219static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
2220{
2221 TCGReg datalo, datahi, addrlo;
2222 TCGReg addrhi __attribute__((unused));
2223 TCGMemOpIdx oi;
2224 TCGMemOp opc;
2225#if defined(CONFIG_SOFTMMU)
2226 int mem_index;
2227 tcg_insn_unit *label_ptr[2];
2228#endif
2229
2230 datalo = *args++;
2231 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
2232 addrlo = *args++;
2233 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
2234 oi = *args++;
2235 opc = get_memop(oi);
2236
2237#if defined(CONFIG_SOFTMMU)
2238 mem_index = get_mmuidx(oi);
2239
2240 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
2241 label_ptr, offsetof(CPUTLBEntry, addr_write));
2242
2243
2244 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
2245
2246
2247 add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi,
2248 s->code_ptr, label_ptr);
2249#else
2250 tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
2251 x86_guest_base_offset, x86_guest_base_seg, opc);
2252#endif
2253}
2254
2255static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2256 const TCGArg *args, const int *const_args)
2257{
2258 TCGArg a0, a1, a2;
2259 int c, const_a2, vexop, rexw = 0;
2260
2261#if TCG_TARGET_REG_BITS == 64
2262# define OP_32_64(x) \
2263 case glue(glue(INDEX_op_, x), _i64): \
2264 rexw = P_REXW; \
2265 case glue(glue(INDEX_op_, x), _i32)
2266#else
2267# define OP_32_64(x) \
2268 case glue(glue(INDEX_op_, x), _i32)
2269#endif
2270
2271
2272 a0 = args[0];
2273 a1 = args[1];
2274 a2 = args[2];
2275 const_a2 = const_args[2];
2276
2277 switch (opc) {
2278 case INDEX_op_exit_tb:
2279
2280 if (a0 == 0) {
2281 tcg_out_jmp(s, s->code_gen_epilogue);
2282 } else {
2283 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
2284 tcg_out_jmp(s, tb_ret_addr);
2285 }
2286 break;
2287 case INDEX_op_goto_tb:
2288 if (s->tb_jmp_insn_offset) {
2289
2290 int gap;
2291
2292
2293
2294 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
2295 if (gap != 1) {
2296 tcg_out_nopn(s, gap - 1);
2297 }
2298 tcg_out8(s, OPC_JMP_long);
2299 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2300 tcg_out32(s, 0);
2301 } else {
2302
2303 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
2304 (intptr_t)(s->tb_jmp_target_addr + a0));
2305 }
2306 set_jmp_reset_offset(s, a0);
2307 break;
2308 case INDEX_op_goto_ptr:
2309
2310 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
2311 break;
2312 case INDEX_op_br:
2313 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
2314 break;
2315 OP_32_64(ld8u):
2316
2317 tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
2318 break;
2319 OP_32_64(ld8s):
2320 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
2321 break;
2322 OP_32_64(ld16u):
2323
2324 tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
2325 break;
2326 OP_32_64(ld16s):
2327 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
2328 break;
2329#if TCG_TARGET_REG_BITS == 64
2330 case INDEX_op_ld32u_i64:
2331#endif
2332 case INDEX_op_ld_i32:
2333 tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
2334 break;
2335
2336 OP_32_64(st8):
2337 if (const_args[0]) {
2338 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
2339 tcg_out8(s, a0);
2340 } else {
2341 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
2342 }
2343 break;
2344 OP_32_64(st16):
2345 if (const_args[0]) {
2346 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
2347 tcg_out16(s, a0);
2348 } else {
2349 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
2350 }
2351 break;
2352#if TCG_TARGET_REG_BITS == 64
2353 case INDEX_op_st32_i64:
2354#endif
2355 case INDEX_op_st_i32:
2356 if (const_args[0]) {
2357 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
2358 tcg_out32(s, a0);
2359 } else {
2360 tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
2361 }
2362 break;
2363
2364 OP_32_64(add):
2365
2366 if (a0 != a1) {
2367 TCGArg c3 = 0;
2368 if (const_a2) {
2369 c3 = a2, a2 = -1;
2370 } else if (a0 == a2) {
2371
2372
2373 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
2374 break;
2375 }
2376
2377 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
2378 break;
2379 }
2380 c = ARITH_ADD;
2381 goto gen_arith;
2382 OP_32_64(sub):
2383 c = ARITH_SUB;
2384 goto gen_arith;
2385 OP_32_64(and):
2386 c = ARITH_AND;
2387 goto gen_arith;
2388 OP_32_64(or):
2389 c = ARITH_OR;
2390 goto gen_arith;
2391 OP_32_64(xor):
2392 c = ARITH_XOR;
2393 goto gen_arith;
2394 gen_arith:
2395 if (const_a2) {
2396 tgen_arithi(s, c + rexw, a0, a2, 0);
2397 } else {
2398 tgen_arithr(s, c + rexw, a0, a2);
2399 }
2400 break;
2401
2402 OP_32_64(andc):
2403 if (const_a2) {
2404 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2405 tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
2406 } else {
2407 tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
2408 }
2409 break;
2410
2411 OP_32_64(mul):
2412 if (const_a2) {
2413 int32_t val;
2414 val = a2;
2415 if (val == (int8_t)val) {
2416 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
2417 tcg_out8(s, val);
2418 } else {
2419 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
2420 tcg_out32(s, val);
2421 }
2422 } else {
2423 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
2424 }
2425 break;
2426
2427 OP_32_64(div2):
2428 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
2429 break;
2430 OP_32_64(divu2):
2431 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
2432 break;
2433
2434 OP_32_64(shl):
2435
2436 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2437 if (a2 - 1 == 0) {
2438
2439 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2440 } else {
2441
2442 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2443 }
2444 break;
2445 }
2446 c = SHIFT_SHL;
2447 vexop = OPC_SHLX;
2448 goto gen_shift_maybe_vex;
2449 OP_32_64(shr):
2450 c = SHIFT_SHR;
2451 vexop = OPC_SHRX;
2452 goto gen_shift_maybe_vex;
2453 OP_32_64(sar):
2454 c = SHIFT_SAR;
2455 vexop = OPC_SARX;
2456 goto gen_shift_maybe_vex;
2457 OP_32_64(rotl):
2458 c = SHIFT_ROL;
2459 goto gen_shift;
2460 OP_32_64(rotr):
2461 c = SHIFT_ROR;
2462 goto gen_shift;
2463 gen_shift_maybe_vex:
2464 if (have_bmi2) {
2465 if (!const_a2) {
2466 tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2467 break;
2468 }
2469 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2470 }
2471
2472 gen_shift:
2473 if (const_a2) {
2474 tcg_out_shifti(s, c + rexw, a0, a2);
2475 } else {
2476 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
2477 }
2478 break;
2479
2480 OP_32_64(ctz):
2481 tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2482 break;
2483 OP_32_64(clz):
2484 tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2485 break;
2486 OP_32_64(ctpop):
2487 tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2488 break;
2489
2490 case INDEX_op_brcond_i32:
2491 tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2492 break;
2493 case INDEX_op_setcond_i32:
2494 tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
2495 break;
2496 case INDEX_op_movcond_i32:
2497 tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
2498 break;
2499
2500 OP_32_64(bswap16):
2501 tcg_out_rolw_8(s, a0);
2502 break;
2503 OP_32_64(bswap32):
2504 tcg_out_bswap32(s, a0);
2505 break;
2506
2507 OP_32_64(neg):
2508 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
2509 break;
2510 OP_32_64(not):
2511 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
2512 break;
2513
2514 OP_32_64(ext8s):
2515 tcg_out_ext8s(s, a0, a1, rexw);
2516 break;
2517 OP_32_64(ext16s):
2518 tcg_out_ext16s(s, a0, a1, rexw);
2519 break;
2520 OP_32_64(ext8u):
2521 tcg_out_ext8u(s, a0, a1);
2522 break;
2523 OP_32_64(ext16u):
2524 tcg_out_ext16u(s, a0, a1);
2525 break;
2526
2527 case INDEX_op_qemu_ld_i32:
2528 tcg_out_qemu_ld(s, args, 0);
2529 break;
2530 case INDEX_op_qemu_ld_i64:
2531 tcg_out_qemu_ld(s, args, 1);
2532 break;
2533 case INDEX_op_qemu_st_i32:
2534 tcg_out_qemu_st(s, args, 0);
2535 break;
2536 case INDEX_op_qemu_st_i64:
2537 tcg_out_qemu_st(s, args, 1);
2538 break;
2539
2540 OP_32_64(mulu2):
2541 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2542 break;
2543 OP_32_64(muls2):
2544 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2545 break;
2546 OP_32_64(add2):
2547 if (const_args[4]) {
2548 tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
2549 } else {
2550 tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
2551 }
2552 if (const_args[5]) {
2553 tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
2554 } else {
2555 tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
2556 }
2557 break;
2558 OP_32_64(sub2):
2559 if (const_args[4]) {
2560 tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
2561 } else {
2562 tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
2563 }
2564 if (const_args[5]) {
2565 tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
2566 } else {
2567 tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
2568 }
2569 break;
2570
2571#if TCG_TARGET_REG_BITS == 32
2572 case INDEX_op_brcond2_i32:
2573 tcg_out_brcond2(s, args, const_args, 0);
2574 break;
2575 case INDEX_op_setcond2_i32:
2576 tcg_out_setcond2(s, args, const_args);
2577 break;
2578#else
2579 case INDEX_op_ld32s_i64:
2580 tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
2581 break;
2582 case INDEX_op_ld_i64:
2583 tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
2584 break;
2585 case INDEX_op_st_i64:
2586 if (const_args[0]) {
2587 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2588 tcg_out32(s, a0);
2589 } else {
2590 tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
2591 }
2592 break;
2593
2594 case INDEX_op_brcond_i64:
2595 tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2596 break;
2597 case INDEX_op_setcond_i64:
2598 tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
2599 break;
2600 case INDEX_op_movcond_i64:
2601 tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
2602 break;
2603
2604 case INDEX_op_bswap64_i64:
2605 tcg_out_bswap64(s, a0);
2606 break;
2607 case INDEX_op_extu_i32_i64:
2608 case INDEX_op_ext32u_i64:
2609 case INDEX_op_extrl_i64_i32:
2610 tcg_out_ext32u(s, a0, a1);
2611 break;
2612 case INDEX_op_ext_i32_i64:
2613 case INDEX_op_ext32s_i64:
2614 tcg_out_ext32s(s, a0, a1);
2615 break;
2616 case INDEX_op_extrh_i64_i32:
2617 tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32);
2618 break;
2619#endif
2620
2621 OP_32_64(deposit):
2622 if (args[3] == 0 && args[4] == 8) {
2623
2624 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
2625 } else if (args[3] == 8 && args[4] == 8) {
2626
2627 tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
2628 } else if (args[3] == 0 && args[4] == 16) {
2629
2630 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
2631 } else {
2632 tcg_abort();
2633 }
2634 break;
2635
2636 case INDEX_op_extract_i64:
2637 if (a2 + args[3] == 32) {
2638
2639 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2640 tcg_out_shifti(s, SHIFT_SHR, a0, a2);
2641 break;
2642 }
2643
2644 case INDEX_op_extract_i32:
2645
2646
2647
2648 tcg_debug_assert(a2 == 8 && args[3] == 8);
2649 if (a1 < 4 && a0 < 8) {
2650 tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
2651 } else {
2652 tcg_out_ext16u(s, a0, a1);
2653 tcg_out_shifti(s, SHIFT_SHR, a0, 8);
2654 }
2655 break;
2656
2657 case INDEX_op_sextract_i32:
2658
2659
2660
2661 tcg_debug_assert(a2 == 8 && args[3] == 8);
2662 if (a1 < 4 && a0 < 8) {
2663 tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
2664 } else {
2665 tcg_out_ext16s(s, a0, a1, 0);
2666 tcg_out_shifti(s, SHIFT_SAR, a0, 8);
2667 }
2668 break;
2669
2670 OP_32_64(extract2):
2671
2672 tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0);
2673 tcg_out8(s, args[3]);
2674 break;
2675
2676 case INDEX_op_mb:
2677 tcg_out_mb(s, a0);
2678 break;
2679 case INDEX_op_mov_i32:
2680 case INDEX_op_mov_i64:
2681 case INDEX_op_movi_i32:
2682 case INDEX_op_movi_i64:
2683 case INDEX_op_call:
2684 default:
2685 tcg_abort();
2686 }
2687
2688#undef OP_32_64
2689}
2690
2691static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2692 unsigned vecl, unsigned vece,
2693 const TCGArg *args, const int *const_args)
2694{
2695 static int const add_insn[4] = {
2696 OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
2697 };
2698 static int const ssadd_insn[4] = {
2699 OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2
2700 };
2701 static int const usadd_insn[4] = {
2702 OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2
2703 };
2704 static int const sub_insn[4] = {
2705 OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
2706 };
2707 static int const sssub_insn[4] = {
2708 OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2
2709 };
2710 static int const ussub_insn[4] = {
2711 OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
2712 };
2713 static int const mul_insn[4] = {
2714 OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
2715 };
2716 static int const shift_imm_insn[4] = {
2717 OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
2718 };
2719 static int const cmpeq_insn[4] = {
2720 OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
2721 };
2722 static int const cmpgt_insn[4] = {
2723 OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
2724 };
2725 static int const punpckl_insn[4] = {
2726 OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
2727 };
2728 static int const punpckh_insn[4] = {
2729 OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
2730 };
2731 static int const packss_insn[4] = {
2732 OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
2733 };
2734 static int const packus_insn[4] = {
2735 OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
2736 };
2737 static int const smin_insn[4] = {
2738 OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
2739 };
2740 static int const smax_insn[4] = {
2741 OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
2742 };
2743 static int const umin_insn[4] = {
2744 OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
2745 };
2746 static int const umax_insn[4] = {
2747 OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
2748 };
2749 static int const shlv_insn[4] = {
2750
2751 OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
2752 };
2753 static int const shrv_insn[4] = {
2754
2755 OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
2756 };
2757 static int const sarv_insn[4] = {
2758
2759 OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
2760 };
2761 static int const shls_insn[4] = {
2762 OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
2763 };
2764 static int const shrs_insn[4] = {
2765 OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
2766 };
2767 static int const sars_insn[4] = {
2768 OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
2769 };
2770 static int const abs_insn[4] = {
2771
2772 OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
2773 };
2774
2775 TCGType type = vecl + TCG_TYPE_V64;
2776 int insn, sub;
2777 TCGArg a0, a1, a2;
2778
2779 a0 = args[0];
2780 a1 = args[1];
2781 a2 = args[2];
2782
2783 switch (opc) {
2784 case INDEX_op_add_vec:
2785 insn = add_insn[vece];
2786 goto gen_simd;
2787 case INDEX_op_ssadd_vec:
2788 insn = ssadd_insn[vece];
2789 goto gen_simd;
2790 case INDEX_op_usadd_vec:
2791 insn = usadd_insn[vece];
2792 goto gen_simd;
2793 case INDEX_op_sub_vec:
2794 insn = sub_insn[vece];
2795 goto gen_simd;
2796 case INDEX_op_sssub_vec:
2797 insn = sssub_insn[vece];
2798 goto gen_simd;
2799 case INDEX_op_ussub_vec:
2800 insn = ussub_insn[vece];
2801 goto gen_simd;
2802 case INDEX_op_mul_vec:
2803 insn = mul_insn[vece];
2804 goto gen_simd;
2805 case INDEX_op_and_vec:
2806 insn = OPC_PAND;
2807 goto gen_simd;
2808 case INDEX_op_or_vec:
2809 insn = OPC_POR;
2810 goto gen_simd;
2811 case INDEX_op_xor_vec:
2812 insn = OPC_PXOR;
2813 goto gen_simd;
2814 case INDEX_op_smin_vec:
2815 insn = smin_insn[vece];
2816 goto gen_simd;
2817 case INDEX_op_umin_vec:
2818 insn = umin_insn[vece];
2819 goto gen_simd;
2820 case INDEX_op_smax_vec:
2821 insn = smax_insn[vece];
2822 goto gen_simd;
2823 case INDEX_op_umax_vec:
2824 insn = umax_insn[vece];
2825 goto gen_simd;
2826 case INDEX_op_shlv_vec:
2827 insn = shlv_insn[vece];
2828 goto gen_simd;
2829 case INDEX_op_shrv_vec:
2830 insn = shrv_insn[vece];
2831 goto gen_simd;
2832 case INDEX_op_sarv_vec:
2833 insn = sarv_insn[vece];
2834 goto gen_simd;
2835 case INDEX_op_shls_vec:
2836 insn = shls_insn[vece];
2837 goto gen_simd;
2838 case INDEX_op_shrs_vec:
2839 insn = shrs_insn[vece];
2840 goto gen_simd;
2841 case INDEX_op_sars_vec:
2842 insn = sars_insn[vece];
2843 goto gen_simd;
2844 case INDEX_op_x86_punpckl_vec:
2845 insn = punpckl_insn[vece];
2846 goto gen_simd;
2847 case INDEX_op_x86_punpckh_vec:
2848 insn = punpckh_insn[vece];
2849 goto gen_simd;
2850 case INDEX_op_x86_packss_vec:
2851 insn = packss_insn[vece];
2852 goto gen_simd;
2853 case INDEX_op_x86_packus_vec:
2854 insn = packus_insn[vece];
2855 goto gen_simd;
2856#if TCG_TARGET_REG_BITS == 32
2857 case INDEX_op_dup2_vec:
2858
2859 insn = OPC_PUNPCKLDQ;
2860 goto gen_simd;
2861#endif
2862 case INDEX_op_abs_vec:
2863 insn = abs_insn[vece];
2864 a2 = a1;
2865 a1 = 0;
2866 goto gen_simd;
2867 gen_simd:
2868 tcg_debug_assert(insn != OPC_UD2);
2869 if (type == TCG_TYPE_V256) {
2870 insn |= P_VEXL;
2871 }
2872 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2873 break;
2874
2875 case INDEX_op_cmp_vec:
2876 sub = args[3];
2877 if (sub == TCG_COND_EQ) {
2878 insn = cmpeq_insn[vece];
2879 } else if (sub == TCG_COND_GT) {
2880 insn = cmpgt_insn[vece];
2881 } else {
2882 g_assert_not_reached();
2883 }
2884 goto gen_simd;
2885
2886 case INDEX_op_andc_vec:
2887 insn = OPC_PANDN;
2888 if (type == TCG_TYPE_V256) {
2889 insn |= P_VEXL;
2890 }
2891 tcg_out_vex_modrm(s, insn, a0, a2, a1);
2892 break;
2893
2894 case INDEX_op_shli_vec:
2895 sub = 6;
2896 goto gen_shift;
2897 case INDEX_op_shri_vec:
2898 sub = 2;
2899 goto gen_shift;
2900 case INDEX_op_sari_vec:
2901 tcg_debug_assert(vece != MO_64);
2902 sub = 4;
2903 gen_shift:
2904 tcg_debug_assert(vece != MO_8);
2905 insn = shift_imm_insn[vece];
2906 if (type == TCG_TYPE_V256) {
2907 insn |= P_VEXL;
2908 }
2909 tcg_out_vex_modrm(s, insn, sub, a0, a1);
2910 tcg_out8(s, a2);
2911 break;
2912
2913 case INDEX_op_ld_vec:
2914 tcg_out_ld(s, type, a0, a1, a2);
2915 break;
2916 case INDEX_op_st_vec:
2917 tcg_out_st(s, type, a0, a1, a2);
2918 break;
2919 case INDEX_op_dupm_vec:
2920 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2921 break;
2922
2923 case INDEX_op_x86_shufps_vec:
2924 insn = OPC_SHUFPS;
2925 sub = args[3];
2926 goto gen_simd_imm8;
2927 case INDEX_op_x86_blend_vec:
2928 if (vece == MO_16) {
2929 insn = OPC_PBLENDW;
2930 } else if (vece == MO_32) {
2931 insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
2932 } else {
2933 g_assert_not_reached();
2934 }
2935 sub = args[3];
2936 goto gen_simd_imm8;
2937 case INDEX_op_x86_vperm2i128_vec:
2938 insn = OPC_VPERM2I128;
2939 sub = args[3];
2940 goto gen_simd_imm8;
2941 gen_simd_imm8:
2942 if (type == TCG_TYPE_V256) {
2943 insn |= P_VEXL;
2944 }
2945 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2946 tcg_out8(s, sub);
2947 break;
2948
2949 case INDEX_op_x86_vpblendvb_vec:
2950 insn = OPC_VPBLENDVB;
2951 if (type == TCG_TYPE_V256) {
2952 insn |= P_VEXL;
2953 }
2954 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2955 tcg_out8(s, args[3] << 4);
2956 break;
2957
2958 case INDEX_op_x86_psrldq_vec:
2959 tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
2960 tcg_out8(s, a2);
2961 break;
2962
2963 case INDEX_op_mov_vec:
2964 case INDEX_op_dupi_vec:
2965 case INDEX_op_dup_vec:
2966 default:
2967 g_assert_not_reached();
2968 }
2969}
2970
2971static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2972{
2973 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2974 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2975 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2976 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2977 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2978 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2979 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2980 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
2981 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2982 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2983 static const TCGTargetOpDef r_0_r = { .args_ct_str = { "r", "0", "r" } };
2984 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
2985 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2986 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2987 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2988 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2989 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2990 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2991 static const TCGTargetOpDef r_r_L_L
2992 = { .args_ct_str = { "r", "r", "L", "L" } };
2993 static const TCGTargetOpDef L_L_L_L
2994 = { .args_ct_str = { "L", "L", "L", "L" } };
2995 static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
2996 static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
2997 static const TCGTargetOpDef x_x_x_x
2998 = { .args_ct_str = { "x", "x", "x", "x" } };
2999 static const TCGTargetOpDef x_r = { .args_ct_str = { "x", "r" } };
3000
3001 switch (op) {
3002 case INDEX_op_goto_ptr:
3003 return &r;
3004
3005 case INDEX_op_ld8u_i32:
3006 case INDEX_op_ld8u_i64:
3007 case INDEX_op_ld8s_i32:
3008 case INDEX_op_ld8s_i64:
3009 case INDEX_op_ld16u_i32:
3010 case INDEX_op_ld16u_i64:
3011 case INDEX_op_ld16s_i32:
3012 case INDEX_op_ld16s_i64:
3013 case INDEX_op_ld_i32:
3014 case INDEX_op_ld32u_i64:
3015 case INDEX_op_ld32s_i64:
3016 case INDEX_op_ld_i64:
3017 return &r_r;
3018
3019 case INDEX_op_st8_i32:
3020 case INDEX_op_st8_i64:
3021 return &qi_r;
3022 case INDEX_op_st16_i32:
3023 case INDEX_op_st16_i64:
3024 case INDEX_op_st_i32:
3025 case INDEX_op_st32_i64:
3026 return &ri_r;
3027 case INDEX_op_st_i64:
3028 return &re_r;
3029
3030 case INDEX_op_add_i32:
3031 case INDEX_op_add_i64:
3032 return &r_r_re;
3033 case INDEX_op_sub_i32:
3034 case INDEX_op_sub_i64:
3035 case INDEX_op_mul_i32:
3036 case INDEX_op_mul_i64:
3037 case INDEX_op_or_i32:
3038 case INDEX_op_or_i64:
3039 case INDEX_op_xor_i32:
3040 case INDEX_op_xor_i64:
3041 return &r_0_re;
3042
3043 case INDEX_op_and_i32:
3044 case INDEX_op_and_i64:
3045 {
3046 static const TCGTargetOpDef and
3047 = { .args_ct_str = { "r", "0", "reZ" } };
3048 return ∧
3049 }
3050 break;
3051 case INDEX_op_andc_i32:
3052 case INDEX_op_andc_i64:
3053 {
3054 static const TCGTargetOpDef andc
3055 = { .args_ct_str = { "r", "r", "rI" } };
3056 return &andc;
3057 }
3058 break;
3059
3060 case INDEX_op_shl_i32:
3061 case INDEX_op_shl_i64:
3062 case INDEX_op_shr_i32:
3063 case INDEX_op_shr_i64:
3064 case INDEX_op_sar_i32:
3065 case INDEX_op_sar_i64:
3066 return have_bmi2 ? &r_r_ri : &r_0_ci;
3067 case INDEX_op_rotl_i32:
3068 case INDEX_op_rotl_i64:
3069 case INDEX_op_rotr_i32:
3070 case INDEX_op_rotr_i64:
3071 return &r_0_ci;
3072
3073 case INDEX_op_brcond_i32:
3074 case INDEX_op_brcond_i64:
3075 return &r_re;
3076
3077 case INDEX_op_bswap16_i32:
3078 case INDEX_op_bswap16_i64:
3079 case INDEX_op_bswap32_i32:
3080 case INDEX_op_bswap32_i64:
3081 case INDEX_op_bswap64_i64:
3082 case INDEX_op_neg_i32:
3083 case INDEX_op_neg_i64:
3084 case INDEX_op_not_i32:
3085 case INDEX_op_not_i64:
3086 case INDEX_op_extrh_i64_i32:
3087 return &r_0;
3088
3089 case INDEX_op_ext8s_i32:
3090 case INDEX_op_ext8s_i64:
3091 case INDEX_op_ext8u_i32:
3092 case INDEX_op_ext8u_i64:
3093 return &r_q;
3094 case INDEX_op_ext16s_i32:
3095 case INDEX_op_ext16s_i64:
3096 case INDEX_op_ext16u_i32:
3097 case INDEX_op_ext16u_i64:
3098 case INDEX_op_ext32s_i64:
3099 case INDEX_op_ext32u_i64:
3100 case INDEX_op_ext_i32_i64:
3101 case INDEX_op_extu_i32_i64:
3102 case INDEX_op_extrl_i64_i32:
3103 case INDEX_op_extract_i32:
3104 case INDEX_op_extract_i64:
3105 case INDEX_op_sextract_i32:
3106 case INDEX_op_ctpop_i32:
3107 case INDEX_op_ctpop_i64:
3108 return &r_r;
3109 case INDEX_op_extract2_i32:
3110 case INDEX_op_extract2_i64:
3111 return &r_0_r;
3112
3113 case INDEX_op_deposit_i32:
3114 case INDEX_op_deposit_i64:
3115 {
3116 static const TCGTargetOpDef dep
3117 = { .args_ct_str = { "Q", "0", "Q" } };
3118 return &dep;
3119 }
3120 case INDEX_op_setcond_i32:
3121 case INDEX_op_setcond_i64:
3122 {
3123 static const TCGTargetOpDef setc
3124 = { .args_ct_str = { "q", "r", "re" } };
3125 return &setc;
3126 }
3127 case INDEX_op_movcond_i32:
3128 case INDEX_op_movcond_i64:
3129 {
3130 static const TCGTargetOpDef movc
3131 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
3132 return &movc;
3133 }
3134 case INDEX_op_div2_i32:
3135 case INDEX_op_div2_i64:
3136 case INDEX_op_divu2_i32:
3137 case INDEX_op_divu2_i64:
3138 {
3139 static const TCGTargetOpDef div2
3140 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
3141 return &div2;
3142 }
3143 case INDEX_op_mulu2_i32:
3144 case INDEX_op_mulu2_i64:
3145 case INDEX_op_muls2_i32:
3146 case INDEX_op_muls2_i64:
3147 {
3148 static const TCGTargetOpDef mul2
3149 = { .args_ct_str = { "a", "d", "a", "r" } };
3150 return &mul2;
3151 }
3152 case INDEX_op_add2_i32:
3153 case INDEX_op_add2_i64:
3154 case INDEX_op_sub2_i32:
3155 case INDEX_op_sub2_i64:
3156 {
3157 static const TCGTargetOpDef arith2
3158 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
3159 return &arith2;
3160 }
3161 case INDEX_op_ctz_i32:
3162 case INDEX_op_ctz_i64:
3163 {
3164 static const TCGTargetOpDef ctz[2] = {
3165 { .args_ct_str = { "&r", "r", "r" } },
3166 { .args_ct_str = { "&r", "r", "rW" } },
3167 };
3168 return &ctz[have_bmi1];
3169 }
3170 case INDEX_op_clz_i32:
3171 case INDEX_op_clz_i64:
3172 {
3173 static const TCGTargetOpDef clz[2] = {
3174 { .args_ct_str = { "&r", "r", "r" } },
3175 { .args_ct_str = { "&r", "r", "rW" } },
3176 };
3177 return &clz[have_lzcnt];
3178 }
3179
3180 case INDEX_op_qemu_ld_i32:
3181 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
3182 case INDEX_op_qemu_st_i32:
3183 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
3184 case INDEX_op_qemu_ld_i64:
3185 return (TCG_TARGET_REG_BITS == 64 ? &r_L
3186 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
3187 : &r_r_L_L);
3188 case INDEX_op_qemu_st_i64:
3189 return (TCG_TARGET_REG_BITS == 64 ? &L_L
3190 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
3191 : &L_L_L_L);
3192
3193 case INDEX_op_brcond2_i32:
3194 {
3195 static const TCGTargetOpDef b2
3196 = { .args_ct_str = { "r", "r", "ri", "ri" } };
3197 return &b2;
3198 }
3199 case INDEX_op_setcond2_i32:
3200 {
3201 static const TCGTargetOpDef s2
3202 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3203 return &s2;
3204 }
3205
3206 case INDEX_op_ld_vec:
3207 case INDEX_op_st_vec:
3208 case INDEX_op_dupm_vec:
3209 return &x_r;
3210
3211 case INDEX_op_add_vec:
3212 case INDEX_op_sub_vec:
3213 case INDEX_op_mul_vec:
3214 case INDEX_op_and_vec:
3215 case INDEX_op_or_vec:
3216 case INDEX_op_xor_vec:
3217 case INDEX_op_andc_vec:
3218 case INDEX_op_ssadd_vec:
3219 case INDEX_op_usadd_vec:
3220 case INDEX_op_sssub_vec:
3221 case INDEX_op_ussub_vec:
3222 case INDEX_op_smin_vec:
3223 case INDEX_op_umin_vec:
3224 case INDEX_op_smax_vec:
3225 case INDEX_op_umax_vec:
3226 case INDEX_op_shlv_vec:
3227 case INDEX_op_shrv_vec:
3228 case INDEX_op_sarv_vec:
3229 case INDEX_op_shls_vec:
3230 case INDEX_op_shrs_vec:
3231 case INDEX_op_sars_vec:
3232 case INDEX_op_cmp_vec:
3233 case INDEX_op_x86_shufps_vec:
3234 case INDEX_op_x86_blend_vec:
3235 case INDEX_op_x86_packss_vec:
3236 case INDEX_op_x86_packus_vec:
3237 case INDEX_op_x86_vperm2i128_vec:
3238 case INDEX_op_x86_punpckl_vec:
3239 case INDEX_op_x86_punpckh_vec:
3240#if TCG_TARGET_REG_BITS == 32
3241 case INDEX_op_dup2_vec:
3242#endif
3243 return &x_x_x;
3244 case INDEX_op_abs_vec:
3245 case INDEX_op_dup_vec:
3246 case INDEX_op_shli_vec:
3247 case INDEX_op_shri_vec:
3248 case INDEX_op_sari_vec:
3249 case INDEX_op_x86_psrldq_vec:
3250 return &x_x;
3251 case INDEX_op_x86_vpblendvb_vec:
3252 return &x_x_x_x;
3253
3254 default:
3255 break;
3256 }
3257 return NULL;
3258}
3259
3260int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3261{
3262 switch (opc) {
3263 case INDEX_op_add_vec:
3264 case INDEX_op_sub_vec:
3265 case INDEX_op_and_vec:
3266 case INDEX_op_or_vec:
3267 case INDEX_op_xor_vec:
3268 case INDEX_op_andc_vec:
3269 return 1;
3270 case INDEX_op_cmp_vec:
3271 case INDEX_op_cmpsel_vec:
3272 return -1;
3273
3274 case INDEX_op_shli_vec:
3275 case INDEX_op_shri_vec:
3276
3277 return vece == MO_8 ? -1 : 1;
3278
3279 case INDEX_op_sari_vec:
3280
3281 if (vece == MO_8) {
3282 return -1;
3283 }
3284
3285
3286 if (vece == MO_64) {
3287 return type >= TCG_TYPE_V256 ? -1 : 0;
3288 }
3289 return 1;
3290
3291 case INDEX_op_shls_vec:
3292 case INDEX_op_shrs_vec:
3293 return vece >= MO_16;
3294 case INDEX_op_sars_vec:
3295 return vece >= MO_16 && vece <= MO_32;
3296
3297 case INDEX_op_shlv_vec:
3298 case INDEX_op_shrv_vec:
3299 return have_avx2 && vece >= MO_32;
3300 case INDEX_op_sarv_vec:
3301 return have_avx2 && vece == MO_32;
3302
3303 case INDEX_op_mul_vec:
3304 if (vece == MO_8) {
3305
3306 return -1;
3307 }
3308 if (vece == MO_64) {
3309 return 0;
3310 }
3311 return 1;
3312
3313 case INDEX_op_ssadd_vec:
3314 case INDEX_op_usadd_vec:
3315 case INDEX_op_sssub_vec:
3316 case INDEX_op_ussub_vec:
3317 return vece <= MO_16;
3318 case INDEX_op_smin_vec:
3319 case INDEX_op_smax_vec:
3320 case INDEX_op_umin_vec:
3321 case INDEX_op_umax_vec:
3322 case INDEX_op_abs_vec:
3323 return vece <= MO_32;
3324
3325 default:
3326 return 0;
3327 }
3328}
3329
3330static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
3331 TCGv_vec v0, TCGv_vec v1, TCGArg imm)
3332{
3333 TCGv_vec t1, t2;
3334
3335 tcg_debug_assert(vece == MO_8);
3336
3337 t1 = tcg_temp_new_vec(type);
3338 t2 = tcg_temp_new_vec(type);
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3350 tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
3351 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3352 tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
3353
3354 if (shr) {
3355 tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
3356 tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
3357 } else {
3358 tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
3359 tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
3360 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3361 tcg_gen_shri_vec(MO_16, t2, t2, 8);
3362 }
3363
3364 vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
3365 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3366 tcg_temp_free_vec(t1);
3367 tcg_temp_free_vec(t2);
3368}
3369
3370static void expand_vec_sari(TCGType type, unsigned vece,
3371 TCGv_vec v0, TCGv_vec v1, TCGArg imm)
3372{
3373 TCGv_vec t1, t2;
3374
3375 switch (vece) {
3376 case MO_8:
3377
3378 t1 = tcg_temp_new_vec(type);
3379 t2 = tcg_temp_new_vec(type);
3380 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3381 tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
3382 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3383 tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
3384 tcg_gen_sari_vec(MO_16, t1, t1, imm + 8);
3385 tcg_gen_sari_vec(MO_16, t2, t2, imm + 8);
3386 vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8,
3387 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3388 tcg_temp_free_vec(t1);
3389 tcg_temp_free_vec(t2);
3390 break;
3391
3392 case MO_64:
3393 if (imm <= 32) {
3394
3395
3396
3397
3398 t1 = tcg_temp_new_vec(type);
3399 tcg_gen_sari_vec(MO_32, t1, v1, imm);
3400 tcg_gen_shri_vec(MO_64, v0, v1, imm);
3401 vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
3402 tcgv_vec_arg(v0), tcgv_vec_arg(v0),
3403 tcgv_vec_arg(t1), 0xaa);
3404 tcg_temp_free_vec(t1);
3405 } else {
3406
3407
3408
3409 t1 = tcg_const_zeros_vec(type);
3410 tcg_gen_cmp_vec(TCG_COND_GT, MO_64, t1, t1, v1);
3411 tcg_gen_shri_vec(MO_64, v0, v1, imm);
3412 tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm);
3413 tcg_gen_or_vec(MO_64, v0, v0, t1);
3414 tcg_temp_free_vec(t1);
3415 }
3416 break;
3417
3418 default:
3419 g_assert_not_reached();
3420 }
3421}
3422
3423static void expand_vec_mul(TCGType type, unsigned vece,
3424 TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
3425{
3426 TCGv_vec t1, t2, t3, t4;
3427
3428 tcg_debug_assert(vece == MO_8);
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440 switch (type) {
3441 case TCG_TYPE_V64:
3442 t1 = tcg_temp_new_vec(TCG_TYPE_V128);
3443 t2 = tcg_temp_new_vec(TCG_TYPE_V128);
3444 tcg_gen_dup16i_vec(t2, 0);
3445 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3446 tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(t2));
3447 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3448 tcgv_vec_arg(t2), tcgv_vec_arg(t2), tcgv_vec_arg(v2));
3449 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3450 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3451 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
3452 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t1));
3453 tcg_temp_free_vec(t1);
3454 tcg_temp_free_vec(t2);
3455 break;
3456
3457 case TCG_TYPE_V128:
3458 case TCG_TYPE_V256:
3459 t1 = tcg_temp_new_vec(type);
3460 t2 = tcg_temp_new_vec(type);
3461 t3 = tcg_temp_new_vec(type);
3462 t4 = tcg_temp_new_vec(type);
3463 tcg_gen_dup16i_vec(t4, 0);
3464 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3465 tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(t4));
3466 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3467 tcgv_vec_arg(t2), tcgv_vec_arg(t4), tcgv_vec_arg(v2));
3468 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3469 tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(t4));
3470 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3471 tcgv_vec_arg(t4), tcgv_vec_arg(t4), tcgv_vec_arg(v2));
3472 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3473 tcg_gen_mul_vec(MO_16, t3, t3, t4);
3474 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3475 tcg_gen_shri_vec(MO_16, t3, t3, 8);
3476 vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
3477 tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3478 tcg_temp_free_vec(t1);
3479 tcg_temp_free_vec(t2);
3480 tcg_temp_free_vec(t3);
3481 tcg_temp_free_vec(t4);
3482 break;
3483
3484 default:
3485 g_assert_not_reached();
3486 }
3487}
3488
3489static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
3490 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3491{
3492 enum {
3493 NEED_INV = 1,
3494 NEED_SWAP = 2,
3495 NEED_BIAS = 4,
3496 NEED_UMIN = 8,
3497 NEED_UMAX = 16,
3498 };
3499 TCGv_vec t1, t2;
3500 uint8_t fixup;
3501
3502 switch (cond) {
3503 case TCG_COND_EQ:
3504 case TCG_COND_GT:
3505 fixup = 0;
3506 break;
3507 case TCG_COND_NE:
3508 case TCG_COND_LE:
3509 fixup = NEED_INV;
3510 break;
3511 case TCG_COND_LT:
3512 fixup = NEED_SWAP;
3513 break;
3514 case TCG_COND_GE:
3515 fixup = NEED_SWAP | NEED_INV;
3516 break;
3517 case TCG_COND_LEU:
3518 if (vece <= MO_32) {
3519 fixup = NEED_UMIN;
3520 } else {
3521 fixup = NEED_BIAS | NEED_INV;
3522 }
3523 break;
3524 case TCG_COND_GTU:
3525 if (vece <= MO_32) {
3526 fixup = NEED_UMIN | NEED_INV;
3527 } else {
3528 fixup = NEED_BIAS;
3529 }
3530 break;
3531 case TCG_COND_GEU:
3532 if (vece <= MO_32) {
3533 fixup = NEED_UMAX;
3534 } else {
3535 fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
3536 }
3537 break;
3538 case TCG_COND_LTU:
3539 if (vece <= MO_32) {
3540 fixup = NEED_UMAX | NEED_INV;
3541 } else {
3542 fixup = NEED_BIAS | NEED_SWAP;
3543 }
3544 break;
3545 default:
3546 g_assert_not_reached();
3547 }
3548
3549 if (fixup & NEED_INV) {
3550 cond = tcg_invert_cond(cond);
3551 }
3552 if (fixup & NEED_SWAP) {
3553 t1 = v1, v1 = v2, v2 = t1;
3554 cond = tcg_swap_cond(cond);
3555 }
3556
3557 t1 = t2 = NULL;
3558 if (fixup & (NEED_UMIN | NEED_UMAX)) {
3559 t1 = tcg_temp_new_vec(type);
3560 if (fixup & NEED_UMIN) {
3561 tcg_gen_umin_vec(vece, t1, v1, v2);
3562 } else {
3563 tcg_gen_umax_vec(vece, t1, v1, v2);
3564 }
3565 v2 = t1;
3566 cond = TCG_COND_EQ;
3567 } else if (fixup & NEED_BIAS) {
3568 t1 = tcg_temp_new_vec(type);
3569 t2 = tcg_temp_new_vec(type);
3570 tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1));
3571 tcg_gen_sub_vec(vece, t1, v1, t2);
3572 tcg_gen_sub_vec(vece, t2, v2, t2);
3573 v1 = t1;
3574 v2 = t2;
3575 cond = tcg_signed_cond(cond);
3576 }
3577
3578 tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
3579
3580 vec_gen_4(INDEX_op_cmp_vec, type, vece,
3581 tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3582
3583 if (t1) {
3584 tcg_temp_free_vec(t1);
3585 if (t2) {
3586 tcg_temp_free_vec(t2);
3587 }
3588 }
3589 return fixup & NEED_INV;
3590}
3591
3592static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3593 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3594{
3595 if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
3596 tcg_gen_not_vec(vece, v0, v0);
3597 }
3598}
3599
3600static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
3601 TCGv_vec c1, TCGv_vec c2,
3602 TCGv_vec v3, TCGv_vec v4, TCGCond cond)
3603{
3604 TCGv_vec t = tcg_temp_new_vec(type);
3605
3606 if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
3607
3608 TCGv_vec x;
3609 x = v3, v3 = v4, v4 = x;
3610 }
3611 vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece,
3612 tcgv_vec_arg(v0), tcgv_vec_arg(v4),
3613 tcgv_vec_arg(v3), tcgv_vec_arg(t));
3614 tcg_temp_free_vec(t);
3615}
3616
3617void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3618 TCGArg a0, ...)
3619{
3620 va_list va;
3621 TCGArg a2;
3622 TCGv_vec v0, v1, v2, v3, v4;
3623
3624 va_start(va, a0);
3625 v0 = temp_tcgv_vec(arg_temp(a0));
3626 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3627 a2 = va_arg(va, TCGArg);
3628
3629 switch (opc) {
3630 case INDEX_op_shli_vec:
3631 case INDEX_op_shri_vec:
3632 expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
3633 break;
3634
3635 case INDEX_op_sari_vec:
3636 expand_vec_sari(type, vece, v0, v1, a2);
3637 break;
3638
3639 case INDEX_op_mul_vec:
3640 v2 = temp_tcgv_vec(arg_temp(a2));
3641 expand_vec_mul(type, vece, v0, v1, v2);
3642 break;
3643
3644 case INDEX_op_cmp_vec:
3645 v2 = temp_tcgv_vec(arg_temp(a2));
3646 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3647 break;
3648
3649 case INDEX_op_cmpsel_vec:
3650 v2 = temp_tcgv_vec(arg_temp(a2));
3651 v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3652 v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3653 expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3654 break;
3655
3656 default:
3657 break;
3658 }
3659
3660 va_end(va);
3661}
3662
3663static const int tcg_target_callee_save_regs[] = {
3664#if TCG_TARGET_REG_BITS == 64
3665 TCG_REG_RBP,
3666 TCG_REG_RBX,
3667#if defined(_WIN64)
3668 TCG_REG_RDI,
3669 TCG_REG_RSI,
3670#endif
3671 TCG_REG_R12,
3672 TCG_REG_R13,
3673 TCG_REG_R14,
3674 TCG_REG_R15,
3675#else
3676 TCG_REG_EBP,
3677 TCG_REG_EBX,
3678 TCG_REG_ESI,
3679 TCG_REG_EDI,
3680#endif
3681};
3682
3683
3684
3685
3686#define PUSH_SIZE \
3687 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
3688 * (TCG_TARGET_REG_BITS / 8))
3689
3690#define FRAME_SIZE \
3691 ((PUSH_SIZE \
3692 + TCG_STATIC_CALL_ARGS_SIZE \
3693 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3694 + TCG_TARGET_STACK_ALIGN - 1) \
3695 & ~(TCG_TARGET_STACK_ALIGN - 1))
3696
3697
3698static void tcg_target_qemu_prologue(TCGContext *s)
3699{
3700 int i, stack_addend;
3701
3702
3703
3704
3705 stack_addend = FRAME_SIZE - PUSH_SIZE;
3706 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
3707 CPU_TEMP_BUF_NLONGS * sizeof(long));
3708
3709
3710 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
3711 tcg_out_push(s, tcg_target_callee_save_regs[i]);
3712 }
3713
3714#if TCG_TARGET_REG_BITS == 32
3715 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
3716 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
3717 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
3718
3719 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
3720 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
3721 + stack_addend);
3722#else
3723# if !defined(CONFIG_SOFTMMU) && TCG_TARGET_REG_BITS == 64
3724 if (guest_base) {
3725 int seg = setup_guest_base_seg();
3726 if (seg != 0) {
3727 x86_guest_base_seg = seg;
3728 } else if (guest_base == (int32_t)guest_base) {
3729 x86_guest_base_offset = guest_base;
3730 } else {
3731
3732 x86_guest_base_index = TCG_REG_R12;
3733 tcg_out_mov(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base);
3734 tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index);
3735 }
3736 }
3737# endif
3738 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3739 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
3740
3741 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
3742#endif
3743
3744
3745
3746
3747
3748 s->code_gen_epilogue = s->code_ptr;
3749 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
3750
3751
3752 tb_ret_addr = s->code_ptr;
3753
3754 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
3755
3756 if (have_avx2) {
3757 tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0);
3758 }
3759 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
3760 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
3761 }
3762 tcg_out_opc(s, OPC_RET, 0, 0, 0);
3763}
3764
3765static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3766{
3767 memset(p, 0x90, count);
3768}
3769
3770static void tcg_target_init(TCGContext *s)
3771{
3772#ifdef CONFIG_CPUID_H
3773 unsigned a, b, c, d, b7 = 0;
3774 int max = __get_cpuid_max(0, 0);
3775
3776 if (max >= 7) {
3777
3778 __cpuid_count(7, 0, a, b7, c, d);
3779 have_bmi1 = (b7 & bit_BMI) != 0;
3780 have_bmi2 = (b7 & bit_BMI2) != 0;
3781 }
3782
3783 if (max >= 1) {
3784 __cpuid(1, a, b, c, d);
3785#ifndef have_cmov
3786
3787
3788
3789 have_cmov = (d & bit_CMOV) != 0;
3790#endif
3791
3792
3793
3794 have_movbe = (c & bit_MOVBE) != 0;
3795 have_popcnt = (c & bit_POPCNT) != 0;
3796
3797
3798
3799 if (c & bit_OSXSAVE) {
3800 unsigned xcrl, xcrh;
3801
3802
3803
3804 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
3805 if ((xcrl & 6) == 6) {
3806 have_avx1 = (c & bit_AVX) != 0;
3807 have_avx2 = (b7 & bit_AVX2) != 0;
3808 }
3809 }
3810 }
3811
3812 max = __get_cpuid_max(0x8000000, 0);
3813 if (max >= 1) {
3814 __cpuid(0x80000001, a, b, c, d);
3815
3816 have_lzcnt = (c & bit_LZCNT) != 0;
3817 }
3818#endif
3819
3820 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
3821 if (TCG_TARGET_REG_BITS == 64) {
3822 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
3823 }
3824 if (have_avx1) {
3825 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
3826 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
3827 }
3828 if (have_avx2) {
3829 tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
3830 }
3831
3832 tcg_target_call_clobber_regs = ALL_VECTOR_REGS;
3833 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
3834 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
3835 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
3836 if (TCG_TARGET_REG_BITS == 64) {
3837#if !defined(_WIN64)
3838 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
3839 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
3840#endif
3841 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3842 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3843 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3844 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3845 }
3846
3847 s->reserved_regs = 0;
3848 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3849}
3850
3851typedef struct {
3852 DebugFrameHeader h;
3853 uint8_t fde_def_cfa[4];
3854 uint8_t fde_reg_ofs[14];
3855} DebugFrame;
3856
3857
3858QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3859
3860#if !defined(__ELF__)
3861
3862#elif TCG_TARGET_REG_BITS == 64
3863#define ELF_HOST_MACHINE EM_X86_64
3864static const DebugFrame debug_frame = {
3865 .h.cie.len = sizeof(DebugFrameCIE)-4,
3866 .h.cie.id = -1,
3867 .h.cie.version = 1,
3868 .h.cie.code_align = 1,
3869 .h.cie.data_align = 0x78,
3870 .h.cie.return_column = 16,
3871
3872
3873 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3874
3875 .fde_def_cfa = {
3876 12, 7,
3877 (FRAME_SIZE & 0x7f) | 0x80,
3878 (FRAME_SIZE >> 7)
3879 },
3880 .fde_reg_ofs = {
3881 0x90, 1,
3882
3883 0x86, 2,
3884 0x83, 3,
3885 0x8c, 4,
3886 0x8d, 5,
3887 0x8e, 6,
3888 0x8f, 7,
3889 }
3890};
3891#else
3892#define ELF_HOST_MACHINE EM_386
3893static const DebugFrame debug_frame = {
3894 .h.cie.len = sizeof(DebugFrameCIE)-4,
3895 .h.cie.id = -1,
3896 .h.cie.version = 1,
3897 .h.cie.code_align = 1,
3898 .h.cie.data_align = 0x7c,
3899 .h.cie.return_column = 8,
3900
3901
3902 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3903
3904 .fde_def_cfa = {
3905 12, 4,
3906 (FRAME_SIZE & 0x7f) | 0x80,
3907 (FRAME_SIZE >> 7)
3908 },
3909 .fde_reg_ofs = {
3910 0x88, 1,
3911
3912 0x85, 2,
3913 0x83, 3,
3914 0x86, 4,
3915 0x87, 5,
3916 }
3917};
3918#endif
3919
3920#if defined(ELF_HOST_MACHINE)
3921void tcg_register_jit(void *buf, size_t buf_size)
3922{
3923 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3924}
3925#endif
3926