1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "tcg-pool.inc.c"
26
27#ifdef CONFIG_DEBUG_TCG
28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31#else
32 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
33#endif
34 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
35 "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
36#if TCG_TARGET_REG_BITS == 64
37 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
38 "%xmm12", "%xmm13", "%xmm14", "%xmm15",
39#endif
40};
41#endif
42
43static const int tcg_target_reg_alloc_order[] = {
44#if TCG_TARGET_REG_BITS == 64
45 TCG_REG_RBP,
46 TCG_REG_RBX,
47 TCG_REG_R12,
48 TCG_REG_R13,
49 TCG_REG_R14,
50 TCG_REG_R15,
51 TCG_REG_R10,
52 TCG_REG_R11,
53 TCG_REG_R9,
54 TCG_REG_R8,
55 TCG_REG_RCX,
56 TCG_REG_RDX,
57 TCG_REG_RSI,
58 TCG_REG_RDI,
59 TCG_REG_RAX,
60#else
61 TCG_REG_EBX,
62 TCG_REG_ESI,
63 TCG_REG_EDI,
64 TCG_REG_EBP,
65 TCG_REG_ECX,
66 TCG_REG_EDX,
67 TCG_REG_EAX,
68#endif
69 TCG_REG_XMM0,
70 TCG_REG_XMM1,
71 TCG_REG_XMM2,
72 TCG_REG_XMM3,
73 TCG_REG_XMM4,
74 TCG_REG_XMM5,
75#ifndef _WIN64
76
77
78 TCG_REG_XMM6,
79 TCG_REG_XMM7,
80#if TCG_TARGET_REG_BITS == 64
81 TCG_REG_XMM8,
82 TCG_REG_XMM9,
83 TCG_REG_XMM10,
84 TCG_REG_XMM11,
85 TCG_REG_XMM12,
86 TCG_REG_XMM13,
87 TCG_REG_XMM14,
88 TCG_REG_XMM15,
89#endif
90#endif
91};
92
93static const int tcg_target_call_iarg_regs[] = {
94#if TCG_TARGET_REG_BITS == 64
95#if defined(_WIN64)
96 TCG_REG_RCX,
97 TCG_REG_RDX,
98#else
99 TCG_REG_RDI,
100 TCG_REG_RSI,
101 TCG_REG_RDX,
102 TCG_REG_RCX,
103#endif
104 TCG_REG_R8,
105 TCG_REG_R9,
106#else
107
108#endif
109};
110
111static const int tcg_target_call_oarg_regs[] = {
112 TCG_REG_EAX,
113#if TCG_TARGET_REG_BITS == 32
114 TCG_REG_EDX
115#endif
116};
117
118
119#define TCG_CT_CONST_S32 0x100
120#define TCG_CT_CONST_U32 0x200
121#define TCG_CT_CONST_I32 0x400
122#define TCG_CT_CONST_WSZ 0x800
123
124
125
126
127#if TCG_TARGET_REG_BITS == 64
128# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
129# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
130#else
131# define TCG_REG_L0 TCG_REG_EAX
132# define TCG_REG_L1 TCG_REG_EDX
133#endif
134
135
136
137
138#if defined(CONFIG_CPUID_H)
139#include "qemu/cpuid.h"
140#endif
141
142
143#if TCG_TARGET_REG_BITS == 64
144# define have_cmov 1
145#elif defined(CONFIG_CPUID_H)
146static bool have_cmov;
147#else
148# define have_cmov 0
149#endif
150
151
152
153bool have_bmi1;
154bool have_popcnt;
155bool have_avx1;
156bool have_avx2;
157
158#ifdef CONFIG_CPUID_H
159static bool have_movbe;
160static bool have_bmi2;
161static bool have_lzcnt;
162#else
163# define have_movbe 0
164# define have_bmi2 0
165# define have_lzcnt 0
166#endif
167
168static tcg_insn_unit *tb_ret_addr;
169
170static void patch_reloc(tcg_insn_unit *code_ptr, int type,
171 intptr_t value, intptr_t addend)
172{
173 value += addend;
174 switch(type) {
175 case R_386_PC32:
176 value -= (uintptr_t)code_ptr;
177 if (value != (int32_t)value) {
178 tcg_abort();
179 }
180
181 case R_386_32:
182 tcg_patch32(code_ptr, value);
183 break;
184 case R_386_PC8:
185 value -= (uintptr_t)code_ptr;
186 if (value != (int8_t)value) {
187 tcg_abort();
188 }
189 tcg_patch8(code_ptr, value);
190 break;
191 default:
192 tcg_abort();
193 }
194}
195
196#if TCG_TARGET_REG_BITS == 64
197#define ALL_GENERAL_REGS 0x0000ffffu
198#define ALL_VECTOR_REGS 0xffff0000u
199#else
200#define ALL_GENERAL_REGS 0x000000ffu
201#define ALL_VECTOR_REGS 0x00ff0000u
202#endif
203
204
205static const char *target_parse_constraint(TCGArgConstraint *ct,
206 const char *ct_str, TCGType type)
207{
208 switch(*ct_str++) {
209 case 'a':
210 ct->ct |= TCG_CT_REG;
211 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
212 break;
213 case 'b':
214 ct->ct |= TCG_CT_REG;
215 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
216 break;
217 case 'c':
218 ct->ct |= TCG_CT_REG;
219 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
220 break;
221 case 'd':
222 ct->ct |= TCG_CT_REG;
223 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
224 break;
225 case 'S':
226 ct->ct |= TCG_CT_REG;
227 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
228 break;
229 case 'D':
230 ct->ct |= TCG_CT_REG;
231 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
232 break;
233 case 'q':
234
235 ct->ct |= TCG_CT_REG;
236 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xf;
237 break;
238 case 'Q':
239
240 ct->ct |= TCG_CT_REG;
241 ct->u.regs = 0xf;
242 break;
243 case 'r':
244
245 ct->ct |= TCG_CT_REG;
246 ct->u.regs |= ALL_GENERAL_REGS;
247 break;
248 case 'W':
249
250 ct->ct |= TCG_CT_CONST_WSZ;
251 break;
252 case 'x':
253
254 ct->ct |= TCG_CT_REG;
255 ct->u.regs |= ALL_VECTOR_REGS;
256 break;
257
258
259 case 'L':
260 ct->ct |= TCG_CT_REG;
261 ct->u.regs = TCG_TARGET_REG_BITS == 64 ? 0xffff : 0xff;
262 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
263 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
264 break;
265
266 case 'e':
267 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
268 break;
269 case 'Z':
270 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
271 break;
272 case 'I':
273 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
274 break;
275
276 default:
277 return NULL;
278 }
279 return ct_str;
280}
281
282
283static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
284 const TCGArgConstraint *arg_ct)
285{
286 int ct = arg_ct->ct;
287 if (ct & TCG_CT_CONST) {
288 return 1;
289 }
290 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
291 return 1;
292 }
293 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
294 return 1;
295 }
296 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
297 return 1;
298 }
299 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
300 return 1;
301 }
302 return 0;
303}
304
305# define LOWREGMASK(x) ((x) & 7)
306
307#define P_EXT 0x100
308#define P_EXT38 0x200
309#define P_DATA16 0x400
310#if TCG_TARGET_REG_BITS == 64
311# define P_ADDR32 0x800
312# define P_REXW 0x1000
313# define P_REXB_R 0x2000
314# define P_REXB_RM 0x4000
315# define P_GS 0x8000
316#else
317# define P_ADDR32 0
318# define P_REXW 0
319# define P_REXB_R 0
320# define P_REXB_RM 0
321# define P_GS 0
322#endif
323#define P_EXT3A 0x10000
324#define P_SIMDF3 0x20000
325#define P_SIMDF2 0x40000
326#define P_VEXL 0x80000
327
328#define OPC_ARITH_EvIz (0x81)
329#define OPC_ARITH_EvIb (0x83)
330#define OPC_ARITH_GvEv (0x03)
331#define OPC_ANDN (0xf2 | P_EXT38)
332#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
333#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16)
334#define OPC_BSF (0xbc | P_EXT)
335#define OPC_BSR (0xbd | P_EXT)
336#define OPC_BSWAP (0xc8 | P_EXT)
337#define OPC_CALL_Jz (0xe8)
338#define OPC_CMOVCC (0x40 | P_EXT)
339#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
340#define OPC_DEC_r32 (0x48)
341#define OPC_IMUL_GvEv (0xaf | P_EXT)
342#define OPC_IMUL_GvEvIb (0x6b)
343#define OPC_IMUL_GvEvIz (0x69)
344#define OPC_INC_r32 (0x40)
345#define OPC_JCC_long (0x80 | P_EXT)
346#define OPC_JCC_short (0x70)
347#define OPC_JMP_long (0xe9)
348#define OPC_JMP_short (0xeb)
349#define OPC_LEA (0x8d)
350#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
351#define OPC_MOVB_EvGv (0x88)
352#define OPC_MOVL_EvGv (0x89)
353#define OPC_MOVL_GvEv (0x8b)
354#define OPC_MOVB_EvIz (0xc6)
355#define OPC_MOVL_EvIz (0xc7)
356#define OPC_MOVL_Iv (0xb8)
357#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
358#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
359#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
360#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
361#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
362#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
363#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
364#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
365#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3)
366#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3)
367#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16)
368#define OPC_MOVSBL (0xbe | P_EXT)
369#define OPC_MOVSWL (0xbf | P_EXT)
370#define OPC_MOVSLQ (0x63 | P_REXW)
371#define OPC_MOVZBL (0xb6 | P_EXT)
372#define OPC_MOVZWL (0xb7 | P_EXT)
373#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
374#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
375#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
376#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16)
377#define OPC_PADDB (0xfc | P_EXT | P_DATA16)
378#define OPC_PADDW (0xfd | P_EXT | P_DATA16)
379#define OPC_PADDD (0xfe | P_EXT | P_DATA16)
380#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16)
381#define OPC_PAND (0xdb | P_EXT | P_DATA16)
382#define OPC_PANDN (0xdf | P_EXT | P_DATA16)
383#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16)
384#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16)
385#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16)
386#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16)
387#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16)
388#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16)
389#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
390#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
391#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
392#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
393#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
394#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
395#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16)
396#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16)
397#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
398#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
399#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
400#define OPC_POR (0xeb | P_EXT | P_DATA16)
401#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
402#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
403#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
404#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
405#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16)
406#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16)
407#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16)
408#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16)
409#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16)
410#define OPC_PSUBD (0xfa | P_EXT | P_DATA16)
411#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16)
412#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16)
413#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16)
414#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16)
415#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16)
416#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16)
417#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16)
418#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16)
419#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16)
420#define OPC_PXOR (0xef | P_EXT | P_DATA16)
421#define OPC_POP_r32 (0x58)
422#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
423#define OPC_PUSH_r32 (0x50)
424#define OPC_PUSH_Iv (0x68)
425#define OPC_PUSH_Ib (0x6a)
426#define OPC_RET (0xc3)
427#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM)
428#define OPC_SHIFT_1 (0xd1)
429#define OPC_SHIFT_Ib (0xc1)
430#define OPC_SHIFT_cl (0xd3)
431#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
432#define OPC_SHUFPS (0xc6 | P_EXT)
433#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
434#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
435#define OPC_TESTL (0x85)
436#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
437#define OPC_UD2 (0x0b | P_EXT)
438#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
439#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
440#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
441#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
442#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
443#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
444#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_REXW)
445#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
446#define OPC_VZEROUPPER (0x77 | P_EXT)
447#define OPC_XCHG_ax_r32 (0x90)
448
449#define OPC_GRP3_Ev (0xf7)
450#define OPC_GRP5 (0xff)
451#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
452
453
454
455#define ARITH_ADD 0
456#define ARITH_OR 1
457#define ARITH_ADC 2
458#define ARITH_SBB 3
459#define ARITH_AND 4
460#define ARITH_SUB 5
461#define ARITH_XOR 6
462#define ARITH_CMP 7
463
464
465#define SHIFT_ROL 0
466#define SHIFT_ROR 1
467#define SHIFT_SHL 4
468#define SHIFT_SHR 5
469#define SHIFT_SAR 7
470
471
472#define EXT3_NOT 2
473#define EXT3_NEG 3
474#define EXT3_MUL 4
475#define EXT3_IMUL 5
476#define EXT3_DIV 6
477#define EXT3_IDIV 7
478
479
480#define EXT5_INC_Ev 0
481#define EXT5_DEC_Ev 1
482#define EXT5_CALLN_Ev 2
483#define EXT5_JMPN_Ev 4
484
485
486#define JCC_JMP (-1)
487#define JCC_JO 0x0
488#define JCC_JNO 0x1
489#define JCC_JB 0x2
490#define JCC_JAE 0x3
491#define JCC_JE 0x4
492#define JCC_JNE 0x5
493#define JCC_JBE 0x6
494#define JCC_JA 0x7
495#define JCC_JS 0x8
496#define JCC_JNS 0x9
497#define JCC_JP 0xa
498#define JCC_JNP 0xb
499#define JCC_JL 0xc
500#define JCC_JGE 0xd
501#define JCC_JLE 0xe
502#define JCC_JG 0xf
503
504static const uint8_t tcg_cond_to_jcc[] = {
505 [TCG_COND_EQ] = JCC_JE,
506 [TCG_COND_NE] = JCC_JNE,
507 [TCG_COND_LT] = JCC_JL,
508 [TCG_COND_GE] = JCC_JGE,
509 [TCG_COND_LE] = JCC_JLE,
510 [TCG_COND_GT] = JCC_JG,
511 [TCG_COND_LTU] = JCC_JB,
512 [TCG_COND_GEU] = JCC_JAE,
513 [TCG_COND_LEU] = JCC_JBE,
514 [TCG_COND_GTU] = JCC_JA,
515};
516
517#if TCG_TARGET_REG_BITS == 64
518static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
519{
520 int rex;
521
522 if (opc & P_GS) {
523 tcg_out8(s, 0x65);
524 }
525 if (opc & P_DATA16) {
526
527 tcg_debug_assert((opc & P_REXW) == 0);
528 tcg_out8(s, 0x66);
529 }
530 if (opc & P_ADDR32) {
531 tcg_out8(s, 0x67);
532 }
533 if (opc & P_SIMDF3) {
534 tcg_out8(s, 0xf3);
535 } else if (opc & P_SIMDF2) {
536 tcg_out8(s, 0xf2);
537 }
538
539 rex = 0;
540 rex |= (opc & P_REXW) ? 0x8 : 0x0;
541 rex |= (r & 8) >> 1;
542 rex |= (x & 8) >> 2;
543 rex |= (rm & 8) >> 3;
544
545
546
547
548
549
550 rex |= opc & (r >= 4 ? P_REXB_R : 0);
551 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
552
553 if (rex) {
554 tcg_out8(s, (uint8_t)(rex | 0x40));
555 }
556
557 if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
558 tcg_out8(s, 0x0f);
559 if (opc & P_EXT38) {
560 tcg_out8(s, 0x38);
561 } else if (opc & P_EXT3A) {
562 tcg_out8(s, 0x3a);
563 }
564 }
565
566 tcg_out8(s, opc);
567}
568#else
569static void tcg_out_opc(TCGContext *s, int opc)
570{
571 if (opc & P_DATA16) {
572 tcg_out8(s, 0x66);
573 }
574 if (opc & P_SIMDF3) {
575 tcg_out8(s, 0xf3);
576 } else if (opc & P_SIMDF2) {
577 tcg_out8(s, 0xf2);
578 }
579 if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
580 tcg_out8(s, 0x0f);
581 if (opc & P_EXT38) {
582 tcg_out8(s, 0x38);
583 } else if (opc & P_EXT3A) {
584 tcg_out8(s, 0x3a);
585 }
586 }
587 tcg_out8(s, opc);
588}
589
590
591
592#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
593#endif
594
595static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
596{
597 tcg_out_opc(s, opc, r, rm, 0);
598 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
599}
600
601static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
602 int rm, int index)
603{
604 int tmp;
605
606
607
608 if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT
609 && ((rm | index) & 8) == 0) {
610
611 tcg_out8(s, 0xc5);
612
613 tmp = (r & 8 ? 0 : 0x80);
614 } else {
615
616 tcg_out8(s, 0xc4);
617
618
619 if (opc & P_EXT3A) {
620 tmp = 3;
621 } else if (opc & P_EXT38) {
622 tmp = 2;
623 } else if (opc & P_EXT) {
624 tmp = 1;
625 } else {
626 g_assert_not_reached();
627 }
628 tmp |= (r & 8 ? 0 : 0x80);
629 tmp |= (index & 8 ? 0 : 0x40);
630 tmp |= (rm & 8 ? 0 : 0x20);
631 tcg_out8(s, tmp);
632
633 tmp = (opc & P_REXW ? 0x80 : 0);
634 }
635
636 tmp |= (opc & P_VEXL ? 0x04 : 0);
637
638 if (opc & P_DATA16) {
639 tmp |= 1;
640 } else if (opc & P_SIMDF3) {
641 tmp |= 2;
642 } else if (opc & P_SIMDF2) {
643 tmp |= 3;
644 }
645 tmp |= (~v & 15) << 3;
646 tcg_out8(s, tmp);
647 tcg_out8(s, opc);
648}
649
650static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
651{
652 tcg_out_vex_opc(s, opc, r, v, rm, 0);
653 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
654}
655
656
657
658
659
660
661static void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index,
662 int shift, intptr_t offset)
663{
664 int mod, len;
665
666 if (index < 0 && rm < 0) {
667 if (TCG_TARGET_REG_BITS == 64) {
668
669
670 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
671 intptr_t disp = offset - pc;
672 if (disp == (int32_t)disp) {
673 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
674 tcg_out32(s, disp);
675 return;
676 }
677
678
679
680
681 if (offset == (int32_t)offset) {
682 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
683 tcg_out8(s, (4 << 3) | 5);
684 tcg_out32(s, offset);
685 return;
686 }
687
688
689 g_assert_not_reached();
690 } else {
691
692 tcg_out8(s, (r << 3) | 5);
693 tcg_out32(s, offset);
694 return;
695 }
696 }
697
698
699
700 if (rm < 0) {
701 mod = 0, len = 4, rm = 5;
702 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
703 mod = 0, len = 0;
704 } else if (offset == (int8_t)offset) {
705 mod = 0x40, len = 1;
706 } else {
707 mod = 0x80, len = 4;
708 }
709
710
711
712 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
713
714 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
715 } else {
716
717
718
719
720
721 if (index < 0) {
722 index = 4;
723 } else {
724 tcg_debug_assert(index != TCG_REG_ESP);
725 }
726
727 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
728 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
729 }
730
731 if (len == 1) {
732 tcg_out8(s, offset);
733 } else if (len == 4) {
734 tcg_out32(s, offset);
735 }
736}
737
738static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
739 int index, int shift, intptr_t offset)
740{
741 tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
742 tcg_out_sib_offset(s, r, rm, index, shift, offset);
743}
744
745static void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v,
746 int rm, int index, int shift,
747 intptr_t offset)
748{
749 tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
750 tcg_out_sib_offset(s, r, rm, index, shift, offset);
751}
752
753
754static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
755 int rm, intptr_t offset)
756{
757 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
758}
759
760static inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r,
761 int v, int rm, intptr_t offset)
762{
763 tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
764}
765
766
767static inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r)
768{
769 tcg_out_opc(s, opc, r, 0, 0);
770
771 tcg_out8(s, LOWREGMASK(r) << 3 | 5);
772 tcg_out32(s, 0);
773}
774
775
776static inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r)
777{
778 tcg_out_vex_opc(s, opc, r, 0, 0, 0);
779
780 tcg_out8(s, LOWREGMASK(r) << 3 | 5);
781 tcg_out32(s, 0);
782}
783
784
785static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
786{
787
788 int ext = subop & ~0x7;
789 subop &= 0x7;
790
791 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
792}
793
794static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
795{
796 int rexw = 0;
797
798 if (arg == ret) {
799 return;
800 }
801 switch (type) {
802 case TCG_TYPE_I64:
803 rexw = P_REXW;
804
805 case TCG_TYPE_I32:
806 if (ret < 16) {
807 if (arg < 16) {
808 tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
809 } else {
810 tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret);
811 }
812 } else {
813 if (arg < 16) {
814 tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg);
815 } else {
816 tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
817 }
818 }
819 break;
820
821 case TCG_TYPE_V64:
822 tcg_debug_assert(ret >= 16 && arg >= 16);
823 tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
824 break;
825 case TCG_TYPE_V128:
826 tcg_debug_assert(ret >= 16 && arg >= 16);
827 tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg);
828 break;
829 case TCG_TYPE_V256:
830 tcg_debug_assert(ret >= 16 && arg >= 16);
831 tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg);
832 break;
833
834 default:
835 g_assert_not_reached();
836 }
837}
838
839static void tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
840 TCGReg r, TCGReg a)
841{
842 if (have_avx2) {
843 static const int dup_insn[4] = {
844 OPC_VPBROADCASTB, OPC_VPBROADCASTW,
845 OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
846 };
847 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
848 tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a);
849 } else {
850 switch (vece) {
851 case MO_8:
852
853 tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a);
854 a = r;
855
856 case MO_16:
857 tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a);
858 a = r;
859
860 case MO_32:
861 tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a);
862
863 tcg_out8(s, 0);
864 break;
865 case MO_64:
866 tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a);
867 break;
868 default:
869 g_assert_not_reached();
870 }
871 }
872}
873
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
875 TCGReg ret, tcg_target_long arg)
876{
877 int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
878
879 if (arg == 0) {
880 tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
881 return;
882 }
883 if (arg == -1) {
884 tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret);
885 return;
886 }
887
888 if (TCG_TARGET_REG_BITS == 64) {
889 if (type == TCG_TYPE_V64) {
890 tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
891 } else if (have_avx2) {
892 tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
893 } else {
894 tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
895 }
896 new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
897 } else if (have_avx2) {
898 tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
899 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
900 } else {
901 tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret);
902 new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
903 tcg_out_dup_vec(s, type, MO_32, ret, ret);
904 }
905}
906
907static void tcg_out_movi(TCGContext *s, TCGType type,
908 TCGReg ret, tcg_target_long arg)
909{
910 tcg_target_long diff;
911
912 switch (type) {
913 case TCG_TYPE_I32:
914#if TCG_TARGET_REG_BITS == 64
915 case TCG_TYPE_I64:
916#endif
917 if (ret < 16) {
918 break;
919 }
920
921 case TCG_TYPE_V64:
922 case TCG_TYPE_V128:
923 case TCG_TYPE_V256:
924 tcg_debug_assert(ret >= 16);
925 tcg_out_dupi_vec(s, type, ret, arg);
926 return;
927 default:
928 g_assert_not_reached();
929 }
930
931 if (arg == 0) {
932 tgen_arithr(s, ARITH_XOR, ret, ret);
933 return;
934 }
935 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
936 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
937 tcg_out32(s, arg);
938 return;
939 }
940 if (arg == (int32_t)arg) {
941 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
942 tcg_out32(s, arg);
943 return;
944 }
945
946
947 diff = arg - ((uintptr_t)s->code_ptr + 7);
948 if (diff == (int32_t)diff) {
949 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
950 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
951 tcg_out32(s, diff);
952 return;
953 }
954
955 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
956 tcg_out64(s, arg);
957}
958
959static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
960{
961 if (val == (int8_t)val) {
962 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
963 tcg_out8(s, val);
964 } else if (val == (int32_t)val) {
965 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
966 tcg_out32(s, val);
967 } else {
968 tcg_abort();
969 }
970}
971
972static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
973{
974
975
976
977 if (a0 & TCG_MO_ST_LD) {
978 tcg_out8(s, 0xf0);
979 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
980 tcg_out8(s, 0);
981 }
982}
983
984static inline void tcg_out_push(TCGContext *s, int reg)
985{
986 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
987}
988
989static inline void tcg_out_pop(TCGContext *s, int reg)
990{
991 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
992}
993
994static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
995 TCGReg arg1, intptr_t arg2)
996{
997 switch (type) {
998 case TCG_TYPE_I32:
999 if (ret < 16) {
1000 tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
1001 } else {
1002 tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2);
1003 }
1004 break;
1005 case TCG_TYPE_I64:
1006 if (ret < 16) {
1007 tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
1008 break;
1009 }
1010
1011 case TCG_TYPE_V64:
1012 tcg_debug_assert(ret >= 16);
1013 tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2);
1014 break;
1015 case TCG_TYPE_V128:
1016 tcg_debug_assert(ret >= 16);
1017 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx, ret, 0, arg1, arg2);
1018 break;
1019 case TCG_TYPE_V256:
1020 tcg_debug_assert(ret >= 16);
1021 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL,
1022 ret, 0, arg1, arg2);
1023 break;
1024 default:
1025 g_assert_not_reached();
1026 }
1027}
1028
1029static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1030 TCGReg arg1, intptr_t arg2)
1031{
1032 switch (type) {
1033 case TCG_TYPE_I32:
1034 if (arg < 16) {
1035 tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
1036 } else {
1037 tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2);
1038 }
1039 break;
1040 case TCG_TYPE_I64:
1041 if (arg < 16) {
1042 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
1043 break;
1044 }
1045
1046 case TCG_TYPE_V64:
1047 tcg_debug_assert(arg >= 16);
1048 tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2);
1049 break;
1050 case TCG_TYPE_V128:
1051 tcg_debug_assert(arg >= 16);
1052 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx, arg, 0, arg1, arg2);
1053 break;
1054 case TCG_TYPE_V256:
1055 tcg_debug_assert(arg >= 16);
1056 tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL,
1057 arg, 0, arg1, arg2);
1058 break;
1059 default:
1060 g_assert_not_reached();
1061 }
1062}
1063
1064static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1065 TCGReg base, intptr_t ofs)
1066{
1067 int rexw = 0;
1068 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
1069 if (val != (int32_t)val) {
1070 return false;
1071 }
1072 rexw = P_REXW;
1073 } else if (type != TCG_TYPE_I32) {
1074 return false;
1075 }
1076 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
1077 tcg_out32(s, val);
1078 return true;
1079}
1080
1081static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
1082{
1083
1084 int ext = subopc & ~0x7;
1085 subopc &= 0x7;
1086
1087 if (count == 1) {
1088 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
1089 } else {
1090 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
1091 tcg_out8(s, count);
1092 }
1093}
1094
1095static inline void tcg_out_bswap32(TCGContext *s, int reg)
1096{
1097 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
1098}
1099
1100static inline void tcg_out_rolw_8(TCGContext *s, int reg)
1101{
1102 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
1103}
1104
1105static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
1106{
1107
1108 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1109 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
1110}
1111
1112static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
1113{
1114
1115 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
1116 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
1117}
1118
1119static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
1120{
1121
1122 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
1123}
1124
1125static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
1126{
1127
1128 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
1129}
1130
1131static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
1132{
1133
1134 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
1135}
1136
1137static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
1138{
1139 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
1140}
1141
1142static inline void tcg_out_bswap64(TCGContext *s, int reg)
1143{
1144 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
1145}
1146
1147static void tgen_arithi(TCGContext *s, int c, int r0,
1148 tcg_target_long val, int cf)
1149{
1150 int rexw = 0;
1151
1152 if (TCG_TARGET_REG_BITS == 64) {
1153 rexw = c & -8;
1154 c &= 7;
1155 }
1156
1157
1158
1159
1160 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
1161 int is_inc = (c == ARITH_ADD) ^ (val < 0);
1162 if (TCG_TARGET_REG_BITS == 64) {
1163
1164
1165 tcg_out_modrm(s, OPC_GRP5 + rexw,
1166 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
1167 } else {
1168 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
1169 }
1170 return;
1171 }
1172
1173 if (c == ARITH_AND) {
1174 if (TCG_TARGET_REG_BITS == 64) {
1175 if (val == 0xffffffffu) {
1176 tcg_out_ext32u(s, r0, r0);
1177 return;
1178 }
1179 if (val == (uint32_t)val) {
1180
1181 rexw = 0;
1182 }
1183 }
1184 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
1185 tcg_out_ext8u(s, r0, r0);
1186 return;
1187 }
1188 if (val == 0xffffu) {
1189 tcg_out_ext16u(s, r0, r0);
1190 return;
1191 }
1192 }
1193
1194 if (val == (int8_t)val) {
1195 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
1196 tcg_out8(s, val);
1197 return;
1198 }
1199 if (rexw == 0 || val == (int32_t)val) {
1200 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
1201 tcg_out32(s, val);
1202 return;
1203 }
1204
1205 tcg_abort();
1206}
1207
1208static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
1209{
1210 if (val != 0) {
1211 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
1212 }
1213}
1214
1215
1216static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
1217{
1218 int32_t val, val1;
1219
1220 if (l->has_value) {
1221 val = tcg_pcrel_diff(s, l->u.value_ptr);
1222 val1 = val - 2;
1223 if ((int8_t)val1 == val1) {
1224 if (opc == -1) {
1225 tcg_out8(s, OPC_JMP_short);
1226 } else {
1227 tcg_out8(s, OPC_JCC_short + opc);
1228 }
1229 tcg_out8(s, val1);
1230 } else {
1231 if (small) {
1232 tcg_abort();
1233 }
1234 if (opc == -1) {
1235 tcg_out8(s, OPC_JMP_long);
1236 tcg_out32(s, val - 5);
1237 } else {
1238 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1239 tcg_out32(s, val - 6);
1240 }
1241 }
1242 } else if (small) {
1243 if (opc == -1) {
1244 tcg_out8(s, OPC_JMP_short);
1245 } else {
1246 tcg_out8(s, OPC_JCC_short + opc);
1247 }
1248 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
1249 s->code_ptr += 1;
1250 } else {
1251 if (opc == -1) {
1252 tcg_out8(s, OPC_JMP_long);
1253 } else {
1254 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
1255 }
1256 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
1257 s->code_ptr += 4;
1258 }
1259}
1260
1261static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
1262 int const_arg2, int rexw)
1263{
1264 if (const_arg2) {
1265 if (arg2 == 0) {
1266
1267 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
1268 } else {
1269 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
1270 }
1271 } else {
1272 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
1273 }
1274}
1275
1276static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
1277 TCGArg arg1, TCGArg arg2, int const_arg2,
1278 TCGLabel *label, int small)
1279{
1280 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1281 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
1282}
1283
1284#if TCG_TARGET_REG_BITS == 64
1285static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
1286 TCGArg arg1, TCGArg arg2, int const_arg2,
1287 TCGLabel *label, int small)
1288{
1289 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1290 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
1291}
1292#else
1293
1294
1295static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
1296 const int *const_args, int small)
1297{
1298 TCGLabel *label_next = gen_new_label();
1299 TCGLabel *label_this = arg_label(args[5]);
1300
1301 switch(args[4]) {
1302 case TCG_COND_EQ:
1303 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
1304 label_next, 1);
1305 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
1306 label_this, small);
1307 break;
1308 case TCG_COND_NE:
1309 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
1310 label_this, small);
1311 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
1312 label_this, small);
1313 break;
1314 case TCG_COND_LT:
1315 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
1316 label_this, small);
1317 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1318 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1319 label_this, small);
1320 break;
1321 case TCG_COND_LE:
1322 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
1323 label_this, small);
1324 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1325 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1326 label_this, small);
1327 break;
1328 case TCG_COND_GT:
1329 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
1330 label_this, small);
1331 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1332 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1333 label_this, small);
1334 break;
1335 case TCG_COND_GE:
1336 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
1337 label_this, small);
1338 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1339 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1340 label_this, small);
1341 break;
1342 case TCG_COND_LTU:
1343 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1344 label_this, small);
1345 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1346 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
1347 label_this, small);
1348 break;
1349 case TCG_COND_LEU:
1350 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
1351 label_this, small);
1352 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1353 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1354 label_this, small);
1355 break;
1356 case TCG_COND_GTU:
1357 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1358 label_this, small);
1359 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1360 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1361 label_this, small);
1362 break;
1363 case TCG_COND_GEU:
1364 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1365 label_this, small);
1366 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1367 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1368 label_this, small);
1369 break;
1370 default:
1371 tcg_abort();
1372 }
1373 tcg_out_label(s, label_next, s->code_ptr);
1374}
1375#endif
1376
1377static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1378 TCGArg arg1, TCGArg arg2, int const_arg2)
1379{
1380 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1381 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1382 tcg_out_ext8u(s, dest, dest);
1383}
1384
1385#if TCG_TARGET_REG_BITS == 64
1386static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1387 TCGArg arg1, TCGArg arg2, int const_arg2)
1388{
1389 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1390 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1391 tcg_out_ext8u(s, dest, dest);
1392}
1393#else
1394static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1395 const int *const_args)
1396{
1397 TCGArg new_args[6];
1398 TCGLabel *label_true, *label_over;
1399
1400 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1401
1402 if (args[0] == args[1] || args[0] == args[2]
1403 || (!const_args[3] && args[0] == args[3])
1404 || (!const_args[4] && args[0] == args[4])) {
1405
1406
1407 label_true = gen_new_label();
1408 label_over = gen_new_label();
1409
1410 new_args[5] = label_arg(label_true);
1411 tcg_out_brcond2(s, new_args, const_args+1, 1);
1412
1413 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1414 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1415 tcg_out_label(s, label_true, s->code_ptr);
1416
1417 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1418 tcg_out_label(s, label_over, s->code_ptr);
1419 } else {
1420
1421
1422
1423
1424 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1425
1426 label_over = gen_new_label();
1427 new_args[4] = tcg_invert_cond(new_args[4]);
1428 new_args[5] = label_arg(label_over);
1429 tcg_out_brcond2(s, new_args, const_args+1, 1);
1430
1431 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1432 tcg_out_label(s, label_over, s->code_ptr);
1433 }
1434}
1435#endif
1436
1437static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
1438 TCGReg dest, TCGReg v1)
1439{
1440 if (have_cmov) {
1441 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
1442 } else {
1443 TCGLabel *over = gen_new_label();
1444 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1445 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1446 tcg_out_label(s, over, s->code_ptr);
1447 }
1448}
1449
1450static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
1451 TCGReg c1, TCGArg c2, int const_c2,
1452 TCGReg v1)
1453{
1454 tcg_out_cmp(s, c1, c2, const_c2, 0);
1455 tcg_out_cmov(s, cond, 0, dest, v1);
1456}
1457
1458#if TCG_TARGET_REG_BITS == 64
1459static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
1460 TCGReg c1, TCGArg c2, int const_c2,
1461 TCGReg v1)
1462{
1463 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1464 tcg_out_cmov(s, cond, P_REXW, dest, v1);
1465}
1466#endif
1467
1468static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1469 TCGArg arg2, bool const_a2)
1470{
1471 if (have_bmi1) {
1472 tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
1473 if (const_a2) {
1474 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1475 } else {
1476 tcg_debug_assert(dest != arg2);
1477 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1478 }
1479 } else {
1480 tcg_debug_assert(dest != arg2);
1481 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
1482 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1483 }
1484}
1485
1486static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1487 TCGArg arg2, bool const_a2)
1488{
1489 if (have_lzcnt) {
1490 tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1491 if (const_a2) {
1492 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1493 } else {
1494 tcg_debug_assert(dest != arg2);
1495 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1496 }
1497 } else {
1498 tcg_debug_assert(!const_a2);
1499 tcg_debug_assert(dest != arg1);
1500 tcg_debug_assert(dest != arg2);
1501
1502
1503 tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
1504 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1505
1506
1507 tcg_out_cmp(s, arg1, 0, 1, rexw);
1508 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
1509 }
1510}
1511
1512static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1513{
1514 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1515
1516 if (disp == (int32_t)disp) {
1517 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1518 tcg_out32(s, disp);
1519 } else {
1520
1521
1522
1523
1524 tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
1525 tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
1526 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1527 tcg_out32(s, 0);
1528 }
1529}
1530
1531static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1532{
1533 tcg_out_branch(s, 1, dest);
1534}
1535
1536static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1537{
1538 tcg_out_branch(s, 0, dest);
1539}
1540
1541static void tcg_out_nopn(TCGContext *s, int n)
1542{
1543 int i;
1544
1545
1546
1547
1548
1549 tcg_debug_assert(n >= 1);
1550 for (i = 1; i < n; ++i) {
1551 tcg_out8(s, 0x66);
1552 }
1553 tcg_out8(s, 0x90);
1554}
1555
1556#if defined(CONFIG_SOFTMMU)
1557#include "tcg-ldst.inc.c"
1558
1559
1560
1561
1562static void * const qemu_ld_helpers[16] = {
1563 [MO_UB] = helper_ret_ldub_mmu,
1564 [MO_LEUW] = helper_le_lduw_mmu,
1565 [MO_LEUL] = helper_le_ldul_mmu,
1566 [MO_LEQ] = helper_le_ldq_mmu,
1567 [MO_BEUW] = helper_be_lduw_mmu,
1568 [MO_BEUL] = helper_be_ldul_mmu,
1569 [MO_BEQ] = helper_be_ldq_mmu,
1570};
1571
1572
1573
1574
1575static void * const qemu_st_helpers[16] = {
1576 [MO_UB] = helper_ret_stb_mmu,
1577 [MO_LEUW] = helper_le_stw_mmu,
1578 [MO_LEUL] = helper_le_stl_mmu,
1579 [MO_LEQ] = helper_le_stq_mmu,
1580 [MO_BEUW] = helper_be_stw_mmu,
1581 [MO_BEUL] = helper_be_stl_mmu,
1582 [MO_BEQ] = helper_be_stq_mmu,
1583};
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1607 int mem_index, TCGMemOp opc,
1608 tcg_insn_unit **label_ptr, int which)
1609{
1610 const TCGReg r0 = TCG_REG_L0;
1611 const TCGReg r1 = TCG_REG_L1;
1612 TCGType ttype = TCG_TYPE_I32;
1613 TCGType tlbtype = TCG_TYPE_I32;
1614 int trexw = 0, hrexw = 0, tlbrexw = 0;
1615 unsigned a_bits = get_alignment_bits(opc);
1616 unsigned s_bits = opc & MO_SIZE;
1617 unsigned a_mask = (1 << a_bits) - 1;
1618 unsigned s_mask = (1 << s_bits) - 1;
1619 target_ulong tlb_mask;
1620
1621 if (TCG_TARGET_REG_BITS == 64) {
1622 if (TARGET_LONG_BITS == 64) {
1623 ttype = TCG_TYPE_I64;
1624 trexw = P_REXW;
1625 }
1626 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1627 hrexw = P_REXW;
1628 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1629 tlbtype = TCG_TYPE_I64;
1630 tlbrexw = P_REXW;
1631 }
1632 }
1633 }
1634
1635 tcg_out_mov(s, tlbtype, r0, addrlo);
1636
1637
1638
1639 if (a_bits >= s_bits) {
1640 tcg_out_mov(s, ttype, r1, addrlo);
1641 } else {
1642 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
1643 }
1644 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
1645
1646 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1647 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1648
1649 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1650 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1651 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1652
1653 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1654 offsetof(CPUArchState, tlb_table[mem_index][0])
1655 + which);
1656
1657
1658 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1659
1660
1661
1662
1663
1664
1665
1666 tcg_out_mov(s, ttype, r1, addrlo);
1667
1668
1669 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1670 label_ptr[0] = s->code_ptr;
1671 s->code_ptr += 4;
1672
1673 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1674
1675 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1676
1677
1678 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1679 label_ptr[1] = s->code_ptr;
1680 s->code_ptr += 4;
1681 }
1682
1683
1684
1685
1686 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1687 offsetof(CPUTLBEntry, addend) - which);
1688}
1689
1690
1691
1692
1693
1694static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1695 TCGReg datalo, TCGReg datahi,
1696 TCGReg addrlo, TCGReg addrhi,
1697 tcg_insn_unit *raddr,
1698 tcg_insn_unit **label_ptr)
1699{
1700 TCGLabelQemuLdst *label = new_ldst_label(s);
1701
1702 label->is_ld = is_ld;
1703 label->oi = oi;
1704 label->datalo_reg = datalo;
1705 label->datahi_reg = datahi;
1706 label->addrlo_reg = addrlo;
1707 label->addrhi_reg = addrhi;
1708 label->raddr = raddr;
1709 label->label_ptr[0] = label_ptr[0];
1710 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1711 label->label_ptr[1] = label_ptr[1];
1712 }
1713}
1714
1715
1716
1717
1718static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1719{
1720 TCGMemOpIdx oi = l->oi;
1721 TCGMemOp opc = get_memop(oi);
1722 TCGReg data_reg;
1723 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1724
1725
1726 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1727 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1728 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1729 }
1730
1731 if (TCG_TARGET_REG_BITS == 32) {
1732 int ofs = 0;
1733
1734 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1735 ofs += 4;
1736
1737 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1738 ofs += 4;
1739
1740 if (TARGET_LONG_BITS == 64) {
1741 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1742 ofs += 4;
1743 }
1744
1745 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1746 ofs += 4;
1747
1748 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1749 } else {
1750 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1751
1752 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1753 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1754 (uintptr_t)l->raddr);
1755 }
1756
1757 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1758
1759 data_reg = l->datalo_reg;
1760 switch (opc & MO_SSIZE) {
1761 case MO_SB:
1762 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1763 break;
1764 case MO_SW:
1765 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1766 break;
1767#if TCG_TARGET_REG_BITS == 64
1768 case MO_SL:
1769 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1770 break;
1771#endif
1772 case MO_UB:
1773 case MO_UW:
1774
1775 case MO_UL:
1776 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1777 break;
1778 case MO_Q:
1779 if (TCG_TARGET_REG_BITS == 64) {
1780 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1781 } else if (data_reg == TCG_REG_EDX) {
1782
1783 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1784 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1785 } else {
1786 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1787 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1788 }
1789 break;
1790 default:
1791 tcg_abort();
1792 }
1793
1794
1795 tcg_out_jmp(s, l->raddr);
1796}
1797
1798
1799
1800
1801static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1802{
1803 TCGMemOpIdx oi = l->oi;
1804 TCGMemOp opc = get_memop(oi);
1805 TCGMemOp s_bits = opc & MO_SIZE;
1806 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1807 TCGReg retaddr;
1808
1809
1810 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1811 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1812 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1813 }
1814
1815 if (TCG_TARGET_REG_BITS == 32) {
1816 int ofs = 0;
1817
1818 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1819 ofs += 4;
1820
1821 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1822 ofs += 4;
1823
1824 if (TARGET_LONG_BITS == 64) {
1825 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1826 ofs += 4;
1827 }
1828
1829 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1830 ofs += 4;
1831
1832 if (s_bits == MO_64) {
1833 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1834 ofs += 4;
1835 }
1836
1837 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1838 ofs += 4;
1839
1840 retaddr = TCG_REG_EAX;
1841 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1842 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1843 } else {
1844 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1845
1846 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1847 tcg_target_call_iarg_regs[2], l->datalo_reg);
1848 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1849
1850 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1851 retaddr = tcg_target_call_iarg_regs[4];
1852 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1853 } else {
1854 retaddr = TCG_REG_RAX;
1855 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1856 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1857 TCG_TARGET_CALL_STACK_OFFSET);
1858 }
1859 }
1860
1861
1862 tcg_out_push(s, retaddr);
1863 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1864}
1865#elif defined(__x86_64__) && defined(__linux__)
1866# include <asm/prctl.h>
1867# include <sys/prctl.h>
1868
1869int arch_prctl(int code, unsigned long addr);
1870
1871static int guest_base_flags;
1872static inline void setup_guest_base_seg(void)
1873{
1874 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1875 guest_base_flags = P_GS;
1876 }
1877}
1878#else
1879# define guest_base_flags 0
1880static inline void setup_guest_base_seg(void) { }
1881#endif
1882
1883static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1884 TCGReg base, int index, intptr_t ofs,
1885 int seg, TCGMemOp memop)
1886{
1887 const TCGMemOp real_bswap = memop & MO_BSWAP;
1888 TCGMemOp bswap = real_bswap;
1889 int movop = OPC_MOVL_GvEv;
1890
1891 if (have_movbe && real_bswap) {
1892 bswap = 0;
1893 movop = OPC_MOVBE_GyMy;
1894 }
1895
1896 switch (memop & MO_SSIZE) {
1897 case MO_UB:
1898 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1899 base, index, 0, ofs);
1900 break;
1901 case MO_SB:
1902 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1903 base, index, 0, ofs);
1904 break;
1905 case MO_UW:
1906 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1907 base, index, 0, ofs);
1908 if (real_bswap) {
1909 tcg_out_rolw_8(s, datalo);
1910 }
1911 break;
1912 case MO_SW:
1913 if (real_bswap) {
1914 if (have_movbe) {
1915 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1916 datalo, base, index, 0, ofs);
1917 } else {
1918 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1919 base, index, 0, ofs);
1920 tcg_out_rolw_8(s, datalo);
1921 }
1922 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1923 } else {
1924 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1925 datalo, base, index, 0, ofs);
1926 }
1927 break;
1928 case MO_UL:
1929 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1930 if (bswap) {
1931 tcg_out_bswap32(s, datalo);
1932 }
1933 break;
1934#if TCG_TARGET_REG_BITS == 64
1935 case MO_SL:
1936 if (real_bswap) {
1937 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1938 base, index, 0, ofs);
1939 if (bswap) {
1940 tcg_out_bswap32(s, datalo);
1941 }
1942 tcg_out_ext32s(s, datalo, datalo);
1943 } else {
1944 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1945 base, index, 0, ofs);
1946 }
1947 break;
1948#endif
1949 case MO_Q:
1950 if (TCG_TARGET_REG_BITS == 64) {
1951 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1952 base, index, 0, ofs);
1953 if (bswap) {
1954 tcg_out_bswap64(s, datalo);
1955 }
1956 } else {
1957 if (real_bswap) {
1958 int t = datalo;
1959 datalo = datahi;
1960 datahi = t;
1961 }
1962 if (base != datalo) {
1963 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1964 base, index, 0, ofs);
1965 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1966 base, index, 0, ofs + 4);
1967 } else {
1968 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1969 base, index, 0, ofs + 4);
1970 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1971 base, index, 0, ofs);
1972 }
1973 if (bswap) {
1974 tcg_out_bswap32(s, datalo);
1975 tcg_out_bswap32(s, datahi);
1976 }
1977 }
1978 break;
1979 default:
1980 tcg_abort();
1981 }
1982}
1983
1984
1985
1986
1987static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1988{
1989 TCGReg datalo, datahi, addrlo;
1990 TCGReg addrhi __attribute__((unused));
1991 TCGMemOpIdx oi;
1992 TCGMemOp opc;
1993#if defined(CONFIG_SOFTMMU)
1994 int mem_index;
1995 tcg_insn_unit *label_ptr[2];
1996#endif
1997
1998 datalo = *args++;
1999 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
2000 addrlo = *args++;
2001 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
2002 oi = *args++;
2003 opc = get_memop(oi);
2004
2005#if defined(CONFIG_SOFTMMU)
2006 mem_index = get_mmuidx(oi);
2007
2008 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
2009 label_ptr, offsetof(CPUTLBEntry, addr_read));
2010
2011
2012 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
2013
2014
2015 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2016 s->code_ptr, label_ptr);
2017#else
2018 {
2019 int32_t offset = guest_base;
2020 TCGReg base = addrlo;
2021 int index = -1;
2022 int seg = 0;
2023
2024
2025
2026
2027
2028 if (guest_base == 0 || guest_base_flags) {
2029 seg = guest_base_flags;
2030 offset = 0;
2031 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2032 seg |= P_ADDR32;
2033 }
2034 } else if (TCG_TARGET_REG_BITS == 64) {
2035 if (TARGET_LONG_BITS == 32) {
2036 tcg_out_ext32u(s, TCG_REG_L0, base);
2037 base = TCG_REG_L0;
2038 }
2039 if (offset != guest_base) {
2040 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
2041 index = TCG_REG_L1;
2042 offset = 0;
2043 }
2044 }
2045
2046 tcg_out_qemu_ld_direct(s, datalo, datahi,
2047 base, index, offset, seg, opc);
2048 }
2049#endif
2050}
2051
2052static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
2053 TCGReg base, intptr_t ofs, int seg,
2054 TCGMemOp memop)
2055{
2056
2057
2058
2059
2060 const TCGReg scratch = TCG_REG_L0;
2061 const TCGMemOp real_bswap = memop & MO_BSWAP;
2062 TCGMemOp bswap = real_bswap;
2063 int movop = OPC_MOVL_EvGv;
2064
2065 if (have_movbe && real_bswap) {
2066 bswap = 0;
2067 movop = OPC_MOVBE_MyGy;
2068 }
2069
2070 switch (memop & MO_SIZE) {
2071 case MO_8:
2072
2073
2074 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
2075 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2076 datalo = scratch;
2077 }
2078 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
2079 datalo, base, ofs);
2080 break;
2081 case MO_16:
2082 if (bswap) {
2083 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2084 tcg_out_rolw_8(s, scratch);
2085 datalo = scratch;
2086 }
2087 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
2088 break;
2089 case MO_32:
2090 if (bswap) {
2091 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2092 tcg_out_bswap32(s, scratch);
2093 datalo = scratch;
2094 }
2095 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
2096 break;
2097 case MO_64:
2098 if (TCG_TARGET_REG_BITS == 64) {
2099 if (bswap) {
2100 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
2101 tcg_out_bswap64(s, scratch);
2102 datalo = scratch;
2103 }
2104 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
2105 } else if (bswap) {
2106 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
2107 tcg_out_bswap32(s, scratch);
2108 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
2109 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
2110 tcg_out_bswap32(s, scratch);
2111 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
2112 } else {
2113 if (real_bswap) {
2114 int t = datalo;
2115 datalo = datahi;
2116 datahi = t;
2117 }
2118 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
2119 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
2120 }
2121 break;
2122 default:
2123 tcg_abort();
2124 }
2125}
2126
2127static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
2128{
2129 TCGReg datalo, datahi, addrlo;
2130 TCGReg addrhi __attribute__((unused));
2131 TCGMemOpIdx oi;
2132 TCGMemOp opc;
2133#if defined(CONFIG_SOFTMMU)
2134 int mem_index;
2135 tcg_insn_unit *label_ptr[2];
2136#endif
2137
2138 datalo = *args++;
2139 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
2140 addrlo = *args++;
2141 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
2142 oi = *args++;
2143 opc = get_memop(oi);
2144
2145#if defined(CONFIG_SOFTMMU)
2146 mem_index = get_mmuidx(oi);
2147
2148 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
2149 label_ptr, offsetof(CPUTLBEntry, addr_write));
2150
2151
2152 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
2153
2154
2155 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2156 s->code_ptr, label_ptr);
2157#else
2158 {
2159 int32_t offset = guest_base;
2160 TCGReg base = addrlo;
2161 int seg = 0;
2162
2163
2164 if (guest_base == 0 || guest_base_flags) {
2165 seg = guest_base_flags;
2166 offset = 0;
2167 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2168 seg |= P_ADDR32;
2169 }
2170 } else if (TCG_TARGET_REG_BITS == 64) {
2171
2172
2173 if (offset != guest_base) {
2174 if (TARGET_LONG_BITS == 32) {
2175 tcg_out_ext32u(s, TCG_REG_L0, base);
2176 base = TCG_REG_L0;
2177 }
2178 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
2179 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
2180 base = TCG_REG_L1;
2181 offset = 0;
2182 } else if (TARGET_LONG_BITS == 32) {
2183 tcg_out_ext32u(s, TCG_REG_L1, base);
2184 base = TCG_REG_L1;
2185 }
2186 }
2187
2188 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
2189 }
2190#endif
2191}
2192
2193static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2194 const TCGArg *args, const int *const_args)
2195{
2196 TCGArg a0, a1, a2;
2197 int c, const_a2, vexop, rexw = 0;
2198
2199#if TCG_TARGET_REG_BITS == 64
2200# define OP_32_64(x) \
2201 case glue(glue(INDEX_op_, x), _i64): \
2202 rexw = P_REXW; \
2203 case glue(glue(INDEX_op_, x), _i32)
2204#else
2205# define OP_32_64(x) \
2206 case glue(glue(INDEX_op_, x), _i32)
2207#endif
2208
2209
2210 a0 = args[0];
2211 a1 = args[1];
2212 a2 = args[2];
2213 const_a2 = const_args[2];
2214
2215 switch (opc) {
2216 case INDEX_op_exit_tb:
2217
2218 if (a0 == 0) {
2219 tcg_out_jmp(s, s->code_gen_epilogue);
2220 } else {
2221 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
2222 tcg_out_jmp(s, tb_ret_addr);
2223 }
2224 break;
2225 case INDEX_op_goto_tb:
2226 if (s->tb_jmp_insn_offset) {
2227
2228 int gap;
2229
2230
2231
2232 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
2233 if (gap != 1) {
2234 tcg_out_nopn(s, gap - 1);
2235 }
2236 tcg_out8(s, OPC_JMP_long);
2237 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2238 tcg_out32(s, 0);
2239 } else {
2240
2241 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
2242 (intptr_t)(s->tb_jmp_target_addr + a0));
2243 }
2244 set_jmp_reset_offset(s, a0);
2245 break;
2246 case INDEX_op_goto_ptr:
2247
2248 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
2249 break;
2250 case INDEX_op_br:
2251 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
2252 break;
2253 OP_32_64(ld8u):
2254
2255 tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
2256 break;
2257 OP_32_64(ld8s):
2258 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
2259 break;
2260 OP_32_64(ld16u):
2261
2262 tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
2263 break;
2264 OP_32_64(ld16s):
2265 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
2266 break;
2267#if TCG_TARGET_REG_BITS == 64
2268 case INDEX_op_ld32u_i64:
2269#endif
2270 case INDEX_op_ld_i32:
2271 tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
2272 break;
2273
2274 OP_32_64(st8):
2275 if (const_args[0]) {
2276 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
2277 tcg_out8(s, a0);
2278 } else {
2279 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
2280 }
2281 break;
2282 OP_32_64(st16):
2283 if (const_args[0]) {
2284 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
2285 tcg_out16(s, a0);
2286 } else {
2287 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
2288 }
2289 break;
2290#if TCG_TARGET_REG_BITS == 64
2291 case INDEX_op_st32_i64:
2292#endif
2293 case INDEX_op_st_i32:
2294 if (const_args[0]) {
2295 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
2296 tcg_out32(s, a0);
2297 } else {
2298 tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
2299 }
2300 break;
2301
2302 OP_32_64(add):
2303
2304 if (a0 != a1) {
2305 TCGArg c3 = 0;
2306 if (const_a2) {
2307 c3 = a2, a2 = -1;
2308 } else if (a0 == a2) {
2309
2310
2311 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
2312 break;
2313 }
2314
2315 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
2316 break;
2317 }
2318 c = ARITH_ADD;
2319 goto gen_arith;
2320 OP_32_64(sub):
2321 c = ARITH_SUB;
2322 goto gen_arith;
2323 OP_32_64(and):
2324 c = ARITH_AND;
2325 goto gen_arith;
2326 OP_32_64(or):
2327 c = ARITH_OR;
2328 goto gen_arith;
2329 OP_32_64(xor):
2330 c = ARITH_XOR;
2331 goto gen_arith;
2332 gen_arith:
2333 if (const_a2) {
2334 tgen_arithi(s, c + rexw, a0, a2, 0);
2335 } else {
2336 tgen_arithr(s, c + rexw, a0, a2);
2337 }
2338 break;
2339
2340 OP_32_64(andc):
2341 if (const_a2) {
2342 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2343 tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
2344 } else {
2345 tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
2346 }
2347 break;
2348
2349 OP_32_64(mul):
2350 if (const_a2) {
2351 int32_t val;
2352 val = a2;
2353 if (val == (int8_t)val) {
2354 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
2355 tcg_out8(s, val);
2356 } else {
2357 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
2358 tcg_out32(s, val);
2359 }
2360 } else {
2361 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
2362 }
2363 break;
2364
2365 OP_32_64(div2):
2366 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
2367 break;
2368 OP_32_64(divu2):
2369 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
2370 break;
2371
2372 OP_32_64(shl):
2373
2374 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2375 if (a2 - 1 == 0) {
2376
2377 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2378 } else {
2379
2380 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2381 }
2382 break;
2383 }
2384 c = SHIFT_SHL;
2385 vexop = OPC_SHLX;
2386 goto gen_shift_maybe_vex;
2387 OP_32_64(shr):
2388 c = SHIFT_SHR;
2389 vexop = OPC_SHRX;
2390 goto gen_shift_maybe_vex;
2391 OP_32_64(sar):
2392 c = SHIFT_SAR;
2393 vexop = OPC_SARX;
2394 goto gen_shift_maybe_vex;
2395 OP_32_64(rotl):
2396 c = SHIFT_ROL;
2397 goto gen_shift;
2398 OP_32_64(rotr):
2399 c = SHIFT_ROR;
2400 goto gen_shift;
2401 gen_shift_maybe_vex:
2402 if (have_bmi2) {
2403 if (!const_a2) {
2404 tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2405 break;
2406 }
2407 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2408 }
2409
2410 gen_shift:
2411 if (const_a2) {
2412 tcg_out_shifti(s, c + rexw, a0, a2);
2413 } else {
2414 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
2415 }
2416 break;
2417
2418 OP_32_64(ctz):
2419 tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2420 break;
2421 OP_32_64(clz):
2422 tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2423 break;
2424 OP_32_64(ctpop):
2425 tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2426 break;
2427
2428 case INDEX_op_brcond_i32:
2429 tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2430 break;
2431 case INDEX_op_setcond_i32:
2432 tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
2433 break;
2434 case INDEX_op_movcond_i32:
2435 tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
2436 break;
2437
2438 OP_32_64(bswap16):
2439 tcg_out_rolw_8(s, a0);
2440 break;
2441 OP_32_64(bswap32):
2442 tcg_out_bswap32(s, a0);
2443 break;
2444
2445 OP_32_64(neg):
2446 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
2447 break;
2448 OP_32_64(not):
2449 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
2450 break;
2451
2452 OP_32_64(ext8s):
2453 tcg_out_ext8s(s, a0, a1, rexw);
2454 break;
2455 OP_32_64(ext16s):
2456 tcg_out_ext16s(s, a0, a1, rexw);
2457 break;
2458 OP_32_64(ext8u):
2459 tcg_out_ext8u(s, a0, a1);
2460 break;
2461 OP_32_64(ext16u):
2462 tcg_out_ext16u(s, a0, a1);
2463 break;
2464
2465 case INDEX_op_qemu_ld_i32:
2466 tcg_out_qemu_ld(s, args, 0);
2467 break;
2468 case INDEX_op_qemu_ld_i64:
2469 tcg_out_qemu_ld(s, args, 1);
2470 break;
2471 case INDEX_op_qemu_st_i32:
2472 tcg_out_qemu_st(s, args, 0);
2473 break;
2474 case INDEX_op_qemu_st_i64:
2475 tcg_out_qemu_st(s, args, 1);
2476 break;
2477
2478 OP_32_64(mulu2):
2479 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2480 break;
2481 OP_32_64(muls2):
2482 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2483 break;
2484 OP_32_64(add2):
2485 if (const_args[4]) {
2486 tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
2487 } else {
2488 tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
2489 }
2490 if (const_args[5]) {
2491 tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
2492 } else {
2493 tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
2494 }
2495 break;
2496 OP_32_64(sub2):
2497 if (const_args[4]) {
2498 tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
2499 } else {
2500 tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
2501 }
2502 if (const_args[5]) {
2503 tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
2504 } else {
2505 tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
2506 }
2507 break;
2508
2509#if TCG_TARGET_REG_BITS == 32
2510 case INDEX_op_brcond2_i32:
2511 tcg_out_brcond2(s, args, const_args, 0);
2512 break;
2513 case INDEX_op_setcond2_i32:
2514 tcg_out_setcond2(s, args, const_args);
2515 break;
2516#else
2517 case INDEX_op_ld32s_i64:
2518 tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
2519 break;
2520 case INDEX_op_ld_i64:
2521 tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
2522 break;
2523 case INDEX_op_st_i64:
2524 if (const_args[0]) {
2525 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2526 tcg_out32(s, a0);
2527 } else {
2528 tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
2529 }
2530 break;
2531
2532 case INDEX_op_brcond_i64:
2533 tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
2534 break;
2535 case INDEX_op_setcond_i64:
2536 tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
2537 break;
2538 case INDEX_op_movcond_i64:
2539 tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
2540 break;
2541
2542 case INDEX_op_bswap64_i64:
2543 tcg_out_bswap64(s, a0);
2544 break;
2545 case INDEX_op_extu_i32_i64:
2546 case INDEX_op_ext32u_i64:
2547 tcg_out_ext32u(s, a0, a1);
2548 break;
2549 case INDEX_op_ext_i32_i64:
2550 case INDEX_op_ext32s_i64:
2551 tcg_out_ext32s(s, a0, a1);
2552 break;
2553#endif
2554
2555 OP_32_64(deposit):
2556 if (args[3] == 0 && args[4] == 8) {
2557
2558 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
2559 } else if (args[3] == 8 && args[4] == 8) {
2560
2561 tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
2562 } else if (args[3] == 0 && args[4] == 16) {
2563
2564 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
2565 } else {
2566 tcg_abort();
2567 }
2568 break;
2569
2570 case INDEX_op_extract_i64:
2571 if (a2 + args[3] == 32) {
2572
2573 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2574 tcg_out_shifti(s, SHIFT_SHR, a0, a2);
2575 break;
2576 }
2577
2578 case INDEX_op_extract_i32:
2579
2580
2581
2582 tcg_debug_assert(a2 == 8 && args[3] == 8);
2583 if (a1 < 4 && a0 < 8) {
2584 tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
2585 } else {
2586 tcg_out_ext16u(s, a0, a1);
2587 tcg_out_shifti(s, SHIFT_SHR, a0, 8);
2588 }
2589 break;
2590
2591 case INDEX_op_sextract_i32:
2592
2593
2594
2595 tcg_debug_assert(a2 == 8 && args[3] == 8);
2596 if (a1 < 4 && a0 < 8) {
2597 tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
2598 } else {
2599 tcg_out_ext16s(s, a0, a1, 0);
2600 tcg_out_shifti(s, SHIFT_SAR, a0, 8);
2601 }
2602 break;
2603
2604 case INDEX_op_mb:
2605 tcg_out_mb(s, a0);
2606 break;
2607 case INDEX_op_mov_i32:
2608 case INDEX_op_mov_i64:
2609 case INDEX_op_mov_vec:
2610 case INDEX_op_movi_i32:
2611 case INDEX_op_movi_i64:
2612 case INDEX_op_dupi_vec:
2613 case INDEX_op_call:
2614 default:
2615 tcg_abort();
2616 }
2617
2618#undef OP_32_64
2619}
2620
2621static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2622 unsigned vecl, unsigned vece,
2623 const TCGArg *args, const int *const_args)
2624{
2625 static int const add_insn[4] = {
2626 OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
2627 };
2628 static int const sub_insn[4] = {
2629 OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
2630 };
2631 static int const mul_insn[4] = {
2632 OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
2633 };
2634 static int const shift_imm_insn[4] = {
2635 OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
2636 };
2637 static int const cmpeq_insn[4] = {
2638 OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
2639 };
2640 static int const cmpgt_insn[4] = {
2641 OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
2642 };
2643 static int const punpckl_insn[4] = {
2644 OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
2645 };
2646 static int const punpckh_insn[4] = {
2647 OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
2648 };
2649 static int const packss_insn[4] = {
2650 OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
2651 };
2652 static int const packus_insn[4] = {
2653 OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
2654 };
2655
2656 TCGType type = vecl + TCG_TYPE_V64;
2657 int insn, sub;
2658 TCGArg a0, a1, a2;
2659
2660 a0 = args[0];
2661 a1 = args[1];
2662 a2 = args[2];
2663
2664 switch (opc) {
2665 case INDEX_op_add_vec:
2666 insn = add_insn[vece];
2667 goto gen_simd;
2668 case INDEX_op_sub_vec:
2669 insn = sub_insn[vece];
2670 goto gen_simd;
2671 case INDEX_op_mul_vec:
2672 insn = mul_insn[vece];
2673 goto gen_simd;
2674 case INDEX_op_and_vec:
2675 insn = OPC_PAND;
2676 goto gen_simd;
2677 case INDEX_op_or_vec:
2678 insn = OPC_POR;
2679 goto gen_simd;
2680 case INDEX_op_xor_vec:
2681 insn = OPC_PXOR;
2682 goto gen_simd;
2683 case INDEX_op_x86_punpckl_vec:
2684 insn = punpckl_insn[vece];
2685 goto gen_simd;
2686 case INDEX_op_x86_punpckh_vec:
2687 insn = punpckh_insn[vece];
2688 goto gen_simd;
2689 case INDEX_op_x86_packss_vec:
2690 insn = packss_insn[vece];
2691 goto gen_simd;
2692 case INDEX_op_x86_packus_vec:
2693 insn = packus_insn[vece];
2694 goto gen_simd;
2695#if TCG_TARGET_REG_BITS == 32
2696 case INDEX_op_dup2_vec:
2697
2698 insn = OPC_PUNPCKLDQ;
2699 goto gen_simd;
2700#endif
2701 gen_simd:
2702 tcg_debug_assert(insn != OPC_UD2);
2703 if (type == TCG_TYPE_V256) {
2704 insn |= P_VEXL;
2705 }
2706 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2707 break;
2708
2709 case INDEX_op_cmp_vec:
2710 sub = args[3];
2711 if (sub == TCG_COND_EQ) {
2712 insn = cmpeq_insn[vece];
2713 } else if (sub == TCG_COND_GT) {
2714 insn = cmpgt_insn[vece];
2715 } else {
2716 g_assert_not_reached();
2717 }
2718 goto gen_simd;
2719
2720 case INDEX_op_andc_vec:
2721 insn = OPC_PANDN;
2722 if (type == TCG_TYPE_V256) {
2723 insn |= P_VEXL;
2724 }
2725 tcg_out_vex_modrm(s, insn, a0, a2, a1);
2726 break;
2727
2728 case INDEX_op_shli_vec:
2729 sub = 6;
2730 goto gen_shift;
2731 case INDEX_op_shri_vec:
2732 sub = 2;
2733 goto gen_shift;
2734 case INDEX_op_sari_vec:
2735 tcg_debug_assert(vece != MO_64);
2736 sub = 4;
2737 gen_shift:
2738 tcg_debug_assert(vece != MO_8);
2739 insn = shift_imm_insn[vece];
2740 if (type == TCG_TYPE_V256) {
2741 insn |= P_VEXL;
2742 }
2743 tcg_out_vex_modrm(s, insn, sub, a0, a1);
2744 tcg_out8(s, a2);
2745 break;
2746
2747 case INDEX_op_ld_vec:
2748 tcg_out_ld(s, type, a0, a1, a2);
2749 break;
2750 case INDEX_op_st_vec:
2751 tcg_out_st(s, type, a0, a1, a2);
2752 break;
2753 case INDEX_op_dup_vec:
2754 tcg_out_dup_vec(s, type, vece, a0, a1);
2755 break;
2756
2757 case INDEX_op_x86_shufps_vec:
2758 insn = OPC_SHUFPS;
2759 sub = args[3];
2760 goto gen_simd_imm8;
2761 case INDEX_op_x86_blend_vec:
2762 if (vece == MO_16) {
2763 insn = OPC_PBLENDW;
2764 } else if (vece == MO_32) {
2765 insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
2766 } else {
2767 g_assert_not_reached();
2768 }
2769 sub = args[3];
2770 goto gen_simd_imm8;
2771 case INDEX_op_x86_vperm2i128_vec:
2772 insn = OPC_VPERM2I128;
2773 sub = args[3];
2774 goto gen_simd_imm8;
2775 gen_simd_imm8:
2776 if (type == TCG_TYPE_V256) {
2777 insn |= P_VEXL;
2778 }
2779 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2780 tcg_out8(s, sub);
2781 break;
2782
2783 case INDEX_op_x86_vpblendvb_vec:
2784 insn = OPC_VPBLENDVB;
2785 if (type == TCG_TYPE_V256) {
2786 insn |= P_VEXL;
2787 }
2788 tcg_out_vex_modrm(s, insn, a0, a1, a2);
2789 tcg_out8(s, args[3] << 4);
2790 break;
2791
2792 case INDEX_op_x86_psrldq_vec:
2793 tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
2794 tcg_out8(s, a2);
2795 break;
2796
2797 default:
2798 g_assert_not_reached();
2799 }
2800}
2801
2802static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2803{
2804 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2805 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2806 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2807 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2808 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2809 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2810 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2811 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
2812 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2813 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2814 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
2815 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2816 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2817 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2818 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2819 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2820 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2821 static const TCGTargetOpDef r_r_L_L
2822 = { .args_ct_str = { "r", "r", "L", "L" } };
2823 static const TCGTargetOpDef L_L_L_L
2824 = { .args_ct_str = { "L", "L", "L", "L" } };
2825 static const TCGTargetOpDef x_x = { .args_ct_str = { "x", "x" } };
2826 static const TCGTargetOpDef x_x_x = { .args_ct_str = { "x", "x", "x" } };
2827 static const TCGTargetOpDef x_x_x_x
2828 = { .args_ct_str = { "x", "x", "x", "x" } };
2829 static const TCGTargetOpDef x_r = { .args_ct_str = { "x", "r" } };
2830
2831 switch (op) {
2832 case INDEX_op_goto_ptr:
2833 return &r;
2834
2835 case INDEX_op_ld8u_i32:
2836 case INDEX_op_ld8u_i64:
2837 case INDEX_op_ld8s_i32:
2838 case INDEX_op_ld8s_i64:
2839 case INDEX_op_ld16u_i32:
2840 case INDEX_op_ld16u_i64:
2841 case INDEX_op_ld16s_i32:
2842 case INDEX_op_ld16s_i64:
2843 case INDEX_op_ld_i32:
2844 case INDEX_op_ld32u_i64:
2845 case INDEX_op_ld32s_i64:
2846 case INDEX_op_ld_i64:
2847 return &r_r;
2848
2849 case INDEX_op_st8_i32:
2850 case INDEX_op_st8_i64:
2851 return &qi_r;
2852 case INDEX_op_st16_i32:
2853 case INDEX_op_st16_i64:
2854 case INDEX_op_st_i32:
2855 case INDEX_op_st32_i64:
2856 return &ri_r;
2857 case INDEX_op_st_i64:
2858 return &re_r;
2859
2860 case INDEX_op_add_i32:
2861 case INDEX_op_add_i64:
2862 return &r_r_re;
2863 case INDEX_op_sub_i32:
2864 case INDEX_op_sub_i64:
2865 case INDEX_op_mul_i32:
2866 case INDEX_op_mul_i64:
2867 case INDEX_op_or_i32:
2868 case INDEX_op_or_i64:
2869 case INDEX_op_xor_i32:
2870 case INDEX_op_xor_i64:
2871 return &r_0_re;
2872
2873 case INDEX_op_and_i32:
2874 case INDEX_op_and_i64:
2875 {
2876 static const TCGTargetOpDef and
2877 = { .args_ct_str = { "r", "0", "reZ" } };
2878 return ∧
2879 }
2880 break;
2881 case INDEX_op_andc_i32:
2882 case INDEX_op_andc_i64:
2883 {
2884 static const TCGTargetOpDef andc
2885 = { .args_ct_str = { "r", "r", "rI" } };
2886 return &andc;
2887 }
2888 break;
2889
2890 case INDEX_op_shl_i32:
2891 case INDEX_op_shl_i64:
2892 case INDEX_op_shr_i32:
2893 case INDEX_op_shr_i64:
2894 case INDEX_op_sar_i32:
2895 case INDEX_op_sar_i64:
2896 return have_bmi2 ? &r_r_ri : &r_0_ci;
2897 case INDEX_op_rotl_i32:
2898 case INDEX_op_rotl_i64:
2899 case INDEX_op_rotr_i32:
2900 case INDEX_op_rotr_i64:
2901 return &r_0_ci;
2902
2903 case INDEX_op_brcond_i32:
2904 case INDEX_op_brcond_i64:
2905 return &r_re;
2906
2907 case INDEX_op_bswap16_i32:
2908 case INDEX_op_bswap16_i64:
2909 case INDEX_op_bswap32_i32:
2910 case INDEX_op_bswap32_i64:
2911 case INDEX_op_bswap64_i64:
2912 case INDEX_op_neg_i32:
2913 case INDEX_op_neg_i64:
2914 case INDEX_op_not_i32:
2915 case INDEX_op_not_i64:
2916 return &r_0;
2917
2918 case INDEX_op_ext8s_i32:
2919 case INDEX_op_ext8s_i64:
2920 case INDEX_op_ext8u_i32:
2921 case INDEX_op_ext8u_i64:
2922 return &r_q;
2923 case INDEX_op_ext16s_i32:
2924 case INDEX_op_ext16s_i64:
2925 case INDEX_op_ext16u_i32:
2926 case INDEX_op_ext16u_i64:
2927 case INDEX_op_ext32s_i64:
2928 case INDEX_op_ext32u_i64:
2929 case INDEX_op_ext_i32_i64:
2930 case INDEX_op_extu_i32_i64:
2931 case INDEX_op_extract_i32:
2932 case INDEX_op_extract_i64:
2933 case INDEX_op_sextract_i32:
2934 case INDEX_op_ctpop_i32:
2935 case INDEX_op_ctpop_i64:
2936 return &r_r;
2937
2938 case INDEX_op_deposit_i32:
2939 case INDEX_op_deposit_i64:
2940 {
2941 static const TCGTargetOpDef dep
2942 = { .args_ct_str = { "Q", "0", "Q" } };
2943 return &dep;
2944 }
2945 case INDEX_op_setcond_i32:
2946 case INDEX_op_setcond_i64:
2947 {
2948 static const TCGTargetOpDef setc
2949 = { .args_ct_str = { "q", "r", "re" } };
2950 return &setc;
2951 }
2952 case INDEX_op_movcond_i32:
2953 case INDEX_op_movcond_i64:
2954 {
2955 static const TCGTargetOpDef movc
2956 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
2957 return &movc;
2958 }
2959 case INDEX_op_div2_i32:
2960 case INDEX_op_div2_i64:
2961 case INDEX_op_divu2_i32:
2962 case INDEX_op_divu2_i64:
2963 {
2964 static const TCGTargetOpDef div2
2965 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
2966 return &div2;
2967 }
2968 case INDEX_op_mulu2_i32:
2969 case INDEX_op_mulu2_i64:
2970 case INDEX_op_muls2_i32:
2971 case INDEX_op_muls2_i64:
2972 {
2973 static const TCGTargetOpDef mul2
2974 = { .args_ct_str = { "a", "d", "a", "r" } };
2975 return &mul2;
2976 }
2977 case INDEX_op_add2_i32:
2978 case INDEX_op_add2_i64:
2979 case INDEX_op_sub2_i32:
2980 case INDEX_op_sub2_i64:
2981 {
2982 static const TCGTargetOpDef arith2
2983 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
2984 return &arith2;
2985 }
2986 case INDEX_op_ctz_i32:
2987 case INDEX_op_ctz_i64:
2988 {
2989 static const TCGTargetOpDef ctz[2] = {
2990 { .args_ct_str = { "&r", "r", "r" } },
2991 { .args_ct_str = { "&r", "r", "rW" } },
2992 };
2993 return &ctz[have_bmi1];
2994 }
2995 case INDEX_op_clz_i32:
2996 case INDEX_op_clz_i64:
2997 {
2998 static const TCGTargetOpDef clz[2] = {
2999 { .args_ct_str = { "&r", "r", "r" } },
3000 { .args_ct_str = { "&r", "r", "rW" } },
3001 };
3002 return &clz[have_lzcnt];
3003 }
3004
3005 case INDEX_op_qemu_ld_i32:
3006 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
3007 case INDEX_op_qemu_st_i32:
3008 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
3009 case INDEX_op_qemu_ld_i64:
3010 return (TCG_TARGET_REG_BITS == 64 ? &r_L
3011 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
3012 : &r_r_L_L);
3013 case INDEX_op_qemu_st_i64:
3014 return (TCG_TARGET_REG_BITS == 64 ? &L_L
3015 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
3016 : &L_L_L_L);
3017
3018 case INDEX_op_brcond2_i32:
3019 {
3020 static const TCGTargetOpDef b2
3021 = { .args_ct_str = { "r", "r", "ri", "ri" } };
3022 return &b2;
3023 }
3024 case INDEX_op_setcond2_i32:
3025 {
3026 static const TCGTargetOpDef s2
3027 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3028 return &s2;
3029 }
3030
3031 case INDEX_op_ld_vec:
3032 case INDEX_op_st_vec:
3033 return &x_r;
3034
3035 case INDEX_op_add_vec:
3036 case INDEX_op_sub_vec:
3037 case INDEX_op_mul_vec:
3038 case INDEX_op_and_vec:
3039 case INDEX_op_or_vec:
3040 case INDEX_op_xor_vec:
3041 case INDEX_op_andc_vec:
3042 case INDEX_op_cmp_vec:
3043 case INDEX_op_x86_shufps_vec:
3044 case INDEX_op_x86_blend_vec:
3045 case INDEX_op_x86_packss_vec:
3046 case INDEX_op_x86_packus_vec:
3047 case INDEX_op_x86_vperm2i128_vec:
3048 case INDEX_op_x86_punpckl_vec:
3049 case INDEX_op_x86_punpckh_vec:
3050#if TCG_TARGET_REG_BITS == 32
3051 case INDEX_op_dup2_vec:
3052#endif
3053 return &x_x_x;
3054 case INDEX_op_dup_vec:
3055 case INDEX_op_shli_vec:
3056 case INDEX_op_shri_vec:
3057 case INDEX_op_sari_vec:
3058 case INDEX_op_x86_psrldq_vec:
3059 return &x_x;
3060 case INDEX_op_x86_vpblendvb_vec:
3061 return &x_x_x_x;
3062
3063 default:
3064 break;
3065 }
3066 return NULL;
3067}
3068
3069int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3070{
3071 switch (opc) {
3072 case INDEX_op_add_vec:
3073 case INDEX_op_sub_vec:
3074 case INDEX_op_and_vec:
3075 case INDEX_op_or_vec:
3076 case INDEX_op_xor_vec:
3077 case INDEX_op_andc_vec:
3078 return 1;
3079 case INDEX_op_cmp_vec:
3080 return -1;
3081
3082 case INDEX_op_shli_vec:
3083 case INDEX_op_shri_vec:
3084
3085 return vece == MO_8 ? -1 : 1;
3086
3087 case INDEX_op_sari_vec:
3088
3089 if (vece == MO_8) {
3090 return -1;
3091 }
3092
3093
3094 if (vece == MO_64) {
3095 return type >= TCG_TYPE_V256 ? -1 : 0;
3096 }
3097 return 1;
3098
3099 case INDEX_op_mul_vec:
3100 if (vece == MO_8) {
3101
3102 return -1;
3103 }
3104 if (vece == MO_64) {
3105 return 0;
3106 }
3107 return 1;
3108
3109 default:
3110 return 0;
3111 }
3112}
3113
3114void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3115 TCGArg a0, ...)
3116{
3117 va_list va;
3118 TCGArg a1, a2;
3119 TCGv_vec v0, t1, t2, t3, t4;
3120
3121 va_start(va, a0);
3122 v0 = temp_tcgv_vec(arg_temp(a0));
3123
3124 switch (opc) {
3125 case INDEX_op_shli_vec:
3126 case INDEX_op_shri_vec:
3127 tcg_debug_assert(vece == MO_8);
3128 a1 = va_arg(va, TCGArg);
3129 a2 = va_arg(va, TCGArg);
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139 t1 = tcg_temp_new_vec(type);
3140 t2 = tcg_temp_new_vec(type);
3141 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3142 tcgv_vec_arg(t1), a1, a1);
3143 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3144 tcgv_vec_arg(t2), a1, a1);
3145 if (opc == INDEX_op_shri_vec) {
3146 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3147 tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
3148 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3149 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
3150 } else {
3151 vec_gen_3(INDEX_op_shli_vec, type, MO_16,
3152 tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
3153 vec_gen_3(INDEX_op_shli_vec, type, MO_16,
3154 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
3155 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3156 tcgv_vec_arg(t1), tcgv_vec_arg(t1), 8);
3157 vec_gen_3(INDEX_op_shri_vec, type, MO_16,
3158 tcgv_vec_arg(t2), tcgv_vec_arg(t2), 8);
3159 }
3160 vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
3161 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3162 tcg_temp_free_vec(t1);
3163 tcg_temp_free_vec(t2);
3164 break;
3165
3166 case INDEX_op_sari_vec:
3167 a1 = va_arg(va, TCGArg);
3168 a2 = va_arg(va, TCGArg);
3169 if (vece == MO_8) {
3170
3171 t1 = tcg_temp_new_vec(type);
3172 t2 = tcg_temp_new_vec(type);
3173 vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
3174 tcgv_vec_arg(t1), a1, a1);
3175 vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
3176 tcgv_vec_arg(t2), a1, a1);
3177 vec_gen_3(INDEX_op_sari_vec, type, MO_16,
3178 tcgv_vec_arg(t1), tcgv_vec_arg(t1), a2 + 8);
3179 vec_gen_3(INDEX_op_sari_vec, type, MO_16,
3180 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2 + 8);
3181 vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8,
3182 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3183 tcg_temp_free_vec(t1);
3184 tcg_temp_free_vec(t2);
3185 break;
3186 }
3187 tcg_debug_assert(vece == MO_64);
3188
3189
3190
3191 if (a2 <= 32) {
3192 t1 = tcg_temp_new_vec(type);
3193 vec_gen_3(INDEX_op_sari_vec, type, MO_32, tcgv_vec_arg(t1), a1, a2);
3194 vec_gen_3(INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
3195 vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
3196 a0, a0, tcgv_vec_arg(t1), 0xaa);
3197 tcg_temp_free_vec(t1);
3198 break;
3199 }
3200
3201
3202 t1 = tcg_temp_new_vec(type);
3203 t2 = tcg_const_zeros_vec(type);
3204 vec_gen_4(INDEX_op_cmp_vec, type, MO_64,
3205 tcgv_vec_arg(t1), tcgv_vec_arg(t2), a1, TCG_COND_GT);
3206 tcg_temp_free_vec(t2);
3207 vec_gen_3(INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
3208 vec_gen_3(INDEX_op_shli_vec, type, MO_64,
3209 tcgv_vec_arg(t1), tcgv_vec_arg(t1), 64 - a2);
3210 vec_gen_3(INDEX_op_or_vec, type, MO_64, a0, a0, tcgv_vec_arg(t1));
3211 tcg_temp_free_vec(t1);
3212 break;
3213
3214 case INDEX_op_mul_vec:
3215 tcg_debug_assert(vece == MO_8);
3216 a1 = va_arg(va, TCGArg);
3217 a2 = va_arg(va, TCGArg);
3218 switch (type) {
3219 case TCG_TYPE_V64:
3220 t1 = tcg_temp_new_vec(TCG_TYPE_V128);
3221 t2 = tcg_temp_new_vec(TCG_TYPE_V128);
3222 tcg_gen_dup16i_vec(t2, 0);
3223 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3224 tcgv_vec_arg(t1), a1, tcgv_vec_arg(t2));
3225 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3226 tcgv_vec_arg(t2), tcgv_vec_arg(t2), a2);
3227 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3228 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3229 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
3230 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t1));
3231 tcg_temp_free_vec(t1);
3232 tcg_temp_free_vec(t2);
3233 break;
3234
3235 case TCG_TYPE_V128:
3236 t1 = tcg_temp_new_vec(TCG_TYPE_V128);
3237 t2 = tcg_temp_new_vec(TCG_TYPE_V128);
3238 t3 = tcg_temp_new_vec(TCG_TYPE_V128);
3239 t4 = tcg_temp_new_vec(TCG_TYPE_V128);
3240 tcg_gen_dup16i_vec(t4, 0);
3241 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3242 tcgv_vec_arg(t1), a1, tcgv_vec_arg(t4));
3243 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
3244 tcgv_vec_arg(t2), tcgv_vec_arg(t4), a2);
3245 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
3246 tcgv_vec_arg(t3), a1, tcgv_vec_arg(t4));
3247 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
3248 tcgv_vec_arg(t4), tcgv_vec_arg(t4), a2);
3249 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3250 tcg_gen_mul_vec(MO_16, t3, t3, t4);
3251 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3252 tcg_gen_shri_vec(MO_16, t3, t3, 8);
3253 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
3254 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3255 tcg_temp_free_vec(t1);
3256 tcg_temp_free_vec(t2);
3257 tcg_temp_free_vec(t3);
3258 tcg_temp_free_vec(t4);
3259 break;
3260
3261 case TCG_TYPE_V256:
3262 t1 = tcg_temp_new_vec(TCG_TYPE_V256);
3263 t2 = tcg_temp_new_vec(TCG_TYPE_V256);
3264 t3 = tcg_temp_new_vec(TCG_TYPE_V256);
3265 t4 = tcg_temp_new_vec(TCG_TYPE_V256);
3266 tcg_gen_dup16i_vec(t4, 0);
3267
3268
3269
3270
3271
3272 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
3273 tcgv_vec_arg(t1), a1, tcgv_vec_arg(t4));
3274 vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
3275 tcgv_vec_arg(t2), tcgv_vec_arg(t4), a2);
3276 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
3277 tcgv_vec_arg(t3), a1, tcgv_vec_arg(t4));
3278 vec_gen_3(INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
3279 tcgv_vec_arg(t4), tcgv_vec_arg(t4), a2);
3280
3281 tcg_gen_mul_vec(MO_16, t1, t1, t2);
3282 tcg_gen_mul_vec(MO_16, t3, t3, t4);
3283 tcg_gen_shri_vec(MO_16, t1, t1, 8);
3284 tcg_gen_shri_vec(MO_16, t3, t3, 8);
3285
3286 vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V256, MO_8,
3287 a0, tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3288 tcg_temp_free_vec(t1);
3289 tcg_temp_free_vec(t2);
3290 tcg_temp_free_vec(t3);
3291 tcg_temp_free_vec(t4);
3292 break;
3293
3294 default:
3295 g_assert_not_reached();
3296 }
3297 break;
3298
3299 case INDEX_op_cmp_vec:
3300 {
3301 enum {
3302 NEED_SWAP = 1,
3303 NEED_INV = 2,
3304 NEED_BIAS = 4
3305 };
3306 static const uint8_t fixups[16] = {
3307 [0 ... 15] = -1,
3308 [TCG_COND_EQ] = 0,
3309 [TCG_COND_NE] = NEED_INV,
3310 [TCG_COND_GT] = 0,
3311 [TCG_COND_LT] = NEED_SWAP,
3312 [TCG_COND_LE] = NEED_INV,
3313 [TCG_COND_GE] = NEED_SWAP | NEED_INV,
3314 [TCG_COND_GTU] = NEED_BIAS,
3315 [TCG_COND_LTU] = NEED_BIAS | NEED_SWAP,
3316 [TCG_COND_LEU] = NEED_BIAS | NEED_INV,
3317 [TCG_COND_GEU] = NEED_BIAS | NEED_SWAP | NEED_INV,
3318 };
3319
3320 TCGCond cond;
3321 uint8_t fixup;
3322
3323 a1 = va_arg(va, TCGArg);
3324 a2 = va_arg(va, TCGArg);
3325 cond = va_arg(va, TCGArg);
3326 fixup = fixups[cond & 15];
3327 tcg_debug_assert(fixup != 0xff);
3328
3329 if (fixup & NEED_INV) {
3330 cond = tcg_invert_cond(cond);
3331 }
3332 if (fixup & NEED_SWAP) {
3333 TCGArg t;
3334 t = a1, a1 = a2, a2 = t;
3335 cond = tcg_swap_cond(cond);
3336 }
3337
3338 t1 = t2 = NULL;
3339 if (fixup & NEED_BIAS) {
3340 t1 = tcg_temp_new_vec(type);
3341 t2 = tcg_temp_new_vec(type);
3342 tcg_gen_dupi_vec(vece, t2, 1ull << ((8 << vece) - 1));
3343 tcg_gen_sub_vec(vece, t1, temp_tcgv_vec(arg_temp(a1)), t2);
3344 tcg_gen_sub_vec(vece, t2, temp_tcgv_vec(arg_temp(a2)), t2);
3345 a1 = tcgv_vec_arg(t1);
3346 a2 = tcgv_vec_arg(t2);
3347 cond = tcg_signed_cond(cond);
3348 }
3349
3350 tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
3351 vec_gen_4(INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond);
3352
3353 if (fixup & NEED_BIAS) {
3354 tcg_temp_free_vec(t1);
3355 tcg_temp_free_vec(t2);
3356 }
3357 if (fixup & NEED_INV) {
3358 tcg_gen_not_vec(vece, v0, v0);
3359 }
3360 }
3361 break;
3362
3363 default:
3364 break;
3365 }
3366
3367 va_end(va);
3368}
3369
3370static const int tcg_target_callee_save_regs[] = {
3371#if TCG_TARGET_REG_BITS == 64
3372 TCG_REG_RBP,
3373 TCG_REG_RBX,
3374#if defined(_WIN64)
3375 TCG_REG_RDI,
3376 TCG_REG_RSI,
3377#endif
3378 TCG_REG_R12,
3379 TCG_REG_R13,
3380 TCG_REG_R14,
3381 TCG_REG_R15,
3382#else
3383 TCG_REG_EBP,
3384 TCG_REG_EBX,
3385 TCG_REG_ESI,
3386 TCG_REG_EDI,
3387#endif
3388};
3389
3390
3391
3392
3393#define PUSH_SIZE \
3394 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
3395 * (TCG_TARGET_REG_BITS / 8))
3396
3397#define FRAME_SIZE \
3398 ((PUSH_SIZE \
3399 + TCG_STATIC_CALL_ARGS_SIZE \
3400 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3401 + TCG_TARGET_STACK_ALIGN - 1) \
3402 & ~(TCG_TARGET_STACK_ALIGN - 1))
3403
3404
3405static void tcg_target_qemu_prologue(TCGContext *s)
3406{
3407 int i, stack_addend;
3408
3409
3410
3411
3412 stack_addend = FRAME_SIZE - PUSH_SIZE;
3413 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
3414 CPU_TEMP_BUF_NLONGS * sizeof(long));
3415
3416
3417 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
3418 tcg_out_push(s, tcg_target_callee_save_regs[i]);
3419 }
3420
3421#if TCG_TARGET_REG_BITS == 32
3422 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
3423 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
3424 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
3425
3426 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
3427 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
3428 + stack_addend);
3429#else
3430 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3431 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
3432
3433 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
3434#endif
3435
3436
3437
3438
3439
3440 s->code_gen_epilogue = s->code_ptr;
3441 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
3442
3443
3444 tb_ret_addr = s->code_ptr;
3445
3446 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
3447
3448 if (have_avx2) {
3449 tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0);
3450 }
3451 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
3452 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
3453 }
3454 tcg_out_opc(s, OPC_RET, 0, 0, 0);
3455
3456#if !defined(CONFIG_SOFTMMU)
3457
3458 if (guest_base) {
3459 setup_guest_base_seg();
3460 }
3461#endif
3462}
3463
3464static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3465{
3466 memset(p, 0x90, count);
3467}
3468
3469static void tcg_target_init(TCGContext *s)
3470{
3471#ifdef CONFIG_CPUID_H
3472 unsigned a, b, c, d, b7 = 0;
3473 int max = __get_cpuid_max(0, 0);
3474
3475 if (max >= 7) {
3476
3477 __cpuid_count(7, 0, a, b7, c, d);
3478 have_bmi1 = (b7 & bit_BMI) != 0;
3479 have_bmi2 = (b7 & bit_BMI2) != 0;
3480 }
3481
3482 if (max >= 1) {
3483 __cpuid(1, a, b, c, d);
3484#ifndef have_cmov
3485
3486
3487
3488 have_cmov = (d & bit_CMOV) != 0;
3489#endif
3490
3491
3492
3493 have_movbe = (c & bit_MOVBE) != 0;
3494 have_popcnt = (c & bit_POPCNT) != 0;
3495
3496
3497
3498 if (c & bit_OSXSAVE) {
3499 unsigned xcrl, xcrh;
3500
3501
3502
3503 asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
3504 if ((xcrl & 6) == 6) {
3505 have_avx1 = (c & bit_AVX) != 0;
3506 have_avx2 = (b7 & bit_AVX2) != 0;
3507 }
3508 }
3509 }
3510
3511 max = __get_cpuid_max(0x8000000, 0);
3512 if (max >= 1) {
3513 __cpuid(0x80000001, a, b, c, d);
3514
3515 have_lzcnt = (c & bit_LZCNT) != 0;
3516 }
3517#endif
3518
3519 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
3520 if (TCG_TARGET_REG_BITS == 64) {
3521 tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
3522 }
3523 if (have_avx1) {
3524 tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
3525 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
3526 }
3527 if (have_avx2) {
3528 tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
3529 }
3530
3531 tcg_target_call_clobber_regs = ALL_VECTOR_REGS;
3532 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
3533 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
3534 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
3535 if (TCG_TARGET_REG_BITS == 64) {
3536#if !defined(_WIN64)
3537 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
3538 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
3539#endif
3540 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3541 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3542 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3543 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3544 }
3545
3546 s->reserved_regs = 0;
3547 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3548}
3549
3550typedef struct {
3551 DebugFrameHeader h;
3552 uint8_t fde_def_cfa[4];
3553 uint8_t fde_reg_ofs[14];
3554} DebugFrame;
3555
3556
3557QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3558
3559#if !defined(__ELF__)
3560
3561#elif TCG_TARGET_REG_BITS == 64
3562#define ELF_HOST_MACHINE EM_X86_64
3563static const DebugFrame debug_frame = {
3564 .h.cie.len = sizeof(DebugFrameCIE)-4,
3565 .h.cie.id = -1,
3566 .h.cie.version = 1,
3567 .h.cie.code_align = 1,
3568 .h.cie.data_align = 0x78,
3569 .h.cie.return_column = 16,
3570
3571
3572 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3573
3574 .fde_def_cfa = {
3575 12, 7,
3576 (FRAME_SIZE & 0x7f) | 0x80,
3577 (FRAME_SIZE >> 7)
3578 },
3579 .fde_reg_ofs = {
3580 0x90, 1,
3581
3582 0x86, 2,
3583 0x83, 3,
3584 0x8c, 4,
3585 0x8d, 5,
3586 0x8e, 6,
3587 0x8f, 7,
3588 }
3589};
3590#else
3591#define ELF_HOST_MACHINE EM_386
3592static const DebugFrame debug_frame = {
3593 .h.cie.len = sizeof(DebugFrameCIE)-4,
3594 .h.cie.id = -1,
3595 .h.cie.version = 1,
3596 .h.cie.code_align = 1,
3597 .h.cie.data_align = 0x7c,
3598 .h.cie.return_column = 8,
3599
3600
3601 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3602
3603 .fde_def_cfa = {
3604 12, 4,
3605 (FRAME_SIZE & 0x7f) | 0x80,
3606 (FRAME_SIZE >> 7)
3607 },
3608 .fde_reg_ofs = {
3609 0x88, 1,
3610
3611 0x85, 2,
3612 0x83, 3,
3613 0x86, 4,
3614 0x87, 5,
3615 }
3616};
3617#endif
3618
3619#if defined(ELF_HOST_MACHINE)
3620void tcg_register_jit(void *buf, size_t buf_size)
3621{
3622 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3623}
3624#endif
3625