1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#include "qemu/osdep.h"
50#include "qemu-common.h"
51#include "qemu/error-report.h"
52
53#include "sysemu/hvf.h"
54#include "sysemu/hvf_int.h"
55#include "sysemu/runstate.h"
56#include "hvf-i386.h"
57#include "vmcs.h"
58#include "vmx.h"
59#include "x86.h"
60#include "x86_descr.h"
61#include "x86_mmu.h"
62#include "x86_decode.h"
63#include "x86_emu.h"
64#include "x86_task.h"
65#include "x86hvf.h"
66
67#include <Hypervisor/hv.h>
68#include <Hypervisor/hv_vmx.h>
69#include <sys/sysctl.h>
70
71#include "hw/i386/apic_internal.h"
72#include "qemu/main-loop.h"
73#include "qemu/accel.h"
74#include "target/i386/cpu.h"
75
76void vmx_update_tpr(CPUState *cpu)
77{
78
79 X86CPU *x86_cpu = X86_CPU(cpu);
80 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
81 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
82
83 wreg(cpu->hvf->fd, HV_X86_TPR, tpr);
84 if (irr == -1) {
85 wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
86 } else {
87 wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
88 irr >> 4);
89 }
90}
91
92static void update_apic_tpr(CPUState *cpu)
93{
94 X86CPU *x86_cpu = X86_CPU(cpu);
95 int tpr = rreg(cpu->hvf->fd, HV_X86_TPR) >> 4;
96 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
97}
98
99#define VECTORING_INFO_VECTOR_MASK 0xff
100
101void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
102 int direction, int size, int count)
103{
104 int i;
105 uint8_t *ptr = buffer;
106
107 for (i = 0; i < count; i++) {
108 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
109 ptr, size,
110 direction);
111 ptr += size;
112 }
113}
114
115static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
116{
117 int read, write;
118
119
120 if (ept_qual & EPT_VIOLATION_INST_FETCH) {
121 return false;
122 }
123
124
125 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
126 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
127 if ((read | write) == 0) {
128 return false;
129 }
130
131 if (write && slot) {
132 if (slot->flags & HVF_SLOT_LOG) {
133 memory_region_set_dirty(slot->region, gpa - slot->start, 1);
134 hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
135 HV_MEMORY_READ | HV_MEMORY_WRITE);
136 }
137 }
138
139
140
141
142
143
144 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
145 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
146 return false;
147 }
148
149 if (!slot) {
150 return true;
151 }
152 if (!memory_region_is_ram(slot->region) &&
153 !(read && memory_region_is_romd(slot->region))) {
154 return true;
155 }
156 return false;
157}
158
159void hvf_arch_vcpu_destroy(CPUState *cpu)
160{
161 X86CPU *x86_cpu = X86_CPU(cpu);
162 CPUX86State *env = &x86_cpu->env;
163
164 g_free(env->hvf_mmio_buf);
165}
166
167static void init_tsc_freq(CPUX86State *env)
168{
169 size_t length;
170 uint64_t tsc_freq;
171
172 if (env->tsc_khz != 0) {
173 return;
174 }
175
176 length = sizeof(uint64_t);
177 if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
178 return;
179 }
180 env->tsc_khz = tsc_freq / 1000;
181}
182
183static void init_apic_bus_freq(CPUX86State *env)
184{
185 size_t length;
186 uint64_t bus_freq;
187
188 if (env->apic_bus_freq != 0) {
189 return;
190 }
191
192 length = sizeof(uint64_t);
193 if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
194 return;
195 }
196 env->apic_bus_freq = bus_freq;
197}
198
199static inline bool tsc_is_known(CPUX86State *env)
200{
201 return env->tsc_khz != 0;
202}
203
204static inline bool apic_bus_freq_is_known(CPUX86State *env)
205{
206 return env->apic_bus_freq != 0;
207}
208
209int hvf_arch_init_vcpu(CPUState *cpu)
210{
211 X86CPU *x86cpu = X86_CPU(cpu);
212 CPUX86State *env = &x86cpu->env;
213
214 init_emu();
215 init_decoder();
216
217 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
218 env->hvf_mmio_buf = g_new(char, 4096);
219
220 if (x86cpu->vmware_cpuid_freq) {
221 init_tsc_freq(env);
222 init_apic_bus_freq(env);
223
224 if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
225 error_report("vmware-cpuid-freq: feature couldn't be enabled");
226 }
227 }
228
229 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
230 &hvf_state->hvf_caps->vmx_cap_pinbased)) {
231 abort();
232 }
233 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
234 &hvf_state->hvf_caps->vmx_cap_procbased)) {
235 abort();
236 }
237 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
238 &hvf_state->hvf_caps->vmx_cap_procbased2)) {
239 abort();
240 }
241 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
242 &hvf_state->hvf_caps->vmx_cap_entry)) {
243 abort();
244 }
245
246
247 wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS,
248 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
249 VMCS_PIN_BASED_CTLS_EXTINT |
250 VMCS_PIN_BASED_CTLS_NMI |
251 VMCS_PIN_BASED_CTLS_VNMI));
252 wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS,
253 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
254 VMCS_PRI_PROC_BASED_CTLS_HLT |
255 VMCS_PRI_PROC_BASED_CTLS_MWAIT |
256 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
257 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
258 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
259 wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS,
260 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
261 VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
262
263 wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
264 0));
265 wvmcs(cpu->hvf->fd, VMCS_EXCEPTION_BITMAP, 0);
266
267 wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
268
269 x86cpu = X86_CPU(cpu);
270 x86cpu->env.xsave_buf_len = 4096;
271 x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len);
272
273
274
275
276
277 assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len);
278
279 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_STAR, 1);
280 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_LSTAR, 1);
281 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_CSTAR, 1);
282 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FMASK, 1);
283 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FSBASE, 1);
284 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_GSBASE, 1);
285 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_KERNELGSBASE, 1);
286 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_TSC_AUX, 1);
287 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_TSC, 1);
288 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_CS, 1);
289 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_EIP, 1);
290 hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_ESP, 1);
291
292 return 0;
293}
294
295static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
296{
297 X86CPU *x86_cpu = X86_CPU(cpu);
298 CPUX86State *env = &x86_cpu->env;
299
300 env->exception_nr = -1;
301 env->exception_pending = 0;
302 env->exception_injected = 0;
303 env->interrupt_injected = -1;
304 env->nmi_injected = false;
305 env->ins_len = 0;
306 env->has_error_code = false;
307 if (idtvec_info & VMCS_IDT_VEC_VALID) {
308 switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
309 case VMCS_IDT_VEC_HWINTR:
310 case VMCS_IDT_VEC_SWINTR:
311 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
312 break;
313 case VMCS_IDT_VEC_NMI:
314 env->nmi_injected = true;
315 break;
316 case VMCS_IDT_VEC_HWEXCEPTION:
317 case VMCS_IDT_VEC_SWEXCEPTION:
318 env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
319 env->exception_injected = 1;
320 break;
321 case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
322 default:
323 abort();
324 }
325 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
326 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
327 env->ins_len = ins_len;
328 }
329 if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
330 env->has_error_code = true;
331 env->error_code = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_ERROR);
332 }
333 }
334 if ((rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) &
335 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
336 env->hflags2 |= HF2_NMI_MASK;
337 } else {
338 env->hflags2 &= ~HF2_NMI_MASK;
339 }
340 if (rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) &
341 (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
342 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
343 env->hflags |= HF_INHIBIT_IRQ_MASK;
344 } else {
345 env->hflags &= ~HF_INHIBIT_IRQ_MASK;
346 }
347}
348
349static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
350 uint32_t *eax, uint32_t *ebx,
351 uint32_t *ecx, uint32_t *edx)
352{
353
354
355
356
357
358
359
360
361
362 if (index < 0x40000000 || index > 0x40000010 ||
363 !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
364
365 cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
366 return;
367 }
368
369 switch (index) {
370 case 0x40000000:
371 *eax = 0x40000010;
372 *ebx = 0;
373 *ecx = 0;
374 *edx = 0;
375 break;
376 case 0x40000010:
377 *eax = env->tsc_khz;
378 *ebx = env->apic_bus_freq / 1000;
379 *ecx = 0;
380 *edx = 0;
381 break;
382 default:
383 *eax = 0;
384 *ebx = 0;
385 *ecx = 0;
386 *edx = 0;
387 break;
388 }
389}
390
391int hvf_vcpu_exec(CPUState *cpu)
392{
393 X86CPU *x86_cpu = X86_CPU(cpu);
394 CPUX86State *env = &x86_cpu->env;
395 int ret = 0;
396 uint64_t rip = 0;
397
398 if (hvf_process_events(cpu)) {
399 return EXCP_HLT;
400 }
401
402 do {
403 if (cpu->vcpu_dirty) {
404 hvf_put_registers(cpu);
405 cpu->vcpu_dirty = false;
406 }
407
408 if (hvf_inject_interrupts(cpu)) {
409 return EXCP_INTERRUPT;
410 }
411 vmx_update_tpr(cpu);
412
413 qemu_mutex_unlock_iothread();
414 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
415 qemu_mutex_lock_iothread();
416 return EXCP_HLT;
417 }
418
419 hv_return_t r = hv_vcpu_run(cpu->hvf->fd);
420 assert_hvf_ok(r);
421
422
423 uint64_t exit_reason = rvmcs(cpu->hvf->fd, VMCS_EXIT_REASON);
424 uint64_t exit_qual = rvmcs(cpu->hvf->fd, VMCS_EXIT_QUALIFICATION);
425 uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf->fd,
426 VMCS_EXIT_INSTRUCTION_LENGTH);
427
428 uint64_t idtvec_info = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO);
429
430 hvf_store_events(cpu, ins_len, idtvec_info);
431 rip = rreg(cpu->hvf->fd, HV_X86_RIP);
432 env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS);
433
434 qemu_mutex_lock_iothread();
435
436 update_apic_tpr(cpu);
437 current_cpu = cpu;
438
439 ret = 0;
440 switch (exit_reason) {
441 case EXIT_REASON_HLT: {
442 macvm_set_rip(cpu, rip + ins_len);
443 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
444 (env->eflags & IF_MASK))
445 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
446 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
447 cpu->halted = 1;
448 ret = EXCP_HLT;
449 break;
450 }
451 ret = EXCP_INTERRUPT;
452 break;
453 }
454 case EXIT_REASON_MWAIT: {
455 ret = EXCP_INTERRUPT;
456 break;
457 }
458
459 case EXIT_REASON_EPT_FAULT:
460 {
461 hvf_slot *slot;
462 uint64_t gpa = rvmcs(cpu->hvf->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
463
464 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
465 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
466 vmx_set_nmi_blocking(cpu);
467 }
468
469 slot = hvf_find_overlap_slot(gpa, 1);
470
471 if (ept_emulation_fault(slot, gpa, exit_qual)) {
472 struct x86_decode decode;
473
474 load_regs(cpu);
475 decode_instruction(env, &decode);
476 exec_instruction(env, &decode);
477 store_regs(cpu);
478 break;
479 }
480 break;
481 }
482 case EXIT_REASON_INOUT:
483 {
484 uint32_t in = (exit_qual & 8) != 0;
485 uint32_t size = (exit_qual & 7) + 1;
486 uint32_t string = (exit_qual & 16) != 0;
487 uint32_t port = exit_qual >> 16;
488
489
490 if (!string && in) {
491 uint64_t val = 0;
492 load_regs(cpu);
493 hvf_handle_io(env, port, &val, 0, size, 1);
494 if (size == 1) {
495 AL(env) = val;
496 } else if (size == 2) {
497 AX(env) = val;
498 } else if (size == 4) {
499 RAX(env) = (uint32_t)val;
500 } else {
501 RAX(env) = (uint64_t)val;
502 }
503 env->eip += ins_len;
504 store_regs(cpu);
505 break;
506 } else if (!string && !in) {
507 RAX(env) = rreg(cpu->hvf->fd, HV_X86_RAX);
508 hvf_handle_io(env, port, &RAX(env), 1, size, 1);
509 macvm_set_rip(cpu, rip + ins_len);
510 break;
511 }
512 struct x86_decode decode;
513
514 load_regs(cpu);
515 decode_instruction(env, &decode);
516 assert(ins_len == decode.len);
517 exec_instruction(env, &decode);
518 store_regs(cpu);
519
520 break;
521 }
522 case EXIT_REASON_CPUID: {
523 uint32_t rax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX);
524 uint32_t rbx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RBX);
525 uint32_t rcx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX);
526 uint32_t rdx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX);
527
528 if (rax == 1) {
529
530 env->cr[4] = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4);
531 }
532 hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
533
534 wreg(cpu->hvf->fd, HV_X86_RAX, rax);
535 wreg(cpu->hvf->fd, HV_X86_RBX, rbx);
536 wreg(cpu->hvf->fd, HV_X86_RCX, rcx);
537 wreg(cpu->hvf->fd, HV_X86_RDX, rdx);
538
539 macvm_set_rip(cpu, rip + ins_len);
540 break;
541 }
542 case EXIT_REASON_XSETBV: {
543 X86CPU *x86_cpu = X86_CPU(cpu);
544 CPUX86State *env = &x86_cpu->env;
545 uint32_t eax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX);
546 uint32_t ecx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX);
547 uint32_t edx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX);
548
549 if (ecx) {
550 macvm_set_rip(cpu, rip + ins_len);
551 break;
552 }
553 env->xcr0 = ((uint64_t)edx << 32) | eax;
554 wreg(cpu->hvf->fd, HV_X86_XCR0, env->xcr0 | 1);
555 macvm_set_rip(cpu, rip + ins_len);
556 break;
557 }
558 case EXIT_REASON_INTR_WINDOW:
559 vmx_clear_int_window_exiting(cpu);
560 ret = EXCP_INTERRUPT;
561 break;
562 case EXIT_REASON_NMI_WINDOW:
563 vmx_clear_nmi_window_exiting(cpu);
564 ret = EXCP_INTERRUPT;
565 break;
566 case EXIT_REASON_EXT_INTR:
567
568 ret = EXCP_INTERRUPT;
569 break;
570 case EXIT_REASON_RDMSR:
571 case EXIT_REASON_WRMSR:
572 {
573 load_regs(cpu);
574 if (exit_reason == EXIT_REASON_RDMSR) {
575 simulate_rdmsr(cpu);
576 } else {
577 simulate_wrmsr(cpu);
578 }
579 env->eip += ins_len;
580 store_regs(cpu);
581 break;
582 }
583 case EXIT_REASON_CR_ACCESS: {
584 int cr;
585 int reg;
586
587 load_regs(cpu);
588 cr = exit_qual & 15;
589 reg = (exit_qual >> 8) & 15;
590
591 switch (cr) {
592 case 0x0: {
593 macvm_set_cr0(cpu->hvf->fd, RRX(env, reg));
594 break;
595 }
596 case 4: {
597 macvm_set_cr4(cpu->hvf->fd, RRX(env, reg));
598 break;
599 }
600 case 8: {
601 X86CPU *x86_cpu = X86_CPU(cpu);
602 if (exit_qual & 0x10) {
603 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
604 } else {
605 int tpr = RRX(env, reg);
606 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
607 ret = EXCP_INTERRUPT;
608 }
609 break;
610 }
611 default:
612 error_report("Unrecognized CR %d", cr);
613 abort();
614 }
615 env->eip += ins_len;
616 store_regs(cpu);
617 break;
618 }
619 case EXIT_REASON_APIC_ACCESS: {
620 struct x86_decode decode;
621
622 load_regs(cpu);
623 decode_instruction(env, &decode);
624 exec_instruction(env, &decode);
625 store_regs(cpu);
626 break;
627 }
628 case EXIT_REASON_TPR: {
629 ret = 1;
630 break;
631 }
632 case EXIT_REASON_TASK_SWITCH: {
633 uint64_t vinfo = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO);
634 x68_segment_selector sel = {.sel = exit_qual & 0xffff};
635 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
636 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
637 & VMCS_INTR_T_MASK);
638 break;
639 }
640 case EXIT_REASON_TRIPLE_FAULT: {
641 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
642 ret = EXCP_INTERRUPT;
643 break;
644 }
645 case EXIT_REASON_RDPMC:
646 wreg(cpu->hvf->fd, HV_X86_RAX, 0);
647 wreg(cpu->hvf->fd, HV_X86_RDX, 0);
648 macvm_set_rip(cpu, rip + ins_len);
649 break;
650 case VMX_REASON_VMCALL:
651 env->exception_nr = EXCP0D_GPF;
652 env->exception_injected = 1;
653 env->has_error_code = true;
654 env->error_code = 0;
655 break;
656 default:
657 error_report("%llx: unhandled exit %llx", rip, exit_reason);
658 }
659 } while (ret == 0);
660
661 return ret;
662}
663