1
2
3
4
5
6
7
8
9
10#include "qemu/osdep.h"
11#include "cpu.h"
12#include "exec/address-spaces.h"
13#include "exec/ioport.h"
14#include "qemu/accel.h"
15#include "sysemu/nvmm.h"
16#include "sysemu/cpus.h"
17#include "sysemu/runstate.h"
18#include "qemu/main-loop.h"
19#include "qemu/error-report.h"
20#include "qapi/error.h"
21#include "qemu/queue.h"
22#include "migration/blocker.h"
23#include "strings.h"
24
25#include "nvmm-accel-ops.h"
26
27#include <nvmm.h>
28
29struct qemu_vcpu {
30 struct nvmm_vcpu vcpu;
31 uint8_t tpr;
32 bool stop;
33
34
35 bool int_window_exit;
36 bool nmi_window_exit;
37
38
39 bool int_shadow;
40};
41
42struct qemu_machine {
43 struct nvmm_capability cap;
44 struct nvmm_machine mach;
45};
46
47
48
49static bool nvmm_allowed;
50static struct qemu_machine qemu_mach;
51
52static struct qemu_vcpu *
53get_qemu_vcpu(CPUState *cpu)
54{
55 return (struct qemu_vcpu *)cpu->hax_vcpu;
56}
57
58static struct nvmm_machine *
59get_nvmm_mach(void)
60{
61 return &qemu_mach.mach;
62}
63
64
65
66static void
67nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
68{
69 uint32_t attrib = qseg->flags;
70
71 nseg->selector = qseg->selector;
72 nseg->limit = qseg->limit;
73 nseg->base = qseg->base;
74 nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
75 nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
76 nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
77 nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
78 nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
79 nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
80 nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
81 nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
82}
83
84static void
85nvmm_set_registers(CPUState *cpu)
86{
87 CPUX86State *env = cpu->env_ptr;
88 struct nvmm_machine *mach = get_nvmm_mach();
89 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
90 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
91 struct nvmm_x64_state *state = vcpu->state;
92 uint64_t bitmap;
93 size_t i;
94 int ret;
95
96 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
97
98
99 state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
100 state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
101 state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
102 state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
103 state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
104 state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
105 state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
106 state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
107#ifdef TARGET_X86_64
108 state->gprs[NVMM_X64_GPR_R8] = env->regs[R_R8];
109 state->gprs[NVMM_X64_GPR_R9] = env->regs[R_R9];
110 state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
111 state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
112 state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
113 state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
114 state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
115 state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
116#endif
117
118
119 state->gprs[NVMM_X64_GPR_RIP] = env->eip;
120 state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
121
122
123 nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
124 nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
125 nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
126 nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
127 nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
128 nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
129
130
131 nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
132 nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
133 nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
134 nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
135
136
137 state->crs[NVMM_X64_CR_CR0] = env->cr[0];
138 state->crs[NVMM_X64_CR_CR2] = env->cr[2];
139 state->crs[NVMM_X64_CR_CR3] = env->cr[3];
140 state->crs[NVMM_X64_CR_CR4] = env->cr[4];
141 state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
142 state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
143
144
145 state->drs[NVMM_X64_DR_DR0] = env->dr[0];
146 state->drs[NVMM_X64_DR_DR1] = env->dr[1];
147 state->drs[NVMM_X64_DR_DR2] = env->dr[2];
148 state->drs[NVMM_X64_DR_DR3] = env->dr[3];
149 state->drs[NVMM_X64_DR_DR6] = env->dr[6];
150 state->drs[NVMM_X64_DR_DR7] = env->dr[7];
151
152
153 state->fpu.fx_cw = env->fpuc;
154 state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
155 state->fpu.fx_tw = 0;
156 for (i = 0; i < 8; i++) {
157 state->fpu.fx_tw |= (!env->fptags[i]) << i;
158 }
159 state->fpu.fx_opcode = env->fpop;
160 state->fpu.fx_ip.fa_64 = env->fpip;
161 state->fpu.fx_dp.fa_64 = env->fpdp;
162 state->fpu.fx_mxcsr = env->mxcsr;
163 state->fpu.fx_mxcsr_mask = 0x0000FFFF;
164 assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
165 memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
166 for (i = 0; i < CPU_NB_REGS; i++) {
167 memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
168 &env->xmm_regs[i].ZMM_Q(0), 8);
169 memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
170 &env->xmm_regs[i].ZMM_Q(1), 8);
171 }
172
173
174 state->msrs[NVMM_X64_MSR_EFER] = env->efer;
175 state->msrs[NVMM_X64_MSR_STAR] = env->star;
176#ifdef TARGET_X86_64
177 state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
178 state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
179 state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
180 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
181#endif
182 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = env->sysenter_cs;
183 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
184 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
185 state->msrs[NVMM_X64_MSR_PAT] = env->pat;
186 state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
187
188 bitmap =
189 NVMM_X64_STATE_SEGS |
190 NVMM_X64_STATE_GPRS |
191 NVMM_X64_STATE_CRS |
192 NVMM_X64_STATE_DRS |
193 NVMM_X64_STATE_MSRS |
194 NVMM_X64_STATE_FPU;
195
196 ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
197 if (ret == -1) {
198 error_report("NVMM: Failed to set virtual processor context,"
199 " error=%d", errno);
200 }
201}
202
203static void
204nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
205{
206 qseg->selector = nseg->selector;
207 qseg->limit = nseg->limit;
208 qseg->base = nseg->base;
209
210 qseg->flags =
211 __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
212 __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
213 __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
214 __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
215 __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
216 __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
217 __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
218 __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
219}
220
221static void
222nvmm_get_registers(CPUState *cpu)
223{
224 CPUX86State *env = cpu->env_ptr;
225 struct nvmm_machine *mach = get_nvmm_mach();
226 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
227 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
228 X86CPU *x86_cpu = X86_CPU(cpu);
229 struct nvmm_x64_state *state = vcpu->state;
230 uint64_t bitmap, tpr;
231 size_t i;
232 int ret;
233
234 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
235
236 bitmap =
237 NVMM_X64_STATE_SEGS |
238 NVMM_X64_STATE_GPRS |
239 NVMM_X64_STATE_CRS |
240 NVMM_X64_STATE_DRS |
241 NVMM_X64_STATE_MSRS |
242 NVMM_X64_STATE_FPU;
243
244 ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
245 if (ret == -1) {
246 error_report("NVMM: Failed to get virtual processor context,"
247 " error=%d", errno);
248 }
249
250
251 env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
252 env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
253 env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
254 env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
255 env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
256 env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
257 env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
258 env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
259#ifdef TARGET_X86_64
260 env->regs[R_R8] = state->gprs[NVMM_X64_GPR_R8];
261 env->regs[R_R9] = state->gprs[NVMM_X64_GPR_R9];
262 env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
263 env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
264 env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
265 env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
266 env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
267 env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
268#endif
269
270
271 env->eip = state->gprs[NVMM_X64_GPR_RIP];
272 env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
273
274
275 nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
276 nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
277 nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
278 nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
279 nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
280 nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
281
282
283 nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
284 nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
285 nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
286 nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
287
288
289 env->cr[0] = state->crs[NVMM_X64_CR_CR0];
290 env->cr[2] = state->crs[NVMM_X64_CR_CR2];
291 env->cr[3] = state->crs[NVMM_X64_CR_CR3];
292 env->cr[4] = state->crs[NVMM_X64_CR_CR4];
293 tpr = state->crs[NVMM_X64_CR_CR8];
294 if (tpr != qcpu->tpr) {
295 qcpu->tpr = tpr;
296 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
297 }
298 env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
299
300
301 env->dr[0] = state->drs[NVMM_X64_DR_DR0];
302 env->dr[1] = state->drs[NVMM_X64_DR_DR1];
303 env->dr[2] = state->drs[NVMM_X64_DR_DR2];
304 env->dr[3] = state->drs[NVMM_X64_DR_DR3];
305 env->dr[6] = state->drs[NVMM_X64_DR_DR6];
306 env->dr[7] = state->drs[NVMM_X64_DR_DR7];
307
308
309 env->fpuc = state->fpu.fx_cw;
310 env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
311 env->fpus = state->fpu.fx_sw & ~0x3800;
312 for (i = 0; i < 8; i++) {
313 env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
314 }
315 env->fpop = state->fpu.fx_opcode;
316 env->fpip = state->fpu.fx_ip.fa_64;
317 env->fpdp = state->fpu.fx_dp.fa_64;
318 env->mxcsr = state->fpu.fx_mxcsr;
319 assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
320 memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
321 for (i = 0; i < CPU_NB_REGS; i++) {
322 memcpy(&env->xmm_regs[i].ZMM_Q(0),
323 &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
324 memcpy(&env->xmm_regs[i].ZMM_Q(1),
325 &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
326 }
327
328
329 env->efer = state->msrs[NVMM_X64_MSR_EFER];
330 env->star = state->msrs[NVMM_X64_MSR_STAR];
331#ifdef TARGET_X86_64
332 env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
333 env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
334 env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
335 env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
336#endif
337 env->sysenter_cs = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
338 env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
339 env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
340 env->pat = state->msrs[NVMM_X64_MSR_PAT];
341 env->tsc = state->msrs[NVMM_X64_MSR_TSC];
342
343 x86_update_hflags(env);
344}
345
346static bool
347nvmm_can_take_int(CPUState *cpu)
348{
349 CPUX86State *env = cpu->env_ptr;
350 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
351 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
352 struct nvmm_machine *mach = get_nvmm_mach();
353
354 if (qcpu->int_window_exit) {
355 return false;
356 }
357
358 if (qcpu->int_shadow || !(env->eflags & IF_MASK)) {
359 struct nvmm_x64_state *state = vcpu->state;
360
361
362 nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
363 state->intr.int_window_exiting = 1;
364 nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
365
366 return false;
367 }
368
369 return true;
370}
371
372static bool
373nvmm_can_take_nmi(CPUState *cpu)
374{
375 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
376
377
378
379
380
381
382 if (qcpu->nmi_window_exit) {
383 return false;
384 }
385
386 return true;
387}
388
389
390
391
392
393static void
394nvmm_vcpu_pre_run(CPUState *cpu)
395{
396 CPUX86State *env = cpu->env_ptr;
397 struct nvmm_machine *mach = get_nvmm_mach();
398 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
399 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
400 X86CPU *x86_cpu = X86_CPU(cpu);
401 struct nvmm_x64_state *state = vcpu->state;
402 struct nvmm_vcpu_event *event = vcpu->event;
403 bool has_event = false;
404 bool sync_tpr = false;
405 uint8_t tpr;
406 int ret;
407
408 qemu_mutex_lock_iothread();
409
410 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
411 if (tpr != qcpu->tpr) {
412 qcpu->tpr = tpr;
413 sync_tpr = true;
414 }
415
416
417
418
419
420 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
421 cpu->exit_request = 1;
422 }
423
424 if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
425 if (nvmm_can_take_nmi(cpu)) {
426 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
427 event->type = NVMM_VCPU_EVENT_INTR;
428 event->vector = 2;
429 has_event = true;
430 }
431 }
432
433 if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
434 if (nvmm_can_take_int(cpu)) {
435 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
436 event->type = NVMM_VCPU_EVENT_INTR;
437 event->vector = cpu_get_pic_interrupt(env);
438 has_event = true;
439 }
440 }
441
442
443 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
444 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
445 }
446
447 if (sync_tpr) {
448 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
449 if (ret == -1) {
450 error_report("NVMM: Failed to get CPU state,"
451 " error=%d", errno);
452 }
453
454 state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
455
456 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
457 if (ret == -1) {
458 error_report("NVMM: Failed to set CPU state,"
459 " error=%d", errno);
460 }
461 }
462
463 if (has_event) {
464 ret = nvmm_vcpu_inject(mach, vcpu);
465 if (ret == -1) {
466 error_report("NVMM: Failed to inject event,"
467 " error=%d", errno);
468 }
469 }
470
471 qemu_mutex_unlock_iothread();
472}
473
474
475
476
477
478static void
479nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
480{
481 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
482 CPUX86State *env = cpu->env_ptr;
483 X86CPU *x86_cpu = X86_CPU(cpu);
484 uint64_t tpr;
485
486 env->eflags = exit->exitstate.rflags;
487 qcpu->int_shadow = exit->exitstate.int_shadow;
488 qcpu->int_window_exit = exit->exitstate.int_window_exiting;
489 qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
490
491 tpr = exit->exitstate.cr8;
492 if (qcpu->tpr != tpr) {
493 qcpu->tpr = tpr;
494 qemu_mutex_lock_iothread();
495 cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
496 qemu_mutex_unlock_iothread();
497 }
498}
499
500
501
502static void
503nvmm_io_callback(struct nvmm_io *io)
504{
505 MemTxAttrs attrs = { 0 };
506 int ret;
507
508 ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
509 io->size, !io->in);
510 if (ret != MEMTX_OK) {
511 error_report("NVMM: I/O Transaction Failed "
512 "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
513 io->port, io->size);
514 }
515
516
517 current_cpu->vcpu_dirty = false;
518}
519
520static void
521nvmm_mem_callback(struct nvmm_mem *mem)
522{
523 cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
524
525
526 current_cpu->vcpu_dirty = false;
527}
528
529static struct nvmm_assist_callbacks nvmm_callbacks = {
530 .io = nvmm_io_callback,
531 .mem = nvmm_mem_callback
532};
533
534
535
536static int
537nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
538{
539 int ret;
540
541 ret = nvmm_assist_mem(mach, vcpu);
542 if (ret == -1) {
543 error_report("NVMM: Mem Assist Failed [gpa=%p]",
544 (void *)vcpu->exit->u.mem.gpa);
545 }
546
547 return ret;
548}
549
550static int
551nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
552{
553 int ret;
554
555 ret = nvmm_assist_io(mach, vcpu);
556 if (ret == -1) {
557 error_report("NVMM: I/O Assist Failed [port=%d]",
558 (int)vcpu->exit->u.io.port);
559 }
560
561 return ret;
562}
563
564static int
565nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
566 struct nvmm_vcpu_exit *exit)
567{
568 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
569 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
570 X86CPU *x86_cpu = X86_CPU(cpu);
571 struct nvmm_x64_state *state = vcpu->state;
572 uint64_t val;
573 int ret;
574
575 switch (exit->u.rdmsr.msr) {
576 case MSR_IA32_APICBASE:
577 val = cpu_get_apic_base(x86_cpu->apic_state);
578 break;
579 case MSR_MTRRcap:
580 case MSR_MTRRdefType:
581 case MSR_MCG_CAP:
582 case MSR_MCG_STATUS:
583 val = 0;
584 break;
585 default:
586 val = 0;
587 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
588 exit->u.rdmsr.msr);
589 break;
590 }
591
592 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
593 if (ret == -1) {
594 return -1;
595 }
596
597 state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
598 state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
599 state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
600
601 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
602 if (ret == -1) {
603 return -1;
604 }
605
606 return 0;
607}
608
609static int
610nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
611 struct nvmm_vcpu_exit *exit)
612{
613 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
614 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
615 X86CPU *x86_cpu = X86_CPU(cpu);
616 struct nvmm_x64_state *state = vcpu->state;
617 uint64_t val;
618 int ret;
619
620 val = exit->u.wrmsr.val;
621
622 switch (exit->u.wrmsr.msr) {
623 case MSR_IA32_APICBASE:
624 cpu_set_apic_base(x86_cpu->apic_state, val);
625 break;
626 case MSR_MTRRdefType:
627 case MSR_MCG_STATUS:
628 break;
629 default:
630 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
631 exit->u.wrmsr.msr, val);
632 break;
633 }
634
635 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
636 if (ret == -1) {
637 return -1;
638 }
639
640 state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
641
642 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
643 if (ret == -1) {
644 return -1;
645 }
646
647 return 0;
648}
649
650static int
651nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
652 struct nvmm_vcpu_exit *exit)
653{
654 CPUX86State *env = cpu->env_ptr;
655 int ret = 0;
656
657 qemu_mutex_lock_iothread();
658
659 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
660 (env->eflags & IF_MASK)) &&
661 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
662 cpu->exception_index = EXCP_HLT;
663 cpu->halted = true;
664 ret = 1;
665 }
666
667 qemu_mutex_unlock_iothread();
668
669 return ret;
670}
671
672static int
673nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
674{
675 struct nvmm_vcpu_event *event = vcpu->event;
676
677 event->type = NVMM_VCPU_EVENT_EXCP;
678 event->vector = 6;
679 event->u.excp.error = 0;
680
681 return nvmm_vcpu_inject(mach, vcpu);
682}
683
684static int
685nvmm_vcpu_loop(CPUState *cpu)
686{
687 CPUX86State *env = cpu->env_ptr;
688 struct nvmm_machine *mach = get_nvmm_mach();
689 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
690 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
691 X86CPU *x86_cpu = X86_CPU(cpu);
692 struct nvmm_vcpu_exit *exit = vcpu->exit;
693 int ret;
694
695
696
697
698
699 if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
700 nvmm_cpu_synchronize_state(cpu);
701 do_cpu_init(x86_cpu);
702
703 }
704 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
705 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
706 apic_poll_irq(x86_cpu->apic_state);
707 }
708 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
709 (env->eflags & IF_MASK)) ||
710 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
711 cpu->halted = false;
712 }
713 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
714 nvmm_cpu_synchronize_state(cpu);
715 do_cpu_sipi(x86_cpu);
716 }
717 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
718 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
719 nvmm_cpu_synchronize_state(cpu);
720 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
721 env->tpr_access_type);
722 }
723
724 if (cpu->halted) {
725 cpu->exception_index = EXCP_HLT;
726 qatomic_set(&cpu->exit_request, false);
727 return 0;
728 }
729
730 qemu_mutex_unlock_iothread();
731 cpu_exec_start(cpu);
732
733
734
735
736 do {
737 if (cpu->vcpu_dirty) {
738 nvmm_set_registers(cpu);
739 cpu->vcpu_dirty = false;
740 }
741
742 if (qcpu->stop) {
743 cpu->exception_index = EXCP_INTERRUPT;
744 qcpu->stop = false;
745 ret = 1;
746 break;
747 }
748
749 nvmm_vcpu_pre_run(cpu);
750
751 if (qatomic_read(&cpu->exit_request)) {
752#if NVMM_USER_VERSION >= 2
753 nvmm_vcpu_stop(vcpu);
754#else
755 qemu_cpu_kick_self();
756#endif
757 }
758
759
760 smp_rmb();
761 ret = nvmm_vcpu_run(mach, vcpu);
762 if (ret == -1) {
763 error_report("NVMM: Failed to exec a virtual processor,"
764 " error=%d", errno);
765 break;
766 }
767
768 nvmm_vcpu_post_run(cpu, exit);
769
770 switch (exit->reason) {
771 case NVMM_VCPU_EXIT_NONE:
772 break;
773#if NVMM_USER_VERSION >= 2
774 case NVMM_VCPU_EXIT_STOPPED:
775
776
777
778
779 smp_wmb();
780 qcpu->stop = true;
781 break;
782#endif
783 case NVMM_VCPU_EXIT_MEMORY:
784 ret = nvmm_handle_mem(mach, vcpu);
785 break;
786 case NVMM_VCPU_EXIT_IO:
787 ret = nvmm_handle_io(mach, vcpu);
788 break;
789 case NVMM_VCPU_EXIT_INT_READY:
790 case NVMM_VCPU_EXIT_NMI_READY:
791 case NVMM_VCPU_EXIT_TPR_CHANGED:
792 break;
793 case NVMM_VCPU_EXIT_HALTED:
794 ret = nvmm_handle_halted(mach, cpu, exit);
795 break;
796 case NVMM_VCPU_EXIT_SHUTDOWN:
797 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
798 cpu->exception_index = EXCP_INTERRUPT;
799 ret = 1;
800 break;
801 case NVMM_VCPU_EXIT_RDMSR:
802 ret = nvmm_handle_rdmsr(mach, cpu, exit);
803 break;
804 case NVMM_VCPU_EXIT_WRMSR:
805 ret = nvmm_handle_wrmsr(mach, cpu, exit);
806 break;
807 case NVMM_VCPU_EXIT_MONITOR:
808 case NVMM_VCPU_EXIT_MWAIT:
809 ret = nvmm_inject_ud(mach, vcpu);
810 break;
811 default:
812 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
813 exit->reason, exit->u.inv.hwcode);
814 nvmm_get_registers(cpu);
815 qemu_mutex_lock_iothread();
816 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
817 qemu_mutex_unlock_iothread();
818 ret = -1;
819 break;
820 }
821 } while (ret == 0);
822
823 cpu_exec_end(cpu);
824 qemu_mutex_lock_iothread();
825
826 qatomic_set(&cpu->exit_request, false);
827
828 return ret < 0;
829}
830
831
832
833static void
834do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
835{
836 nvmm_get_registers(cpu);
837 cpu->vcpu_dirty = true;
838}
839
840static void
841do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
842{
843 nvmm_set_registers(cpu);
844 cpu->vcpu_dirty = false;
845}
846
847static void
848do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
849{
850 nvmm_set_registers(cpu);
851 cpu->vcpu_dirty = false;
852}
853
854static void
855do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
856{
857 cpu->vcpu_dirty = true;
858}
859
860void nvmm_cpu_synchronize_state(CPUState *cpu)
861{
862 if (!cpu->vcpu_dirty) {
863 run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
864 }
865}
866
867void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
868{
869 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
870}
871
872void nvmm_cpu_synchronize_post_init(CPUState *cpu)
873{
874 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
875}
876
877void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
878{
879 run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
880}
881
882
883
884static Error *nvmm_migration_blocker;
885
886
887
888
889
890
891static void
892nvmm_ipi_signal(int sigcpu)
893{
894 if (current_cpu) {
895 struct qemu_vcpu *qcpu = get_qemu_vcpu(current_cpu);
896#if NVMM_USER_VERSION >= 2
897 struct nvmm_vcpu *vcpu = &qcpu->vcpu;
898 nvmm_vcpu_stop(vcpu);
899#else
900 qcpu->stop = true;
901#endif
902 }
903}
904
905static void
906nvmm_init_cpu_signals(void)
907{
908 struct sigaction sigact;
909 sigset_t set;
910
911
912 memset(&sigact, 0, sizeof(sigact));
913 sigact.sa_handler = nvmm_ipi_signal;
914 sigaction(SIG_IPI, &sigact, NULL);
915
916
917 sigprocmask(SIG_BLOCK, NULL, &set);
918 sigdelset(&set, SIG_IPI);
919 pthread_sigmask(SIG_SETMASK, &set, NULL);
920}
921
922int
923nvmm_init_vcpu(CPUState *cpu)
924{
925 struct nvmm_machine *mach = get_nvmm_mach();
926 struct nvmm_vcpu_conf_cpuid cpuid;
927 struct nvmm_vcpu_conf_tpr tpr;
928 Error *local_error = NULL;
929 struct qemu_vcpu *qcpu;
930 int ret, err;
931
932 nvmm_init_cpu_signals();
933
934 if (nvmm_migration_blocker == NULL) {
935 error_setg(&nvmm_migration_blocker,
936 "NVMM: Migration not supported");
937
938 if (migrate_add_blocker(nvmm_migration_blocker, &local_error) < 0) {
939 error_report_err(local_error);
940 error_free(nvmm_migration_blocker);
941 return -EINVAL;
942 }
943 }
944
945 qcpu = g_malloc0(sizeof(*qcpu));
946 if (qcpu == NULL) {
947 error_report("NVMM: Failed to allocate VCPU context.");
948 return -ENOMEM;
949 }
950
951 ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
952 if (ret == -1) {
953 err = errno;
954 error_report("NVMM: Failed to create a virtual processor,"
955 " error=%d", err);
956 g_free(qcpu);
957 return -err;
958 }
959
960 memset(&cpuid, 0, sizeof(cpuid));
961 cpuid.mask = 1;
962 cpuid.leaf = 0x00000001;
963 cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
964 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
965 &cpuid);
966 if (ret == -1) {
967 err = errno;
968 error_report("NVMM: Failed to configure a virtual processor,"
969 " error=%d", err);
970 g_free(qcpu);
971 return -err;
972 }
973
974 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
975 &nvmm_callbacks);
976 if (ret == -1) {
977 err = errno;
978 error_report("NVMM: Failed to configure a virtual processor,"
979 " error=%d", err);
980 g_free(qcpu);
981 return -err;
982 }
983
984 if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
985 memset(&tpr, 0, sizeof(tpr));
986 tpr.exit_changed = 1;
987 ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
988 if (ret == -1) {
989 err = errno;
990 error_report("NVMM: Failed to configure a virtual processor,"
991 " error=%d", err);
992 g_free(qcpu);
993 return -err;
994 }
995 }
996
997 cpu->vcpu_dirty = true;
998 cpu->hax_vcpu = (struct hax_vcpu_state *)qcpu;
999
1000 return 0;
1001}
1002
1003int
1004nvmm_vcpu_exec(CPUState *cpu)
1005{
1006 int ret, fatal;
1007
1008 while (1) {
1009 if (cpu->exception_index >= EXCP_INTERRUPT) {
1010 ret = cpu->exception_index;
1011 cpu->exception_index = -1;
1012 break;
1013 }
1014
1015 fatal = nvmm_vcpu_loop(cpu);
1016
1017 if (fatal) {
1018 error_report("NVMM: Failed to execute a VCPU.");
1019 abort();
1020 }
1021 }
1022
1023 return ret;
1024}
1025
1026void
1027nvmm_destroy_vcpu(CPUState *cpu)
1028{
1029 struct nvmm_machine *mach = get_nvmm_mach();
1030 struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
1031
1032 nvmm_vcpu_destroy(mach, &qcpu->vcpu);
1033 g_free(cpu->hax_vcpu);
1034}
1035
1036
1037
1038static void
1039nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
1040 bool add, bool rom, const char *name)
1041{
1042 struct nvmm_machine *mach = get_nvmm_mach();
1043 int ret, prot;
1044
1045 if (add) {
1046 prot = PROT_READ | PROT_EXEC;
1047 if (!rom) {
1048 prot |= PROT_WRITE;
1049 }
1050 ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
1051 } else {
1052 ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
1053 }
1054
1055 if (ret == -1) {
1056 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1057 "Size:%p bytes, HostVA:%p, error=%d",
1058 (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
1059 (void *)size, (void *)hva, errno);
1060 }
1061}
1062
1063static void
1064nvmm_process_section(MemoryRegionSection *section, int add)
1065{
1066 MemoryRegion *mr = section->mr;
1067 hwaddr start_pa = section->offset_within_address_space;
1068 ram_addr_t size = int128_get64(section->size);
1069 unsigned int delta;
1070 uintptr_t hva;
1071
1072 if (!memory_region_is_ram(mr)) {
1073 return;
1074 }
1075
1076
1077 delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
1078 delta &= ~qemu_real_host_page_mask();
1079 if (delta > size) {
1080 return;
1081 }
1082 start_pa += delta;
1083 size -= delta;
1084 size &= qemu_real_host_page_mask();
1085 if (!size || (start_pa & ~qemu_real_host_page_mask())) {
1086 return;
1087 }
1088
1089 hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
1090 section->offset_within_region + delta;
1091
1092 nvmm_update_mapping(start_pa, size, hva, add,
1093 memory_region_is_rom(mr), mr->name);
1094}
1095
1096static void
1097nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
1098{
1099 memory_region_ref(section->mr);
1100 nvmm_process_section(section, 1);
1101}
1102
1103static void
1104nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
1105{
1106 nvmm_process_section(section, 0);
1107 memory_region_unref(section->mr);
1108}
1109
1110static void
1111nvmm_transaction_begin(MemoryListener *listener)
1112{
1113
1114}
1115
1116static void
1117nvmm_transaction_commit(MemoryListener *listener)
1118{
1119
1120}
1121
1122static void
1123nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
1124{
1125 MemoryRegion *mr = section->mr;
1126
1127 if (!memory_region_is_ram(mr)) {
1128 return;
1129 }
1130
1131 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1132}
1133
1134static MemoryListener nvmm_memory_listener = {
1135 .name = "nvmm",
1136 .begin = nvmm_transaction_begin,
1137 .commit = nvmm_transaction_commit,
1138 .region_add = nvmm_region_add,
1139 .region_del = nvmm_region_del,
1140 .log_sync = nvmm_log_sync,
1141 .priority = 10,
1142};
1143
1144static void
1145nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
1146 size_t max_size)
1147{
1148 struct nvmm_machine *mach = get_nvmm_mach();
1149 uintptr_t hva = (uintptr_t)host;
1150 int ret;
1151
1152 ret = nvmm_hva_map(mach, hva, max_size);
1153
1154 if (ret == -1) {
1155 error_report("NVMM: Failed to map HVA, HostVA:%p "
1156 "Size:%p bytes, error=%d",
1157 (void *)hva, (void *)size, errno);
1158 }
1159}
1160
1161static struct RAMBlockNotifier nvmm_ram_notifier = {
1162 .ram_block_added = nvmm_ram_block_added
1163};
1164
1165
1166
1167static int
1168nvmm_accel_init(MachineState *ms)
1169{
1170 int ret, err;
1171
1172 ret = nvmm_init();
1173 if (ret == -1) {
1174 err = errno;
1175 error_report("NVMM: Initialization failed, error=%d", errno);
1176 return -err;
1177 }
1178
1179 ret = nvmm_capability(&qemu_mach.cap);
1180 if (ret == -1) {
1181 err = errno;
1182 error_report("NVMM: Unable to fetch capability, error=%d", errno);
1183 return -err;
1184 }
1185 if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
1186 error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
1187 return -EPROGMISMATCH;
1188 }
1189 if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
1190 error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
1191 return -EPROGMISMATCH;
1192 }
1193
1194 ret = nvmm_machine_create(&qemu_mach.mach);
1195 if (ret == -1) {
1196 err = errno;
1197 error_report("NVMM: Machine creation failed, error=%d", errno);
1198 return -err;
1199 }
1200
1201 memory_listener_register(&nvmm_memory_listener, &address_space_memory);
1202 ram_block_notifier_add(&nvmm_ram_notifier);
1203
1204 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1205 return 0;
1206}
1207
1208int
1209nvmm_enabled(void)
1210{
1211 return nvmm_allowed;
1212}
1213
1214static void
1215nvmm_accel_class_init(ObjectClass *oc, void *data)
1216{
1217 AccelClass *ac = ACCEL_CLASS(oc);
1218 ac->name = "NVMM";
1219 ac->init_machine = nvmm_accel_init;
1220 ac->allowed = &nvmm_allowed;
1221}
1222
1223static const TypeInfo nvmm_accel_type = {
1224 .name = ACCEL_CLASS_NAME("nvmm"),
1225 .parent = TYPE_ACCEL,
1226 .class_init = nvmm_accel_class_init,
1227};
1228
1229static void
1230nvmm_type_init(void)
1231{
1232 type_register_static(&nvmm_accel_type);
1233}
1234
1235type_init(nvmm_type_init);
1236