1
2
3#include <linux/objtool.h>
4#include <linux/percpu.h>
5
6#include <asm/debugreg.h>
7#include <asm/mmu_context.h>
8
9#include "cpuid.h"
10#include "hyperv.h"
11#include "mmu.h"
12#include "nested.h"
13#include "pmu.h"
14#include "sgx.h"
15#include "trace.h"
16#include "vmx.h"
17#include "x86.h"
18
19static bool __read_mostly enable_shadow_vmcs = 1;
20module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
21
22static bool __read_mostly nested_early_check = 0;
23module_param(nested_early_check, bool, S_IRUGO);
24
25#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
26
27
28
29
30
31#define VMX_VPID_EXTENT_SUPPORTED_MASK \
32 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
33 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
34 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
35 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
36
37#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
38
39enum {
40 VMX_VMREAD_BITMAP,
41 VMX_VMWRITE_BITMAP,
42 VMX_BITMAP_NR
43};
44static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
45
46#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
47#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
48
49struct shadow_vmcs_field {
50 u16 encoding;
51 u16 offset;
52};
53static struct shadow_vmcs_field shadow_read_only_fields[] = {
54#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
55#include "vmcs_shadow_fields.h"
56};
57static int max_shadow_read_only_fields =
58 ARRAY_SIZE(shadow_read_only_fields);
59
60static struct shadow_vmcs_field shadow_read_write_fields[] = {
61#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
62#include "vmcs_shadow_fields.h"
63};
64static int max_shadow_read_write_fields =
65 ARRAY_SIZE(shadow_read_write_fields);
66
67static void init_vmcs_shadow_fields(void)
68{
69 int i, j;
70
71 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
72 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
73
74 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
75 struct shadow_vmcs_field entry = shadow_read_only_fields[i];
76 u16 field = entry.encoding;
77
78 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
79 (i + 1 == max_shadow_read_only_fields ||
80 shadow_read_only_fields[i + 1].encoding != field + 1))
81 pr_err("Missing field from shadow_read_only_field %x\n",
82 field + 1);
83
84 clear_bit(field, vmx_vmread_bitmap);
85 if (field & 1)
86#ifdef CONFIG_X86_64
87 continue;
88#else
89 entry.offset += sizeof(u32);
90#endif
91 shadow_read_only_fields[j++] = entry;
92 }
93 max_shadow_read_only_fields = j;
94
95 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
96 struct shadow_vmcs_field entry = shadow_read_write_fields[i];
97 u16 field = entry.encoding;
98
99 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
100 (i + 1 == max_shadow_read_write_fields ||
101 shadow_read_write_fields[i + 1].encoding != field + 1))
102 pr_err("Missing field from shadow_read_write_field %x\n",
103 field + 1);
104
105 WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
106 field <= GUEST_TR_AR_BYTES,
107 "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
108
109
110
111
112
113
114 switch (field) {
115 case GUEST_PML_INDEX:
116 if (!cpu_has_vmx_pml())
117 continue;
118 break;
119 case VMX_PREEMPTION_TIMER_VALUE:
120 if (!cpu_has_vmx_preemption_timer())
121 continue;
122 break;
123 case GUEST_INTR_STATUS:
124 if (!cpu_has_vmx_apicv())
125 continue;
126 break;
127 default:
128 break;
129 }
130
131 clear_bit(field, vmx_vmwrite_bitmap);
132 clear_bit(field, vmx_vmread_bitmap);
133 if (field & 1)
134#ifdef CONFIG_X86_64
135 continue;
136#else
137 entry.offset += sizeof(u32);
138#endif
139 shadow_read_write_fields[j++] = entry;
140 }
141 max_shadow_read_write_fields = j;
142}
143
144
145
146
147
148
149
150static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
151{
152 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
153 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
154 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
155 return kvm_skip_emulated_instruction(vcpu);
156}
157
158static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
159{
160 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
161 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
162 X86_EFLAGS_SF | X86_EFLAGS_OF))
163 | X86_EFLAGS_CF);
164 return kvm_skip_emulated_instruction(vcpu);
165}
166
167static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
168 u32 vm_instruction_error)
169{
170 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
171 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
172 X86_EFLAGS_SF | X86_EFLAGS_OF))
173 | X86_EFLAGS_ZF);
174 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
175
176
177
178
179
180 if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
181 to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
182
183 return kvm_skip_emulated_instruction(vcpu);
184}
185
186static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
187{
188 struct vcpu_vmx *vmx = to_vmx(vcpu);
189
190
191
192
193
194 if (vmx->nested.current_vmptr == -1ull &&
195 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
196 return nested_vmx_failInvalid(vcpu);
197
198 return nested_vmx_failValid(vcpu, vm_instruction_error);
199}
200
201static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
202{
203
204 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
205 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
206}
207
208static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
209{
210 return fixed_bits_valid(control, low, high);
211}
212
213static inline u64 vmx_control_msr(u32 low, u32 high)
214{
215 return low | ((u64)high << 32);
216}
217
218static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
219{
220 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
221 vmcs_write64(VMCS_LINK_POINTER, -1ull);
222 vmx->nested.need_vmcs12_to_shadow_sync = false;
223}
224
225static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
226{
227 struct vcpu_vmx *vmx = to_vmx(vcpu);
228
229 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
230 kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
231 vmx->nested.hv_evmcs = NULL;
232 }
233
234 vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
235}
236
237static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
238 struct loaded_vmcs *prev)
239{
240 struct vmcs_host_state *dest, *src;
241
242 if (unlikely(!vmx->guest_state_loaded))
243 return;
244
245 src = &prev->host_state;
246 dest = &vmx->loaded_vmcs->host_state;
247
248 vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
249 dest->ldt_sel = src->ldt_sel;
250#ifdef CONFIG_X86_64
251 dest->ds_sel = src->ds_sel;
252 dest->es_sel = src->es_sel;
253#endif
254}
255
256static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
257{
258 struct vcpu_vmx *vmx = to_vmx(vcpu);
259 struct loaded_vmcs *prev;
260 int cpu;
261
262 if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
263 return;
264
265 cpu = get_cpu();
266 prev = vmx->loaded_vmcs;
267 vmx->loaded_vmcs = vmcs;
268 vmx_vcpu_load_vmcs(vcpu, cpu, prev);
269 vmx_sync_vmcs_host_state(vmx, prev);
270 put_cpu();
271
272 vmx_register_cache_reset(vcpu);
273}
274
275
276
277
278
279static void free_nested(struct kvm_vcpu *vcpu)
280{
281 struct vcpu_vmx *vmx = to_vmx(vcpu);
282
283 if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
284 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
285
286 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
287 return;
288
289 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
290
291 vmx->nested.vmxon = false;
292 vmx->nested.smm.vmxon = false;
293 free_vpid(vmx->nested.vpid02);
294 vmx->nested.posted_intr_nv = -1;
295 vmx->nested.current_vmptr = -1ull;
296 if (enable_shadow_vmcs) {
297 vmx_disable_shadow_vmcs(vmx);
298 vmcs_clear(vmx->vmcs01.shadow_vmcs);
299 free_vmcs(vmx->vmcs01.shadow_vmcs);
300 vmx->vmcs01.shadow_vmcs = NULL;
301 }
302 kfree(vmx->nested.cached_vmcs12);
303 vmx->nested.cached_vmcs12 = NULL;
304 kfree(vmx->nested.cached_shadow_vmcs12);
305 vmx->nested.cached_shadow_vmcs12 = NULL;
306
307 if (vmx->nested.apic_access_page) {
308 kvm_release_page_clean(vmx->nested.apic_access_page);
309 vmx->nested.apic_access_page = NULL;
310 }
311 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
312 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
313 vmx->nested.pi_desc = NULL;
314
315 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
316
317 nested_release_evmcs(vcpu);
318
319 free_loaded_vmcs(&vmx->nested.vmcs02);
320}
321
322
323
324
325
326void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
327{
328 vcpu_load(vcpu);
329 vmx_leave_nested(vcpu);
330 vcpu_put(vcpu);
331}
332
333#define EPTP_PA_MASK GENMASK_ULL(51, 12)
334
335static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
336{
337 return VALID_PAGE(root_hpa) &&
338 ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
339}
340
341static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
342 gpa_t addr)
343{
344 uint i;
345 struct kvm_mmu_root_info *cached_root;
346
347 WARN_ON_ONCE(!mmu_is_nested(vcpu));
348
349 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
350 cached_root = &vcpu->arch.mmu->prev_roots[i];
351
352 if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
353 eptp))
354 vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
355 }
356}
357
358static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
359 struct x86_exception *fault)
360{
361 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
362 struct vcpu_vmx *vmx = to_vmx(vcpu);
363 u32 vm_exit_reason;
364 unsigned long exit_qualification = vcpu->arch.exit_qualification;
365
366 if (vmx->nested.pml_full) {
367 vm_exit_reason = EXIT_REASON_PML_FULL;
368 vmx->nested.pml_full = false;
369 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
370 } else {
371 if (fault->error_code & PFERR_RSVD_MASK)
372 vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
373 else
374 vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
375
376
377
378
379
380
381
382
383 nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
384 fault->address);
385 }
386
387 nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
388 vmcs12->guest_physical_address = fault->address;
389}
390
391static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
392{
393 kvm_init_shadow_ept_mmu(vcpu,
394 to_vmx(vcpu)->nested.msrs.ept_caps &
395 VMX_EPT_EXECUTE_ONLY_BIT,
396 nested_ept_ad_enabled(vcpu),
397 nested_ept_get_eptp(vcpu));
398}
399
400static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
401{
402 WARN_ON(mmu_is_nested(vcpu));
403
404 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
405 nested_ept_new_eptp(vcpu);
406 vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
407 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
408 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
409
410 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
411}
412
413static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
414{
415 vcpu->arch.mmu = &vcpu->arch.root_mmu;
416 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
417}
418
419static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
420 u16 error_code)
421{
422 bool inequality, bit;
423
424 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
425 inequality =
426 (error_code & vmcs12->page_fault_error_code_mask) !=
427 vmcs12->page_fault_error_code_match;
428 return inequality ^ bit;
429}
430
431
432
433
434
435
436static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
437{
438 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
439 unsigned int nr = vcpu->arch.exception.nr;
440 bool has_payload = vcpu->arch.exception.has_payload;
441 unsigned long payload = vcpu->arch.exception.payload;
442
443 if (nr == PF_VECTOR) {
444 if (vcpu->arch.exception.nested_apf) {
445 *exit_qual = vcpu->arch.apf.nested_apf_token;
446 return 1;
447 }
448 if (nested_vmx_is_page_fault_vmexit(vmcs12,
449 vcpu->arch.exception.error_code)) {
450 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
451 return 1;
452 }
453 } else if (vmcs12->exception_bitmap & (1u << nr)) {
454 if (nr == DB_VECTOR) {
455 if (!has_payload) {
456 payload = vcpu->arch.dr6;
457 payload &= ~DR6_BT;
458 payload ^= DR6_ACTIVE_LOW;
459 }
460 *exit_qual = payload;
461 } else
462 *exit_qual = 0;
463 return 1;
464 }
465
466 return 0;
467}
468
469
470static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
471 struct x86_exception *fault)
472{
473 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
474
475 WARN_ON(!is_guest_mode(vcpu));
476
477 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
478 !to_vmx(vcpu)->nested.nested_run_pending) {
479 vmcs12->vm_exit_intr_error_code = fault->error_code;
480 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
481 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
482 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
483 fault->address);
484 } else {
485 kvm_inject_page_fault(vcpu, fault);
486 }
487}
488
489static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
490 struct vmcs12 *vmcs12)
491{
492 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
493 return 0;
494
495 if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
496 CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
497 return -EINVAL;
498
499 return 0;
500}
501
502static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
503 struct vmcs12 *vmcs12)
504{
505 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
506 return 0;
507
508 if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
509 return -EINVAL;
510
511 return 0;
512}
513
514static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
515 struct vmcs12 *vmcs12)
516{
517 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
518 return 0;
519
520 if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
521 return -EINVAL;
522
523 return 0;
524}
525
526
527
528
529static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
530{
531 unsigned long *msr_bitmap;
532 int f = sizeof(unsigned long);
533
534 if (!cpu_has_vmx_msr_bitmap())
535 return true;
536
537 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
538
539 if (msr <= 0x1fff) {
540 return !!test_bit(msr, msr_bitmap + 0x800 / f);
541 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
542 msr &= 0x1fff;
543 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
544 }
545
546 return true;
547}
548
549
550
551
552
553static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
554 unsigned long *msr_bitmap_nested,
555 u32 msr, int type)
556{
557 int f = sizeof(unsigned long);
558
559
560
561
562
563
564 if (msr <= 0x1fff) {
565 if (type & MSR_TYPE_R &&
566 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
567
568 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
569
570 if (type & MSR_TYPE_W &&
571 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
572
573 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
574
575 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
576 msr &= 0x1fff;
577 if (type & MSR_TYPE_R &&
578 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
579
580 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
581
582 if (type & MSR_TYPE_W &&
583 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
584
585 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
586
587 }
588}
589
590static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
591{
592 int msr;
593
594 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
595 unsigned word = msr / BITS_PER_LONG;
596
597 msr_bitmap[word] = ~0;
598 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
599 }
600}
601
602
603
604
605
606static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
607 struct vmcs12 *vmcs12)
608{
609 int msr;
610 unsigned long *msr_bitmap_l1;
611 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
612 struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
613
614
615 if (!cpu_has_vmx_msr_bitmap() ||
616 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
617 return false;
618
619 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
620 return false;
621
622 msr_bitmap_l1 = (unsigned long *)map->hva;
623
624
625
626
627
628
629 enable_x2apic_msr_intercepts(msr_bitmap_l0);
630
631 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
632 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
633
634
635
636
637
638
639 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
640 unsigned word = msr / BITS_PER_LONG;
641
642 msr_bitmap_l0[word] = msr_bitmap_l1[word];
643 }
644 }
645
646 nested_vmx_disable_intercept_for_msr(
647 msr_bitmap_l1, msr_bitmap_l0,
648 X2APIC_MSR(APIC_TASKPRI),
649 MSR_TYPE_R | MSR_TYPE_W);
650
651 if (nested_cpu_has_vid(vmcs12)) {
652 nested_vmx_disable_intercept_for_msr(
653 msr_bitmap_l1, msr_bitmap_l0,
654 X2APIC_MSR(APIC_EOI),
655 MSR_TYPE_W);
656 nested_vmx_disable_intercept_for_msr(
657 msr_bitmap_l1, msr_bitmap_l0,
658 X2APIC_MSR(APIC_SELF_IPI),
659 MSR_TYPE_W);
660 }
661 }
662
663
664#ifdef CONFIG_X86_64
665 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
666 MSR_FS_BASE, MSR_TYPE_RW);
667
668 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
669 MSR_GS_BASE, MSR_TYPE_RW);
670
671 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
672 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
673#endif
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
689 nested_vmx_disable_intercept_for_msr(
690 msr_bitmap_l1, msr_bitmap_l0,
691 MSR_IA32_SPEC_CTRL,
692 MSR_TYPE_R | MSR_TYPE_W);
693
694 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
695 nested_vmx_disable_intercept_for_msr(
696 msr_bitmap_l1, msr_bitmap_l0,
697 MSR_IA32_PRED_CMD,
698 MSR_TYPE_W);
699
700 kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
701
702 return true;
703}
704
705static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
706 struct vmcs12 *vmcs12)
707{
708 struct kvm_host_map map;
709 struct vmcs12 *shadow;
710
711 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
712 vmcs12->vmcs_link_pointer == -1ull)
713 return;
714
715 shadow = get_shadow_vmcs12(vcpu);
716
717 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
718 return;
719
720 memcpy(shadow, map.hva, VMCS12_SIZE);
721 kvm_vcpu_unmap(vcpu, &map, false);
722}
723
724static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
725 struct vmcs12 *vmcs12)
726{
727 struct vcpu_vmx *vmx = to_vmx(vcpu);
728
729 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
730 vmcs12->vmcs_link_pointer == -1ull)
731 return;
732
733 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
734 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
735}
736
737
738
739
740
741static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
742{
743 return get_vmcs12(vcpu)->vm_exit_controls &
744 VM_EXIT_ACK_INTR_ON_EXIT;
745}
746
747static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
748 struct vmcs12 *vmcs12)
749{
750 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
751 CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
752 return -EINVAL;
753 else
754 return 0;
755}
756
757static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
758 struct vmcs12 *vmcs12)
759{
760 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
761 !nested_cpu_has_apic_reg_virt(vmcs12) &&
762 !nested_cpu_has_vid(vmcs12) &&
763 !nested_cpu_has_posted_intr(vmcs12))
764 return 0;
765
766
767
768
769
770 if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
771 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
772 return -EINVAL;
773
774
775
776
777
778 if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
779 return -EINVAL;
780
781
782
783
784
785
786
787
788 if (nested_cpu_has_posted_intr(vmcs12) &&
789 (CC(!nested_cpu_has_vid(vmcs12)) ||
790 CC(!nested_exit_intr_ack_set(vcpu)) ||
791 CC((vmcs12->posted_intr_nv & 0xff00)) ||
792 CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
793 return -EINVAL;
794
795
796 if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
797 return -EINVAL;
798
799 return 0;
800}
801
802static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
803 u32 count, u64 addr)
804{
805 if (count == 0)
806 return 0;
807
808 if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
809 !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
810 return -EINVAL;
811
812 return 0;
813}
814
815static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
816 struct vmcs12 *vmcs12)
817{
818 if (CC(nested_vmx_check_msr_switch(vcpu,
819 vmcs12->vm_exit_msr_load_count,
820 vmcs12->vm_exit_msr_load_addr)) ||
821 CC(nested_vmx_check_msr_switch(vcpu,
822 vmcs12->vm_exit_msr_store_count,
823 vmcs12->vm_exit_msr_store_addr)))
824 return -EINVAL;
825
826 return 0;
827}
828
829static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
830 struct vmcs12 *vmcs12)
831{
832 if (CC(nested_vmx_check_msr_switch(vcpu,
833 vmcs12->vm_entry_msr_load_count,
834 vmcs12->vm_entry_msr_load_addr)))
835 return -EINVAL;
836
837 return 0;
838}
839
840static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
841 struct vmcs12 *vmcs12)
842{
843 if (!nested_cpu_has_pml(vmcs12))
844 return 0;
845
846 if (CC(!nested_cpu_has_ept(vmcs12)) ||
847 CC(!page_address_valid(vcpu, vmcs12->pml_address)))
848 return -EINVAL;
849
850 return 0;
851}
852
853static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
854 struct vmcs12 *vmcs12)
855{
856 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
857 !nested_cpu_has_ept(vmcs12)))
858 return -EINVAL;
859 return 0;
860}
861
862static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
863 struct vmcs12 *vmcs12)
864{
865 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
866 !nested_cpu_has_ept(vmcs12)))
867 return -EINVAL;
868 return 0;
869}
870
871static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
872 struct vmcs12 *vmcs12)
873{
874 if (!nested_cpu_has_shadow_vmcs(vmcs12))
875 return 0;
876
877 if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
878 CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
879 return -EINVAL;
880
881 return 0;
882}
883
884static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
885 struct vmx_msr_entry *e)
886{
887
888 if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
889 return -EINVAL;
890 if (CC(e->index == MSR_IA32_UCODE_WRITE) ||
891 CC(e->index == MSR_IA32_UCODE_REV))
892 return -EINVAL;
893 if (CC(e->reserved != 0))
894 return -EINVAL;
895 return 0;
896}
897
898static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
899 struct vmx_msr_entry *e)
900{
901 if (CC(e->index == MSR_FS_BASE) ||
902 CC(e->index == MSR_GS_BASE) ||
903 CC(e->index == MSR_IA32_SMM_MONITOR_CTL) ||
904 nested_vmx_msr_check_common(vcpu, e))
905 return -EINVAL;
906 return 0;
907}
908
909static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
910 struct vmx_msr_entry *e)
911{
912 if (CC(e->index == MSR_IA32_SMBASE) ||
913 nested_vmx_msr_check_common(vcpu, e))
914 return -EINVAL;
915 return 0;
916}
917
918static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
919{
920 struct vcpu_vmx *vmx = to_vmx(vcpu);
921 u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
922 vmx->nested.msrs.misc_high);
923
924 return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
925}
926
927
928
929
930
931
932
933
934
935
936static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
937{
938 u32 i;
939 struct vmx_msr_entry e;
940 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
941
942 for (i = 0; i < count; i++) {
943 if (unlikely(i >= max_msr_list_size))
944 goto fail;
945
946 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
947 &e, sizeof(e))) {
948 pr_debug_ratelimited(
949 "%s cannot read MSR entry (%u, 0x%08llx)\n",
950 __func__, i, gpa + i * sizeof(e));
951 goto fail;
952 }
953 if (nested_vmx_load_msr_check(vcpu, &e)) {
954 pr_debug_ratelimited(
955 "%s check failed (%u, 0x%x, 0x%x)\n",
956 __func__, i, e.index, e.reserved);
957 goto fail;
958 }
959 if (kvm_set_msr(vcpu, e.index, e.value)) {
960 pr_debug_ratelimited(
961 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
962 __func__, i, e.index, e.value);
963 goto fail;
964 }
965 }
966 return 0;
967fail:
968
969 return i + 1;
970}
971
972static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
973 u32 msr_index,
974 u64 *data)
975{
976 struct vcpu_vmx *vmx = to_vmx(vcpu);
977
978
979
980
981
982
983 if (msr_index == MSR_IA32_TSC) {
984 int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
985 MSR_IA32_TSC);
986
987 if (i >= 0) {
988 u64 val = vmx->msr_autostore.guest.val[i].value;
989
990 *data = kvm_read_l1_tsc(vcpu, val);
991 return true;
992 }
993 }
994
995 if (kvm_get_msr(vcpu, msr_index, data)) {
996 pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
997 msr_index);
998 return false;
999 }
1000 return true;
1001}
1002
1003static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
1004 struct vmx_msr_entry *e)
1005{
1006 if (kvm_vcpu_read_guest(vcpu,
1007 gpa + i * sizeof(*e),
1008 e, 2 * sizeof(u32))) {
1009 pr_debug_ratelimited(
1010 "%s cannot read MSR entry (%u, 0x%08llx)\n",
1011 __func__, i, gpa + i * sizeof(*e));
1012 return false;
1013 }
1014 if (nested_vmx_store_msr_check(vcpu, e)) {
1015 pr_debug_ratelimited(
1016 "%s check failed (%u, 0x%x, 0x%x)\n",
1017 __func__, i, e->index, e->reserved);
1018 return false;
1019 }
1020 return true;
1021}
1022
1023static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
1024{
1025 u64 data;
1026 u32 i;
1027 struct vmx_msr_entry e;
1028 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
1029
1030 for (i = 0; i < count; i++) {
1031 if (unlikely(i >= max_msr_list_size))
1032 return -EINVAL;
1033
1034 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1035 return -EINVAL;
1036
1037 if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
1038 return -EINVAL;
1039
1040 if (kvm_vcpu_write_guest(vcpu,
1041 gpa + i * sizeof(e) +
1042 offsetof(struct vmx_msr_entry, value),
1043 &data, sizeof(data))) {
1044 pr_debug_ratelimited(
1045 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
1046 __func__, i, e.index, data);
1047 return -EINVAL;
1048 }
1049 }
1050 return 0;
1051}
1052
1053static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
1054{
1055 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1056 u32 count = vmcs12->vm_exit_msr_store_count;
1057 u64 gpa = vmcs12->vm_exit_msr_store_addr;
1058 struct vmx_msr_entry e;
1059 u32 i;
1060
1061 for (i = 0; i < count; i++) {
1062 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1063 return false;
1064
1065 if (e.index == msr_index)
1066 return true;
1067 }
1068 return false;
1069}
1070
1071static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
1072 u32 msr_index)
1073{
1074 struct vcpu_vmx *vmx = to_vmx(vcpu);
1075 struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
1076 bool in_vmcs12_store_list;
1077 int msr_autostore_slot;
1078 bool in_autostore_list;
1079 int last;
1080
1081 msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
1082 in_autostore_list = msr_autostore_slot >= 0;
1083 in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
1084
1085 if (in_vmcs12_store_list && !in_autostore_list) {
1086 if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
1087
1088
1089
1090
1091
1092
1093
1094 pr_warn_ratelimited(
1095 "Not enough msr entries in msr_autostore. Can't add msr %x\n",
1096 msr_index);
1097 return;
1098 }
1099 last = autostore->nr++;
1100 autostore->val[last].index = msr_index;
1101 } else if (!in_vmcs12_store_list && in_autostore_list) {
1102 last = --autostore->nr;
1103 autostore->val[msr_autostore_slot] = autostore->val[last];
1104 }
1105}
1106
1107
1108
1109
1110
1111
1112
1113static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
1114 bool nested_ept, bool reload_pdptrs,
1115 enum vm_entry_failure_code *entry_failure_code)
1116{
1117 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
1118 *entry_failure_code = ENTRY_FAIL_DEFAULT;
1119 return -EINVAL;
1120 }
1121
1122
1123
1124
1125
1126 if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
1127 CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
1128 *entry_failure_code = ENTRY_FAIL_PDPTE;
1129 return -EINVAL;
1130 }
1131
1132 if (!nested_ept)
1133 kvm_mmu_new_pgd(vcpu, cr3);
1134
1135 vcpu->arch.cr3 = cr3;
1136 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1137
1138
1139 kvm_init_mmu(vcpu);
1140
1141 return 0;
1142}
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
1158{
1159 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1160
1161 return enable_ept ||
1162 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
1163}
1164
1165static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
1166 struct vmcs12 *vmcs12,
1167 bool is_vmenter)
1168{
1169 struct vcpu_vmx *vmx = to_vmx(vcpu);
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 if (!nested_cpu_has_vpid(vmcs12)) {
1186 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1187 return;
1188 }
1189
1190
1191 WARN_ON(!enable_vpid);
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210 if (!nested_has_guest_tlb_tag(vcpu)) {
1211 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1212 } else if (is_vmenter &&
1213 vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
1214 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
1215 vpid_sync_context(nested_get_vpid02(vcpu));
1216 }
1217}
1218
1219static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
1220{
1221 superset &= mask;
1222 subset &= mask;
1223
1224 return (superset | subset) == superset;
1225}
1226
1227static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
1228{
1229 const u64 feature_and_reserved =
1230
1231 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
1232
1233 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
1234 u64 vmx_basic = vmx->nested.msrs.basic;
1235
1236 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
1237 return -EINVAL;
1238
1239
1240
1241
1242
1243 if (data & BIT_ULL(48))
1244 return -EINVAL;
1245
1246 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
1247 vmx_basic_vmcs_revision_id(data))
1248 return -EINVAL;
1249
1250 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
1251 return -EINVAL;
1252
1253 vmx->nested.msrs.basic = data;
1254 return 0;
1255}
1256
1257static int
1258vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1259{
1260 u64 supported;
1261 u32 *lowp, *highp;
1262
1263 switch (msr_index) {
1264 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1265 lowp = &vmx->nested.msrs.pinbased_ctls_low;
1266 highp = &vmx->nested.msrs.pinbased_ctls_high;
1267 break;
1268 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1269 lowp = &vmx->nested.msrs.procbased_ctls_low;
1270 highp = &vmx->nested.msrs.procbased_ctls_high;
1271 break;
1272 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1273 lowp = &vmx->nested.msrs.exit_ctls_low;
1274 highp = &vmx->nested.msrs.exit_ctls_high;
1275 break;
1276 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1277 lowp = &vmx->nested.msrs.entry_ctls_low;
1278 highp = &vmx->nested.msrs.entry_ctls_high;
1279 break;
1280 case MSR_IA32_VMX_PROCBASED_CTLS2:
1281 lowp = &vmx->nested.msrs.secondary_ctls_low;
1282 highp = &vmx->nested.msrs.secondary_ctls_high;
1283 break;
1284 default:
1285 BUG();
1286 }
1287
1288 supported = vmx_control_msr(*lowp, *highp);
1289
1290
1291 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
1292 return -EINVAL;
1293
1294
1295 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
1296 return -EINVAL;
1297
1298 *lowp = data;
1299 *highp = data >> 32;
1300 return 0;
1301}
1302
1303static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
1304{
1305 const u64 feature_and_reserved_bits =
1306
1307 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
1308 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
1309
1310 GENMASK_ULL(13, 9) | BIT_ULL(31);
1311 u64 vmx_misc;
1312
1313 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
1314 vmx->nested.msrs.misc_high);
1315
1316 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
1317 return -EINVAL;
1318
1319 if ((vmx->nested.msrs.pinbased_ctls_high &
1320 PIN_BASED_VMX_PREEMPTION_TIMER) &&
1321 vmx_misc_preemption_timer_rate(data) !=
1322 vmx_misc_preemption_timer_rate(vmx_misc))
1323 return -EINVAL;
1324
1325 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
1326 return -EINVAL;
1327
1328 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
1329 return -EINVAL;
1330
1331 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
1332 return -EINVAL;
1333
1334 vmx->nested.msrs.misc_low = data;
1335 vmx->nested.msrs.misc_high = data >> 32;
1336
1337 return 0;
1338}
1339
1340static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
1341{
1342 u64 vmx_ept_vpid_cap;
1343
1344 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
1345 vmx->nested.msrs.vpid_caps);
1346
1347
1348 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
1349 return -EINVAL;
1350
1351 vmx->nested.msrs.ept_caps = data;
1352 vmx->nested.msrs.vpid_caps = data >> 32;
1353 return 0;
1354}
1355
1356static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1357{
1358 u64 *msr;
1359
1360 switch (msr_index) {
1361 case MSR_IA32_VMX_CR0_FIXED0:
1362 msr = &vmx->nested.msrs.cr0_fixed0;
1363 break;
1364 case MSR_IA32_VMX_CR4_FIXED0:
1365 msr = &vmx->nested.msrs.cr4_fixed0;
1366 break;
1367 default:
1368 BUG();
1369 }
1370
1371
1372
1373
1374
1375 if (!is_bitwise_subset(data, *msr, -1ULL))
1376 return -EINVAL;
1377
1378 *msr = data;
1379 return 0;
1380}
1381
1382
1383
1384
1385
1386
1387int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1388{
1389 struct vcpu_vmx *vmx = to_vmx(vcpu);
1390
1391
1392
1393
1394
1395 if (vmx->nested.vmxon)
1396 return -EBUSY;
1397
1398 switch (msr_index) {
1399 case MSR_IA32_VMX_BASIC:
1400 return vmx_restore_vmx_basic(vmx, data);
1401 case MSR_IA32_VMX_PINBASED_CTLS:
1402 case MSR_IA32_VMX_PROCBASED_CTLS:
1403 case MSR_IA32_VMX_EXIT_CTLS:
1404 case MSR_IA32_VMX_ENTRY_CTLS:
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414 return -EINVAL;
1415 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1416 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1417 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1418 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1419 case MSR_IA32_VMX_PROCBASED_CTLS2:
1420 return vmx_restore_control_msr(vmx, msr_index, data);
1421 case MSR_IA32_VMX_MISC:
1422 return vmx_restore_vmx_misc(vmx, data);
1423 case MSR_IA32_VMX_CR0_FIXED0:
1424 case MSR_IA32_VMX_CR4_FIXED0:
1425 return vmx_restore_fixed0_msr(vmx, msr_index, data);
1426 case MSR_IA32_VMX_CR0_FIXED1:
1427 case MSR_IA32_VMX_CR4_FIXED1:
1428
1429
1430
1431
1432 return -EINVAL;
1433 case MSR_IA32_VMX_EPT_VPID_CAP:
1434 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
1435 case MSR_IA32_VMX_VMCS_ENUM:
1436 vmx->nested.msrs.vmcs_enum = data;
1437 return 0;
1438 case MSR_IA32_VMX_VMFUNC:
1439 if (data & ~vmx->nested.msrs.vmfunc_controls)
1440 return -EINVAL;
1441 vmx->nested.msrs.vmfunc_controls = data;
1442 return 0;
1443 default:
1444
1445
1446
1447 return -EINVAL;
1448 }
1449}
1450
1451
1452int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
1453{
1454 switch (msr_index) {
1455 case MSR_IA32_VMX_BASIC:
1456 *pdata = msrs->basic;
1457 break;
1458 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1459 case MSR_IA32_VMX_PINBASED_CTLS:
1460 *pdata = vmx_control_msr(
1461 msrs->pinbased_ctls_low,
1462 msrs->pinbased_ctls_high);
1463 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
1464 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1465 break;
1466 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1467 case MSR_IA32_VMX_PROCBASED_CTLS:
1468 *pdata = vmx_control_msr(
1469 msrs->procbased_ctls_low,
1470 msrs->procbased_ctls_high);
1471 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
1472 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1473 break;
1474 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1475 case MSR_IA32_VMX_EXIT_CTLS:
1476 *pdata = vmx_control_msr(
1477 msrs->exit_ctls_low,
1478 msrs->exit_ctls_high);
1479 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
1480 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
1481 break;
1482 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1483 case MSR_IA32_VMX_ENTRY_CTLS:
1484 *pdata = vmx_control_msr(
1485 msrs->entry_ctls_low,
1486 msrs->entry_ctls_high);
1487 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
1488 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
1489 break;
1490 case MSR_IA32_VMX_MISC:
1491 *pdata = vmx_control_msr(
1492 msrs->misc_low,
1493 msrs->misc_high);
1494 break;
1495 case MSR_IA32_VMX_CR0_FIXED0:
1496 *pdata = msrs->cr0_fixed0;
1497 break;
1498 case MSR_IA32_VMX_CR0_FIXED1:
1499 *pdata = msrs->cr0_fixed1;
1500 break;
1501 case MSR_IA32_VMX_CR4_FIXED0:
1502 *pdata = msrs->cr4_fixed0;
1503 break;
1504 case MSR_IA32_VMX_CR4_FIXED1:
1505 *pdata = msrs->cr4_fixed1;
1506 break;
1507 case MSR_IA32_VMX_VMCS_ENUM:
1508 *pdata = msrs->vmcs_enum;
1509 break;
1510 case MSR_IA32_VMX_PROCBASED_CTLS2:
1511 *pdata = vmx_control_msr(
1512 msrs->secondary_ctls_low,
1513 msrs->secondary_ctls_high);
1514 break;
1515 case MSR_IA32_VMX_EPT_VPID_CAP:
1516 *pdata = msrs->ept_caps |
1517 ((u64)msrs->vpid_caps << 32);
1518 break;
1519 case MSR_IA32_VMX_VMFUNC:
1520 *pdata = msrs->vmfunc_controls;
1521 break;
1522 default:
1523 return 1;
1524 }
1525
1526 return 0;
1527}
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
1538{
1539 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1540 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1541 struct shadow_vmcs_field field;
1542 unsigned long val;
1543 int i;
1544
1545 if (WARN_ON(!shadow_vmcs))
1546 return;
1547
1548 preempt_disable();
1549
1550 vmcs_load(shadow_vmcs);
1551
1552 for (i = 0; i < max_shadow_read_write_fields; i++) {
1553 field = shadow_read_write_fields[i];
1554 val = __vmcs_readl(field.encoding);
1555 vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
1556 }
1557
1558 vmcs_clear(shadow_vmcs);
1559 vmcs_load(vmx->loaded_vmcs->vmcs);
1560
1561 preempt_enable();
1562}
1563
1564static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
1565{
1566 const struct shadow_vmcs_field *fields[] = {
1567 shadow_read_write_fields,
1568 shadow_read_only_fields
1569 };
1570 const int max_fields[] = {
1571 max_shadow_read_write_fields,
1572 max_shadow_read_only_fields
1573 };
1574 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1575 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1576 struct shadow_vmcs_field field;
1577 unsigned long val;
1578 int i, q;
1579
1580 if (WARN_ON(!shadow_vmcs))
1581 return;
1582
1583 vmcs_load(shadow_vmcs);
1584
1585 for (q = 0; q < ARRAY_SIZE(fields); q++) {
1586 for (i = 0; i < max_fields[q]; i++) {
1587 field = fields[q][i];
1588 val = vmcs12_read_any(vmcs12, field.encoding,
1589 field.offset);
1590 __vmcs_writel(field.encoding, val);
1591 }
1592 }
1593
1594 vmcs_clear(shadow_vmcs);
1595 vmcs_load(vmx->loaded_vmcs->vmcs);
1596}
1597
1598static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
1599{
1600 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1601 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1602
1603
1604 vmcs12->tpr_threshold = evmcs->tpr_threshold;
1605 vmcs12->guest_rip = evmcs->guest_rip;
1606
1607 if (unlikely(!(hv_clean_fields &
1608 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
1609 vmcs12->guest_rsp = evmcs->guest_rsp;
1610 vmcs12->guest_rflags = evmcs->guest_rflags;
1611 vmcs12->guest_interruptibility_info =
1612 evmcs->guest_interruptibility_info;
1613 }
1614
1615 if (unlikely(!(hv_clean_fields &
1616 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
1617 vmcs12->cpu_based_vm_exec_control =
1618 evmcs->cpu_based_vm_exec_control;
1619 }
1620
1621 if (unlikely(!(hv_clean_fields &
1622 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
1623 vmcs12->exception_bitmap = evmcs->exception_bitmap;
1624 }
1625
1626 if (unlikely(!(hv_clean_fields &
1627 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
1628 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
1629 }
1630
1631 if (unlikely(!(hv_clean_fields &
1632 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
1633 vmcs12->vm_entry_intr_info_field =
1634 evmcs->vm_entry_intr_info_field;
1635 vmcs12->vm_entry_exception_error_code =
1636 evmcs->vm_entry_exception_error_code;
1637 vmcs12->vm_entry_instruction_len =
1638 evmcs->vm_entry_instruction_len;
1639 }
1640
1641 if (unlikely(!(hv_clean_fields &
1642 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
1643 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
1644 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
1645 vmcs12->host_cr0 = evmcs->host_cr0;
1646 vmcs12->host_cr3 = evmcs->host_cr3;
1647 vmcs12->host_cr4 = evmcs->host_cr4;
1648 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
1649 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
1650 vmcs12->host_rip = evmcs->host_rip;
1651 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
1652 vmcs12->host_es_selector = evmcs->host_es_selector;
1653 vmcs12->host_cs_selector = evmcs->host_cs_selector;
1654 vmcs12->host_ss_selector = evmcs->host_ss_selector;
1655 vmcs12->host_ds_selector = evmcs->host_ds_selector;
1656 vmcs12->host_fs_selector = evmcs->host_fs_selector;
1657 vmcs12->host_gs_selector = evmcs->host_gs_selector;
1658 vmcs12->host_tr_selector = evmcs->host_tr_selector;
1659 }
1660
1661 if (unlikely(!(hv_clean_fields &
1662 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
1663 vmcs12->pin_based_vm_exec_control =
1664 evmcs->pin_based_vm_exec_control;
1665 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
1666 vmcs12->secondary_vm_exec_control =
1667 evmcs->secondary_vm_exec_control;
1668 }
1669
1670 if (unlikely(!(hv_clean_fields &
1671 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
1672 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
1673 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
1674 }
1675
1676 if (unlikely(!(hv_clean_fields &
1677 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
1678 vmcs12->msr_bitmap = evmcs->msr_bitmap;
1679 }
1680
1681 if (unlikely(!(hv_clean_fields &
1682 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
1683 vmcs12->guest_es_base = evmcs->guest_es_base;
1684 vmcs12->guest_cs_base = evmcs->guest_cs_base;
1685 vmcs12->guest_ss_base = evmcs->guest_ss_base;
1686 vmcs12->guest_ds_base = evmcs->guest_ds_base;
1687 vmcs12->guest_fs_base = evmcs->guest_fs_base;
1688 vmcs12->guest_gs_base = evmcs->guest_gs_base;
1689 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
1690 vmcs12->guest_tr_base = evmcs->guest_tr_base;
1691 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
1692 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
1693 vmcs12->guest_es_limit = evmcs->guest_es_limit;
1694 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
1695 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
1696 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
1697 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
1698 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
1699 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
1700 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
1701 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
1702 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
1703 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
1704 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
1705 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
1706 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
1707 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
1708 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
1709 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
1710 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
1711 vmcs12->guest_es_selector = evmcs->guest_es_selector;
1712 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
1713 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
1714 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
1715 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
1716 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
1717 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
1718 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
1719 }
1720
1721 if (unlikely(!(hv_clean_fields &
1722 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
1723 vmcs12->tsc_offset = evmcs->tsc_offset;
1724 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
1725 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
1726 }
1727
1728 if (unlikely(!(hv_clean_fields &
1729 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
1730 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
1731 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
1732 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
1733 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
1734 vmcs12->guest_cr0 = evmcs->guest_cr0;
1735 vmcs12->guest_cr3 = evmcs->guest_cr3;
1736 vmcs12->guest_cr4 = evmcs->guest_cr4;
1737 vmcs12->guest_dr7 = evmcs->guest_dr7;
1738 }
1739
1740 if (unlikely(!(hv_clean_fields &
1741 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
1742 vmcs12->host_fs_base = evmcs->host_fs_base;
1743 vmcs12->host_gs_base = evmcs->host_gs_base;
1744 vmcs12->host_tr_base = evmcs->host_tr_base;
1745 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
1746 vmcs12->host_idtr_base = evmcs->host_idtr_base;
1747 vmcs12->host_rsp = evmcs->host_rsp;
1748 }
1749
1750 if (unlikely(!(hv_clean_fields &
1751 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
1752 vmcs12->ept_pointer = evmcs->ept_pointer;
1753 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
1754 }
1755
1756 if (unlikely(!(hv_clean_fields &
1757 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
1758 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
1759 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
1760 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
1761 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
1762 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
1763 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
1764 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
1765 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
1766 vmcs12->guest_pending_dbg_exceptions =
1767 evmcs->guest_pending_dbg_exceptions;
1768 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
1769 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
1770 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
1771 vmcs12->guest_activity_state = evmcs->guest_activity_state;
1772 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
1773 }
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811 return;
1812}
1813
1814static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
1815{
1816 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1817 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883 evmcs->guest_es_selector = vmcs12->guest_es_selector;
1884 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
1885 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
1886 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
1887 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
1888 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
1889 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
1890 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
1891
1892 evmcs->guest_es_limit = vmcs12->guest_es_limit;
1893 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
1894 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
1895 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
1896 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
1897 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
1898 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
1899 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
1900 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
1901 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
1902
1903 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
1904 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
1905 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
1906 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
1907 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
1908 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
1909 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
1910 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
1911
1912 evmcs->guest_es_base = vmcs12->guest_es_base;
1913 evmcs->guest_cs_base = vmcs12->guest_cs_base;
1914 evmcs->guest_ss_base = vmcs12->guest_ss_base;
1915 evmcs->guest_ds_base = vmcs12->guest_ds_base;
1916 evmcs->guest_fs_base = vmcs12->guest_fs_base;
1917 evmcs->guest_gs_base = vmcs12->guest_gs_base;
1918 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
1919 evmcs->guest_tr_base = vmcs12->guest_tr_base;
1920 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
1921 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
1922
1923 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
1924 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
1925
1926 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
1927 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
1928 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
1929 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
1930
1931 evmcs->guest_pending_dbg_exceptions =
1932 vmcs12->guest_pending_dbg_exceptions;
1933 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
1934 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
1935
1936 evmcs->guest_activity_state = vmcs12->guest_activity_state;
1937 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
1938
1939 evmcs->guest_cr0 = vmcs12->guest_cr0;
1940 evmcs->guest_cr3 = vmcs12->guest_cr3;
1941 evmcs->guest_cr4 = vmcs12->guest_cr4;
1942 evmcs->guest_dr7 = vmcs12->guest_dr7;
1943
1944 evmcs->guest_physical_address = vmcs12->guest_physical_address;
1945
1946 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
1947 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
1948 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
1949 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
1950 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
1951 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
1952 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
1953 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
1954
1955 evmcs->exit_qualification = vmcs12->exit_qualification;
1956
1957 evmcs->guest_linear_address = vmcs12->guest_linear_address;
1958 evmcs->guest_rsp = vmcs12->guest_rsp;
1959 evmcs->guest_rflags = vmcs12->guest_rflags;
1960
1961 evmcs->guest_interruptibility_info =
1962 vmcs12->guest_interruptibility_info;
1963 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
1964 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
1965 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
1966 evmcs->vm_entry_exception_error_code =
1967 vmcs12->vm_entry_exception_error_code;
1968 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
1969
1970 evmcs->guest_rip = vmcs12->guest_rip;
1971
1972 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
1973
1974 return;
1975}
1976
1977
1978
1979
1980
1981static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
1982 struct kvm_vcpu *vcpu, bool from_launch)
1983{
1984 struct vcpu_vmx *vmx = to_vmx(vcpu);
1985 bool evmcs_gpa_changed = false;
1986 u64 evmcs_gpa;
1987
1988 if (likely(!vmx->nested.enlightened_vmcs_enabled))
1989 return EVMPTRLD_DISABLED;
1990
1991 if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
1992 nested_release_evmcs(vcpu);
1993 return EVMPTRLD_DISABLED;
1994 }
1995
1996 if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
1997 vmx->nested.current_vmptr = -1ull;
1998
1999 nested_release_evmcs(vcpu);
2000
2001 if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
2002 &vmx->nested.hv_evmcs_map))
2003 return EVMPTRLD_ERROR;
2004
2005 vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029 if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
2030 (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
2031 nested_release_evmcs(vcpu);
2032 return EVMPTRLD_VMFAIL;
2033 }
2034
2035 vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
2036
2037 evmcs_gpa_changed = true;
2038
2039
2040
2041
2042
2043
2044 if (from_launch) {
2045 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2046 memset(vmcs12, 0, sizeof(*vmcs12));
2047 vmcs12->hdr.revision_id = VMCS12_REVISION;
2048 }
2049
2050 }
2051
2052
2053
2054
2055
2056 if (from_launch || evmcs_gpa_changed)
2057 vmx->nested.hv_evmcs->hv_clean_fields &=
2058 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2059
2060 return EVMPTRLD_SUCCEEDED;
2061}
2062
2063void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
2064{
2065 struct vcpu_vmx *vmx = to_vmx(vcpu);
2066
2067 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2068 copy_vmcs12_to_enlightened(vmx);
2069 else
2070 copy_vmcs12_to_shadow(vmx);
2071
2072 vmx->nested.need_vmcs12_to_shadow_sync = false;
2073}
2074
2075static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
2076{
2077 struct vcpu_vmx *vmx =
2078 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
2079
2080 vmx->nested.preemption_timer_expired = true;
2081 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
2082 kvm_vcpu_kick(&vmx->vcpu);
2083
2084 return HRTIMER_NORESTART;
2085}
2086
2087static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
2088{
2089 struct vcpu_vmx *vmx = to_vmx(vcpu);
2090 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2091
2092 u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
2093 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2094
2095 if (!vmx->nested.has_preemption_timer_deadline) {
2096 vmx->nested.preemption_timer_deadline =
2097 vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
2098 vmx->nested.has_preemption_timer_deadline = true;
2099 }
2100 return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
2101}
2102
2103static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
2104 u64 preemption_timeout)
2105{
2106 struct vcpu_vmx *vmx = to_vmx(vcpu);
2107
2108
2109
2110
2111
2112 if (preemption_timeout == 0) {
2113 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
2114 return;
2115 }
2116
2117 if (vcpu->arch.virtual_tsc_khz == 0)
2118 return;
2119
2120 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2121 preemption_timeout *= 1000000;
2122 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
2123 hrtimer_start(&vmx->nested.preemption_timer,
2124 ktime_add_ns(ktime_get(), preemption_timeout),
2125 HRTIMER_MODE_ABS_PINNED);
2126}
2127
2128static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2129{
2130 if (vmx->nested.nested_run_pending &&
2131 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
2132 return vmcs12->guest_ia32_efer;
2133 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
2134 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
2135 else
2136 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
2137}
2138
2139static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
2140{
2141
2142
2143
2144
2145
2146
2147 if (vmx->nested.vmcs02_initialized)
2148 return;
2149 vmx->nested.vmcs02_initialized = true;
2150
2151
2152
2153
2154
2155
2156 if (enable_ept && nested_early_check)
2157 vmcs_write64(EPT_POINTER,
2158 construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
2159
2160
2161 if (cpu_has_vmx_vmfunc())
2162 vmcs_write64(VM_FUNCTION_CONTROL, 0);
2163
2164 if (cpu_has_vmx_posted_intr())
2165 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
2166
2167 if (cpu_has_vmx_msr_bitmap())
2168 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
2169
2170
2171
2172
2173
2174
2175 if (enable_pml) {
2176 vmcs_write64(PML_ADDRESS, 0);
2177 vmcs_write16(GUEST_PML_INDEX, -1);
2178 }
2179
2180 if (cpu_has_vmx_encls_vmexit())
2181 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
2182
2183
2184
2185
2186
2187
2188 vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
2189 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
2190 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
2191
2192 vmx_set_constant_host_state(vmx);
2193}
2194
2195static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
2196 struct vmcs12 *vmcs12)
2197{
2198 prepare_vmcs02_constant_state(vmx);
2199
2200 vmcs_write64(VMCS_LINK_POINTER, -1ull);
2201
2202 if (enable_vpid) {
2203 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
2204 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
2205 else
2206 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2207 }
2208}
2209
2210static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2211{
2212 u32 exec_control;
2213 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
2214
2215 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2216 prepare_vmcs02_early_rare(vmx, vmcs12);
2217
2218
2219
2220
2221 exec_control = vmx_pin_based_exec_ctrl(vmx);
2222 exec_control |= (vmcs12->pin_based_vm_exec_control &
2223 ~PIN_BASED_VMX_PREEMPTION_TIMER);
2224
2225
2226 if (nested_cpu_has_posted_intr(vmcs12)) {
2227 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
2228 vmx->nested.pi_pending = false;
2229 } else {
2230 exec_control &= ~PIN_BASED_POSTED_INTR;
2231 }
2232 pin_controls_set(vmx, exec_control);
2233
2234
2235
2236
2237 exec_control = vmx_exec_control(vmx);
2238 exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
2239 exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
2240 exec_control &= ~CPU_BASED_TPR_SHADOW;
2241 exec_control |= vmcs12->cpu_based_vm_exec_control;
2242
2243 vmx->nested.l1_tpr_threshold = -1;
2244 if (exec_control & CPU_BASED_TPR_SHADOW)
2245 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
2246#ifdef CONFIG_X86_64
2247 else
2248 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
2249 CPU_BASED_CR8_STORE_EXITING;
2250#endif
2251
2252
2253
2254
2255
2256 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
2257 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
2258
2259
2260
2261
2262
2263
2264
2265 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
2266 exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
2267
2268 exec_controls_set(vmx, exec_control);
2269
2270
2271
2272
2273 if (cpu_has_secondary_exec_ctrls()) {
2274 exec_control = vmx->secondary_exec_control;
2275
2276
2277 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2278 SECONDARY_EXEC_ENABLE_INVPCID |
2279 SECONDARY_EXEC_ENABLE_RDTSCP |
2280 SECONDARY_EXEC_XSAVES |
2281 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
2282 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2283 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2284 SECONDARY_EXEC_ENABLE_VMFUNC |
2285 SECONDARY_EXEC_TSC_SCALING);
2286 if (nested_cpu_has(vmcs12,
2287 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
2288 exec_control |= vmcs12->secondary_vm_exec_control;
2289
2290
2291 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
2292
2293
2294 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
2295
2296
2297
2298
2299
2300 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
2301 (vmcs12->guest_cr4 & X86_CR4_UMIP))
2302 exec_control |= SECONDARY_EXEC_DESC;
2303
2304 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
2305 vmcs_write16(GUEST_INTR_STATUS,
2306 vmcs12->guest_intr_status);
2307
2308 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
2309 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2310
2311 if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
2312 vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
2313
2314 secondary_exec_controls_set(vmx, exec_control);
2315 }
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325 exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
2326 ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
2327 if (cpu_has_load_ia32_efer()) {
2328 if (guest_efer & EFER_LMA)
2329 exec_control |= VM_ENTRY_IA32E_MODE;
2330 if (guest_efer != host_efer)
2331 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
2332 }
2333 vm_entry_controls_set(vmx, exec_control);
2334
2335
2336
2337
2338
2339
2340
2341
2342 exec_control = vmx_vmexit_ctrl();
2343 if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
2344 exec_control |= VM_EXIT_LOAD_IA32_EFER;
2345 vm_exit_controls_set(vmx, exec_control);
2346
2347
2348
2349
2350 if (vmx->nested.nested_run_pending) {
2351 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2352 vmcs12->vm_entry_intr_info_field);
2353 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2354 vmcs12->vm_entry_exception_error_code);
2355 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2356 vmcs12->vm_entry_instruction_len);
2357 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2358 vmcs12->guest_interruptibility_info);
2359 vmx->loaded_vmcs->nmi_known_unmasked =
2360 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
2361 } else {
2362 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
2363 }
2364}
2365
2366static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2367{
2368 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2369
2370 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2371 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
2372 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
2373 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
2374 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
2375 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
2376 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
2377 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
2378 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
2379 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
2380 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
2381 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
2382 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
2383 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
2384 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
2385 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
2386 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
2387 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
2388 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
2389 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
2390 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
2391 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
2392 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
2393 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
2394 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
2395 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
2396 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
2397 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
2398 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
2399 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
2400 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
2401 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
2402 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
2403 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
2404 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
2405 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
2406 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
2407 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
2408
2409 vmx->segment_cache.bitmask = 0;
2410 }
2411
2412 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2413 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
2414 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
2415 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
2416 vmcs12->guest_pending_dbg_exceptions);
2417 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
2418 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
2419
2420
2421
2422
2423
2424 if (enable_ept) {
2425 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2426 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2427 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2428 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2429 }
2430
2431 if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
2432 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
2433 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
2434 }
2435
2436 if (nested_cpu_has_xsaves(vmcs12))
2437 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452 if (vmx_need_pf_intercept(&vmx->vcpu)) {
2453
2454
2455
2456
2457 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
2458 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
2459 } else {
2460 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
2461 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
2462 }
2463
2464 if (cpu_has_vmx_apicv()) {
2465 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
2466 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
2467 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
2468 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
2469 }
2470
2471
2472
2473
2474
2475 prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
2476
2477 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
2478 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2479 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2480
2481 set_cr4_guest_host_mask(vmx);
2482}
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
2496 bool from_vmentry,
2497 enum vm_entry_failure_code *entry_failure_code)
2498{
2499 struct vcpu_vmx *vmx = to_vmx(vcpu);
2500 bool load_guest_pdptrs_vmcs12 = false;
2501
2502 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
2503 prepare_vmcs02_rare(vmx, vmcs12);
2504 vmx->nested.dirty_vmcs12 = false;
2505
2506 load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
2507 !(vmx->nested.hv_evmcs->hv_clean_fields &
2508 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
2509 }
2510
2511 if (vmx->nested.nested_run_pending &&
2512 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
2513 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
2514 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
2515 } else {
2516 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
2517 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
2518 }
2519 if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
2520 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
2521 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
2522 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
2523
2524
2525
2526
2527
2528 vmx_update_exception_bitmap(vcpu);
2529 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
2530 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2531
2532 if (vmx->nested.nested_run_pending &&
2533 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
2534 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
2535 vcpu->arch.pat = vmcs12->guest_ia32_pat;
2536 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2537 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
2538 }
2539
2540 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2541 vcpu->arch.l1_tsc_offset,
2542 vmx_get_l2_tsc_offset(vcpu),
2543 vmx_get_l2_tsc_multiplier(vcpu));
2544
2545 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2546 vcpu->arch.l1_tsc_scaling_ratio,
2547 vmx_get_l2_tsc_multiplier(vcpu));
2548
2549 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
2550 if (kvm_has_tsc_control)
2551 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
2552
2553 nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
2554
2555 if (nested_cpu_has_ept(vmcs12))
2556 nested_ept_init_mmu_context(vcpu);
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
2567 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2568
2569 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
2570 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
2571
2572 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
2573
2574 vmx_set_efer(vcpu, vcpu->arch.efer);
2575
2576
2577
2578
2579
2580
2581 if (CC(!vmx_guest_state_valid(vcpu))) {
2582 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2583 return -EINVAL;
2584 }
2585
2586
2587 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
2588 from_vmentry, entry_failure_code))
2589 return -EINVAL;
2590
2591
2592
2593
2594
2595
2596
2597
2598 if (enable_ept)
2599 vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
2600
2601
2602 if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
2603 is_pae_paging(vcpu)) {
2604 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2605 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2606 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2607 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2608 }
2609
2610 if (!enable_ept)
2611 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
2612
2613 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2614 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
2615 vmcs12->guest_ia32_perf_global_ctrl)))
2616 return -EINVAL;
2617
2618 kvm_rsp_write(vcpu, vmcs12->guest_rsp);
2619 kvm_rip_write(vcpu, vmcs12->guest_rip);
2620
2621
2622
2623
2624
2625
2626
2627 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2628 vmx->nested.hv_evmcs->hv_clean_fields |=
2629 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2630
2631 return 0;
2632}
2633
2634static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
2635{
2636 if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
2637 nested_cpu_has_virtual_nmis(vmcs12)))
2638 return -EINVAL;
2639
2640 if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
2641 nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
2642 return -EINVAL;
2643
2644 return 0;
2645}
2646
2647static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
2648{
2649 struct vcpu_vmx *vmx = to_vmx(vcpu);
2650
2651
2652 switch (new_eptp & VMX_EPTP_MT_MASK) {
2653 case VMX_EPTP_MT_UC:
2654 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
2655 return false;
2656 break;
2657 case VMX_EPTP_MT_WB:
2658 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
2659 return false;
2660 break;
2661 default:
2662 return false;
2663 }
2664
2665
2666 switch (new_eptp & VMX_EPTP_PWL_MASK) {
2667 case VMX_EPTP_PWL_5:
2668 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
2669 return false;
2670 break;
2671 case VMX_EPTP_PWL_4:
2672 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
2673 return false;
2674 break;
2675 default:
2676 return false;
2677 }
2678
2679
2680 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
2681 return false;
2682
2683
2684 if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
2685 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
2686 return false;
2687 }
2688
2689 return true;
2690}
2691
2692
2693
2694
2695static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2696 struct vmcs12 *vmcs12)
2697{
2698 struct vcpu_vmx *vmx = to_vmx(vcpu);
2699
2700 if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
2701 vmx->nested.msrs.pinbased_ctls_low,
2702 vmx->nested.msrs.pinbased_ctls_high)) ||
2703 CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
2704 vmx->nested.msrs.procbased_ctls_low,
2705 vmx->nested.msrs.procbased_ctls_high)))
2706 return -EINVAL;
2707
2708 if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
2709 CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
2710 vmx->nested.msrs.secondary_ctls_low,
2711 vmx->nested.msrs.secondary_ctls_high)))
2712 return -EINVAL;
2713
2714 if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
2715 nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
2716 nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
2717 nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
2718 nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
2719 nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
2720 nested_vmx_check_nmi_controls(vmcs12) ||
2721 nested_vmx_check_pml_controls(vcpu, vmcs12) ||
2722 nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
2723 nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
2724 nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
2725 CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
2726 return -EINVAL;
2727
2728 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2729 nested_cpu_has_save_preemption_timer(vmcs12))
2730 return -EINVAL;
2731
2732 if (nested_cpu_has_ept(vmcs12) &&
2733 CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
2734 return -EINVAL;
2735
2736 if (nested_cpu_has_vmfunc(vmcs12)) {
2737 if (CC(vmcs12->vm_function_control &
2738 ~vmx->nested.msrs.vmfunc_controls))
2739 return -EINVAL;
2740
2741 if (nested_cpu_has_eptp_switching(vmcs12)) {
2742 if (CC(!nested_cpu_has_ept(vmcs12)) ||
2743 CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
2744 return -EINVAL;
2745 }
2746 }
2747
2748 return 0;
2749}
2750
2751
2752
2753
2754static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
2755 struct vmcs12 *vmcs12)
2756{
2757 struct vcpu_vmx *vmx = to_vmx(vcpu);
2758
2759 if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
2760 vmx->nested.msrs.exit_ctls_low,
2761 vmx->nested.msrs.exit_ctls_high)) ||
2762 CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
2763 return -EINVAL;
2764
2765 return 0;
2766}
2767
2768
2769
2770
2771static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
2772 struct vmcs12 *vmcs12)
2773{
2774 struct vcpu_vmx *vmx = to_vmx(vcpu);
2775
2776 if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
2777 vmx->nested.msrs.entry_ctls_low,
2778 vmx->nested.msrs.entry_ctls_high)))
2779 return -EINVAL;
2780
2781
2782
2783
2784
2785
2786
2787 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
2788 u32 intr_info = vmcs12->vm_entry_intr_info_field;
2789 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
2790 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
2791 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
2792 bool should_have_error_code;
2793 bool urg = nested_cpu_has2(vmcs12,
2794 SECONDARY_EXEC_UNRESTRICTED_GUEST);
2795 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
2796
2797
2798 if (CC(intr_type == INTR_TYPE_RESERVED) ||
2799 CC(intr_type == INTR_TYPE_OTHER_EVENT &&
2800 !nested_cpu_supports_monitor_trap_flag(vcpu)))
2801 return -EINVAL;
2802
2803
2804 if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
2805 CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
2806 CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
2807 return -EINVAL;
2808
2809
2810 should_have_error_code =
2811 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
2812 x86_exception_has_error_code(vector);
2813 if (CC(has_error_code != should_have_error_code))
2814 return -EINVAL;
2815
2816
2817 if (CC(has_error_code &&
2818 vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
2819 return -EINVAL;
2820
2821
2822 if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
2823 return -EINVAL;
2824
2825
2826 switch (intr_type) {
2827 case INTR_TYPE_SOFT_EXCEPTION:
2828 case INTR_TYPE_SOFT_INTR:
2829 case INTR_TYPE_PRIV_SW_EXCEPTION:
2830 if (CC(vmcs12->vm_entry_instruction_len > 15) ||
2831 CC(vmcs12->vm_entry_instruction_len == 0 &&
2832 CC(!nested_cpu_has_zero_length_injection(vcpu))))
2833 return -EINVAL;
2834 }
2835 }
2836
2837 if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
2838 return -EINVAL;
2839
2840 return 0;
2841}
2842
2843static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
2844 struct vmcs12 *vmcs12)
2845{
2846 if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
2847 nested_check_vm_exit_controls(vcpu, vmcs12) ||
2848 nested_check_vm_entry_controls(vcpu, vmcs12))
2849 return -EINVAL;
2850
2851 if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
2852 return nested_evmcs_check_controls(vmcs12);
2853
2854 return 0;
2855}
2856
2857static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
2858 struct vmcs12 *vmcs12)
2859{
2860 bool ia32e;
2861
2862 if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
2863 CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
2864 CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
2865 return -EINVAL;
2866
2867 if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
2868 CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
2869 return -EINVAL;
2870
2871 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
2872 CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
2873 return -EINVAL;
2874
2875 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2876 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
2877 vmcs12->host_ia32_perf_global_ctrl)))
2878 return -EINVAL;
2879
2880#ifdef CONFIG_X86_64
2881 ia32e = !!(vcpu->arch.efer & EFER_LMA);
2882#else
2883 ia32e = false;
2884#endif
2885
2886 if (ia32e) {
2887 if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
2888 CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
2889 return -EINVAL;
2890 } else {
2891 if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
2892 CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
2893 CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
2894 CC((vmcs12->host_rip) >> 32))
2895 return -EINVAL;
2896 }
2897
2898 if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2899 CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2900 CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2901 CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2902 CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2903 CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2904 CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2905 CC(vmcs12->host_cs_selector == 0) ||
2906 CC(vmcs12->host_tr_selector == 0) ||
2907 CC(vmcs12->host_ss_selector == 0 && !ia32e))
2908 return -EINVAL;
2909
2910 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
2911 CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
2912 CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
2913 CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
2914 CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
2915 CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
2916 return -EINVAL;
2917
2918
2919
2920
2921
2922
2923
2924 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
2925 if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
2926 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
2927 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
2928 return -EINVAL;
2929 }
2930
2931 return 0;
2932}
2933
2934static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
2935 struct vmcs12 *vmcs12)
2936{
2937 int r = 0;
2938 struct vmcs12 *shadow;
2939 struct kvm_host_map map;
2940
2941 if (vmcs12->vmcs_link_pointer == -1ull)
2942 return 0;
2943
2944 if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
2945 return -EINVAL;
2946
2947 if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
2948 return -EINVAL;
2949
2950 shadow = map.hva;
2951
2952 if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
2953 CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
2954 r = -EINVAL;
2955
2956 kvm_vcpu_unmap(vcpu, &map, false);
2957 return r;
2958}
2959
2960
2961
2962
2963static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
2964{
2965 if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
2966 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
2967 vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
2968 return -EINVAL;
2969
2970 return 0;
2971}
2972
2973static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
2974 struct vmcs12 *vmcs12,
2975 enum vm_entry_failure_code *entry_failure_code)
2976{
2977 bool ia32e;
2978
2979 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2980
2981 if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
2982 CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
2983 return -EINVAL;
2984
2985 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
2986 CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
2987 return -EINVAL;
2988
2989 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
2990 CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
2991 return -EINVAL;
2992
2993 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
2994 *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
2995 return -EINVAL;
2996 }
2997
2998 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2999 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
3000 vmcs12->guest_ia32_perf_global_ctrl)))
3001 return -EINVAL;
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012 if (to_vmx(vcpu)->nested.nested_run_pending &&
3013 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
3014 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
3015 if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
3016 CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
3017 CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
3018 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
3019 return -EINVAL;
3020 }
3021
3022 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
3023 (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
3024 CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
3025 return -EINVAL;
3026
3027 if (nested_check_guest_non_reg_state(vmcs12))
3028 return -EINVAL;
3029
3030 return 0;
3031}
3032
3033static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
3034{
3035 struct vcpu_vmx *vmx = to_vmx(vcpu);
3036 unsigned long cr3, cr4;
3037 bool vm_fail;
3038
3039 if (!nested_early_check)
3040 return 0;
3041
3042 if (vmx->msr_autoload.host.nr)
3043 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
3044 if (vmx->msr_autoload.guest.nr)
3045 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
3046
3047 preempt_disable();
3048
3049 vmx_prepare_switch_to_guest(vcpu);
3050
3051
3052
3053
3054
3055
3056
3057 vmcs_writel(GUEST_RFLAGS, 0);
3058
3059 cr3 = __get_current_cr3_fast();
3060 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
3061 vmcs_writel(HOST_CR3, cr3);
3062 vmx->loaded_vmcs->host_state.cr3 = cr3;
3063 }
3064
3065 cr4 = cr4_read_shadow();
3066 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
3067 vmcs_writel(HOST_CR4, cr4);
3068 vmx->loaded_vmcs->host_state.cr4 = cr4;
3069 }
3070
3071 vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
3072 vmx->loaded_vmcs->launched);
3073
3074 if (vmx->msr_autoload.host.nr)
3075 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
3076 if (vmx->msr_autoload.guest.nr)
3077 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
3078
3079 if (vm_fail) {
3080 u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
3081
3082 preempt_enable();
3083
3084 trace_kvm_nested_vmenter_failed(
3085 "early hardware check VM-instruction error: ", error);
3086 WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3087 return 1;
3088 }
3089
3090
3091
3092
3093 if (hw_breakpoint_active())
3094 set_debugreg(__this_cpu_read(cpu_dr7), 7);
3095 local_irq_enable();
3096 preempt_enable();
3097
3098
3099
3100
3101
3102
3103
3104
3105 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
3106 VMX_EXIT_REASONS_FAILED_VMENTRY));
3107
3108 return 0;
3109}
3110
3111static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
3112{
3113 struct vcpu_vmx *vmx = to_vmx(vcpu);
3114
3115
3116
3117
3118
3119
3120 if (vmx->nested.enlightened_vmcs_enabled &&
3121 vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
3122 enum nested_evmptrld_status evmptrld_status =
3123 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
3124
3125 if (evmptrld_status == EVMPTRLD_VMFAIL ||
3126 evmptrld_status == EVMPTRLD_ERROR)
3127 return false;
3128
3129
3130
3131
3132
3133 vmx->nested.need_vmcs12_to_shadow_sync = true;
3134 }
3135
3136 return true;
3137}
3138
3139static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
3140{
3141 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3142 struct vcpu_vmx *vmx = to_vmx(vcpu);
3143 struct kvm_host_map *map;
3144 struct page *page;
3145 u64 hpa;
3146
3147 if (!vcpu->arch.pdptrs_from_userspace &&
3148 !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
3149
3150
3151
3152
3153
3154 if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
3155 return false;
3156 }
3157
3158
3159 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3160
3161
3162
3163
3164
3165
3166 if (vmx->nested.apic_access_page) {
3167 kvm_release_page_clean(vmx->nested.apic_access_page);
3168 vmx->nested.apic_access_page = NULL;
3169 }
3170 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
3171 if (!is_error_page(page)) {
3172 vmx->nested.apic_access_page = page;
3173 hpa = page_to_phys(vmx->nested.apic_access_page);
3174 vmcs_write64(APIC_ACCESS_ADDR, hpa);
3175 } else {
3176 pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
3177 __func__);
3178 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3179 vcpu->run->internal.suberror =
3180 KVM_INTERNAL_ERROR_EMULATION;
3181 vcpu->run->internal.ndata = 0;
3182 return false;
3183 }
3184 }
3185
3186 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3187 map = &vmx->nested.virtual_apic_map;
3188
3189 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
3190 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
3191 } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
3192 nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
3193 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3194
3195
3196
3197
3198
3199
3200
3201
3202 exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
3203 } else {
3204
3205
3206
3207
3208 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
3209 }
3210 }
3211
3212 if (nested_cpu_has_posted_intr(vmcs12)) {
3213 map = &vmx->nested.pi_desc_map;
3214
3215 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
3216 vmx->nested.pi_desc =
3217 (struct pi_desc *)(((void *)map->hva) +
3218 offset_in_page(vmcs12->posted_intr_desc_addr));
3219 vmcs_write64(POSTED_INTR_DESC_ADDR,
3220 pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
3221 } else {
3222
3223
3224
3225
3226
3227
3228 vmx->nested.pi_desc = NULL;
3229 pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
3230 }
3231 }
3232 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
3233 exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
3234 else
3235 exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
3236
3237 return true;
3238}
3239
3240static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
3241{
3242 if (!nested_get_evmcs_page(vcpu)) {
3243 pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
3244 __func__);
3245 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3246 vcpu->run->internal.suberror =
3247 KVM_INTERNAL_ERROR_EMULATION;
3248 vcpu->run->internal.ndata = 0;
3249
3250 return false;
3251 }
3252
3253 if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
3254 return false;
3255
3256 return true;
3257}
3258
3259static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
3260{
3261 struct vmcs12 *vmcs12;
3262 struct vcpu_vmx *vmx = to_vmx(vcpu);
3263 gpa_t dst;
3264
3265 if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
3266 return 0;
3267
3268 if (WARN_ON_ONCE(vmx->nested.pml_full))
3269 return 1;
3270
3271
3272
3273
3274
3275 vmcs12 = get_vmcs12(vcpu);
3276 if (!nested_cpu_has_pml(vmcs12))
3277 return 0;
3278
3279 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
3280 vmx->nested.pml_full = true;
3281 return 1;
3282 }
3283
3284 gpa &= ~0xFFFull;
3285 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
3286
3287 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
3288 offset_in_page(dst), sizeof(gpa)))
3289 return 0;
3290
3291 vmcs12->guest_pml_index--;
3292
3293 return 0;
3294}
3295
3296
3297
3298
3299
3300
3301
3302
3303static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
3304{
3305 if (!to_vmx(vcpu)->nested.vmxon) {
3306 kvm_queue_exception(vcpu, UD_VECTOR);
3307 return 0;
3308 }
3309
3310 if (vmx_get_cpl(vcpu)) {
3311 kvm_inject_gp(vcpu, 0);
3312 return 0;
3313 }
3314
3315 return 1;
3316}
3317
3318static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
3319{
3320 u8 rvi = vmx_get_rvi();
3321 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
3322
3323 return ((rvi & 0xf0) > (vppr & 0xf0));
3324}
3325
3326static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
3327 struct vmcs12 *vmcs12);
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
3340 bool from_vmentry)
3341{
3342 struct vcpu_vmx *vmx = to_vmx(vcpu);
3343 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3344 enum vm_entry_failure_code entry_failure_code;
3345 bool evaluate_pending_interrupts;
3346 union vmx_exit_reason exit_reason = {
3347 .basic = EXIT_REASON_INVALID_STATE,
3348 .failed_vmentry = 1,
3349 };
3350 u32 failed_index;
3351
3352 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
3353 kvm_vcpu_flush_tlb_current(vcpu);
3354
3355 evaluate_pending_interrupts = exec_controls_get(vmx) &
3356 (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
3357 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
3358 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
3359
3360 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
3361 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
3362 if (kvm_mpx_supported() &&
3363 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
3364 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382 if (!enable_ept && !nested_early_check)
3383 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3384
3385 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
3386
3387 prepare_vmcs02_early(vmx, vmcs12);
3388
3389 if (from_vmentry) {
3390 if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
3391 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3392 return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
3393 }
3394
3395 if (nested_vmx_check_vmentry_hw(vcpu)) {
3396 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3397 return NVMX_VMENTRY_VMFAIL;
3398 }
3399
3400 if (nested_vmx_check_guest_state(vcpu, vmcs12,
3401 &entry_failure_code)) {
3402 exit_reason.basic = EXIT_REASON_INVALID_STATE;
3403 vmcs12->exit_qualification = entry_failure_code;
3404 goto vmentry_fail_vmexit;
3405 }
3406 }
3407
3408 enter_guest_mode(vcpu);
3409
3410 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
3411 exit_reason.basic = EXIT_REASON_INVALID_STATE;
3412 vmcs12->exit_qualification = entry_failure_code;
3413 goto vmentry_fail_vmexit_guest_mode;
3414 }
3415
3416 if (from_vmentry) {
3417 failed_index = nested_vmx_load_msr(vcpu,
3418 vmcs12->vm_entry_msr_load_addr,
3419 vmcs12->vm_entry_msr_load_count);
3420 if (failed_index) {
3421 exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
3422 vmcs12->exit_qualification = failed_index;
3423 goto vmentry_fail_vmexit_guest_mode;
3424 }
3425 } else {
3426
3427
3428
3429
3430
3431
3432
3433 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
3434 }
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450 if (unlikely(evaluate_pending_interrupts))
3451 kvm_make_request(KVM_REQ_EVENT, vcpu);
3452
3453
3454
3455
3456
3457
3458 vmx->nested.preemption_timer_expired = false;
3459 if (nested_cpu_has_preemption_timer(vmcs12)) {
3460 u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
3461 vmx_start_preemption_timer(vcpu, timer_value);
3462 }
3463
3464
3465
3466
3467
3468
3469
3470 return NVMX_VMENTRY_SUCCESS;
3471
3472
3473
3474
3475
3476
3477vmentry_fail_vmexit_guest_mode:
3478 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
3479 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
3480 leave_guest_mode(vcpu);
3481
3482vmentry_fail_vmexit:
3483 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3484
3485 if (!from_vmentry)
3486 return NVMX_VMENTRY_VMEXIT;
3487
3488 load_vmcs12_host_state(vcpu, vmcs12);
3489 vmcs12->vm_exit_reason = exit_reason.full;
3490 if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
3491 vmx->nested.need_vmcs12_to_shadow_sync = true;
3492 return NVMX_VMENTRY_VMEXIT;
3493}
3494
3495
3496
3497
3498
3499static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
3500{
3501 struct vmcs12 *vmcs12;
3502 enum nvmx_vmentry_status status;
3503 struct vcpu_vmx *vmx = to_vmx(vcpu);
3504 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
3505 enum nested_evmptrld_status evmptrld_status;
3506
3507 if (!nested_vmx_check_permission(vcpu))
3508 return 1;
3509
3510 evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
3511 if (evmptrld_status == EVMPTRLD_ERROR) {
3512 kvm_queue_exception(vcpu, UD_VECTOR);
3513 return 1;
3514 } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
3515 return nested_vmx_failInvalid(vcpu);
3516 }
3517
3518 if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
3519 vmx->nested.current_vmptr == -1ull))
3520 return nested_vmx_failInvalid(vcpu);
3521
3522 vmcs12 = get_vmcs12(vcpu);
3523
3524
3525
3526
3527
3528
3529
3530 if (CC(vmcs12->hdr.shadow_vmcs))
3531 return nested_vmx_failInvalid(vcpu);
3532
3533 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
3534 copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
3535
3536 vmcs12->launch_state = !launch;
3537 } else if (enable_shadow_vmcs) {
3538 copy_shadow_to_vmcs12(vmx);
3539 }
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551 if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
3552 return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
3553
3554 if (CC(vmcs12->launch_state == launch))
3555 return nested_vmx_fail(vcpu,
3556 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
3557 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
3558
3559 if (nested_vmx_check_controls(vcpu, vmcs12))
3560 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3561
3562 if (nested_vmx_check_host_state(vcpu, vmcs12))
3563 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
3564
3565
3566
3567
3568
3569 vmx->nested.nested_run_pending = 1;
3570 vmx->nested.has_preemption_timer_deadline = false;
3571 status = nested_vmx_enter_non_root_mode(vcpu, true);
3572 if (unlikely(status != NVMX_VMENTRY_SUCCESS))
3573 goto vmentry_failed;
3574
3575
3576 if (nested_cpu_has_posted_intr(vmcs12) &&
3577 kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
3578 vmx->nested.pi_pending = true;
3579 kvm_make_request(KVM_REQ_EVENT, vcpu);
3580 kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
3581 }
3582
3583
3584 vmx->vcpu.arch.l1tf_flush_l1d = true;
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596 nested_cache_shadow_vmcs12(vcpu, vmcs12);
3597
3598 switch (vmcs12->guest_activity_state) {
3599 case GUEST_ACTIVITY_HLT:
3600
3601
3602
3603
3604
3605 if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
3606 !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
3607 !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
3608 (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
3609 vmx->nested.nested_run_pending = 0;
3610 return kvm_vcpu_halt(vcpu);
3611 }
3612 break;
3613 case GUEST_ACTIVITY_WAIT_SIPI:
3614 vmx->nested.nested_run_pending = 0;
3615 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3616 break;
3617 default:
3618 break;
3619 }
3620
3621 return 1;
3622
3623vmentry_failed:
3624 vmx->nested.nested_run_pending = 0;
3625 if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
3626 return 0;
3627 if (status == NVMX_VMENTRY_VMEXIT)
3628 return 1;
3629 WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
3630 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3631}
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650static inline unsigned long
3651vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3652{
3653 return
3654 (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
3655 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
3656 (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
3657 vcpu->arch.cr0_guest_owned_bits));
3658}
3659
3660static inline unsigned long
3661vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3662{
3663 return
3664 (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
3665 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
3666 (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
3667 vcpu->arch.cr4_guest_owned_bits));
3668}
3669
3670static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
3671 struct vmcs12 *vmcs12)
3672{
3673 u32 idt_vectoring;
3674 unsigned int nr;
3675
3676 if (vcpu->arch.exception.injected) {
3677 nr = vcpu->arch.exception.nr;
3678 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3679
3680 if (kvm_exception_is_soft(nr)) {
3681 vmcs12->vm_exit_instruction_len =
3682 vcpu->arch.event_exit_inst_len;
3683 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
3684 } else
3685 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
3686
3687 if (vcpu->arch.exception.has_error_code) {
3688 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
3689 vmcs12->idt_vectoring_error_code =
3690 vcpu->arch.exception.error_code;
3691 }
3692
3693 vmcs12->idt_vectoring_info_field = idt_vectoring;
3694 } else if (vcpu->arch.nmi_injected) {
3695 vmcs12->idt_vectoring_info_field =
3696 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
3697 } else if (vcpu->arch.interrupt.injected) {
3698 nr = vcpu->arch.interrupt.nr;
3699 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3700
3701 if (vcpu->arch.interrupt.soft) {
3702 idt_vectoring |= INTR_TYPE_SOFT_INTR;
3703 vmcs12->vm_entry_instruction_len =
3704 vcpu->arch.event_exit_inst_len;
3705 } else
3706 idt_vectoring |= INTR_TYPE_EXT_INTR;
3707
3708 vmcs12->idt_vectoring_info_field = idt_vectoring;
3709 }
3710}
3711
3712
3713void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
3714{
3715 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3716 gfn_t gfn;
3717
3718
3719
3720
3721
3722
3723 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3724 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3725 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3726 }
3727
3728 if (nested_cpu_has_posted_intr(vmcs12)) {
3729 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3730 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3731 }
3732}
3733
3734static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3735{
3736 struct vcpu_vmx *vmx = to_vmx(vcpu);
3737 int max_irr;
3738 void *vapic_page;
3739 u16 status;
3740
3741 if (!vmx->nested.pi_pending)
3742 return 0;
3743
3744 if (!vmx->nested.pi_desc)
3745 goto mmio_needed;
3746
3747 vmx->nested.pi_pending = false;
3748
3749 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
3750 return 0;
3751
3752 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3753 if (max_irr != 256) {
3754 vapic_page = vmx->nested.virtual_apic_map.hva;
3755 if (!vapic_page)
3756 goto mmio_needed;
3757
3758 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
3759 vapic_page, &max_irr);
3760 status = vmcs_read16(GUEST_INTR_STATUS);
3761 if ((u8)max_irr > ((u8)status & 0xff)) {
3762 status &= ~0xff;
3763 status |= (u8)max_irr;
3764 vmcs_write16(GUEST_INTR_STATUS, status);
3765 }
3766 }
3767
3768 nested_mark_vmcs12_pages_dirty(vcpu);
3769 return 0;
3770
3771mmio_needed:
3772 kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
3773 return -ENXIO;
3774}
3775
3776static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3777 unsigned long exit_qual)
3778{
3779 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3780 unsigned int nr = vcpu->arch.exception.nr;
3781 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3782
3783 if (vcpu->arch.exception.has_error_code) {
3784 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3785 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3786 }
3787
3788 if (kvm_exception_is_soft(nr))
3789 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3790 else
3791 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3792
3793 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3794 vmx_get_nmi_mask(vcpu))
3795 intr_info |= INTR_INFO_UNBLOCK_NMI;
3796
3797 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3798}
3799
3800
3801
3802
3803
3804
3805
3806static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
3807{
3808 return vcpu->arch.exception.pending &&
3809 vcpu->arch.exception.nr == DB_VECTOR &&
3810 vcpu->arch.exception.payload;
3811}
3812
3813
3814
3815
3816
3817
3818
3819
3820static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
3821{
3822 if (vmx_pending_dbg_trap(vcpu))
3823 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
3824 vcpu->arch.exception.payload);
3825}
3826
3827static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
3828{
3829 return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
3830 to_vmx(vcpu)->nested.preemption_timer_expired;
3831}
3832
3833static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
3834{
3835 struct vcpu_vmx *vmx = to_vmx(vcpu);
3836 unsigned long exit_qual;
3837 bool block_nested_events =
3838 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
3839 bool mtf_pending = vmx->nested.mtf_pending;
3840 struct kvm_lapic *apic = vcpu->arch.apic;
3841
3842
3843
3844
3845
3846 if (!block_nested_events)
3847 vmx->nested.mtf_pending = false;
3848
3849 if (lapic_in_kernel(vcpu) &&
3850 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
3851 if (block_nested_events)
3852 return -EBUSY;
3853 nested_vmx_update_pending_dbg(vcpu);
3854 clear_bit(KVM_APIC_INIT, &apic->pending_events);
3855 if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
3856 nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
3857 return 0;
3858 }
3859
3860 if (lapic_in_kernel(vcpu) &&
3861 test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3862 if (block_nested_events)
3863 return -EBUSY;
3864
3865 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3866 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3867 nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
3868 apic->sipi_vector & 0xFFUL);
3869 return 0;
3870 }
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881 if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
3882 if (vmx->nested.nested_run_pending)
3883 return -EBUSY;
3884 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3885 goto no_vmexit;
3886 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3887 return 0;
3888 }
3889
3890 if (mtf_pending) {
3891 if (block_nested_events)
3892 return -EBUSY;
3893 nested_vmx_update_pending_dbg(vcpu);
3894 nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
3895 return 0;
3896 }
3897
3898 if (vcpu->arch.exception.pending) {
3899 if (vmx->nested.nested_run_pending)
3900 return -EBUSY;
3901 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3902 goto no_vmexit;
3903 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3904 return 0;
3905 }
3906
3907 if (nested_vmx_preemption_timer_pending(vcpu)) {
3908 if (block_nested_events)
3909 return -EBUSY;
3910 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
3911 return 0;
3912 }
3913
3914 if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
3915 if (block_nested_events)
3916 return -EBUSY;
3917 goto no_vmexit;
3918 }
3919
3920 if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
3921 if (block_nested_events)
3922 return -EBUSY;
3923 if (!nested_exit_on_nmi(vcpu))
3924 goto no_vmexit;
3925
3926 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
3927 NMI_VECTOR | INTR_TYPE_NMI_INTR |
3928 INTR_INFO_VALID_MASK, 0);
3929
3930
3931
3932
3933 vcpu->arch.nmi_pending = 0;
3934 vmx_set_nmi_mask(vcpu, true);
3935 return 0;
3936 }
3937
3938 if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
3939 if (block_nested_events)
3940 return -EBUSY;
3941 if (!nested_exit_on_intr(vcpu))
3942 goto no_vmexit;
3943 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
3944 return 0;
3945 }
3946
3947no_vmexit:
3948 return vmx_complete_nested_posted_interrupt(vcpu);
3949}
3950
3951static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
3952{
3953 ktime_t remaining =
3954 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
3955 u64 value;
3956
3957 if (ktime_to_ns(remaining) <= 0)
3958 return 0;
3959
3960 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
3961 do_div(value, 1000000);
3962 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
3963}
3964
3965static bool is_vmcs12_ext_field(unsigned long field)
3966{
3967 switch (field) {
3968 case GUEST_ES_SELECTOR:
3969 case GUEST_CS_SELECTOR:
3970 case GUEST_SS_SELECTOR:
3971 case GUEST_DS_SELECTOR:
3972 case GUEST_FS_SELECTOR:
3973 case GUEST_GS_SELECTOR:
3974 case GUEST_LDTR_SELECTOR:
3975 case GUEST_TR_SELECTOR:
3976 case GUEST_ES_LIMIT:
3977 case GUEST_CS_LIMIT:
3978 case GUEST_SS_LIMIT:
3979 case GUEST_DS_LIMIT:
3980 case GUEST_FS_LIMIT:
3981 case GUEST_GS_LIMIT:
3982 case GUEST_LDTR_LIMIT:
3983 case GUEST_TR_LIMIT:
3984 case GUEST_GDTR_LIMIT:
3985 case GUEST_IDTR_LIMIT:
3986 case GUEST_ES_AR_BYTES:
3987 case GUEST_DS_AR_BYTES:
3988 case GUEST_FS_AR_BYTES:
3989 case GUEST_GS_AR_BYTES:
3990 case GUEST_LDTR_AR_BYTES:
3991 case GUEST_TR_AR_BYTES:
3992 case GUEST_ES_BASE:
3993 case GUEST_CS_BASE:
3994 case GUEST_SS_BASE:
3995 case GUEST_DS_BASE:
3996 case GUEST_FS_BASE:
3997 case GUEST_GS_BASE:
3998 case GUEST_LDTR_BASE:
3999 case GUEST_TR_BASE:
4000 case GUEST_GDTR_BASE:
4001 case GUEST_IDTR_BASE:
4002 case GUEST_PENDING_DBG_EXCEPTIONS:
4003 case GUEST_BNDCFGS:
4004 return true;
4005 default:
4006 break;
4007 }
4008
4009 return false;
4010}
4011
4012static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4013 struct vmcs12 *vmcs12)
4014{
4015 struct vcpu_vmx *vmx = to_vmx(vcpu);
4016
4017 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
4018 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
4019 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
4020 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
4021 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
4022 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
4023 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
4024 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
4025 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
4026 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
4027 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
4028 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
4029 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
4030 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
4031 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
4032 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
4033 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
4034 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
4035 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
4036 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
4037 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
4038 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
4039 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
4040 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
4041 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
4042 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
4043 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
4044 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
4045 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
4046 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
4047 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
4048 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
4049 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
4050 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
4051 vmcs12->guest_pending_dbg_exceptions =
4052 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
4053 if (kvm_mpx_supported())
4054 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
4055
4056 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
4057}
4058
4059static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4060 struct vmcs12 *vmcs12)
4061{
4062 struct vcpu_vmx *vmx = to_vmx(vcpu);
4063 int cpu;
4064
4065 if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
4066 return;
4067
4068
4069 WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
4070
4071 cpu = get_cpu();
4072 vmx->loaded_vmcs = &vmx->nested.vmcs02;
4073 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
4074
4075 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4076
4077 vmx->loaded_vmcs = &vmx->vmcs01;
4078 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
4079 put_cpu();
4080}
4081
4082
4083
4084
4085
4086
4087
4088static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
4089{
4090 struct vcpu_vmx *vmx = to_vmx(vcpu);
4091
4092 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
4093 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4094
4095 vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
4096 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
4097
4098 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
4099 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
4100
4101 vmcs12->guest_rsp = kvm_rsp_read(vcpu);
4102 vmcs12->guest_rip = kvm_rip_read(vcpu);
4103 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
4104
4105 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
4106 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
4107
4108 vmcs12->guest_interruptibility_info =
4109 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
4110
4111 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
4112 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
4113 else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4114 vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
4115 else
4116 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
4117
4118 if (nested_cpu_has_preemption_timer(vmcs12) &&
4119 vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
4120 !vmx->nested.nested_run_pending)
4121 vmcs12->vmx_preemption_timer_value =
4122 vmx_get_preemption_timer_value(vcpu);
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132 if (enable_ept) {
4133 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
4134 if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
4135 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
4136 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
4137 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
4138 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
4139 }
4140 }
4141
4142 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
4143
4144 if (nested_cpu_has_vid(vmcs12))
4145 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
4146
4147 vmcs12->vm_entry_controls =
4148 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
4149 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
4150
4151 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
4152 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
4153
4154 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
4155 vmcs12->guest_ia32_efer = vcpu->arch.efer;
4156}
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
4170 u32 vm_exit_reason, u32 exit_intr_info,
4171 unsigned long exit_qualification)
4172{
4173
4174 vmcs12->vm_exit_reason = vm_exit_reason;
4175 if (to_vmx(vcpu)->exit_reason.enclave_mode)
4176 vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
4177 vmcs12->exit_qualification = exit_qualification;
4178 vmcs12->vm_exit_intr_info = exit_intr_info;
4179
4180 vmcs12->idt_vectoring_info_field = 0;
4181 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4182 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4183
4184 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
4185 vmcs12->launch_state = 1;
4186
4187
4188
4189 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
4190
4191
4192
4193
4194
4195 vmcs12_save_pending_event(vcpu, vmcs12);
4196
4197
4198
4199
4200
4201
4202
4203 if (nested_vmx_store_msr(vcpu,
4204 vmcs12->vm_exit_msr_store_addr,
4205 vmcs12->vm_exit_msr_store_count))
4206 nested_vmx_abort(vcpu,
4207 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
4208 }
4209
4210
4211
4212
4213
4214 vcpu->arch.nmi_injected = false;
4215 kvm_clear_exception_queue(vcpu);
4216 kvm_clear_interrupt_queue(vcpu);
4217}
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
4229 struct vmcs12 *vmcs12)
4230{
4231 enum vm_entry_failure_code ignored;
4232 struct kvm_segment seg;
4233
4234 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
4235 vcpu->arch.efer = vmcs12->host_ia32_efer;
4236 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4237 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
4238 else
4239 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
4240 vmx_set_efer(vcpu, vcpu->arch.efer);
4241
4242 kvm_rsp_write(vcpu, vmcs12->host_rsp);
4243 kvm_rip_write(vcpu, vmcs12->host_rip);
4244 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
4245 vmx_set_interrupt_shadow(vcpu, 0);
4246
4247
4248
4249
4250
4251
4252
4253
4254 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4255 vmx_set_cr0(vcpu, vmcs12->host_cr0);
4256
4257
4258 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4259 vmx_set_cr4(vcpu, vmcs12->host_cr4);
4260
4261 nested_ept_uninit_mmu_context(vcpu);
4262
4263
4264
4265
4266
4267 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
4268 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
4269
4270 nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
4271
4272 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
4273 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
4274 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
4275 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
4276 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
4277 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
4278 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
4279
4280
4281 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
4282 vmcs_write64(GUEST_BNDCFGS, 0);
4283
4284 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
4285 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
4286 vcpu->arch.pat = vmcs12->host_ia32_pat;
4287 }
4288 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
4289 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
4290 vmcs12->host_ia32_perf_global_ctrl));
4291
4292
4293
4294 seg = (struct kvm_segment) {
4295 .base = 0,
4296 .limit = 0xFFFFFFFF,
4297 .selector = vmcs12->host_cs_selector,
4298 .type = 11,
4299 .present = 1,
4300 .s = 1,
4301 .g = 1
4302 };
4303 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4304 seg.l = 1;
4305 else
4306 seg.db = 1;
4307 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
4308 seg = (struct kvm_segment) {
4309 .base = 0,
4310 .limit = 0xFFFFFFFF,
4311 .type = 3,
4312 .present = 1,
4313 .s = 1,
4314 .db = 1,
4315 .g = 1
4316 };
4317 seg.selector = vmcs12->host_ds_selector;
4318 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
4319 seg.selector = vmcs12->host_es_selector;
4320 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
4321 seg.selector = vmcs12->host_ss_selector;
4322 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
4323 seg.selector = vmcs12->host_fs_selector;
4324 seg.base = vmcs12->host_fs_base;
4325 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
4326 seg.selector = vmcs12->host_gs_selector;
4327 seg.base = vmcs12->host_gs_base;
4328 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
4329 seg = (struct kvm_segment) {
4330 .base = vmcs12->host_tr_base,
4331 .limit = 0x67,
4332 .selector = vmcs12->host_tr_selector,
4333 .type = 11,
4334 .present = 1
4335 };
4336 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
4337
4338 kvm_set_dr(vcpu, 7, 0x400);
4339 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4340
4341 if (cpu_has_vmx_msr_bitmap())
4342 vmx_update_msr_bitmap(vcpu);
4343
4344 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
4345 vmcs12->vm_exit_msr_load_count))
4346 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4347}
4348
4349static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
4350{
4351 struct vmx_uret_msr *efer_msr;
4352 unsigned int i;
4353
4354 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
4355 return vmcs_read64(GUEST_IA32_EFER);
4356
4357 if (cpu_has_load_ia32_efer())
4358 return host_efer;
4359
4360 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
4361 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
4362 return vmx->msr_autoload.guest.val[i].value;
4363 }
4364
4365 efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
4366 if (efer_msr)
4367 return efer_msr->data;
4368
4369 return host_efer;
4370}
4371
4372static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
4373{
4374 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4375 struct vcpu_vmx *vmx = to_vmx(vcpu);
4376 struct vmx_msr_entry g, h;
4377 gpa_t gpa;
4378 u32 i, j;
4379
4380 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
4381
4382 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
4383
4384
4385
4386
4387
4388
4389 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
4390 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
4391 else
4392 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
4393 }
4394
4395
4396
4397
4398
4399 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
4400
4401 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4402 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
4403
4404 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4405 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
4406
4407 nested_ept_uninit_mmu_context(vcpu);
4408 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
4409 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
4410
4411
4412
4413
4414
4415
4416
4417 if (enable_ept && is_pae_paging(vcpu))
4418 ept_save_pdptrs(vcpu);
4419
4420 kvm_mmu_reset_context(vcpu);
4421
4422 if (cpu_has_vmx_msr_bitmap())
4423 vmx_update_msr_bitmap(vcpu);
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
4437 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
4438 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
4439 pr_debug_ratelimited(
4440 "%s read MSR index failed (%u, 0x%08llx)\n",
4441 __func__, i, gpa);
4442 goto vmabort;
4443 }
4444
4445 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
4446 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
4447 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
4448 pr_debug_ratelimited(
4449 "%s read MSR failed (%u, 0x%08llx)\n",
4450 __func__, j, gpa);
4451 goto vmabort;
4452 }
4453 if (h.index != g.index)
4454 continue;
4455 if (h.value == g.value)
4456 break;
4457
4458 if (nested_vmx_load_msr_check(vcpu, &h)) {
4459 pr_debug_ratelimited(
4460 "%s check failed (%u, 0x%x, 0x%x)\n",
4461 __func__, j, h.index, h.reserved);
4462 goto vmabort;
4463 }
4464
4465 if (kvm_set_msr(vcpu, h.index, h.value)) {
4466 pr_debug_ratelimited(
4467 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
4468 __func__, j, h.index, h.value);
4469 goto vmabort;
4470 }
4471 }
4472 }
4473
4474 return;
4475
4476vmabort:
4477 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4478}
4479
4480
4481
4482
4483
4484
4485void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
4486 u32 exit_intr_info, unsigned long exit_qualification)
4487{
4488 struct vcpu_vmx *vmx = to_vmx(vcpu);
4489 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4490
4491
4492 WARN_ON_ONCE(vmx->nested.nested_run_pending);
4493
4494
4495 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
4496
4497 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
4498
4499
4500
4501
4502
4503
4504 (void)nested_get_evmcs_page(vcpu);
4505 }
4506
4507
4508 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
4509 kvm_vcpu_flush_tlb_current(vcpu);
4510
4511
4512
4513
4514
4515
4516 if (enable_ept && is_pae_paging(vcpu))
4517 vmx_ept_load_pdptrs(vcpu);
4518
4519 leave_guest_mode(vcpu);
4520
4521 if (nested_cpu_has_preemption_timer(vmcs12))
4522 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
4523
4524 if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
4525 vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
4526 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
4527 vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
4528 }
4529
4530 if (likely(!vmx->fail)) {
4531 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
4532
4533 if (vm_exit_reason != -1)
4534 prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
4535 exit_intr_info, exit_qualification);
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
4547 } else {
4548
4549
4550
4551
4552
4553
4554 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
4555 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4556 WARN_ON_ONCE(nested_early_check);
4557 }
4558
4559 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
4560
4561
4562 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
4563 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
4564 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
4565 if (kvm_has_tsc_control)
4566 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
4567
4568 if (vmx->nested.l1_tpr_threshold != -1)
4569 vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
4570
4571 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
4572 vmx->nested.change_vmcs01_virtual_apic_mode = false;
4573 vmx_set_virtual_apic_mode(vcpu);
4574 }
4575
4576 if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
4577 vmx->nested.update_vmcs01_cpu_dirty_logging = false;
4578 vmx_update_cpu_dirty_logging(vcpu);
4579 }
4580
4581
4582 if (vmx->nested.apic_access_page) {
4583 kvm_release_page_clean(vmx->nested.apic_access_page);
4584 vmx->nested.apic_access_page = NULL;
4585 }
4586 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
4587 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
4588 vmx->nested.pi_desc = NULL;
4589
4590 if (vmx->nested.reload_vmcs01_apic_access_page) {
4591 vmx->nested.reload_vmcs01_apic_access_page = false;
4592 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4593 }
4594
4595 if ((vm_exit_reason != -1) &&
4596 (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
4597 vmx->nested.need_vmcs12_to_shadow_sync = true;
4598
4599
4600 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4601
4602 if (likely(!vmx->fail)) {
4603 if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
4604 nested_exit_intr_ack_set(vcpu)) {
4605 int irq = kvm_cpu_get_interrupt(vcpu);
4606 WARN_ON(irq < 0);
4607 vmcs12->vm_exit_intr_info = irq |
4608 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
4609 }
4610
4611 if (vm_exit_reason != -1)
4612 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
4613 vmcs12->exit_qualification,
4614 vmcs12->idt_vectoring_info_field,
4615 vmcs12->vm_exit_intr_info,
4616 vmcs12->vm_exit_intr_error_code,
4617 KVM_ISA_VMX);
4618
4619 load_vmcs12_host_state(vcpu, vmcs12);
4620
4621 return;
4622 }
4623
4624
4625
4626
4627
4628
4629
4630
4631 (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4632
4633
4634
4635
4636
4637
4638
4639 nested_vmx_restore_host_state(vcpu);
4640
4641 vmx->fail = 0;
4642}
4643
4644static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
4645{
4646 nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
4647}
4648
4649
4650
4651
4652
4653
4654
4655int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
4656 u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
4657{
4658 gva_t off;
4659 bool exn;
4660 struct kvm_segment s;
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670 int scaling = vmx_instruction_info & 3;
4671 int addr_size = (vmx_instruction_info >> 7) & 7;
4672 bool is_reg = vmx_instruction_info & (1u << 10);
4673 int seg_reg = (vmx_instruction_info >> 15) & 7;
4674 int index_reg = (vmx_instruction_info >> 18) & 0xf;
4675 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
4676 int base_reg = (vmx_instruction_info >> 23) & 0xf;
4677 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
4678
4679 if (is_reg) {
4680 kvm_queue_exception(vcpu, UD_VECTOR);
4681 return 1;
4682 }
4683
4684
4685
4686 off = exit_qualification;
4687 if (addr_size == 1)
4688 off = (gva_t)sign_extend64(off, 31);
4689 else if (addr_size == 0)
4690 off = (gva_t)sign_extend64(off, 15);
4691 if (base_is_valid)
4692 off += kvm_register_read(vcpu, base_reg);
4693 if (index_is_valid)
4694 off += kvm_register_read(vcpu, index_reg) << scaling;
4695 vmx_get_segment(vcpu, &s, seg_reg);
4696
4697
4698
4699
4700
4701
4702
4703 if (addr_size == 1)
4704 off &= 0xffffffff;
4705 else if (addr_size == 0)
4706 off &= 0xffff;
4707
4708
4709 exn = false;
4710 if (is_long_mode(vcpu)) {
4711
4712
4713
4714
4715
4716 if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
4717 *ret = s.base + off;
4718 else
4719 *ret = off;
4720
4721
4722
4723
4724
4725 exn = is_noncanonical_address(*ret, vcpu);
4726 } else {
4727
4728
4729
4730
4731
4732 *ret = (s.base + off) & 0xffffffff;
4733
4734
4735
4736
4737
4738
4739
4740 if (wr)
4741
4742
4743
4744 exn = ((s.type & 0xa) == 0 || (s.type & 8));
4745 else
4746
4747
4748
4749 exn = ((s.type & 0xa) == 8);
4750 if (exn) {
4751 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
4752 return 1;
4753 }
4754
4755
4756 exn = (s.unusable != 0);
4757
4758
4759
4760
4761
4762
4763
4764 if (!(s.base == 0 && s.limit == 0xffffffff &&
4765 ((s.type & 8) || !(s.type & 4))))
4766 exn = exn || ((u64)off + len - 1 > s.limit);
4767 }
4768 if (exn) {
4769 kvm_queue_exception_e(vcpu,
4770 seg_reg == VCPU_SREG_SS ?
4771 SS_VECTOR : GP_VECTOR,
4772 0);
4773 return 1;
4774 }
4775
4776 return 0;
4777}
4778
4779void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
4780{
4781 struct vcpu_vmx *vmx;
4782
4783 if (!nested_vmx_allowed(vcpu))
4784 return;
4785
4786 vmx = to_vmx(vcpu);
4787 if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
4788 vmx->nested.msrs.entry_ctls_high |=
4789 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4790 vmx->nested.msrs.exit_ctls_high |=
4791 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4792 } else {
4793 vmx->nested.msrs.entry_ctls_high &=
4794 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4795 vmx->nested.msrs.exit_ctls_high &=
4796 ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4797 }
4798}
4799
4800static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
4801 int *ret)
4802{
4803 gva_t gva;
4804 struct x86_exception e;
4805 int r;
4806
4807 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
4808 vmcs_read32(VMX_INSTRUCTION_INFO), false,
4809 sizeof(*vmpointer), &gva)) {
4810 *ret = 1;
4811 return -EINVAL;
4812 }
4813
4814 r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
4815 if (r != X86EMUL_CONTINUE) {
4816 *ret = kvm_handle_memory_failure(vcpu, r, &e);
4817 return -EINVAL;
4818 }
4819
4820 return 0;
4821}
4822
4823
4824
4825
4826
4827
4828static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
4829{
4830 struct vcpu_vmx *vmx = to_vmx(vcpu);
4831 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
4832
4833
4834
4835
4836
4837
4838
4839 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
4840
4841 if (!loaded_vmcs->shadow_vmcs) {
4842 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
4843 if (loaded_vmcs->shadow_vmcs)
4844 vmcs_clear(loaded_vmcs->shadow_vmcs);
4845 }
4846 return loaded_vmcs->shadow_vmcs;
4847}
4848
4849static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4850{
4851 struct vcpu_vmx *vmx = to_vmx(vcpu);
4852 int r;
4853
4854 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
4855 if (r < 0)
4856 goto out_vmcs02;
4857
4858 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4859 if (!vmx->nested.cached_vmcs12)
4860 goto out_cached_vmcs12;
4861
4862 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4863 if (!vmx->nested.cached_shadow_vmcs12)
4864 goto out_cached_shadow_vmcs12;
4865
4866 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
4867 goto out_shadow_vmcs;
4868
4869 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
4870 HRTIMER_MODE_ABS_PINNED);
4871 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
4872
4873 vmx->nested.vpid02 = allocate_vpid();
4874
4875 vmx->nested.vmcs02_initialized = false;
4876 vmx->nested.vmxon = true;
4877
4878 if (vmx_pt_mode_is_host_guest()) {
4879 vmx->pt_desc.guest.ctl = 0;
4880 pt_update_intercept_for_msr(vcpu);
4881 }
4882
4883 return 0;
4884
4885out_shadow_vmcs:
4886 kfree(vmx->nested.cached_shadow_vmcs12);
4887
4888out_cached_shadow_vmcs12:
4889 kfree(vmx->nested.cached_vmcs12);
4890
4891out_cached_vmcs12:
4892 free_loaded_vmcs(&vmx->nested.vmcs02);
4893
4894out_vmcs02:
4895 return -ENOMEM;
4896}
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906static int handle_vmon(struct kvm_vcpu *vcpu)
4907{
4908 int ret;
4909 gpa_t vmptr;
4910 uint32_t revision;
4911 struct vcpu_vmx *vmx = to_vmx(vcpu);
4912 const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
4913 | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
4925 kvm_queue_exception(vcpu, UD_VECTOR);
4926 return 1;
4927 }
4928
4929
4930 if (vmx_get_cpl(vcpu)) {
4931 kvm_inject_gp(vcpu, 0);
4932 return 1;
4933 }
4934
4935 if (vmx->nested.vmxon)
4936 return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
4937
4938 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
4939 != VMXON_NEEDED_FEATURES) {
4940 kvm_inject_gp(vcpu, 0);
4941 return 1;
4942 }
4943
4944 if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
4945 return ret;
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955 if (!page_address_valid(vcpu, vmptr))
4956 return nested_vmx_failInvalid(vcpu);
4957
4958 if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
4959 revision != VMCS12_REVISION)
4960 return nested_vmx_failInvalid(vcpu);
4961
4962 vmx->nested.vmxon_ptr = vmptr;
4963 ret = enter_vmx_operation(vcpu);
4964 if (ret)
4965 return ret;
4966
4967 return nested_vmx_succeed(vcpu);
4968}
4969
4970static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
4971{
4972 struct vcpu_vmx *vmx = to_vmx(vcpu);
4973
4974 if (vmx->nested.current_vmptr == -1ull)
4975 return;
4976
4977 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
4978
4979 if (enable_shadow_vmcs) {
4980
4981
4982 copy_shadow_to_vmcs12(vmx);
4983 vmx_disable_shadow_vmcs(vmx);
4984 }
4985 vmx->nested.posted_intr_nv = -1;
4986
4987
4988 kvm_vcpu_write_guest_page(vcpu,
4989 vmx->nested.current_vmptr >> PAGE_SHIFT,
4990 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
4991
4992 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
4993
4994 vmx->nested.current_vmptr = -1ull;
4995}
4996
4997
4998static int handle_vmoff(struct kvm_vcpu *vcpu)
4999{
5000 if (!nested_vmx_check_permission(vcpu))
5001 return 1;
5002
5003 free_nested(vcpu);
5004
5005
5006 kvm_make_request(KVM_REQ_EVENT, vcpu);
5007
5008 return nested_vmx_succeed(vcpu);
5009}
5010
5011
5012static int handle_vmclear(struct kvm_vcpu *vcpu)
5013{
5014 struct vcpu_vmx *vmx = to_vmx(vcpu);
5015 u32 zero = 0;
5016 gpa_t vmptr;
5017 u64 evmcs_gpa;
5018 int r;
5019
5020 if (!nested_vmx_check_permission(vcpu))
5021 return 1;
5022
5023 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5024 return r;
5025
5026 if (!page_address_valid(vcpu, vmptr))
5027 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
5028
5029 if (vmptr == vmx->nested.vmxon_ptr)
5030 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042 if (likely(!vmx->nested.enlightened_vmcs_enabled ||
5043 !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
5044 if (vmptr == vmx->nested.current_vmptr)
5045 nested_release_vmcs12(vcpu);
5046
5047 kvm_vcpu_write_guest(vcpu,
5048 vmptr + offsetof(struct vmcs12,
5049 launch_state),
5050 &zero, sizeof(zero));
5051 } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
5052 nested_release_evmcs(vcpu);
5053 }
5054
5055 return nested_vmx_succeed(vcpu);
5056}
5057
5058
5059static int handle_vmlaunch(struct kvm_vcpu *vcpu)
5060{
5061 return nested_vmx_run(vcpu, true);
5062}
5063
5064
5065static int handle_vmresume(struct kvm_vcpu *vcpu)
5066{
5067
5068 return nested_vmx_run(vcpu, false);
5069}
5070
5071static int handle_vmread(struct kvm_vcpu *vcpu)
5072{
5073 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5074 : get_vmcs12(vcpu);
5075 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5076 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5077 struct vcpu_vmx *vmx = to_vmx(vcpu);
5078 struct x86_exception e;
5079 unsigned long field;
5080 u64 value;
5081 gva_t gva = 0;
5082 short offset;
5083 int len, r;
5084
5085 if (!nested_vmx_check_permission(vcpu))
5086 return 1;
5087
5088
5089
5090
5091
5092 if (vmx->nested.current_vmptr == -1ull ||
5093 (is_guest_mode(vcpu) &&
5094 get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
5095 return nested_vmx_failInvalid(vcpu);
5096
5097
5098 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
5099
5100 offset = vmcs_field_to_offset(field);
5101 if (offset < 0)
5102 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5103
5104 if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
5105 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5106
5107
5108 value = vmcs12_read_any(vmcs12, field, offset);
5109
5110
5111
5112
5113
5114
5115 if (instr_info & BIT(10)) {
5116 kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value);
5117 } else {
5118 len = is_64_bit_mode(vcpu) ? 8 : 4;
5119 if (get_vmx_mem_address(vcpu, exit_qualification,
5120 instr_info, true, len, &gva))
5121 return 1;
5122
5123 r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
5124 if (r != X86EMUL_CONTINUE)
5125 return kvm_handle_memory_failure(vcpu, r, &e);
5126 }
5127
5128 return nested_vmx_succeed(vcpu);
5129}
5130
5131static bool is_shadow_field_rw(unsigned long field)
5132{
5133 switch (field) {
5134#define SHADOW_FIELD_RW(x, y) case x:
5135#include "vmcs_shadow_fields.h"
5136 return true;
5137 default:
5138 break;
5139 }
5140 return false;
5141}
5142
5143static bool is_shadow_field_ro(unsigned long field)
5144{
5145 switch (field) {
5146#define SHADOW_FIELD_RO(x, y) case x:
5147#include "vmcs_shadow_fields.h"
5148 return true;
5149 default:
5150 break;
5151 }
5152 return false;
5153}
5154
5155static int handle_vmwrite(struct kvm_vcpu *vcpu)
5156{
5157 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5158 : get_vmcs12(vcpu);
5159 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5160 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5161 struct vcpu_vmx *vmx = to_vmx(vcpu);
5162 struct x86_exception e;
5163 unsigned long field;
5164 short offset;
5165 gva_t gva;
5166 int len, r;
5167
5168
5169
5170
5171
5172
5173
5174
5175 u64 value = 0;
5176
5177 if (!nested_vmx_check_permission(vcpu))
5178 return 1;
5179
5180
5181
5182
5183
5184 if (vmx->nested.current_vmptr == -1ull ||
5185 (is_guest_mode(vcpu) &&
5186 get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
5187 return nested_vmx_failInvalid(vcpu);
5188
5189 if (instr_info & BIT(10))
5190 value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf));
5191 else {
5192 len = is_64_bit_mode(vcpu) ? 8 : 4;
5193 if (get_vmx_mem_address(vcpu, exit_qualification,
5194 instr_info, false, len, &gva))
5195 return 1;
5196 r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
5197 if (r != X86EMUL_CONTINUE)
5198 return kvm_handle_memory_failure(vcpu, r, &e);
5199 }
5200
5201 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
5202
5203 offset = vmcs_field_to_offset(field);
5204 if (offset < 0)
5205 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5206
5207
5208
5209
5210
5211 if (vmcs_field_readonly(field) &&
5212 !nested_cpu_has_vmwrite_any_field(vcpu))
5213 return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
5214
5215
5216
5217
5218
5219 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
5220 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230 if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
5231 value &= 0x1f0ff;
5232
5233 vmcs12_write_any(vmcs12, field, offset, value);
5234
5235
5236
5237
5238
5239
5240
5241 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
5242
5243
5244
5245
5246 if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
5247 preempt_disable();
5248 vmcs_load(vmx->vmcs01.shadow_vmcs);
5249
5250 __vmcs_writel(field, value);
5251
5252 vmcs_clear(vmx->vmcs01.shadow_vmcs);
5253 vmcs_load(vmx->loaded_vmcs->vmcs);
5254 preempt_enable();
5255 }
5256 vmx->nested.dirty_vmcs12 = true;
5257 }
5258
5259 return nested_vmx_succeed(vcpu);
5260}
5261
5262static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
5263{
5264 vmx->nested.current_vmptr = vmptr;
5265 if (enable_shadow_vmcs) {
5266 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
5267 vmcs_write64(VMCS_LINK_POINTER,
5268 __pa(vmx->vmcs01.shadow_vmcs));
5269 vmx->nested.need_vmcs12_to_shadow_sync = true;
5270 }
5271 vmx->nested.dirty_vmcs12 = true;
5272}
5273
5274
5275static int handle_vmptrld(struct kvm_vcpu *vcpu)
5276{
5277 struct vcpu_vmx *vmx = to_vmx(vcpu);
5278 gpa_t vmptr;
5279 int r;
5280
5281 if (!nested_vmx_check_permission(vcpu))
5282 return 1;
5283
5284 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5285 return r;
5286
5287 if (!page_address_valid(vcpu, vmptr))
5288 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
5289
5290 if (vmptr == vmx->nested.vmxon_ptr)
5291 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
5292
5293
5294 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
5295 return 1;
5296
5297 if (vmx->nested.current_vmptr != vmptr) {
5298 struct kvm_host_map map;
5299 struct vmcs12 *new_vmcs12;
5300
5301 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
5302
5303
5304
5305
5306
5307
5308 return nested_vmx_fail(vcpu,
5309 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5310 }
5311
5312 new_vmcs12 = map.hva;
5313
5314 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
5315 (new_vmcs12->hdr.shadow_vmcs &&
5316 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
5317 kvm_vcpu_unmap(vcpu, &map, false);
5318 return nested_vmx_fail(vcpu,
5319 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5320 }
5321
5322 nested_release_vmcs12(vcpu);
5323
5324
5325
5326
5327
5328 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
5329 kvm_vcpu_unmap(vcpu, &map, false);
5330
5331 set_current_vmptr(vmx, vmptr);
5332 }
5333
5334 return nested_vmx_succeed(vcpu);
5335}
5336
5337
5338static int handle_vmptrst(struct kvm_vcpu *vcpu)
5339{
5340 unsigned long exit_qual = vmx_get_exit_qual(vcpu);
5341 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5342 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
5343 struct x86_exception e;
5344 gva_t gva;
5345 int r;
5346
5347 if (!nested_vmx_check_permission(vcpu))
5348 return 1;
5349
5350 if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
5351 return 1;
5352
5353 if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
5354 true, sizeof(gpa_t), &gva))
5355 return 1;
5356
5357 r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr,
5358 sizeof(gpa_t), &e);
5359 if (r != X86EMUL_CONTINUE)
5360 return kvm_handle_memory_failure(vcpu, r, &e);
5361
5362 return nested_vmx_succeed(vcpu);
5363}
5364
5365
5366static int handle_invept(struct kvm_vcpu *vcpu)
5367{
5368 struct vcpu_vmx *vmx = to_vmx(vcpu);
5369 u32 vmx_instruction_info, types;
5370 unsigned long type, roots_to_free;
5371 struct kvm_mmu *mmu;
5372 gva_t gva;
5373 struct x86_exception e;
5374 struct {
5375 u64 eptp, gpa;
5376 } operand;
5377 int i, r;
5378
5379 if (!(vmx->nested.msrs.secondary_ctls_high &
5380 SECONDARY_EXEC_ENABLE_EPT) ||
5381 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
5382 kvm_queue_exception(vcpu, UD_VECTOR);
5383 return 1;
5384 }
5385
5386 if (!nested_vmx_check_permission(vcpu))
5387 return 1;
5388
5389 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5390 type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
5391
5392 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
5393
5394 if (type >= 32 || !(types & (1 << type)))
5395 return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5396
5397
5398
5399
5400 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5401 vmx_instruction_info, false, sizeof(operand), &gva))
5402 return 1;
5403 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5404 if (r != X86EMUL_CONTINUE)
5405 return kvm_handle_memory_failure(vcpu, r, &e);
5406
5407
5408
5409
5410
5411 mmu = &vcpu->arch.guest_mmu;
5412
5413 switch (type) {
5414 case VMX_EPT_EXTENT_CONTEXT:
5415 if (!nested_vmx_check_eptp(vcpu, operand.eptp))
5416 return nested_vmx_fail(vcpu,
5417 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5418
5419 roots_to_free = 0;
5420 if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
5421 operand.eptp))
5422 roots_to_free |= KVM_MMU_ROOT_CURRENT;
5423
5424 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5425 if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
5426 mmu->prev_roots[i].pgd,
5427 operand.eptp))
5428 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5429 }
5430 break;
5431 case VMX_EPT_EXTENT_GLOBAL:
5432 roots_to_free = KVM_MMU_ROOTS_ALL;
5433 break;
5434 default:
5435 BUG();
5436 break;
5437 }
5438
5439 if (roots_to_free)
5440 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
5441
5442 return nested_vmx_succeed(vcpu);
5443}
5444
5445static int handle_invvpid(struct kvm_vcpu *vcpu)
5446{
5447 struct vcpu_vmx *vmx = to_vmx(vcpu);
5448 u32 vmx_instruction_info;
5449 unsigned long type, types;
5450 gva_t gva;
5451 struct x86_exception e;
5452 struct {
5453 u64 vpid;
5454 u64 gla;
5455 } operand;
5456 u16 vpid02;
5457 int r;
5458
5459 if (!(vmx->nested.msrs.secondary_ctls_high &
5460 SECONDARY_EXEC_ENABLE_VPID) ||
5461 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
5462 kvm_queue_exception(vcpu, UD_VECTOR);
5463 return 1;
5464 }
5465
5466 if (!nested_vmx_check_permission(vcpu))
5467 return 1;
5468
5469 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5470 type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
5471
5472 types = (vmx->nested.msrs.vpid_caps &
5473 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
5474
5475 if (type >= 32 || !(types & (1 << type)))
5476 return nested_vmx_fail(vcpu,
5477 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5478
5479
5480
5481
5482 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5483 vmx_instruction_info, false, sizeof(operand), &gva))
5484 return 1;
5485 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5486 if (r != X86EMUL_CONTINUE)
5487 return kvm_handle_memory_failure(vcpu, r, &e);
5488
5489 if (operand.vpid >> 16)
5490 return nested_vmx_fail(vcpu,
5491 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5492
5493 vpid02 = nested_get_vpid02(vcpu);
5494 switch (type) {
5495 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
5496 if (!operand.vpid ||
5497 is_noncanonical_address(operand.gla, vcpu))
5498 return nested_vmx_fail(vcpu,
5499 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5500 vpid_sync_vcpu_addr(vpid02, operand.gla);
5501 break;
5502 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
5503 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
5504 if (!operand.vpid)
5505 return nested_vmx_fail(vcpu,
5506 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5507 vpid_sync_context(vpid02);
5508 break;
5509 case VMX_VPID_EXTENT_ALL_CONTEXT:
5510 vpid_sync_context(vpid02);
5511 break;
5512 default:
5513 WARN_ON_ONCE(1);
5514 return kvm_skip_emulated_instruction(vcpu);
5515 }
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527 if (!enable_ept)
5528 kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
5529
5530 return nested_vmx_succeed(vcpu);
5531}
5532
5533static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
5534 struct vmcs12 *vmcs12)
5535{
5536 u32 index = kvm_rcx_read(vcpu);
5537 u64 new_eptp;
5538
5539 if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
5540 return 1;
5541 if (index >= VMFUNC_EPTP_ENTRIES)
5542 return 1;
5543
5544 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
5545 &new_eptp, index * 8, 8))
5546 return 1;
5547
5548
5549
5550
5551
5552 if (vmcs12->ept_pointer != new_eptp) {
5553 if (!nested_vmx_check_eptp(vcpu, new_eptp))
5554 return 1;
5555
5556 vmcs12->ept_pointer = new_eptp;
5557 nested_ept_new_eptp(vcpu);
5558
5559 if (!nested_cpu_has_vpid(vmcs12))
5560 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
5561 }
5562
5563 return 0;
5564}
5565
5566static int handle_vmfunc(struct kvm_vcpu *vcpu)
5567{
5568 struct vcpu_vmx *vmx = to_vmx(vcpu);
5569 struct vmcs12 *vmcs12;
5570 u32 function = kvm_rax_read(vcpu);
5571
5572
5573
5574
5575
5576
5577 if (!is_guest_mode(vcpu)) {
5578 kvm_queue_exception(vcpu, UD_VECTOR);
5579 return 1;
5580 }
5581
5582 vmcs12 = get_vmcs12(vcpu);
5583
5584
5585
5586
5587
5588 if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
5589 kvm_queue_exception(vcpu, UD_VECTOR);
5590 return 1;
5591 }
5592
5593 if (!(vmcs12->vm_function_control & BIT_ULL(function)))
5594 goto fail;
5595
5596 switch (function) {
5597 case 0:
5598 if (nested_vmx_eptp_switching(vcpu, vmcs12))
5599 goto fail;
5600 break;
5601 default:
5602 goto fail;
5603 }
5604 return kvm_skip_emulated_instruction(vcpu);
5605
5606fail:
5607
5608
5609
5610
5611
5612 nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
5613 vmx_get_intr_info(vcpu),
5614 vmx_get_exit_qual(vcpu));
5615 return 1;
5616}
5617
5618
5619
5620
5621
5622bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
5623 int size)
5624{
5625 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5626 gpa_t bitmap, last_bitmap;
5627 u8 b;
5628
5629 last_bitmap = (gpa_t)-1;
5630 b = -1;
5631
5632 while (size > 0) {
5633 if (port < 0x8000)
5634 bitmap = vmcs12->io_bitmap_a;
5635 else if (port < 0x10000)
5636 bitmap = vmcs12->io_bitmap_b;
5637 else
5638 return true;
5639 bitmap += (port & 0x7fff) / 8;
5640
5641 if (last_bitmap != bitmap)
5642 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
5643 return true;
5644 if (b & (1 << (port & 7)))
5645 return true;
5646
5647 port++;
5648 size--;
5649 last_bitmap = bitmap;
5650 }
5651
5652 return false;
5653}
5654
5655static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
5656 struct vmcs12 *vmcs12)
5657{
5658 unsigned long exit_qualification;
5659 unsigned short port;
5660 int size;
5661
5662 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
5663 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
5664
5665 exit_qualification = vmx_get_exit_qual(vcpu);
5666
5667 port = exit_qualification >> 16;
5668 size = (exit_qualification & 7) + 1;
5669
5670 return nested_vmx_check_io_bitmaps(vcpu, port, size);
5671}
5672
5673
5674
5675
5676
5677
5678
5679static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5680 struct vmcs12 *vmcs12,
5681 union vmx_exit_reason exit_reason)
5682{
5683 u32 msr_index = kvm_rcx_read(vcpu);
5684 gpa_t bitmap;
5685
5686 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5687 return true;
5688
5689
5690
5691
5692
5693
5694 bitmap = vmcs12->msr_bitmap;
5695 if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
5696 bitmap += 2048;
5697 if (msr_index >= 0xc0000000) {
5698 msr_index -= 0xc0000000;
5699 bitmap += 1024;
5700 }
5701
5702
5703 if (msr_index < 1024*8) {
5704 unsigned char b;
5705 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
5706 return true;
5707 return 1 & (b >> (msr_index & 7));
5708 } else
5709 return true;
5710}
5711
5712
5713
5714
5715
5716
5717static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5718 struct vmcs12 *vmcs12)
5719{
5720 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5721 int cr = exit_qualification & 15;
5722 int reg;
5723 unsigned long val;
5724
5725 switch ((exit_qualification >> 4) & 3) {
5726 case 0:
5727 reg = (exit_qualification >> 8) & 15;
5728 val = kvm_register_read(vcpu, reg);
5729 switch (cr) {
5730 case 0:
5731 if (vmcs12->cr0_guest_host_mask &
5732 (val ^ vmcs12->cr0_read_shadow))
5733 return true;
5734 break;
5735 case 3:
5736 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5737 return true;
5738 break;
5739 case 4:
5740 if (vmcs12->cr4_guest_host_mask &
5741 (vmcs12->cr4_read_shadow ^ val))
5742 return true;
5743 break;
5744 case 8:
5745 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5746 return true;
5747 break;
5748 }
5749 break;
5750 case 2:
5751 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5752 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5753 return true;
5754 break;
5755 case 1:
5756 switch (cr) {
5757 case 3:
5758 if (vmcs12->cpu_based_vm_exec_control &
5759 CPU_BASED_CR3_STORE_EXITING)
5760 return true;
5761 break;
5762 case 8:
5763 if (vmcs12->cpu_based_vm_exec_control &
5764 CPU_BASED_CR8_STORE_EXITING)
5765 return true;
5766 break;
5767 }
5768 break;
5769 case 3:
5770
5771
5772
5773
5774 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5775 if (vmcs12->cr0_guest_host_mask & 0xe &
5776 (val ^ vmcs12->cr0_read_shadow))
5777 return true;
5778 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5779 !(vmcs12->cr0_read_shadow & 0x1) &&
5780 (val & 0x1))
5781 return true;
5782 break;
5783 }
5784 return false;
5785}
5786
5787static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
5788 struct vmcs12 *vmcs12)
5789{
5790 u32 encls_leaf;
5791
5792 if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
5793 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
5794 return false;
5795
5796 encls_leaf = kvm_rax_read(vcpu);
5797 if (encls_leaf > 62)
5798 encls_leaf = 63;
5799 return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
5800}
5801
5802static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
5803 struct vmcs12 *vmcs12, gpa_t bitmap)
5804{
5805 u32 vmx_instruction_info;
5806 unsigned long field;
5807 u8 b;
5808
5809 if (!nested_cpu_has_shadow_vmcs(vmcs12))
5810 return true;
5811
5812
5813 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5814 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5815
5816
5817 if (field >> 15)
5818 return true;
5819
5820 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
5821 return true;
5822
5823 return 1 & (b >> (field & 7));
5824}
5825
5826static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
5827{
5828 u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
5829
5830 if (nested_cpu_has_mtf(vmcs12))
5831 return true;
5832
5833
5834
5835
5836
5837
5838
5839 return entry_intr_info == (INTR_INFO_VALID_MASK
5840 | INTR_TYPE_OTHER_EVENT);
5841}
5842
5843
5844
5845
5846
5847static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
5848 union vmx_exit_reason exit_reason)
5849{
5850 u32 intr_info;
5851
5852 switch ((u16)exit_reason.basic) {
5853 case EXIT_REASON_EXCEPTION_NMI:
5854 intr_info = vmx_get_intr_info(vcpu);
5855 if (is_nmi(intr_info))
5856 return true;
5857 else if (is_page_fault(intr_info))
5858 return vcpu->arch.apf.host_apf_flags ||
5859 vmx_need_pf_intercept(vcpu);
5860 else if (is_debug(intr_info) &&
5861 vcpu->guest_debug &
5862 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5863 return true;
5864 else if (is_breakpoint(intr_info) &&
5865 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5866 return true;
5867 else if (is_alignment_check(intr_info) &&
5868 !vmx_guest_inject_ac(vcpu))
5869 return true;
5870 return false;
5871 case EXIT_REASON_EXTERNAL_INTERRUPT:
5872 return true;
5873 case EXIT_REASON_MCE_DURING_VMENTRY:
5874 return true;
5875 case EXIT_REASON_EPT_VIOLATION:
5876
5877
5878
5879
5880
5881
5882 return true;
5883 case EXIT_REASON_EPT_MISCONFIG:
5884
5885
5886
5887
5888
5889
5890 return true;
5891 case EXIT_REASON_PREEMPTION_TIMER:
5892 return true;
5893 case EXIT_REASON_PML_FULL:
5894
5895
5896
5897
5898 return true;
5899 case EXIT_REASON_VMFUNC:
5900
5901 return true;
5902 default:
5903 break;
5904 }
5905 return false;
5906}
5907
5908
5909
5910
5911
5912static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
5913 union vmx_exit_reason exit_reason)
5914{
5915 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5916 u32 intr_info;
5917
5918 switch ((u16)exit_reason.basic) {
5919 case EXIT_REASON_EXCEPTION_NMI:
5920 intr_info = vmx_get_intr_info(vcpu);
5921 if (is_nmi(intr_info))
5922 return true;
5923 else if (is_page_fault(intr_info))
5924 return true;
5925 return vmcs12->exception_bitmap &
5926 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5927 case EXIT_REASON_EXTERNAL_INTERRUPT:
5928 return nested_exit_on_intr(vcpu);
5929 case EXIT_REASON_TRIPLE_FAULT:
5930 return true;
5931 case EXIT_REASON_INTERRUPT_WINDOW:
5932 return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
5933 case EXIT_REASON_NMI_WINDOW:
5934 return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
5935 case EXIT_REASON_TASK_SWITCH:
5936 return true;
5937 case EXIT_REASON_CPUID:
5938 return true;
5939 case EXIT_REASON_HLT:
5940 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5941 case EXIT_REASON_INVD:
5942 return true;
5943 case EXIT_REASON_INVLPG:
5944 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5945 case EXIT_REASON_RDPMC:
5946 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5947 case EXIT_REASON_RDRAND:
5948 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
5949 case EXIT_REASON_RDSEED:
5950 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
5951 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
5952 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5953 case EXIT_REASON_VMREAD:
5954 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5955 vmcs12->vmread_bitmap);
5956 case EXIT_REASON_VMWRITE:
5957 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5958 vmcs12->vmwrite_bitmap);
5959 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5960 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5961 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
5962 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5963 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
5964
5965
5966
5967
5968 return true;
5969 case EXIT_REASON_CR_ACCESS:
5970 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5971 case EXIT_REASON_DR_ACCESS:
5972 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5973 case EXIT_REASON_IO_INSTRUCTION:
5974 return nested_vmx_exit_handled_io(vcpu, vmcs12);
5975 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
5976 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
5977 case EXIT_REASON_MSR_READ:
5978 case EXIT_REASON_MSR_WRITE:
5979 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5980 case EXIT_REASON_INVALID_STATE:
5981 return true;
5982 case EXIT_REASON_MWAIT_INSTRUCTION:
5983 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5984 case EXIT_REASON_MONITOR_TRAP_FLAG:
5985 return nested_vmx_exit_handled_mtf(vmcs12);
5986 case EXIT_REASON_MONITOR_INSTRUCTION:
5987 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
5988 case EXIT_REASON_PAUSE_INSTRUCTION:
5989 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
5990 nested_cpu_has2(vmcs12,
5991 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
5992 case EXIT_REASON_MCE_DURING_VMENTRY:
5993 return true;
5994 case EXIT_REASON_TPR_BELOW_THRESHOLD:
5995 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
5996 case EXIT_REASON_APIC_ACCESS:
5997 case EXIT_REASON_APIC_WRITE:
5998 case EXIT_REASON_EOI_INDUCED:
5999
6000
6001
6002
6003
6004 return true;
6005 case EXIT_REASON_INVPCID:
6006 return
6007 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
6008 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
6009 case EXIT_REASON_WBINVD:
6010 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
6011 case EXIT_REASON_XSETBV:
6012 return true;
6013 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
6014
6015
6016
6017
6018
6019
6020 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
6021 case EXIT_REASON_UMWAIT:
6022 case EXIT_REASON_TPAUSE:
6023 return nested_cpu_has2(vmcs12,
6024 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
6025 case EXIT_REASON_ENCLS:
6026 return nested_vmx_exit_handled_encls(vcpu, vmcs12);
6027 default:
6028 return true;
6029 }
6030}
6031
6032
6033
6034
6035
6036bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
6037{
6038 struct vcpu_vmx *vmx = to_vmx(vcpu);
6039 union vmx_exit_reason exit_reason = vmx->exit_reason;
6040 unsigned long exit_qual;
6041 u32 exit_intr_info;
6042
6043 WARN_ON_ONCE(vmx->nested.nested_run_pending);
6044
6045
6046
6047
6048
6049 if (unlikely(vmx->fail)) {
6050 trace_kvm_nested_vmenter_failed(
6051 "hardware VM-instruction error: ",
6052 vmcs_read32(VM_INSTRUCTION_ERROR));
6053 exit_intr_info = 0;
6054 exit_qual = 0;
6055 goto reflect_vmexit;
6056 }
6057
6058 trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX);
6059
6060
6061 if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
6062 return false;
6063
6064
6065 if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
6066 return false;
6067
6068
6069
6070
6071
6072
6073
6074 exit_intr_info = vmx_get_intr_info(vcpu);
6075 if (is_exception_with_error_code(exit_intr_info)) {
6076 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6077
6078 vmcs12->vm_exit_intr_error_code =
6079 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
6080 }
6081 exit_qual = vmx_get_exit_qual(vcpu);
6082
6083reflect_vmexit:
6084 nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
6085 return true;
6086}
6087
6088static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
6089 struct kvm_nested_state __user *user_kvm_nested_state,
6090 u32 user_data_size)
6091{
6092 struct vcpu_vmx *vmx;
6093 struct vmcs12 *vmcs12;
6094 struct kvm_nested_state kvm_state = {
6095 .flags = 0,
6096 .format = KVM_STATE_NESTED_FORMAT_VMX,
6097 .size = sizeof(kvm_state),
6098 .hdr.vmx.flags = 0,
6099 .hdr.vmx.vmxon_pa = -1ull,
6100 .hdr.vmx.vmcs12_pa = -1ull,
6101 .hdr.vmx.preemption_timer_deadline = 0,
6102 };
6103 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6104 &user_kvm_nested_state->data.vmx[0];
6105
6106 if (!vcpu)
6107 return kvm_state.size + sizeof(*user_vmx_nested_state);
6108
6109 vmx = to_vmx(vcpu);
6110 vmcs12 = get_vmcs12(vcpu);
6111
6112 if (nested_vmx_allowed(vcpu) &&
6113 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
6114 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
6115 kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
6116
6117 if (vmx_has_valid_vmcs12(vcpu)) {
6118 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
6119
6120
6121 if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
6122 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
6123
6124 if (is_guest_mode(vcpu) &&
6125 nested_cpu_has_shadow_vmcs(vmcs12) &&
6126 vmcs12->vmcs_link_pointer != -1ull)
6127 kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
6128 }
6129
6130 if (vmx->nested.smm.vmxon)
6131 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
6132
6133 if (vmx->nested.smm.guest_mode)
6134 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
6135
6136 if (is_guest_mode(vcpu)) {
6137 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
6138
6139 if (vmx->nested.nested_run_pending)
6140 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
6141
6142 if (vmx->nested.mtf_pending)
6143 kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
6144
6145 if (nested_cpu_has_preemption_timer(vmcs12) &&
6146 vmx->nested.has_preemption_timer_deadline) {
6147 kvm_state.hdr.vmx.flags |=
6148 KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
6149 kvm_state.hdr.vmx.preemption_timer_deadline =
6150 vmx->nested.preemption_timer_deadline;
6151 }
6152 }
6153 }
6154
6155 if (user_data_size < kvm_state.size)
6156 goto out;
6157
6158 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
6159 return -EFAULT;
6160
6161 if (!vmx_has_valid_vmcs12(vcpu))
6162 goto out;
6163
6164
6165
6166
6167
6168
6169
6170
6171 if (is_guest_mode(vcpu)) {
6172 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
6173 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
6174 } else {
6175 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
6176 if (!vmx->nested.need_vmcs12_to_shadow_sync) {
6177 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
6178
6179
6180
6181
6182
6183
6184
6185 copy_enlightened_to_vmcs12(vmx, 0);
6186 else if (enable_shadow_vmcs)
6187 copy_shadow_to_vmcs12(vmx);
6188 }
6189 }
6190
6191 BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
6192 BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
6193
6194
6195
6196
6197
6198 if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
6199 return -EFAULT;
6200
6201 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
6202 vmcs12->vmcs_link_pointer != -1ull) {
6203 if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
6204 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
6205 return -EFAULT;
6206 }
6207out:
6208 return kvm_state.size;
6209}
6210
6211
6212
6213
6214void vmx_leave_nested(struct kvm_vcpu *vcpu)
6215{
6216 if (is_guest_mode(vcpu)) {
6217 to_vmx(vcpu)->nested.nested_run_pending = 0;
6218 nested_vmx_vmexit(vcpu, -1, 0, 0);
6219 }
6220 free_nested(vcpu);
6221}
6222
6223static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
6224 struct kvm_nested_state __user *user_kvm_nested_state,
6225 struct kvm_nested_state *kvm_state)
6226{
6227 struct vcpu_vmx *vmx = to_vmx(vcpu);
6228 struct vmcs12 *vmcs12;
6229 enum vm_entry_failure_code ignored;
6230 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6231 &user_kvm_nested_state->data.vmx[0];
6232 int ret;
6233
6234 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
6235 return -EINVAL;
6236
6237 if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
6238 if (kvm_state->hdr.vmx.smm.flags)
6239 return -EINVAL;
6240
6241 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
6242 return -EINVAL;
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
6254 return -EINVAL;
6255 } else {
6256 if (!nested_vmx_allowed(vcpu))
6257 return -EINVAL;
6258
6259 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
6260 return -EINVAL;
6261 }
6262
6263 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6264 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6265 return -EINVAL;
6266
6267 if (kvm_state->hdr.vmx.smm.flags &
6268 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
6269 return -EINVAL;
6270
6271 if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
6272 return -EINVAL;
6273
6274
6275
6276
6277
6278
6279 if (is_smm(vcpu) ?
6280 (kvm_state->flags &
6281 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
6282 : kvm_state->hdr.vmx.smm.flags)
6283 return -EINVAL;
6284
6285 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6286 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
6287 return -EINVAL;
6288
6289 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
6290 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
6291 return -EINVAL;
6292
6293 vmx_leave_nested(vcpu);
6294
6295 if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
6296 return 0;
6297
6298 vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
6299 ret = enter_vmx_operation(vcpu);
6300 if (ret)
6301 return ret;
6302
6303
6304 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
6305
6306 if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
6307 (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
6308 (kvm_state->hdr.vmx.vmcs12_pa != -1ull))
6309 return -EINVAL;
6310 else
6311 return 0;
6312 }
6313
6314 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
6315 if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
6316 !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
6317 return -EINVAL;
6318
6319 set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
6320 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
6321
6322
6323
6324
6325
6326
6327 vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
6328 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
6329 } else {
6330 return -EINVAL;
6331 }
6332
6333 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
6334 vmx->nested.smm.vmxon = true;
6335 vmx->nested.vmxon = false;
6336
6337 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
6338 vmx->nested.smm.guest_mode = true;
6339 }
6340
6341 vmcs12 = get_vmcs12(vcpu);
6342 if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
6343 return -EFAULT;
6344
6345 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
6346 return -EINVAL;
6347
6348 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6349 return 0;
6350
6351 vmx->nested.nested_run_pending =
6352 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
6353
6354 vmx->nested.mtf_pending =
6355 !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
6356
6357 ret = -EINVAL;
6358 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
6359 vmcs12->vmcs_link_pointer != -1ull) {
6360 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
6361
6362 if (kvm_state->size <
6363 sizeof(*kvm_state) +
6364 sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
6365 goto error_guest_mode;
6366
6367 if (copy_from_user(shadow_vmcs12,
6368 user_vmx_nested_state->shadow_vmcs12,
6369 sizeof(*shadow_vmcs12))) {
6370 ret = -EFAULT;
6371 goto error_guest_mode;
6372 }
6373
6374 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
6375 !shadow_vmcs12->hdr.shadow_vmcs)
6376 goto error_guest_mode;
6377 }
6378
6379 vmx->nested.has_preemption_timer_deadline = false;
6380 if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
6381 vmx->nested.has_preemption_timer_deadline = true;
6382 vmx->nested.preemption_timer_deadline =
6383 kvm_state->hdr.vmx.preemption_timer_deadline;
6384 }
6385
6386 if (nested_vmx_check_controls(vcpu, vmcs12) ||
6387 nested_vmx_check_host_state(vcpu, vmcs12) ||
6388 nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
6389 goto error_guest_mode;
6390
6391 vmx->nested.dirty_vmcs12 = true;
6392 ret = nested_vmx_enter_non_root_mode(vcpu, false);
6393 if (ret)
6394 goto error_guest_mode;
6395
6396 return 0;
6397
6398error_guest_mode:
6399 vmx->nested.nested_run_pending = 0;
6400 return ret;
6401}
6402
6403void nested_vmx_set_vmcs_shadowing_bitmap(void)
6404{
6405 if (enable_shadow_vmcs) {
6406 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
6407 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
6408 }
6409}
6410
6411
6412
6413
6414
6415#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
6416
6417static u64 nested_vmx_calc_vmcs_enum_msr(void)
6418{
6419
6420
6421
6422
6423 unsigned int max_idx, idx;
6424 int i;
6425
6426
6427
6428
6429
6430
6431 max_idx = 0;
6432 for (i = 0; i < nr_vmcs12_fields; i++) {
6433
6434 if (!vmcs_field_to_offset_table[i])
6435 continue;
6436
6437 idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
6438 if (idx > max_idx)
6439 max_idx = idx;
6440 }
6441
6442 return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
6443}
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
6456{
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
6474 msrs->pinbased_ctls_low,
6475 msrs->pinbased_ctls_high);
6476 msrs->pinbased_ctls_low |=
6477 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6478 msrs->pinbased_ctls_high &=
6479 PIN_BASED_EXT_INTR_MASK |
6480 PIN_BASED_NMI_EXITING |
6481 PIN_BASED_VIRTUAL_NMIS |
6482 (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
6483 msrs->pinbased_ctls_high |=
6484 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6485 PIN_BASED_VMX_PREEMPTION_TIMER;
6486
6487
6488 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
6489 msrs->exit_ctls_low,
6490 msrs->exit_ctls_high);
6491 msrs->exit_ctls_low =
6492 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
6493
6494 msrs->exit_ctls_high &=
6495#ifdef CONFIG_X86_64
6496 VM_EXIT_HOST_ADDR_SPACE_SIZE |
6497#endif
6498 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
6499 VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
6500 msrs->exit_ctls_high |=
6501 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
6502 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
6503 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
6504
6505
6506 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
6507
6508
6509 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
6510 msrs->entry_ctls_low,
6511 msrs->entry_ctls_high);
6512 msrs->entry_ctls_low =
6513 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
6514 msrs->entry_ctls_high &=
6515#ifdef CONFIG_X86_64
6516 VM_ENTRY_IA32E_MODE |
6517#endif
6518 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
6519 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
6520 msrs->entry_ctls_high |=
6521 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
6522
6523
6524 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
6525
6526
6527 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
6528 msrs->procbased_ctls_low,
6529 msrs->procbased_ctls_high);
6530 msrs->procbased_ctls_low =
6531 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6532 msrs->procbased_ctls_high &=
6533 CPU_BASED_INTR_WINDOW_EXITING |
6534 CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
6535 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
6536 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
6537 CPU_BASED_CR3_STORE_EXITING |
6538#ifdef CONFIG_X86_64
6539 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
6540#endif
6541 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
6542 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
6543 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
6544 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
6545 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
6546
6547
6548
6549
6550
6551
6552 msrs->procbased_ctls_high |=
6553 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6554 CPU_BASED_USE_MSR_BITMAPS;
6555
6556
6557 msrs->procbased_ctls_low &=
6558 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
6559
6560
6561
6562
6563
6564
6565 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
6566 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
6567 msrs->secondary_ctls_low,
6568 msrs->secondary_ctls_high);
6569
6570 msrs->secondary_ctls_low = 0;
6571 msrs->secondary_ctls_high &=
6572 SECONDARY_EXEC_DESC |
6573 SECONDARY_EXEC_ENABLE_RDTSCP |
6574 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
6575 SECONDARY_EXEC_WBINVD_EXITING |
6576 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6577 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
6578 SECONDARY_EXEC_RDRAND_EXITING |
6579 SECONDARY_EXEC_ENABLE_INVPCID |
6580 SECONDARY_EXEC_RDSEED_EXITING |
6581 SECONDARY_EXEC_XSAVES |
6582 SECONDARY_EXEC_TSC_SCALING;
6583
6584
6585
6586
6587
6588 msrs->secondary_ctls_high |=
6589 SECONDARY_EXEC_SHADOW_VMCS;
6590
6591 if (enable_ept) {
6592
6593 msrs->secondary_ctls_high |=
6594 SECONDARY_EXEC_ENABLE_EPT;
6595 msrs->ept_caps =
6596 VMX_EPT_PAGE_WALK_4_BIT |
6597 VMX_EPT_PAGE_WALK_5_BIT |
6598 VMX_EPTP_WB_BIT |
6599 VMX_EPT_INVEPT_BIT |
6600 VMX_EPT_EXECUTE_ONLY_BIT;
6601
6602 msrs->ept_caps &= ept_caps;
6603 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
6604 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
6605 VMX_EPT_1GB_PAGE_BIT;
6606 if (enable_ept_ad_bits) {
6607 msrs->secondary_ctls_high |=
6608 SECONDARY_EXEC_ENABLE_PML;
6609 msrs->ept_caps |= VMX_EPT_AD_BIT;
6610 }
6611 }
6612
6613 if (cpu_has_vmx_vmfunc()) {
6614 msrs->secondary_ctls_high |=
6615 SECONDARY_EXEC_ENABLE_VMFUNC;
6616
6617
6618
6619
6620 if (enable_ept)
6621 msrs->vmfunc_controls =
6622 VMX_VMFUNC_EPTP_SWITCHING;
6623 }
6624
6625
6626
6627
6628
6629
6630
6631 if (enable_vpid) {
6632 msrs->secondary_ctls_high |=
6633 SECONDARY_EXEC_ENABLE_VPID;
6634 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
6635 VMX_VPID_EXTENT_SUPPORTED_MASK;
6636 }
6637
6638 if (enable_unrestricted_guest)
6639 msrs->secondary_ctls_high |=
6640 SECONDARY_EXEC_UNRESTRICTED_GUEST;
6641
6642 if (flexpriority_enabled)
6643 msrs->secondary_ctls_high |=
6644 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6645
6646 if (enable_sgx)
6647 msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
6648
6649
6650 rdmsr(MSR_IA32_VMX_MISC,
6651 msrs->misc_low,
6652 msrs->misc_high);
6653 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
6654 msrs->misc_low |=
6655 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
6656 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
6657 VMX_MISC_ACTIVITY_HLT |
6658 VMX_MISC_ACTIVITY_WAIT_SIPI;
6659 msrs->misc_high = 0;
6660
6661
6662
6663
6664
6665
6666
6667 msrs->basic =
6668 VMCS12_REVISION |
6669 VMX_BASIC_TRUE_CTLS |
6670 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
6671 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
6672
6673 if (cpu_has_vmx_basic_inout())
6674 msrs->basic |= VMX_BASIC_INOUT;
6675
6676
6677
6678
6679
6680
6681#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
6682#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
6683 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
6684 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
6685
6686
6687 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
6688 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
6689
6690 msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
6691}
6692
6693void nested_vmx_hardware_unsetup(void)
6694{
6695 int i;
6696
6697 if (enable_shadow_vmcs) {
6698 for (i = 0; i < VMX_BITMAP_NR; i++)
6699 free_page((unsigned long)vmx_bitmap[i]);
6700 }
6701}
6702
6703__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
6704{
6705 int i;
6706
6707 if (!cpu_has_vmx_shadow_vmcs())
6708 enable_shadow_vmcs = 0;
6709 if (enable_shadow_vmcs) {
6710 for (i = 0; i < VMX_BITMAP_NR; i++) {
6711
6712
6713
6714
6715 vmx_bitmap[i] = (unsigned long *)
6716 __get_free_page(GFP_KERNEL);
6717 if (!vmx_bitmap[i]) {
6718 nested_vmx_hardware_unsetup();
6719 return -ENOMEM;
6720 }
6721 }
6722
6723 init_vmcs_shadow_fields();
6724 }
6725
6726 exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
6727 exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
6728 exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
6729 exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
6730 exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
6731 exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
6732 exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
6733 exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
6734 exit_handlers[EXIT_REASON_VMON] = handle_vmon;
6735 exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
6736 exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
6737 exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
6738
6739 return 0;
6740}
6741
6742struct kvm_x86_nested_ops vmx_nested_ops = {
6743 .check_events = vmx_check_nested_events,
6744 .hv_timer_pending = nested_vmx_preemption_timer_pending,
6745 .triple_fault = nested_vmx_triple_fault,
6746 .get_state = vmx_get_nested_state,
6747 .set_state = vmx_set_nested_state,
6748 .get_nested_state_pages = vmx_get_nested_state_pages,
6749 .write_log_dirty = nested_vmx_write_pml_buffer,
6750 .enable_evmcs = nested_enable_evmcs,
6751 .get_evmcs_version = nested_get_evmcs_version,
6752};
6753