1
2
3#include <linux/objtool.h>
4#include <linux/percpu.h>
5
6#include <asm/debugreg.h>
7#include <asm/mmu_context.h>
8
9#include "cpuid.h"
10#include "hyperv.h"
11#include "mmu.h"
12#include "nested.h"
13#include "pmu.h"
14#include "sgx.h"
15#include "trace.h"
16#include "vmx.h"
17#include "x86.h"
18
19static bool __read_mostly enable_shadow_vmcs = 1;
20module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
21
22static bool __read_mostly nested_early_check = 0;
23module_param(nested_early_check, bool, S_IRUGO);
24
25#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
26
27
28
29
30
31#define VMX_VPID_EXTENT_SUPPORTED_MASK \
32 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
33 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
34 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
35 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
36
37#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
38
39enum {
40 VMX_VMREAD_BITMAP,
41 VMX_VMWRITE_BITMAP,
42 VMX_BITMAP_NR
43};
44static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
45
46#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
47#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
48
49struct shadow_vmcs_field {
50 u16 encoding;
51 u16 offset;
52};
53static struct shadow_vmcs_field shadow_read_only_fields[] = {
54#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
55#include "vmcs_shadow_fields.h"
56};
57static int max_shadow_read_only_fields =
58 ARRAY_SIZE(shadow_read_only_fields);
59
60static struct shadow_vmcs_field shadow_read_write_fields[] = {
61#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
62#include "vmcs_shadow_fields.h"
63};
64static int max_shadow_read_write_fields =
65 ARRAY_SIZE(shadow_read_write_fields);
66
67static void init_vmcs_shadow_fields(void)
68{
69 int i, j;
70
71 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
72 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
73
74 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
75 struct shadow_vmcs_field entry = shadow_read_only_fields[i];
76 u16 field = entry.encoding;
77
78 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
79 (i + 1 == max_shadow_read_only_fields ||
80 shadow_read_only_fields[i + 1].encoding != field + 1))
81 pr_err("Missing field from shadow_read_only_field %x\n",
82 field + 1);
83
84 clear_bit(field, vmx_vmread_bitmap);
85 if (field & 1)
86#ifdef CONFIG_X86_64
87 continue;
88#else
89 entry.offset += sizeof(u32);
90#endif
91 shadow_read_only_fields[j++] = entry;
92 }
93 max_shadow_read_only_fields = j;
94
95 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
96 struct shadow_vmcs_field entry = shadow_read_write_fields[i];
97 u16 field = entry.encoding;
98
99 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
100 (i + 1 == max_shadow_read_write_fields ||
101 shadow_read_write_fields[i + 1].encoding != field + 1))
102 pr_err("Missing field from shadow_read_write_field %x\n",
103 field + 1);
104
105 WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
106 field <= GUEST_TR_AR_BYTES,
107 "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
108
109
110
111
112
113
114 switch (field) {
115 case GUEST_PML_INDEX:
116 if (!cpu_has_vmx_pml())
117 continue;
118 break;
119 case VMX_PREEMPTION_TIMER_VALUE:
120 if (!cpu_has_vmx_preemption_timer())
121 continue;
122 break;
123 case GUEST_INTR_STATUS:
124 if (!cpu_has_vmx_apicv())
125 continue;
126 break;
127 default:
128 break;
129 }
130
131 clear_bit(field, vmx_vmwrite_bitmap);
132 clear_bit(field, vmx_vmread_bitmap);
133 if (field & 1)
134#ifdef CONFIG_X86_64
135 continue;
136#else
137 entry.offset += sizeof(u32);
138#endif
139 shadow_read_write_fields[j++] = entry;
140 }
141 max_shadow_read_write_fields = j;
142}
143
144
145
146
147
148
149
150static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
151{
152 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
153 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
154 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
155 return kvm_skip_emulated_instruction(vcpu);
156}
157
158static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
159{
160 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
161 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
162 X86_EFLAGS_SF | X86_EFLAGS_OF))
163 | X86_EFLAGS_CF);
164 return kvm_skip_emulated_instruction(vcpu);
165}
166
167static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
168 u32 vm_instruction_error)
169{
170 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
171 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
172 X86_EFLAGS_SF | X86_EFLAGS_OF))
173 | X86_EFLAGS_ZF);
174 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
175
176
177
178
179
180 if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
181 to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
182
183 return kvm_skip_emulated_instruction(vcpu);
184}
185
186static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
187{
188 struct vcpu_vmx *vmx = to_vmx(vcpu);
189
190
191
192
193
194 if (vmx->nested.current_vmptr == -1ull &&
195 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
196 return nested_vmx_failInvalid(vcpu);
197
198 return nested_vmx_failValid(vcpu, vm_instruction_error);
199}
200
201static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
202{
203
204 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
205 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
206}
207
208static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
209{
210 return fixed_bits_valid(control, low, high);
211}
212
213static inline u64 vmx_control_msr(u32 low, u32 high)
214{
215 return low | ((u64)high << 32);
216}
217
218static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
219{
220 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
221 vmcs_write64(VMCS_LINK_POINTER, -1ull);
222 vmx->nested.need_vmcs12_to_shadow_sync = false;
223}
224
225static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
226{
227 struct vcpu_vmx *vmx = to_vmx(vcpu);
228
229 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
230 kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
231 vmx->nested.hv_evmcs = NULL;
232 }
233
234 vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
235}
236
237static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
238 struct loaded_vmcs *prev)
239{
240 struct vmcs_host_state *dest, *src;
241
242 if (unlikely(!vmx->guest_state_loaded))
243 return;
244
245 src = &prev->host_state;
246 dest = &vmx->loaded_vmcs->host_state;
247
248 vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
249 dest->ldt_sel = src->ldt_sel;
250#ifdef CONFIG_X86_64
251 dest->ds_sel = src->ds_sel;
252 dest->es_sel = src->es_sel;
253#endif
254}
255
256static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
257{
258 struct vcpu_vmx *vmx = to_vmx(vcpu);
259 struct loaded_vmcs *prev;
260 int cpu;
261
262 if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
263 return;
264
265 cpu = get_cpu();
266 prev = vmx->loaded_vmcs;
267 vmx->loaded_vmcs = vmcs;
268 vmx_vcpu_load_vmcs(vcpu, cpu, prev);
269 vmx_sync_vmcs_host_state(vmx, prev);
270 put_cpu();
271
272 vmx_register_cache_reset(vcpu);
273}
274
275
276
277
278
279static void free_nested(struct kvm_vcpu *vcpu)
280{
281 struct vcpu_vmx *vmx = to_vmx(vcpu);
282
283 if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
284 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
285
286 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
287 return;
288
289 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
290
291 vmx->nested.vmxon = false;
292 vmx->nested.smm.vmxon = false;
293 free_vpid(vmx->nested.vpid02);
294 vmx->nested.posted_intr_nv = -1;
295 vmx->nested.current_vmptr = -1ull;
296 if (enable_shadow_vmcs) {
297 vmx_disable_shadow_vmcs(vmx);
298 vmcs_clear(vmx->vmcs01.shadow_vmcs);
299 free_vmcs(vmx->vmcs01.shadow_vmcs);
300 vmx->vmcs01.shadow_vmcs = NULL;
301 }
302 kfree(vmx->nested.cached_vmcs12);
303 vmx->nested.cached_vmcs12 = NULL;
304 kfree(vmx->nested.cached_shadow_vmcs12);
305 vmx->nested.cached_shadow_vmcs12 = NULL;
306
307 if (vmx->nested.apic_access_page) {
308 kvm_release_page_clean(vmx->nested.apic_access_page);
309 vmx->nested.apic_access_page = NULL;
310 }
311 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
312 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
313 vmx->nested.pi_desc = NULL;
314
315 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
316
317 nested_release_evmcs(vcpu);
318
319 free_loaded_vmcs(&vmx->nested.vmcs02);
320}
321
322
323
324
325
326void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
327{
328 vcpu_load(vcpu);
329 vmx_leave_nested(vcpu);
330 vcpu_put(vcpu);
331}
332
333#define EPTP_PA_MASK GENMASK_ULL(51, 12)
334
335static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
336{
337 return VALID_PAGE(root_hpa) &&
338 ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
339}
340
341static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
342 gpa_t addr)
343{
344 uint i;
345 struct kvm_mmu_root_info *cached_root;
346
347 WARN_ON_ONCE(!mmu_is_nested(vcpu));
348
349 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
350 cached_root = &vcpu->arch.mmu->prev_roots[i];
351
352 if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
353 eptp))
354 vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
355 }
356}
357
358static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
359 struct x86_exception *fault)
360{
361 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
362 struct vcpu_vmx *vmx = to_vmx(vcpu);
363 u32 vm_exit_reason;
364 unsigned long exit_qualification = vcpu->arch.exit_qualification;
365
366 if (vmx->nested.pml_full) {
367 vm_exit_reason = EXIT_REASON_PML_FULL;
368 vmx->nested.pml_full = false;
369 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
370 } else {
371 if (fault->error_code & PFERR_RSVD_MASK)
372 vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
373 else
374 vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
375
376
377
378
379
380
381
382
383 nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
384 fault->address);
385 }
386
387 nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
388 vmcs12->guest_physical_address = fault->address;
389}
390
391static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
392{
393 kvm_init_shadow_ept_mmu(vcpu,
394 to_vmx(vcpu)->nested.msrs.ept_caps &
395 VMX_EPT_EXECUTE_ONLY_BIT,
396 nested_ept_ad_enabled(vcpu),
397 nested_ept_get_eptp(vcpu));
398}
399
400static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
401{
402 WARN_ON(mmu_is_nested(vcpu));
403
404 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
405 nested_ept_new_eptp(vcpu);
406 vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
407 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
408 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
409
410 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
411}
412
413static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
414{
415 vcpu->arch.mmu = &vcpu->arch.root_mmu;
416 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
417}
418
419static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
420 u16 error_code)
421{
422 bool inequality, bit;
423
424 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
425 inequality =
426 (error_code & vmcs12->page_fault_error_code_mask) !=
427 vmcs12->page_fault_error_code_match;
428 return inequality ^ bit;
429}
430
431
432
433
434
435
436static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
437{
438 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
439 unsigned int nr = vcpu->arch.exception.nr;
440 bool has_payload = vcpu->arch.exception.has_payload;
441 unsigned long payload = vcpu->arch.exception.payload;
442
443 if (nr == PF_VECTOR) {
444 if (vcpu->arch.exception.nested_apf) {
445 *exit_qual = vcpu->arch.apf.nested_apf_token;
446 return 1;
447 }
448 if (nested_vmx_is_page_fault_vmexit(vmcs12,
449 vcpu->arch.exception.error_code)) {
450 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
451 return 1;
452 }
453 } else if (vmcs12->exception_bitmap & (1u << nr)) {
454 if (nr == DB_VECTOR) {
455 if (!has_payload) {
456 payload = vcpu->arch.dr6;
457 payload &= ~DR6_BT;
458 payload ^= DR6_ACTIVE_LOW;
459 }
460 *exit_qual = payload;
461 } else
462 *exit_qual = 0;
463 return 1;
464 }
465
466 return 0;
467}
468
469
470static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
471 struct x86_exception *fault)
472{
473 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
474
475 WARN_ON(!is_guest_mode(vcpu));
476
477 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
478 !to_vmx(vcpu)->nested.nested_run_pending) {
479 vmcs12->vm_exit_intr_error_code = fault->error_code;
480 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
481 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
482 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
483 fault->address);
484 } else {
485 kvm_inject_page_fault(vcpu, fault);
486 }
487}
488
489static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
490 struct vmcs12 *vmcs12)
491{
492 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
493 return 0;
494
495 if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
496 CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
497 return -EINVAL;
498
499 return 0;
500}
501
502static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
503 struct vmcs12 *vmcs12)
504{
505 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
506 return 0;
507
508 if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
509 return -EINVAL;
510
511 return 0;
512}
513
514static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
515 struct vmcs12 *vmcs12)
516{
517 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
518 return 0;
519
520 if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
521 return -EINVAL;
522
523 return 0;
524}
525
526
527
528
529static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
530{
531 unsigned long *msr_bitmap;
532 int f = sizeof(unsigned long);
533
534 if (!cpu_has_vmx_msr_bitmap())
535 return true;
536
537 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
538
539 if (msr <= 0x1fff) {
540 return !!test_bit(msr, msr_bitmap + 0x800 / f);
541 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
542 msr &= 0x1fff;
543 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
544 }
545
546 return true;
547}
548
549
550
551
552
553static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
554 unsigned long *msr_bitmap_nested,
555 u32 msr, int type)
556{
557 int f = sizeof(unsigned long);
558
559
560
561
562
563
564 if (msr <= 0x1fff) {
565 if (type & MSR_TYPE_R &&
566 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
567
568 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
569
570 if (type & MSR_TYPE_W &&
571 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
572
573 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
574
575 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
576 msr &= 0x1fff;
577 if (type & MSR_TYPE_R &&
578 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
579
580 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
581
582 if (type & MSR_TYPE_W &&
583 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
584
585 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
586
587 }
588}
589
590static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
591{
592 int msr;
593
594 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
595 unsigned word = msr / BITS_PER_LONG;
596
597 msr_bitmap[word] = ~0;
598 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
599 }
600}
601
602
603
604
605
606static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
607 struct vmcs12 *vmcs12)
608{
609 int msr;
610 unsigned long *msr_bitmap_l1;
611 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
612 struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
613
614
615 if (!cpu_has_vmx_msr_bitmap() ||
616 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
617 return false;
618
619 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
620 return false;
621
622 msr_bitmap_l1 = (unsigned long *)map->hva;
623
624
625
626
627
628
629 enable_x2apic_msr_intercepts(msr_bitmap_l0);
630
631 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
632 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
633
634
635
636
637
638
639 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
640 unsigned word = msr / BITS_PER_LONG;
641
642 msr_bitmap_l0[word] = msr_bitmap_l1[word];
643 }
644 }
645
646 nested_vmx_disable_intercept_for_msr(
647 msr_bitmap_l1, msr_bitmap_l0,
648 X2APIC_MSR(APIC_TASKPRI),
649 MSR_TYPE_R | MSR_TYPE_W);
650
651 if (nested_cpu_has_vid(vmcs12)) {
652 nested_vmx_disable_intercept_for_msr(
653 msr_bitmap_l1, msr_bitmap_l0,
654 X2APIC_MSR(APIC_EOI),
655 MSR_TYPE_W);
656 nested_vmx_disable_intercept_for_msr(
657 msr_bitmap_l1, msr_bitmap_l0,
658 X2APIC_MSR(APIC_SELF_IPI),
659 MSR_TYPE_W);
660 }
661 }
662
663
664#ifdef CONFIG_X86_64
665 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
666 MSR_FS_BASE, MSR_TYPE_RW);
667
668 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
669 MSR_GS_BASE, MSR_TYPE_RW);
670
671 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
672 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
673#endif
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
689 nested_vmx_disable_intercept_for_msr(
690 msr_bitmap_l1, msr_bitmap_l0,
691 MSR_IA32_SPEC_CTRL,
692 MSR_TYPE_R | MSR_TYPE_W);
693
694 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
695 nested_vmx_disable_intercept_for_msr(
696 msr_bitmap_l1, msr_bitmap_l0,
697 MSR_IA32_PRED_CMD,
698 MSR_TYPE_W);
699
700 kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
701
702 return true;
703}
704
705static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
706 struct vmcs12 *vmcs12)
707{
708 struct kvm_host_map map;
709 struct vmcs12 *shadow;
710
711 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
712 vmcs12->vmcs_link_pointer == -1ull)
713 return;
714
715 shadow = get_shadow_vmcs12(vcpu);
716
717 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
718 return;
719
720 memcpy(shadow, map.hva, VMCS12_SIZE);
721 kvm_vcpu_unmap(vcpu, &map, false);
722}
723
724static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
725 struct vmcs12 *vmcs12)
726{
727 struct vcpu_vmx *vmx = to_vmx(vcpu);
728
729 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
730 vmcs12->vmcs_link_pointer == -1ull)
731 return;
732
733 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
734 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
735}
736
737
738
739
740
741static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
742{
743 return get_vmcs12(vcpu)->vm_exit_controls &
744 VM_EXIT_ACK_INTR_ON_EXIT;
745}
746
747static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
748 struct vmcs12 *vmcs12)
749{
750 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
751 CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
752 return -EINVAL;
753 else
754 return 0;
755}
756
757static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
758 struct vmcs12 *vmcs12)
759{
760 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
761 !nested_cpu_has_apic_reg_virt(vmcs12) &&
762 !nested_cpu_has_vid(vmcs12) &&
763 !nested_cpu_has_posted_intr(vmcs12))
764 return 0;
765
766
767
768
769
770 if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
771 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
772 return -EINVAL;
773
774
775
776
777
778 if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
779 return -EINVAL;
780
781
782
783
784
785
786
787
788 if (nested_cpu_has_posted_intr(vmcs12) &&
789 (CC(!nested_cpu_has_vid(vmcs12)) ||
790 CC(!nested_exit_intr_ack_set(vcpu)) ||
791 CC((vmcs12->posted_intr_nv & 0xff00)) ||
792 CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
793 return -EINVAL;
794
795
796 if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
797 return -EINVAL;
798
799 return 0;
800}
801
802static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
803 u32 count, u64 addr)
804{
805 if (count == 0)
806 return 0;
807
808 if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
809 !kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
810 return -EINVAL;
811
812 return 0;
813}
814
815static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
816 struct vmcs12 *vmcs12)
817{
818 if (CC(nested_vmx_check_msr_switch(vcpu,
819 vmcs12->vm_exit_msr_load_count,
820 vmcs12->vm_exit_msr_load_addr)) ||
821 CC(nested_vmx_check_msr_switch(vcpu,
822 vmcs12->vm_exit_msr_store_count,
823 vmcs12->vm_exit_msr_store_addr)))
824 return -EINVAL;
825
826 return 0;
827}
828
829static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
830 struct vmcs12 *vmcs12)
831{
832 if (CC(nested_vmx_check_msr_switch(vcpu,
833 vmcs12->vm_entry_msr_load_count,
834 vmcs12->vm_entry_msr_load_addr)))
835 return -EINVAL;
836
837 return 0;
838}
839
840static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
841 struct vmcs12 *vmcs12)
842{
843 if (!nested_cpu_has_pml(vmcs12))
844 return 0;
845
846 if (CC(!nested_cpu_has_ept(vmcs12)) ||
847 CC(!page_address_valid(vcpu, vmcs12->pml_address)))
848 return -EINVAL;
849
850 return 0;
851}
852
853static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
854 struct vmcs12 *vmcs12)
855{
856 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
857 !nested_cpu_has_ept(vmcs12)))
858 return -EINVAL;
859 return 0;
860}
861
862static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
863 struct vmcs12 *vmcs12)
864{
865 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
866 !nested_cpu_has_ept(vmcs12)))
867 return -EINVAL;
868 return 0;
869}
870
871static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
872 struct vmcs12 *vmcs12)
873{
874 if (!nested_cpu_has_shadow_vmcs(vmcs12))
875 return 0;
876
877 if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
878 CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
879 return -EINVAL;
880
881 return 0;
882}
883
884static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
885 struct vmx_msr_entry *e)
886{
887
888 if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
889 return -EINVAL;
890 if (CC(e->index == MSR_IA32_UCODE_WRITE) ||
891 CC(e->index == MSR_IA32_UCODE_REV))
892 return -EINVAL;
893 if (CC(e->reserved != 0))
894 return -EINVAL;
895 return 0;
896}
897
898static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
899 struct vmx_msr_entry *e)
900{
901 if (CC(e->index == MSR_FS_BASE) ||
902 CC(e->index == MSR_GS_BASE) ||
903 CC(e->index == MSR_IA32_SMM_MONITOR_CTL) ||
904 nested_vmx_msr_check_common(vcpu, e))
905 return -EINVAL;
906 return 0;
907}
908
909static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
910 struct vmx_msr_entry *e)
911{
912 if (CC(e->index == MSR_IA32_SMBASE) ||
913 nested_vmx_msr_check_common(vcpu, e))
914 return -EINVAL;
915 return 0;
916}
917
918static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
919{
920 struct vcpu_vmx *vmx = to_vmx(vcpu);
921 u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
922 vmx->nested.msrs.misc_high);
923
924 return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
925}
926
927
928
929
930
931
932
933
934
935
936static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
937{
938 u32 i;
939 struct vmx_msr_entry e;
940 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
941
942 for (i = 0; i < count; i++) {
943 if (unlikely(i >= max_msr_list_size))
944 goto fail;
945
946 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
947 &e, sizeof(e))) {
948 pr_debug_ratelimited(
949 "%s cannot read MSR entry (%u, 0x%08llx)\n",
950 __func__, i, gpa + i * sizeof(e));
951 goto fail;
952 }
953 if (nested_vmx_load_msr_check(vcpu, &e)) {
954 pr_debug_ratelimited(
955 "%s check failed (%u, 0x%x, 0x%x)\n",
956 __func__, i, e.index, e.reserved);
957 goto fail;
958 }
959 if (kvm_set_msr(vcpu, e.index, e.value)) {
960 pr_debug_ratelimited(
961 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
962 __func__, i, e.index, e.value);
963 goto fail;
964 }
965 }
966 return 0;
967fail:
968
969 return i + 1;
970}
971
972static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
973 u32 msr_index,
974 u64 *data)
975{
976 struct vcpu_vmx *vmx = to_vmx(vcpu);
977
978
979
980
981
982
983 if (msr_index == MSR_IA32_TSC) {
984 int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
985 MSR_IA32_TSC);
986
987 if (i >= 0) {
988 u64 val = vmx->msr_autostore.guest.val[i].value;
989
990 *data = kvm_read_l1_tsc(vcpu, val);
991 return true;
992 }
993 }
994
995 if (kvm_get_msr(vcpu, msr_index, data)) {
996 pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
997 msr_index);
998 return false;
999 }
1000 return true;
1001}
1002
1003static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
1004 struct vmx_msr_entry *e)
1005{
1006 if (kvm_vcpu_read_guest(vcpu,
1007 gpa + i * sizeof(*e),
1008 e, 2 * sizeof(u32))) {
1009 pr_debug_ratelimited(
1010 "%s cannot read MSR entry (%u, 0x%08llx)\n",
1011 __func__, i, gpa + i * sizeof(*e));
1012 return false;
1013 }
1014 if (nested_vmx_store_msr_check(vcpu, e)) {
1015 pr_debug_ratelimited(
1016 "%s check failed (%u, 0x%x, 0x%x)\n",
1017 __func__, i, e->index, e->reserved);
1018 return false;
1019 }
1020 return true;
1021}
1022
1023static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
1024{
1025 u64 data;
1026 u32 i;
1027 struct vmx_msr_entry e;
1028 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
1029
1030 for (i = 0; i < count; i++) {
1031 if (unlikely(i >= max_msr_list_size))
1032 return -EINVAL;
1033
1034 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1035 return -EINVAL;
1036
1037 if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
1038 return -EINVAL;
1039
1040 if (kvm_vcpu_write_guest(vcpu,
1041 gpa + i * sizeof(e) +
1042 offsetof(struct vmx_msr_entry, value),
1043 &data, sizeof(data))) {
1044 pr_debug_ratelimited(
1045 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
1046 __func__, i, e.index, data);
1047 return -EINVAL;
1048 }
1049 }
1050 return 0;
1051}
1052
1053static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
1054{
1055 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1056 u32 count = vmcs12->vm_exit_msr_store_count;
1057 u64 gpa = vmcs12->vm_exit_msr_store_addr;
1058 struct vmx_msr_entry e;
1059 u32 i;
1060
1061 for (i = 0; i < count; i++) {
1062 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1063 return false;
1064
1065 if (e.index == msr_index)
1066 return true;
1067 }
1068 return false;
1069}
1070
1071static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
1072 u32 msr_index)
1073{
1074 struct vcpu_vmx *vmx = to_vmx(vcpu);
1075 struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
1076 bool in_vmcs12_store_list;
1077 int msr_autostore_slot;
1078 bool in_autostore_list;
1079 int last;
1080
1081 msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
1082 in_autostore_list = msr_autostore_slot >= 0;
1083 in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
1084
1085 if (in_vmcs12_store_list && !in_autostore_list) {
1086 if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
1087
1088
1089
1090
1091
1092
1093
1094 pr_warn_ratelimited(
1095 "Not enough msr entries in msr_autostore. Can't add msr %x\n",
1096 msr_index);
1097 return;
1098 }
1099 last = autostore->nr++;
1100 autostore->val[last].index = msr_index;
1101 } else if (!in_vmcs12_store_list && in_autostore_list) {
1102 last = --autostore->nr;
1103 autostore->val[msr_autostore_slot] = autostore->val[last];
1104 }
1105}
1106
1107
1108
1109
1110
1111
1112
1113static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
1114 bool nested_ept, bool reload_pdptrs,
1115 enum vm_entry_failure_code *entry_failure_code)
1116{
1117 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
1118 *entry_failure_code = ENTRY_FAIL_DEFAULT;
1119 return -EINVAL;
1120 }
1121
1122
1123
1124
1125
1126 if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
1127 CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
1128 *entry_failure_code = ENTRY_FAIL_PDPTE;
1129 return -EINVAL;
1130 }
1131
1132 if (!nested_ept)
1133 kvm_mmu_new_pgd(vcpu, cr3);
1134
1135 vcpu->arch.cr3 = cr3;
1136 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1137
1138
1139 kvm_init_mmu(vcpu);
1140
1141 return 0;
1142}
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
1158{
1159 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1160
1161 return enable_ept ||
1162 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
1163}
1164
1165static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
1166 struct vmcs12 *vmcs12,
1167 bool is_vmenter)
1168{
1169 struct vcpu_vmx *vmx = to_vmx(vcpu);
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 if (!nested_cpu_has_vpid(vmcs12)) {
1186 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1187 return;
1188 }
1189
1190
1191 WARN_ON(!enable_vpid);
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210 if (!nested_has_guest_tlb_tag(vcpu)) {
1211 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1212 } else if (is_vmenter &&
1213 vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
1214 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
1215 vpid_sync_context(nested_get_vpid02(vcpu));
1216 }
1217}
1218
1219static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
1220{
1221 superset &= mask;
1222 subset &= mask;
1223
1224 return (superset | subset) == superset;
1225}
1226
1227static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
1228{
1229 const u64 feature_and_reserved =
1230
1231 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
1232
1233 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
1234 u64 vmx_basic = vmx->nested.msrs.basic;
1235
1236 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
1237 return -EINVAL;
1238
1239
1240
1241
1242
1243 if (data & BIT_ULL(48))
1244 return -EINVAL;
1245
1246 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
1247 vmx_basic_vmcs_revision_id(data))
1248 return -EINVAL;
1249
1250 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
1251 return -EINVAL;
1252
1253 vmx->nested.msrs.basic = data;
1254 return 0;
1255}
1256
1257static int
1258vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1259{
1260 u64 supported;
1261 u32 *lowp, *highp;
1262
1263 switch (msr_index) {
1264 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1265 lowp = &vmx->nested.msrs.pinbased_ctls_low;
1266 highp = &vmx->nested.msrs.pinbased_ctls_high;
1267 break;
1268 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1269 lowp = &vmx->nested.msrs.procbased_ctls_low;
1270 highp = &vmx->nested.msrs.procbased_ctls_high;
1271 break;
1272 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1273 lowp = &vmx->nested.msrs.exit_ctls_low;
1274 highp = &vmx->nested.msrs.exit_ctls_high;
1275 break;
1276 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1277 lowp = &vmx->nested.msrs.entry_ctls_low;
1278 highp = &vmx->nested.msrs.entry_ctls_high;
1279 break;
1280 case MSR_IA32_VMX_PROCBASED_CTLS2:
1281 lowp = &vmx->nested.msrs.secondary_ctls_low;
1282 highp = &vmx->nested.msrs.secondary_ctls_high;
1283 break;
1284 default:
1285 BUG();
1286 }
1287
1288 supported = vmx_control_msr(*lowp, *highp);
1289
1290
1291 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
1292 return -EINVAL;
1293
1294
1295 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
1296 return -EINVAL;
1297
1298 *lowp = data;
1299 *highp = data >> 32;
1300 return 0;
1301}
1302
1303static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
1304{
1305 const u64 feature_and_reserved_bits =
1306
1307 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
1308 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
1309
1310 GENMASK_ULL(13, 9) | BIT_ULL(31);
1311 u64 vmx_misc;
1312
1313 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
1314 vmx->nested.msrs.misc_high);
1315
1316 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
1317 return -EINVAL;
1318
1319 if ((vmx->nested.msrs.pinbased_ctls_high &
1320 PIN_BASED_VMX_PREEMPTION_TIMER) &&
1321 vmx_misc_preemption_timer_rate(data) !=
1322 vmx_misc_preemption_timer_rate(vmx_misc))
1323 return -EINVAL;
1324
1325 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
1326 return -EINVAL;
1327
1328 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
1329 return -EINVAL;
1330
1331 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
1332 return -EINVAL;
1333
1334 vmx->nested.msrs.misc_low = data;
1335 vmx->nested.msrs.misc_high = data >> 32;
1336
1337 return 0;
1338}
1339
1340static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
1341{
1342 u64 vmx_ept_vpid_cap;
1343
1344 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
1345 vmx->nested.msrs.vpid_caps);
1346
1347
1348 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
1349 return -EINVAL;
1350
1351 vmx->nested.msrs.ept_caps = data;
1352 vmx->nested.msrs.vpid_caps = data >> 32;
1353 return 0;
1354}
1355
1356static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1357{
1358 u64 *msr;
1359
1360 switch (msr_index) {
1361 case MSR_IA32_VMX_CR0_FIXED0:
1362 msr = &vmx->nested.msrs.cr0_fixed0;
1363 break;
1364 case MSR_IA32_VMX_CR4_FIXED0:
1365 msr = &vmx->nested.msrs.cr4_fixed0;
1366 break;
1367 default:
1368 BUG();
1369 }
1370
1371
1372
1373
1374
1375 if (!is_bitwise_subset(data, *msr, -1ULL))
1376 return -EINVAL;
1377
1378 *msr = data;
1379 return 0;
1380}
1381
1382
1383
1384
1385
1386
1387int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1388{
1389 struct vcpu_vmx *vmx = to_vmx(vcpu);
1390
1391
1392
1393
1394
1395 if (vmx->nested.vmxon)
1396 return -EBUSY;
1397
1398 switch (msr_index) {
1399 case MSR_IA32_VMX_BASIC:
1400 return vmx_restore_vmx_basic(vmx, data);
1401 case MSR_IA32_VMX_PINBASED_CTLS:
1402 case MSR_IA32_VMX_PROCBASED_CTLS:
1403 case MSR_IA32_VMX_EXIT_CTLS:
1404 case MSR_IA32_VMX_ENTRY_CTLS:
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414 return -EINVAL;
1415 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1416 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1417 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1418 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1419 case MSR_IA32_VMX_PROCBASED_CTLS2:
1420 return vmx_restore_control_msr(vmx, msr_index, data);
1421 case MSR_IA32_VMX_MISC:
1422 return vmx_restore_vmx_misc(vmx, data);
1423 case MSR_IA32_VMX_CR0_FIXED0:
1424 case MSR_IA32_VMX_CR4_FIXED0:
1425 return vmx_restore_fixed0_msr(vmx, msr_index, data);
1426 case MSR_IA32_VMX_CR0_FIXED1:
1427 case MSR_IA32_VMX_CR4_FIXED1:
1428
1429
1430
1431
1432 return -EINVAL;
1433 case MSR_IA32_VMX_EPT_VPID_CAP:
1434 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
1435 case MSR_IA32_VMX_VMCS_ENUM:
1436 vmx->nested.msrs.vmcs_enum = data;
1437 return 0;
1438 case MSR_IA32_VMX_VMFUNC:
1439 if (data & ~vmx->nested.msrs.vmfunc_controls)
1440 return -EINVAL;
1441 vmx->nested.msrs.vmfunc_controls = data;
1442 return 0;
1443 default:
1444
1445
1446
1447 return -EINVAL;
1448 }
1449}
1450
1451
1452int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
1453{
1454 switch (msr_index) {
1455 case MSR_IA32_VMX_BASIC:
1456 *pdata = msrs->basic;
1457 break;
1458 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1459 case MSR_IA32_VMX_PINBASED_CTLS:
1460 *pdata = vmx_control_msr(
1461 msrs->pinbased_ctls_low,
1462 msrs->pinbased_ctls_high);
1463 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
1464 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1465 break;
1466 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1467 case MSR_IA32_VMX_PROCBASED_CTLS:
1468 *pdata = vmx_control_msr(
1469 msrs->procbased_ctls_low,
1470 msrs->procbased_ctls_high);
1471 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
1472 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1473 break;
1474 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1475 case MSR_IA32_VMX_EXIT_CTLS:
1476 *pdata = vmx_control_msr(
1477 msrs->exit_ctls_low,
1478 msrs->exit_ctls_high);
1479 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
1480 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
1481 break;
1482 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1483 case MSR_IA32_VMX_ENTRY_CTLS:
1484 *pdata = vmx_control_msr(
1485 msrs->entry_ctls_low,
1486 msrs->entry_ctls_high);
1487 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
1488 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
1489 break;
1490 case MSR_IA32_VMX_MISC:
1491 *pdata = vmx_control_msr(
1492 msrs->misc_low,
1493 msrs->misc_high);
1494 break;
1495 case MSR_IA32_VMX_CR0_FIXED0:
1496 *pdata = msrs->cr0_fixed0;
1497 break;
1498 case MSR_IA32_VMX_CR0_FIXED1:
1499 *pdata = msrs->cr0_fixed1;
1500 break;
1501 case MSR_IA32_VMX_CR4_FIXED0:
1502 *pdata = msrs->cr4_fixed0;
1503 break;
1504 case MSR_IA32_VMX_CR4_FIXED1:
1505 *pdata = msrs->cr4_fixed1;
1506 break;
1507 case MSR_IA32_VMX_VMCS_ENUM:
1508 *pdata = msrs->vmcs_enum;
1509 break;
1510 case MSR_IA32_VMX_PROCBASED_CTLS2:
1511 *pdata = vmx_control_msr(
1512 msrs->secondary_ctls_low,
1513 msrs->secondary_ctls_high);
1514 break;
1515 case MSR_IA32_VMX_EPT_VPID_CAP:
1516 *pdata = msrs->ept_caps |
1517 ((u64)msrs->vpid_caps << 32);
1518 break;
1519 case MSR_IA32_VMX_VMFUNC:
1520 *pdata = msrs->vmfunc_controls;
1521 break;
1522 default:
1523 return 1;
1524 }
1525
1526 return 0;
1527}
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
1538{
1539 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1540 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1541 struct shadow_vmcs_field field;
1542 unsigned long val;
1543 int i;
1544
1545 if (WARN_ON(!shadow_vmcs))
1546 return;
1547
1548 preempt_disable();
1549
1550 vmcs_load(shadow_vmcs);
1551
1552 for (i = 0; i < max_shadow_read_write_fields; i++) {
1553 field = shadow_read_write_fields[i];
1554 val = __vmcs_readl(field.encoding);
1555 vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
1556 }
1557
1558 vmcs_clear(shadow_vmcs);
1559 vmcs_load(vmx->loaded_vmcs->vmcs);
1560
1561 preempt_enable();
1562}
1563
1564static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
1565{
1566 const struct shadow_vmcs_field *fields[] = {
1567 shadow_read_write_fields,
1568 shadow_read_only_fields
1569 };
1570 const int max_fields[] = {
1571 max_shadow_read_write_fields,
1572 max_shadow_read_only_fields
1573 };
1574 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1575 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1576 struct shadow_vmcs_field field;
1577 unsigned long val;
1578 int i, q;
1579
1580 if (WARN_ON(!shadow_vmcs))
1581 return;
1582
1583 vmcs_load(shadow_vmcs);
1584
1585 for (q = 0; q < ARRAY_SIZE(fields); q++) {
1586 for (i = 0; i < max_fields[q]; i++) {
1587 field = fields[q][i];
1588 val = vmcs12_read_any(vmcs12, field.encoding,
1589 field.offset);
1590 __vmcs_writel(field.encoding, val);
1591 }
1592 }
1593
1594 vmcs_clear(shadow_vmcs);
1595 vmcs_load(vmx->loaded_vmcs->vmcs);
1596}
1597
1598static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
1599{
1600 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1601 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1602
1603
1604 vmcs12->tpr_threshold = evmcs->tpr_threshold;
1605 vmcs12->guest_rip = evmcs->guest_rip;
1606
1607 if (unlikely(!(hv_clean_fields &
1608 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
1609 vmcs12->guest_rsp = evmcs->guest_rsp;
1610 vmcs12->guest_rflags = evmcs->guest_rflags;
1611 vmcs12->guest_interruptibility_info =
1612 evmcs->guest_interruptibility_info;
1613 }
1614
1615 if (unlikely(!(hv_clean_fields &
1616 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
1617 vmcs12->cpu_based_vm_exec_control =
1618 evmcs->cpu_based_vm_exec_control;
1619 }
1620
1621 if (unlikely(!(hv_clean_fields &
1622 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
1623 vmcs12->exception_bitmap = evmcs->exception_bitmap;
1624 }
1625
1626 if (unlikely(!(hv_clean_fields &
1627 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
1628 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
1629 }
1630
1631 if (unlikely(!(hv_clean_fields &
1632 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
1633 vmcs12->vm_entry_intr_info_field =
1634 evmcs->vm_entry_intr_info_field;
1635 vmcs12->vm_entry_exception_error_code =
1636 evmcs->vm_entry_exception_error_code;
1637 vmcs12->vm_entry_instruction_len =
1638 evmcs->vm_entry_instruction_len;
1639 }
1640
1641 if (unlikely(!(hv_clean_fields &
1642 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
1643 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
1644 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
1645 vmcs12->host_cr0 = evmcs->host_cr0;
1646 vmcs12->host_cr3 = evmcs->host_cr3;
1647 vmcs12->host_cr4 = evmcs->host_cr4;
1648 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
1649 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
1650 vmcs12->host_rip = evmcs->host_rip;
1651 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
1652 vmcs12->host_es_selector = evmcs->host_es_selector;
1653 vmcs12->host_cs_selector = evmcs->host_cs_selector;
1654 vmcs12->host_ss_selector = evmcs->host_ss_selector;
1655 vmcs12->host_ds_selector = evmcs->host_ds_selector;
1656 vmcs12->host_fs_selector = evmcs->host_fs_selector;
1657 vmcs12->host_gs_selector = evmcs->host_gs_selector;
1658 vmcs12->host_tr_selector = evmcs->host_tr_selector;
1659 }
1660
1661 if (unlikely(!(hv_clean_fields &
1662 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
1663 vmcs12->pin_based_vm_exec_control =
1664 evmcs->pin_based_vm_exec_control;
1665 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
1666 vmcs12->secondary_vm_exec_control =
1667 evmcs->secondary_vm_exec_control;
1668 }
1669
1670 if (unlikely(!(hv_clean_fields &
1671 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
1672 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
1673 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
1674 }
1675
1676 if (unlikely(!(hv_clean_fields &
1677 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
1678 vmcs12->msr_bitmap = evmcs->msr_bitmap;
1679 }
1680
1681 if (unlikely(!(hv_clean_fields &
1682 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
1683 vmcs12->guest_es_base = evmcs->guest_es_base;
1684 vmcs12->guest_cs_base = evmcs->guest_cs_base;
1685 vmcs12->guest_ss_base = evmcs->guest_ss_base;
1686 vmcs12->guest_ds_base = evmcs->guest_ds_base;
1687 vmcs12->guest_fs_base = evmcs->guest_fs_base;
1688 vmcs12->guest_gs_base = evmcs->guest_gs_base;
1689 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
1690 vmcs12->guest_tr_base = evmcs->guest_tr_base;
1691 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
1692 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
1693 vmcs12->guest_es_limit = evmcs->guest_es_limit;
1694 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
1695 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
1696 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
1697 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
1698 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
1699 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
1700 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
1701 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
1702 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
1703 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
1704 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
1705 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
1706 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
1707 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
1708 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
1709 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
1710 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
1711 vmcs12->guest_es_selector = evmcs->guest_es_selector;
1712 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
1713 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
1714 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
1715 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
1716 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
1717 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
1718 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
1719 }
1720
1721 if (unlikely(!(hv_clean_fields &
1722 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
1723 vmcs12->tsc_offset = evmcs->tsc_offset;
1724 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
1725 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
1726 }
1727
1728 if (unlikely(!(hv_clean_fields &
1729 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
1730 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
1731 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
1732 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
1733 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
1734 vmcs12->guest_cr0 = evmcs->guest_cr0;
1735 vmcs12->guest_cr3 = evmcs->guest_cr3;
1736 vmcs12->guest_cr4 = evmcs->guest_cr4;
1737 vmcs12->guest_dr7 = evmcs->guest_dr7;
1738 }
1739
1740 if (unlikely(!(hv_clean_fields &
1741 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
1742 vmcs12->host_fs_base = evmcs->host_fs_base;
1743 vmcs12->host_gs_base = evmcs->host_gs_base;
1744 vmcs12->host_tr_base = evmcs->host_tr_base;
1745 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
1746 vmcs12->host_idtr_base = evmcs->host_idtr_base;
1747 vmcs12->host_rsp = evmcs->host_rsp;
1748 }
1749
1750 if (unlikely(!(hv_clean_fields &
1751 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
1752 vmcs12->ept_pointer = evmcs->ept_pointer;
1753 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
1754 }
1755
1756 if (unlikely(!(hv_clean_fields &
1757 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
1758 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
1759 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
1760 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
1761 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
1762 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
1763 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
1764 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
1765 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
1766 vmcs12->guest_pending_dbg_exceptions =
1767 evmcs->guest_pending_dbg_exceptions;
1768 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
1769 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
1770 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
1771 vmcs12->guest_activity_state = evmcs->guest_activity_state;
1772 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
1773 }
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811 return;
1812}
1813
1814static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
1815{
1816 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1817 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883 evmcs->guest_es_selector = vmcs12->guest_es_selector;
1884 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
1885 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
1886 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
1887 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
1888 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
1889 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
1890 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
1891
1892 evmcs->guest_es_limit = vmcs12->guest_es_limit;
1893 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
1894 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
1895 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
1896 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
1897 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
1898 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
1899 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
1900 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
1901 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
1902
1903 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
1904 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
1905 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
1906 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
1907 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
1908 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
1909 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
1910 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
1911
1912 evmcs->guest_es_base = vmcs12->guest_es_base;
1913 evmcs->guest_cs_base = vmcs12->guest_cs_base;
1914 evmcs->guest_ss_base = vmcs12->guest_ss_base;
1915 evmcs->guest_ds_base = vmcs12->guest_ds_base;
1916 evmcs->guest_fs_base = vmcs12->guest_fs_base;
1917 evmcs->guest_gs_base = vmcs12->guest_gs_base;
1918 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
1919 evmcs->guest_tr_base = vmcs12->guest_tr_base;
1920 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
1921 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
1922
1923 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
1924 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
1925
1926 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
1927 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
1928 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
1929 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
1930
1931 evmcs->guest_pending_dbg_exceptions =
1932 vmcs12->guest_pending_dbg_exceptions;
1933 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
1934 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
1935
1936 evmcs->guest_activity_state = vmcs12->guest_activity_state;
1937 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
1938
1939 evmcs->guest_cr0 = vmcs12->guest_cr0;
1940 evmcs->guest_cr3 = vmcs12->guest_cr3;
1941 evmcs->guest_cr4 = vmcs12->guest_cr4;
1942 evmcs->guest_dr7 = vmcs12->guest_dr7;
1943
1944 evmcs->guest_physical_address = vmcs12->guest_physical_address;
1945
1946 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
1947 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
1948 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
1949 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
1950 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
1951 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
1952 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
1953 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
1954
1955 evmcs->exit_qualification = vmcs12->exit_qualification;
1956
1957 evmcs->guest_linear_address = vmcs12->guest_linear_address;
1958 evmcs->guest_rsp = vmcs12->guest_rsp;
1959 evmcs->guest_rflags = vmcs12->guest_rflags;
1960
1961 evmcs->guest_interruptibility_info =
1962 vmcs12->guest_interruptibility_info;
1963 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
1964 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
1965 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
1966 evmcs->vm_entry_exception_error_code =
1967 vmcs12->vm_entry_exception_error_code;
1968 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
1969
1970 evmcs->guest_rip = vmcs12->guest_rip;
1971
1972 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
1973
1974 return;
1975}
1976
1977
1978
1979
1980
1981static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
1982 struct kvm_vcpu *vcpu, bool from_launch)
1983{
1984 struct vcpu_vmx *vmx = to_vmx(vcpu);
1985 bool evmcs_gpa_changed = false;
1986 u64 evmcs_gpa;
1987
1988 if (likely(!vmx->nested.enlightened_vmcs_enabled))
1989 return EVMPTRLD_DISABLED;
1990
1991 if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
1992 nested_release_evmcs(vcpu);
1993 return EVMPTRLD_DISABLED;
1994 }
1995
1996 if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
1997 vmx->nested.current_vmptr = -1ull;
1998
1999 nested_release_evmcs(vcpu);
2000
2001 if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
2002 &vmx->nested.hv_evmcs_map))
2003 return EVMPTRLD_ERROR;
2004
2005 vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029 if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
2030 (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
2031 nested_release_evmcs(vcpu);
2032 return EVMPTRLD_VMFAIL;
2033 }
2034
2035 vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
2036
2037 evmcs_gpa_changed = true;
2038
2039
2040
2041
2042
2043
2044 if (from_launch) {
2045 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2046 memset(vmcs12, 0, sizeof(*vmcs12));
2047 vmcs12->hdr.revision_id = VMCS12_REVISION;
2048 }
2049
2050 }
2051
2052
2053
2054
2055
2056 if (from_launch || evmcs_gpa_changed)
2057 vmx->nested.hv_evmcs->hv_clean_fields &=
2058 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2059
2060 return EVMPTRLD_SUCCEEDED;
2061}
2062
2063void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
2064{
2065 struct vcpu_vmx *vmx = to_vmx(vcpu);
2066
2067 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2068 copy_vmcs12_to_enlightened(vmx);
2069 else
2070 copy_vmcs12_to_shadow(vmx);
2071
2072 vmx->nested.need_vmcs12_to_shadow_sync = false;
2073}
2074
2075static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
2076{
2077 struct vcpu_vmx *vmx =
2078 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
2079
2080 vmx->nested.preemption_timer_expired = true;
2081 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
2082 kvm_vcpu_kick(&vmx->vcpu);
2083
2084 return HRTIMER_NORESTART;
2085}
2086
2087static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
2088{
2089 struct vcpu_vmx *vmx = to_vmx(vcpu);
2090 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2091
2092 u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
2093 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2094
2095 if (!vmx->nested.has_preemption_timer_deadline) {
2096 vmx->nested.preemption_timer_deadline =
2097 vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
2098 vmx->nested.has_preemption_timer_deadline = true;
2099 }
2100 return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
2101}
2102
2103static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
2104 u64 preemption_timeout)
2105{
2106 struct vcpu_vmx *vmx = to_vmx(vcpu);
2107
2108
2109
2110
2111
2112 if (preemption_timeout == 0) {
2113 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
2114 return;
2115 }
2116
2117 if (vcpu->arch.virtual_tsc_khz == 0)
2118 return;
2119
2120 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2121 preemption_timeout *= 1000000;
2122 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
2123 hrtimer_start(&vmx->nested.preemption_timer,
2124 ktime_add_ns(ktime_get(), preemption_timeout),
2125 HRTIMER_MODE_ABS_PINNED);
2126}
2127
2128static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2129{
2130 if (vmx->nested.nested_run_pending &&
2131 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
2132 return vmcs12->guest_ia32_efer;
2133 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
2134 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
2135 else
2136 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
2137}
2138
2139static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
2140{
2141
2142
2143
2144
2145
2146
2147 if (vmx->nested.vmcs02_initialized)
2148 return;
2149 vmx->nested.vmcs02_initialized = true;
2150
2151
2152
2153
2154
2155
2156 if (enable_ept && nested_early_check)
2157 vmcs_write64(EPT_POINTER,
2158 construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
2159
2160
2161 if (cpu_has_vmx_vmfunc())
2162 vmcs_write64(VM_FUNCTION_CONTROL, 0);
2163
2164 if (cpu_has_vmx_posted_intr())
2165 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
2166
2167 if (cpu_has_vmx_msr_bitmap())
2168 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
2169
2170
2171
2172
2173
2174
2175 if (enable_pml) {
2176 vmcs_write64(PML_ADDRESS, 0);
2177 vmcs_write16(GUEST_PML_INDEX, -1);
2178 }
2179
2180 if (cpu_has_vmx_encls_vmexit())
2181 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
2182
2183
2184
2185
2186
2187
2188 vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
2189 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
2190 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
2191
2192 vmx_set_constant_host_state(vmx);
2193}
2194
2195static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
2196 struct vmcs12 *vmcs12)
2197{
2198 prepare_vmcs02_constant_state(vmx);
2199
2200 vmcs_write64(VMCS_LINK_POINTER, -1ull);
2201
2202 if (enable_vpid) {
2203 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
2204 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
2205 else
2206 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2207 }
2208}
2209
2210static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
2211 struct vmcs12 *vmcs12)
2212{
2213 u32 exec_control;
2214 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
2215
2216 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2217 prepare_vmcs02_early_rare(vmx, vmcs12);
2218
2219
2220
2221
2222 exec_control = __pin_controls_get(vmcs01);
2223 exec_control |= (vmcs12->pin_based_vm_exec_control &
2224 ~PIN_BASED_VMX_PREEMPTION_TIMER);
2225
2226
2227 vmx->nested.pi_pending = false;
2228 if (nested_cpu_has_posted_intr(vmcs12))
2229 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
2230 else
2231 exec_control &= ~PIN_BASED_POSTED_INTR;
2232 pin_controls_set(vmx, exec_control);
2233
2234
2235
2236
2237 exec_control = __exec_controls_get(vmcs01);
2238 exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
2239 exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
2240 exec_control &= ~CPU_BASED_TPR_SHADOW;
2241 exec_control |= vmcs12->cpu_based_vm_exec_control;
2242
2243 vmx->nested.l1_tpr_threshold = -1;
2244 if (exec_control & CPU_BASED_TPR_SHADOW)
2245 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
2246#ifdef CONFIG_X86_64
2247 else
2248 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
2249 CPU_BASED_CR8_STORE_EXITING;
2250#endif
2251
2252
2253
2254
2255
2256 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
2257 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
2258
2259
2260
2261
2262
2263
2264
2265 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
2266 exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
2267
2268 exec_controls_set(vmx, exec_control);
2269
2270
2271
2272
2273 if (cpu_has_secondary_exec_ctrls()) {
2274 exec_control = __secondary_exec_controls_get(vmcs01);
2275
2276
2277 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2278 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2279 SECONDARY_EXEC_ENABLE_INVPCID |
2280 SECONDARY_EXEC_ENABLE_RDTSCP |
2281 SECONDARY_EXEC_XSAVES |
2282 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
2283 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2284 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2285 SECONDARY_EXEC_ENABLE_VMFUNC |
2286 SECONDARY_EXEC_TSC_SCALING |
2287 SECONDARY_EXEC_DESC);
2288
2289 if (nested_cpu_has(vmcs12,
2290 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
2291 exec_control |= vmcs12->secondary_vm_exec_control;
2292
2293
2294 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
2295
2296
2297 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
2298
2299
2300
2301
2302
2303 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
2304 (vmcs12->guest_cr4 & X86_CR4_UMIP))
2305 exec_control |= SECONDARY_EXEC_DESC;
2306
2307 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
2308 vmcs_write16(GUEST_INTR_STATUS,
2309 vmcs12->guest_intr_status);
2310
2311 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
2312 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2313
2314 if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
2315 vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
2316
2317 secondary_exec_controls_set(vmx, exec_control);
2318 }
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328 exec_control = __vm_entry_controls_get(vmcs01);
2329 exec_control |= vmcs12->vm_entry_controls;
2330 exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
2331 if (cpu_has_load_ia32_efer()) {
2332 if (guest_efer & EFER_LMA)
2333 exec_control |= VM_ENTRY_IA32E_MODE;
2334 if (guest_efer != host_efer)
2335 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
2336 }
2337 vm_entry_controls_set(vmx, exec_control);
2338
2339
2340
2341
2342
2343
2344
2345
2346 exec_control = __vm_exit_controls_get(vmcs01);
2347 if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
2348 exec_control |= VM_EXIT_LOAD_IA32_EFER;
2349 else
2350 exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
2351 vm_exit_controls_set(vmx, exec_control);
2352
2353
2354
2355
2356 if (vmx->nested.nested_run_pending) {
2357 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2358 vmcs12->vm_entry_intr_info_field);
2359 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2360 vmcs12->vm_entry_exception_error_code);
2361 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2362 vmcs12->vm_entry_instruction_len);
2363 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2364 vmcs12->guest_interruptibility_info);
2365 vmx->loaded_vmcs->nmi_known_unmasked =
2366 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
2367 } else {
2368 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
2369 }
2370}
2371
2372static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2373{
2374 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2375
2376 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2377 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
2378 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
2379 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
2380 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
2381 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
2382 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
2383 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
2384 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
2385 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
2386 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
2387 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
2388 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
2389 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
2390 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
2391 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
2392 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
2393 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
2394 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
2395 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
2396 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
2397 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
2398 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
2399 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
2400 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
2401 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
2402 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
2403 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
2404 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
2405 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
2406 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
2407 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
2408 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
2409 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
2410 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
2411 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
2412 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
2413 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
2414
2415 vmx->segment_cache.bitmask = 0;
2416 }
2417
2418 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2419 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
2420 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
2421 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
2422 vmcs12->guest_pending_dbg_exceptions);
2423 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
2424 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
2425
2426
2427
2428
2429
2430 if (enable_ept) {
2431 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2432 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2433 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2434 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2435 }
2436
2437 if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
2438 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
2439 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
2440 }
2441
2442 if (nested_cpu_has_xsaves(vmcs12))
2443 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458 if (vmx_need_pf_intercept(&vmx->vcpu)) {
2459
2460
2461
2462
2463 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
2464 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
2465 } else {
2466 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
2467 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
2468 }
2469
2470 if (cpu_has_vmx_apicv()) {
2471 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
2472 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
2473 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
2474 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
2475 }
2476
2477
2478
2479
2480
2481 prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
2482
2483 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
2484 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2485 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2486
2487 set_cr4_guest_host_mask(vmx);
2488}
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
2502 bool from_vmentry,
2503 enum vm_entry_failure_code *entry_failure_code)
2504{
2505 struct vcpu_vmx *vmx = to_vmx(vcpu);
2506 bool load_guest_pdptrs_vmcs12 = false;
2507
2508 if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
2509 prepare_vmcs02_rare(vmx, vmcs12);
2510 vmx->nested.dirty_vmcs12 = false;
2511
2512 load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
2513 !(vmx->nested.hv_evmcs->hv_clean_fields &
2514 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
2515 }
2516
2517 if (vmx->nested.nested_run_pending &&
2518 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
2519 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
2520 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
2521 } else {
2522 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
2523 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
2524 }
2525 if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
2526 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
2527 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
2528 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
2529
2530
2531
2532
2533
2534 vmx_update_exception_bitmap(vcpu);
2535 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
2536 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2537
2538 if (vmx->nested.nested_run_pending &&
2539 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
2540 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
2541 vcpu->arch.pat = vmcs12->guest_ia32_pat;
2542 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2543 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
2544 }
2545
2546 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2547 vcpu->arch.l1_tsc_offset,
2548 vmx_get_l2_tsc_offset(vcpu),
2549 vmx_get_l2_tsc_multiplier(vcpu));
2550
2551 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2552 vcpu->arch.l1_tsc_scaling_ratio,
2553 vmx_get_l2_tsc_multiplier(vcpu));
2554
2555 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
2556 if (kvm_has_tsc_control)
2557 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
2558
2559 nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
2560
2561 if (nested_cpu_has_ept(vmcs12))
2562 nested_ept_init_mmu_context(vcpu);
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
2573 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2574
2575 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
2576 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
2577
2578 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
2579
2580 vmx_set_efer(vcpu, vcpu->arch.efer);
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592 if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
2593 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2594 return -EINVAL;
2595 }
2596
2597
2598 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
2599 from_vmentry, entry_failure_code))
2600 return -EINVAL;
2601
2602
2603
2604
2605
2606
2607
2608
2609 if (enable_ept)
2610 vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
2611
2612
2613 if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
2614 is_pae_paging(vcpu)) {
2615 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2616 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2617 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2618 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2619 }
2620
2621 if (!enable_ept)
2622 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
2623
2624 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2625 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
2626 vmcs12->guest_ia32_perf_global_ctrl)))
2627 return -EINVAL;
2628
2629 kvm_rsp_write(vcpu, vmcs12->guest_rsp);
2630 kvm_rip_write(vcpu, vmcs12->guest_rip);
2631
2632
2633
2634
2635
2636
2637
2638 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
2639 vmx->nested.hv_evmcs->hv_clean_fields |=
2640 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2641
2642 return 0;
2643}
2644
2645static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
2646{
2647 if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
2648 nested_cpu_has_virtual_nmis(vmcs12)))
2649 return -EINVAL;
2650
2651 if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
2652 nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
2653 return -EINVAL;
2654
2655 return 0;
2656}
2657
2658static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
2659{
2660 struct vcpu_vmx *vmx = to_vmx(vcpu);
2661
2662
2663 switch (new_eptp & VMX_EPTP_MT_MASK) {
2664 case VMX_EPTP_MT_UC:
2665 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
2666 return false;
2667 break;
2668 case VMX_EPTP_MT_WB:
2669 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
2670 return false;
2671 break;
2672 default:
2673 return false;
2674 }
2675
2676
2677 switch (new_eptp & VMX_EPTP_PWL_MASK) {
2678 case VMX_EPTP_PWL_5:
2679 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
2680 return false;
2681 break;
2682 case VMX_EPTP_PWL_4:
2683 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
2684 return false;
2685 break;
2686 default:
2687 return false;
2688 }
2689
2690
2691 if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
2692 return false;
2693
2694
2695 if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
2696 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
2697 return false;
2698 }
2699
2700 return true;
2701}
2702
2703
2704
2705
2706static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2707 struct vmcs12 *vmcs12)
2708{
2709 struct vcpu_vmx *vmx = to_vmx(vcpu);
2710
2711 if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
2712 vmx->nested.msrs.pinbased_ctls_low,
2713 vmx->nested.msrs.pinbased_ctls_high)) ||
2714 CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
2715 vmx->nested.msrs.procbased_ctls_low,
2716 vmx->nested.msrs.procbased_ctls_high)))
2717 return -EINVAL;
2718
2719 if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
2720 CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
2721 vmx->nested.msrs.secondary_ctls_low,
2722 vmx->nested.msrs.secondary_ctls_high)))
2723 return -EINVAL;
2724
2725 if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
2726 nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
2727 nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
2728 nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
2729 nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
2730 nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
2731 nested_vmx_check_nmi_controls(vmcs12) ||
2732 nested_vmx_check_pml_controls(vcpu, vmcs12) ||
2733 nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
2734 nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
2735 nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
2736 CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
2737 return -EINVAL;
2738
2739 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2740 nested_cpu_has_save_preemption_timer(vmcs12))
2741 return -EINVAL;
2742
2743 if (nested_cpu_has_ept(vmcs12) &&
2744 CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
2745 return -EINVAL;
2746
2747 if (nested_cpu_has_vmfunc(vmcs12)) {
2748 if (CC(vmcs12->vm_function_control &
2749 ~vmx->nested.msrs.vmfunc_controls))
2750 return -EINVAL;
2751
2752 if (nested_cpu_has_eptp_switching(vmcs12)) {
2753 if (CC(!nested_cpu_has_ept(vmcs12)) ||
2754 CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
2755 return -EINVAL;
2756 }
2757 }
2758
2759 return 0;
2760}
2761
2762
2763
2764
2765static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
2766 struct vmcs12 *vmcs12)
2767{
2768 struct vcpu_vmx *vmx = to_vmx(vcpu);
2769
2770 if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
2771 vmx->nested.msrs.exit_ctls_low,
2772 vmx->nested.msrs.exit_ctls_high)) ||
2773 CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
2774 return -EINVAL;
2775
2776 return 0;
2777}
2778
2779
2780
2781
2782static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
2783 struct vmcs12 *vmcs12)
2784{
2785 struct vcpu_vmx *vmx = to_vmx(vcpu);
2786
2787 if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
2788 vmx->nested.msrs.entry_ctls_low,
2789 vmx->nested.msrs.entry_ctls_high)))
2790 return -EINVAL;
2791
2792
2793
2794
2795
2796
2797
2798 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
2799 u32 intr_info = vmcs12->vm_entry_intr_info_field;
2800 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
2801 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
2802 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
2803 bool should_have_error_code;
2804 bool urg = nested_cpu_has2(vmcs12,
2805 SECONDARY_EXEC_UNRESTRICTED_GUEST);
2806 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
2807
2808
2809 if (CC(intr_type == INTR_TYPE_RESERVED) ||
2810 CC(intr_type == INTR_TYPE_OTHER_EVENT &&
2811 !nested_cpu_supports_monitor_trap_flag(vcpu)))
2812 return -EINVAL;
2813
2814
2815 if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
2816 CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
2817 CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
2818 return -EINVAL;
2819
2820
2821 should_have_error_code =
2822 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
2823 x86_exception_has_error_code(vector);
2824 if (CC(has_error_code != should_have_error_code))
2825 return -EINVAL;
2826
2827
2828 if (CC(has_error_code &&
2829 vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
2830 return -EINVAL;
2831
2832
2833 if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
2834 return -EINVAL;
2835
2836
2837 switch (intr_type) {
2838 case INTR_TYPE_SOFT_EXCEPTION:
2839 case INTR_TYPE_SOFT_INTR:
2840 case INTR_TYPE_PRIV_SW_EXCEPTION:
2841 if (CC(vmcs12->vm_entry_instruction_len > 15) ||
2842 CC(vmcs12->vm_entry_instruction_len == 0 &&
2843 CC(!nested_cpu_has_zero_length_injection(vcpu))))
2844 return -EINVAL;
2845 }
2846 }
2847
2848 if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
2849 return -EINVAL;
2850
2851 return 0;
2852}
2853
2854static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
2855 struct vmcs12 *vmcs12)
2856{
2857 if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
2858 nested_check_vm_exit_controls(vcpu, vmcs12) ||
2859 nested_check_vm_entry_controls(vcpu, vmcs12))
2860 return -EINVAL;
2861
2862 if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
2863 return nested_evmcs_check_controls(vmcs12);
2864
2865 return 0;
2866}
2867
2868static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
2869 struct vmcs12 *vmcs12)
2870{
2871 bool ia32e;
2872
2873 if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
2874 CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
2875 CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
2876 return -EINVAL;
2877
2878 if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
2879 CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
2880 return -EINVAL;
2881
2882 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
2883 CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
2884 return -EINVAL;
2885
2886 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2887 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
2888 vmcs12->host_ia32_perf_global_ctrl)))
2889 return -EINVAL;
2890
2891#ifdef CONFIG_X86_64
2892 ia32e = !!(vcpu->arch.efer & EFER_LMA);
2893#else
2894 ia32e = false;
2895#endif
2896
2897 if (ia32e) {
2898 if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
2899 CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
2900 return -EINVAL;
2901 } else {
2902 if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
2903 CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
2904 CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
2905 CC((vmcs12->host_rip) >> 32))
2906 return -EINVAL;
2907 }
2908
2909 if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2910 CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2911 CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2912 CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2913 CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2914 CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2915 CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2916 CC(vmcs12->host_cs_selector == 0) ||
2917 CC(vmcs12->host_tr_selector == 0) ||
2918 CC(vmcs12->host_ss_selector == 0 && !ia32e))
2919 return -EINVAL;
2920
2921 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
2922 CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
2923 CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
2924 CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
2925 CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
2926 CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
2927 return -EINVAL;
2928
2929
2930
2931
2932
2933
2934
2935 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
2936 if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
2937 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
2938 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
2939 return -EINVAL;
2940 }
2941
2942 return 0;
2943}
2944
2945static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
2946 struct vmcs12 *vmcs12)
2947{
2948 int r = 0;
2949 struct vmcs12 *shadow;
2950 struct kvm_host_map map;
2951
2952 if (vmcs12->vmcs_link_pointer == -1ull)
2953 return 0;
2954
2955 if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
2956 return -EINVAL;
2957
2958 if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
2959 return -EINVAL;
2960
2961 shadow = map.hva;
2962
2963 if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
2964 CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
2965 r = -EINVAL;
2966
2967 kvm_vcpu_unmap(vcpu, &map, false);
2968 return r;
2969}
2970
2971
2972
2973
2974static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
2975{
2976 if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
2977 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
2978 vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
2979 return -EINVAL;
2980
2981 return 0;
2982}
2983
2984static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
2985 struct vmcs12 *vmcs12,
2986 enum vm_entry_failure_code *entry_failure_code)
2987{
2988 bool ia32e;
2989
2990 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2991
2992 if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
2993 CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
2994 return -EINVAL;
2995
2996 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
2997 CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
2998 return -EINVAL;
2999
3000 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
3001 CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
3002 return -EINVAL;
3003
3004 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
3005 *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
3006 return -EINVAL;
3007 }
3008
3009 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
3010 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
3011 vmcs12->guest_ia32_perf_global_ctrl)))
3012 return -EINVAL;
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023 if (to_vmx(vcpu)->nested.nested_run_pending &&
3024 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
3025 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
3026 if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
3027 CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
3028 CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
3029 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
3030 return -EINVAL;
3031 }
3032
3033 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
3034 (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
3035 CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
3036 return -EINVAL;
3037
3038 if (nested_check_guest_non_reg_state(vmcs12))
3039 return -EINVAL;
3040
3041 return 0;
3042}
3043
3044static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
3045{
3046 struct vcpu_vmx *vmx = to_vmx(vcpu);
3047 unsigned long cr3, cr4;
3048 bool vm_fail;
3049
3050 if (!nested_early_check)
3051 return 0;
3052
3053 if (vmx->msr_autoload.host.nr)
3054 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
3055 if (vmx->msr_autoload.guest.nr)
3056 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
3057
3058 preempt_disable();
3059
3060 vmx_prepare_switch_to_guest(vcpu);
3061
3062
3063
3064
3065
3066
3067
3068 vmcs_writel(GUEST_RFLAGS, 0);
3069
3070 cr3 = __get_current_cr3_fast();
3071 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
3072 vmcs_writel(HOST_CR3, cr3);
3073 vmx->loaded_vmcs->host_state.cr3 = cr3;
3074 }
3075
3076 cr4 = cr4_read_shadow();
3077 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
3078 vmcs_writel(HOST_CR4, cr4);
3079 vmx->loaded_vmcs->host_state.cr4 = cr4;
3080 }
3081
3082 vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
3083 vmx->loaded_vmcs->launched);
3084
3085 if (vmx->msr_autoload.host.nr)
3086 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
3087 if (vmx->msr_autoload.guest.nr)
3088 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
3089
3090 if (vm_fail) {
3091 u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
3092
3093 preempt_enable();
3094
3095 trace_kvm_nested_vmenter_failed(
3096 "early hardware check VM-instruction error: ", error);
3097 WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3098 return 1;
3099 }
3100
3101
3102
3103
3104 if (hw_breakpoint_active())
3105 set_debugreg(__this_cpu_read(cpu_dr7), 7);
3106 local_irq_enable();
3107 preempt_enable();
3108
3109
3110
3111
3112
3113
3114
3115
3116 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
3117 VMX_EXIT_REASONS_FAILED_VMENTRY));
3118
3119 return 0;
3120}
3121
3122static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
3123{
3124 struct vcpu_vmx *vmx = to_vmx(vcpu);
3125
3126
3127
3128
3129
3130
3131 if (vmx->nested.enlightened_vmcs_enabled &&
3132 vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
3133 enum nested_evmptrld_status evmptrld_status =
3134 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
3135
3136 if (evmptrld_status == EVMPTRLD_VMFAIL ||
3137 evmptrld_status == EVMPTRLD_ERROR)
3138 return false;
3139
3140
3141
3142
3143
3144 vmx->nested.need_vmcs12_to_shadow_sync = true;
3145 }
3146
3147 return true;
3148}
3149
3150static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
3151{
3152 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3153 struct vcpu_vmx *vmx = to_vmx(vcpu);
3154 struct kvm_host_map *map;
3155 struct page *page;
3156 u64 hpa;
3157
3158 if (!vcpu->arch.pdptrs_from_userspace &&
3159 !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
3160
3161
3162
3163
3164
3165 if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
3166 return false;
3167 }
3168
3169
3170 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3171
3172
3173
3174
3175
3176
3177 if (vmx->nested.apic_access_page) {
3178 kvm_release_page_clean(vmx->nested.apic_access_page);
3179 vmx->nested.apic_access_page = NULL;
3180 }
3181 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
3182 if (!is_error_page(page)) {
3183 vmx->nested.apic_access_page = page;
3184 hpa = page_to_phys(vmx->nested.apic_access_page);
3185 vmcs_write64(APIC_ACCESS_ADDR, hpa);
3186 } else {
3187 pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
3188 __func__);
3189 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3190 vcpu->run->internal.suberror =
3191 KVM_INTERNAL_ERROR_EMULATION;
3192 vcpu->run->internal.ndata = 0;
3193 return false;
3194 }
3195 }
3196
3197 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3198 map = &vmx->nested.virtual_apic_map;
3199
3200 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
3201 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
3202 } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
3203 nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
3204 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3205
3206
3207
3208
3209
3210
3211
3212
3213 exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
3214 } else {
3215
3216
3217
3218
3219 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
3220 }
3221 }
3222
3223 if (nested_cpu_has_posted_intr(vmcs12)) {
3224 map = &vmx->nested.pi_desc_map;
3225
3226 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
3227 vmx->nested.pi_desc =
3228 (struct pi_desc *)(((void *)map->hva) +
3229 offset_in_page(vmcs12->posted_intr_desc_addr));
3230 vmcs_write64(POSTED_INTR_DESC_ADDR,
3231 pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
3232 } else {
3233
3234
3235
3236
3237
3238
3239 vmx->nested.pi_desc = NULL;
3240 pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
3241 }
3242 }
3243 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
3244 exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
3245 else
3246 exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
3247
3248 return true;
3249}
3250
3251static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
3252{
3253 if (!nested_get_evmcs_page(vcpu)) {
3254 pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
3255 __func__);
3256 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3257 vcpu->run->internal.suberror =
3258 KVM_INTERNAL_ERROR_EMULATION;
3259 vcpu->run->internal.ndata = 0;
3260
3261 return false;
3262 }
3263
3264 if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
3265 return false;
3266
3267 return true;
3268}
3269
3270static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
3271{
3272 struct vmcs12 *vmcs12;
3273 struct vcpu_vmx *vmx = to_vmx(vcpu);
3274 gpa_t dst;
3275
3276 if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
3277 return 0;
3278
3279 if (WARN_ON_ONCE(vmx->nested.pml_full))
3280 return 1;
3281
3282
3283
3284
3285
3286 vmcs12 = get_vmcs12(vcpu);
3287 if (!nested_cpu_has_pml(vmcs12))
3288 return 0;
3289
3290 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
3291 vmx->nested.pml_full = true;
3292 return 1;
3293 }
3294
3295 gpa &= ~0xFFFull;
3296 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
3297
3298 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
3299 offset_in_page(dst), sizeof(gpa)))
3300 return 0;
3301
3302 vmcs12->guest_pml_index--;
3303
3304 return 0;
3305}
3306
3307
3308
3309
3310
3311
3312
3313
3314static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
3315{
3316 if (!to_vmx(vcpu)->nested.vmxon) {
3317 kvm_queue_exception(vcpu, UD_VECTOR);
3318 return 0;
3319 }
3320
3321 if (vmx_get_cpl(vcpu)) {
3322 kvm_inject_gp(vcpu, 0);
3323 return 0;
3324 }
3325
3326 return 1;
3327}
3328
3329static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
3330{
3331 u8 rvi = vmx_get_rvi();
3332 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
3333
3334 return ((rvi & 0xf0) > (vppr & 0xf0));
3335}
3336
3337static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
3338 struct vmcs12 *vmcs12);
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
3351 bool from_vmentry)
3352{
3353 struct vcpu_vmx *vmx = to_vmx(vcpu);
3354 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3355 enum vm_entry_failure_code entry_failure_code;
3356 bool evaluate_pending_interrupts;
3357 union vmx_exit_reason exit_reason = {
3358 .basic = EXIT_REASON_INVALID_STATE,
3359 .failed_vmentry = 1,
3360 };
3361 u32 failed_index;
3362
3363 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
3364 kvm_vcpu_flush_tlb_current(vcpu);
3365
3366 evaluate_pending_interrupts = exec_controls_get(vmx) &
3367 (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
3368 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
3369 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
3370
3371 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
3372 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
3373 if (kvm_mpx_supported() &&
3374 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
3375 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393 if (!enable_ept && !nested_early_check)
3394 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3395
3396 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
3397
3398 prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
3399
3400 if (from_vmentry) {
3401 if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
3402 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3403 return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
3404 }
3405
3406 if (nested_vmx_check_vmentry_hw(vcpu)) {
3407 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3408 return NVMX_VMENTRY_VMFAIL;
3409 }
3410
3411 if (nested_vmx_check_guest_state(vcpu, vmcs12,
3412 &entry_failure_code)) {
3413 exit_reason.basic = EXIT_REASON_INVALID_STATE;
3414 vmcs12->exit_qualification = entry_failure_code;
3415 goto vmentry_fail_vmexit;
3416 }
3417 }
3418
3419 enter_guest_mode(vcpu);
3420
3421 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
3422 exit_reason.basic = EXIT_REASON_INVALID_STATE;
3423 vmcs12->exit_qualification = entry_failure_code;
3424 goto vmentry_fail_vmexit_guest_mode;
3425 }
3426
3427 if (from_vmentry) {
3428 failed_index = nested_vmx_load_msr(vcpu,
3429 vmcs12->vm_entry_msr_load_addr,
3430 vmcs12->vm_entry_msr_load_count);
3431 if (failed_index) {
3432 exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
3433 vmcs12->exit_qualification = failed_index;
3434 goto vmentry_fail_vmexit_guest_mode;
3435 }
3436 } else {
3437
3438
3439
3440
3441
3442
3443
3444 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
3445 }
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461 if (unlikely(evaluate_pending_interrupts))
3462 kvm_make_request(KVM_REQ_EVENT, vcpu);
3463
3464
3465
3466
3467
3468
3469 vmx->nested.preemption_timer_expired = false;
3470 if (nested_cpu_has_preemption_timer(vmcs12)) {
3471 u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
3472 vmx_start_preemption_timer(vcpu, timer_value);
3473 }
3474
3475
3476
3477
3478
3479
3480
3481 return NVMX_VMENTRY_SUCCESS;
3482
3483
3484
3485
3486
3487
3488vmentry_fail_vmexit_guest_mode:
3489 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
3490 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
3491 leave_guest_mode(vcpu);
3492
3493vmentry_fail_vmexit:
3494 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3495
3496 if (!from_vmentry)
3497 return NVMX_VMENTRY_VMEXIT;
3498
3499 load_vmcs12_host_state(vcpu, vmcs12);
3500 vmcs12->vm_exit_reason = exit_reason.full;
3501 if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
3502 vmx->nested.need_vmcs12_to_shadow_sync = true;
3503 return NVMX_VMENTRY_VMEXIT;
3504}
3505
3506
3507
3508
3509
3510static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
3511{
3512 struct vmcs12 *vmcs12;
3513 enum nvmx_vmentry_status status;
3514 struct vcpu_vmx *vmx = to_vmx(vcpu);
3515 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
3516 enum nested_evmptrld_status evmptrld_status;
3517
3518 if (!nested_vmx_check_permission(vcpu))
3519 return 1;
3520
3521 evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
3522 if (evmptrld_status == EVMPTRLD_ERROR) {
3523 kvm_queue_exception(vcpu, UD_VECTOR);
3524 return 1;
3525 } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
3526 return nested_vmx_failInvalid(vcpu);
3527 }
3528
3529 if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
3530 vmx->nested.current_vmptr == -1ull))
3531 return nested_vmx_failInvalid(vcpu);
3532
3533 vmcs12 = get_vmcs12(vcpu);
3534
3535
3536
3537
3538
3539
3540
3541 if (CC(vmcs12->hdr.shadow_vmcs))
3542 return nested_vmx_failInvalid(vcpu);
3543
3544 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
3545 copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
3546
3547 vmcs12->launch_state = !launch;
3548 } else if (enable_shadow_vmcs) {
3549 copy_shadow_to_vmcs12(vmx);
3550 }
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562 if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
3563 return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
3564
3565 if (CC(vmcs12->launch_state == launch))
3566 return nested_vmx_fail(vcpu,
3567 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
3568 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
3569
3570 if (nested_vmx_check_controls(vcpu, vmcs12))
3571 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3572
3573 if (nested_vmx_check_host_state(vcpu, vmcs12))
3574 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
3575
3576
3577
3578
3579
3580 vmx->nested.nested_run_pending = 1;
3581 vmx->nested.has_preemption_timer_deadline = false;
3582 status = nested_vmx_enter_non_root_mode(vcpu, true);
3583 if (unlikely(status != NVMX_VMENTRY_SUCCESS))
3584 goto vmentry_failed;
3585
3586
3587 if (nested_cpu_has_posted_intr(vmcs12) &&
3588 kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
3589 vmx->nested.pi_pending = true;
3590 kvm_make_request(KVM_REQ_EVENT, vcpu);
3591 kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
3592 }
3593
3594
3595 vmx->vcpu.arch.l1tf_flush_l1d = true;
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607 nested_cache_shadow_vmcs12(vcpu, vmcs12);
3608
3609 switch (vmcs12->guest_activity_state) {
3610 case GUEST_ACTIVITY_HLT:
3611
3612
3613
3614
3615
3616 if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
3617 !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
3618 !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
3619 (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
3620 vmx->nested.nested_run_pending = 0;
3621 return kvm_vcpu_halt(vcpu);
3622 }
3623 break;
3624 case GUEST_ACTIVITY_WAIT_SIPI:
3625 vmx->nested.nested_run_pending = 0;
3626 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3627 break;
3628 default:
3629 break;
3630 }
3631
3632 return 1;
3633
3634vmentry_failed:
3635 vmx->nested.nested_run_pending = 0;
3636 if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
3637 return 0;
3638 if (status == NVMX_VMENTRY_VMEXIT)
3639 return 1;
3640 WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
3641 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3642}
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661static inline unsigned long
3662vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3663{
3664 return
3665 (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
3666 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
3667 (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
3668 vcpu->arch.cr0_guest_owned_bits));
3669}
3670
3671static inline unsigned long
3672vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3673{
3674 return
3675 (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
3676 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
3677 (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
3678 vcpu->arch.cr4_guest_owned_bits));
3679}
3680
3681static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
3682 struct vmcs12 *vmcs12)
3683{
3684 u32 idt_vectoring;
3685 unsigned int nr;
3686
3687 if (vcpu->arch.exception.injected) {
3688 nr = vcpu->arch.exception.nr;
3689 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3690
3691 if (kvm_exception_is_soft(nr)) {
3692 vmcs12->vm_exit_instruction_len =
3693 vcpu->arch.event_exit_inst_len;
3694 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
3695 } else
3696 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
3697
3698 if (vcpu->arch.exception.has_error_code) {
3699 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
3700 vmcs12->idt_vectoring_error_code =
3701 vcpu->arch.exception.error_code;
3702 }
3703
3704 vmcs12->idt_vectoring_info_field = idt_vectoring;
3705 } else if (vcpu->arch.nmi_injected) {
3706 vmcs12->idt_vectoring_info_field =
3707 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
3708 } else if (vcpu->arch.interrupt.injected) {
3709 nr = vcpu->arch.interrupt.nr;
3710 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3711
3712 if (vcpu->arch.interrupt.soft) {
3713 idt_vectoring |= INTR_TYPE_SOFT_INTR;
3714 vmcs12->vm_entry_instruction_len =
3715 vcpu->arch.event_exit_inst_len;
3716 } else
3717 idt_vectoring |= INTR_TYPE_EXT_INTR;
3718
3719 vmcs12->idt_vectoring_info_field = idt_vectoring;
3720 }
3721}
3722
3723
3724void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
3725{
3726 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3727 gfn_t gfn;
3728
3729
3730
3731
3732
3733
3734 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3735 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3736 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3737 }
3738
3739 if (nested_cpu_has_posted_intr(vmcs12)) {
3740 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3741 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3742 }
3743}
3744
3745static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3746{
3747 struct vcpu_vmx *vmx = to_vmx(vcpu);
3748 int max_irr;
3749 void *vapic_page;
3750 u16 status;
3751
3752 if (!vmx->nested.pi_pending)
3753 return 0;
3754
3755 if (!vmx->nested.pi_desc)
3756 goto mmio_needed;
3757
3758 vmx->nested.pi_pending = false;
3759
3760 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
3761 return 0;
3762
3763 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3764 if (max_irr != 256) {
3765 vapic_page = vmx->nested.virtual_apic_map.hva;
3766 if (!vapic_page)
3767 goto mmio_needed;
3768
3769 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
3770 vapic_page, &max_irr);
3771 status = vmcs_read16(GUEST_INTR_STATUS);
3772 if ((u8)max_irr > ((u8)status & 0xff)) {
3773 status &= ~0xff;
3774 status |= (u8)max_irr;
3775 vmcs_write16(GUEST_INTR_STATUS, status);
3776 }
3777 }
3778
3779 nested_mark_vmcs12_pages_dirty(vcpu);
3780 return 0;
3781
3782mmio_needed:
3783 kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
3784 return -ENXIO;
3785}
3786
3787static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3788 unsigned long exit_qual)
3789{
3790 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3791 unsigned int nr = vcpu->arch.exception.nr;
3792 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3793
3794 if (vcpu->arch.exception.has_error_code) {
3795 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3796 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3797 }
3798
3799 if (kvm_exception_is_soft(nr))
3800 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3801 else
3802 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3803
3804 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3805 vmx_get_nmi_mask(vcpu))
3806 intr_info |= INTR_INFO_UNBLOCK_NMI;
3807
3808 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3809}
3810
3811
3812
3813
3814
3815
3816
3817static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
3818{
3819 return vcpu->arch.exception.pending &&
3820 vcpu->arch.exception.nr == DB_VECTOR &&
3821 vcpu->arch.exception.payload;
3822}
3823
3824
3825
3826
3827
3828
3829
3830
3831static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
3832{
3833 if (vmx_pending_dbg_trap(vcpu))
3834 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
3835 vcpu->arch.exception.payload);
3836}
3837
3838static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
3839{
3840 return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
3841 to_vmx(vcpu)->nested.preemption_timer_expired;
3842}
3843
3844static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
3845{
3846 struct vcpu_vmx *vmx = to_vmx(vcpu);
3847 unsigned long exit_qual;
3848 bool block_nested_events =
3849 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
3850 bool mtf_pending = vmx->nested.mtf_pending;
3851 struct kvm_lapic *apic = vcpu->arch.apic;
3852
3853
3854
3855
3856
3857 if (!block_nested_events)
3858 vmx->nested.mtf_pending = false;
3859
3860 if (lapic_in_kernel(vcpu) &&
3861 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
3862 if (block_nested_events)
3863 return -EBUSY;
3864 nested_vmx_update_pending_dbg(vcpu);
3865 clear_bit(KVM_APIC_INIT, &apic->pending_events);
3866 if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
3867 nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
3868 return 0;
3869 }
3870
3871 if (lapic_in_kernel(vcpu) &&
3872 test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3873 if (block_nested_events)
3874 return -EBUSY;
3875
3876 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3877 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3878 nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
3879 apic->sipi_vector & 0xFFUL);
3880 return 0;
3881 }
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892 if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
3893 if (vmx->nested.nested_run_pending)
3894 return -EBUSY;
3895 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3896 goto no_vmexit;
3897 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3898 return 0;
3899 }
3900
3901 if (mtf_pending) {
3902 if (block_nested_events)
3903 return -EBUSY;
3904 nested_vmx_update_pending_dbg(vcpu);
3905 nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
3906 return 0;
3907 }
3908
3909 if (vcpu->arch.exception.pending) {
3910 if (vmx->nested.nested_run_pending)
3911 return -EBUSY;
3912 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3913 goto no_vmexit;
3914 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3915 return 0;
3916 }
3917
3918 if (nested_vmx_preemption_timer_pending(vcpu)) {
3919 if (block_nested_events)
3920 return -EBUSY;
3921 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
3922 return 0;
3923 }
3924
3925 if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
3926 if (block_nested_events)
3927 return -EBUSY;
3928 goto no_vmexit;
3929 }
3930
3931 if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
3932 if (block_nested_events)
3933 return -EBUSY;
3934 if (!nested_exit_on_nmi(vcpu))
3935 goto no_vmexit;
3936
3937 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
3938 NMI_VECTOR | INTR_TYPE_NMI_INTR |
3939 INTR_INFO_VALID_MASK, 0);
3940
3941
3942
3943
3944 vcpu->arch.nmi_pending = 0;
3945 vmx_set_nmi_mask(vcpu, true);
3946 return 0;
3947 }
3948
3949 if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
3950 if (block_nested_events)
3951 return -EBUSY;
3952 if (!nested_exit_on_intr(vcpu))
3953 goto no_vmexit;
3954 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
3955 return 0;
3956 }
3957
3958no_vmexit:
3959 return vmx_complete_nested_posted_interrupt(vcpu);
3960}
3961
3962static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
3963{
3964 ktime_t remaining =
3965 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
3966 u64 value;
3967
3968 if (ktime_to_ns(remaining) <= 0)
3969 return 0;
3970
3971 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
3972 do_div(value, 1000000);
3973 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
3974}
3975
3976static bool is_vmcs12_ext_field(unsigned long field)
3977{
3978 switch (field) {
3979 case GUEST_ES_SELECTOR:
3980 case GUEST_CS_SELECTOR:
3981 case GUEST_SS_SELECTOR:
3982 case GUEST_DS_SELECTOR:
3983 case GUEST_FS_SELECTOR:
3984 case GUEST_GS_SELECTOR:
3985 case GUEST_LDTR_SELECTOR:
3986 case GUEST_TR_SELECTOR:
3987 case GUEST_ES_LIMIT:
3988 case GUEST_CS_LIMIT:
3989 case GUEST_SS_LIMIT:
3990 case GUEST_DS_LIMIT:
3991 case GUEST_FS_LIMIT:
3992 case GUEST_GS_LIMIT:
3993 case GUEST_LDTR_LIMIT:
3994 case GUEST_TR_LIMIT:
3995 case GUEST_GDTR_LIMIT:
3996 case GUEST_IDTR_LIMIT:
3997 case GUEST_ES_AR_BYTES:
3998 case GUEST_DS_AR_BYTES:
3999 case GUEST_FS_AR_BYTES:
4000 case GUEST_GS_AR_BYTES:
4001 case GUEST_LDTR_AR_BYTES:
4002 case GUEST_TR_AR_BYTES:
4003 case GUEST_ES_BASE:
4004 case GUEST_CS_BASE:
4005 case GUEST_SS_BASE:
4006 case GUEST_DS_BASE:
4007 case GUEST_FS_BASE:
4008 case GUEST_GS_BASE:
4009 case GUEST_LDTR_BASE:
4010 case GUEST_TR_BASE:
4011 case GUEST_GDTR_BASE:
4012 case GUEST_IDTR_BASE:
4013 case GUEST_PENDING_DBG_EXCEPTIONS:
4014 case GUEST_BNDCFGS:
4015 return true;
4016 default:
4017 break;
4018 }
4019
4020 return false;
4021}
4022
4023static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4024 struct vmcs12 *vmcs12)
4025{
4026 struct vcpu_vmx *vmx = to_vmx(vcpu);
4027
4028 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
4029 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
4030 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
4031 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
4032 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
4033 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
4034 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
4035 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
4036 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
4037 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
4038 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
4039 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
4040 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
4041 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
4042 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
4043 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
4044 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
4045 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
4046 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
4047 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
4048 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
4049 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
4050 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
4051 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
4052 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
4053 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
4054 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
4055 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
4056 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
4057 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
4058 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
4059 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
4060 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
4061 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
4062 vmcs12->guest_pending_dbg_exceptions =
4063 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
4064 if (kvm_mpx_supported())
4065 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
4066
4067 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
4068}
4069
4070static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
4071 struct vmcs12 *vmcs12)
4072{
4073 struct vcpu_vmx *vmx = to_vmx(vcpu);
4074 int cpu;
4075
4076 if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
4077 return;
4078
4079
4080 WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
4081
4082 cpu = get_cpu();
4083 vmx->loaded_vmcs = &vmx->nested.vmcs02;
4084 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
4085
4086 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4087
4088 vmx->loaded_vmcs = &vmx->vmcs01;
4089 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
4090 put_cpu();
4091}
4092
4093
4094
4095
4096
4097
4098
4099static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
4100{
4101 struct vcpu_vmx *vmx = to_vmx(vcpu);
4102
4103 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
4104 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4105
4106 vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
4107 !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
4108
4109 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
4110 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
4111
4112 vmcs12->guest_rsp = kvm_rsp_read(vcpu);
4113 vmcs12->guest_rip = kvm_rip_read(vcpu);
4114 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
4115
4116 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
4117 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
4118
4119 vmcs12->guest_interruptibility_info =
4120 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
4121
4122 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
4123 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
4124 else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4125 vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
4126 else
4127 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
4128
4129 if (nested_cpu_has_preemption_timer(vmcs12) &&
4130 vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
4131 !vmx->nested.nested_run_pending)
4132 vmcs12->vmx_preemption_timer_value =
4133 vmx_get_preemption_timer_value(vcpu);
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143 if (enable_ept) {
4144 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
4145 if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
4146 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
4147 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
4148 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
4149 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
4150 }
4151 }
4152
4153 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
4154
4155 if (nested_cpu_has_vid(vmcs12))
4156 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
4157
4158 vmcs12->vm_entry_controls =
4159 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
4160 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
4161
4162 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
4163 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
4164
4165 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
4166 vmcs12->guest_ia32_efer = vcpu->arch.efer;
4167}
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
4181 u32 vm_exit_reason, u32 exit_intr_info,
4182 unsigned long exit_qualification)
4183{
4184
4185 vmcs12->vm_exit_reason = vm_exit_reason;
4186 if (to_vmx(vcpu)->exit_reason.enclave_mode)
4187 vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
4188 vmcs12->exit_qualification = exit_qualification;
4189 vmcs12->vm_exit_intr_info = exit_intr_info;
4190
4191 vmcs12->idt_vectoring_info_field = 0;
4192 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4193 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4194
4195 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
4196 vmcs12->launch_state = 1;
4197
4198
4199
4200 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
4201
4202
4203
4204
4205
4206 vmcs12_save_pending_event(vcpu, vmcs12);
4207
4208
4209
4210
4211
4212
4213
4214 if (nested_vmx_store_msr(vcpu,
4215 vmcs12->vm_exit_msr_store_addr,
4216 vmcs12->vm_exit_msr_store_count))
4217 nested_vmx_abort(vcpu,
4218 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
4219 }
4220
4221
4222
4223
4224
4225 vcpu->arch.nmi_injected = false;
4226 kvm_clear_exception_queue(vcpu);
4227 kvm_clear_interrupt_queue(vcpu);
4228}
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
4240 struct vmcs12 *vmcs12)
4241{
4242 enum vm_entry_failure_code ignored;
4243 struct kvm_segment seg;
4244
4245 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
4246 vcpu->arch.efer = vmcs12->host_ia32_efer;
4247 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4248 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
4249 else
4250 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
4251 vmx_set_efer(vcpu, vcpu->arch.efer);
4252
4253 kvm_rsp_write(vcpu, vmcs12->host_rsp);
4254 kvm_rip_write(vcpu, vmcs12->host_rip);
4255 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
4256 vmx_set_interrupt_shadow(vcpu, 0);
4257
4258
4259
4260
4261
4262
4263
4264
4265 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4266 vmx_set_cr0(vcpu, vmcs12->host_cr0);
4267
4268
4269 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4270 vmx_set_cr4(vcpu, vmcs12->host_cr4);
4271
4272 nested_ept_uninit_mmu_context(vcpu);
4273
4274
4275
4276
4277
4278 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
4279 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
4280
4281 nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
4282
4283 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
4284 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
4285 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
4286 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
4287 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
4288 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
4289 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
4290
4291
4292 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
4293 vmcs_write64(GUEST_BNDCFGS, 0);
4294
4295 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
4296 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
4297 vcpu->arch.pat = vmcs12->host_ia32_pat;
4298 }
4299 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
4300 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
4301 vmcs12->host_ia32_perf_global_ctrl));
4302
4303
4304
4305 seg = (struct kvm_segment) {
4306 .base = 0,
4307 .limit = 0xFFFFFFFF,
4308 .selector = vmcs12->host_cs_selector,
4309 .type = 11,
4310 .present = 1,
4311 .s = 1,
4312 .g = 1
4313 };
4314 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4315 seg.l = 1;
4316 else
4317 seg.db = 1;
4318 __vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
4319 seg = (struct kvm_segment) {
4320 .base = 0,
4321 .limit = 0xFFFFFFFF,
4322 .type = 3,
4323 .present = 1,
4324 .s = 1,
4325 .db = 1,
4326 .g = 1
4327 };
4328 seg.selector = vmcs12->host_ds_selector;
4329 __vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
4330 seg.selector = vmcs12->host_es_selector;
4331 __vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
4332 seg.selector = vmcs12->host_ss_selector;
4333 __vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
4334 seg.selector = vmcs12->host_fs_selector;
4335 seg.base = vmcs12->host_fs_base;
4336 __vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
4337 seg.selector = vmcs12->host_gs_selector;
4338 seg.base = vmcs12->host_gs_base;
4339 __vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
4340 seg = (struct kvm_segment) {
4341 .base = vmcs12->host_tr_base,
4342 .limit = 0x67,
4343 .selector = vmcs12->host_tr_selector,
4344 .type = 11,
4345 .present = 1
4346 };
4347 __vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
4348
4349 memset(&seg, 0, sizeof(seg));
4350 seg.unusable = 1;
4351 __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
4352
4353 kvm_set_dr(vcpu, 7, 0x400);
4354 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4355
4356 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
4357 vmcs12->vm_exit_msr_load_count))
4358 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4359
4360 to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
4361}
4362
4363static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
4364{
4365 struct vmx_uret_msr *efer_msr;
4366 unsigned int i;
4367
4368 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
4369 return vmcs_read64(GUEST_IA32_EFER);
4370
4371 if (cpu_has_load_ia32_efer())
4372 return host_efer;
4373
4374 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
4375 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
4376 return vmx->msr_autoload.guest.val[i].value;
4377 }
4378
4379 efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
4380 if (efer_msr)
4381 return efer_msr->data;
4382
4383 return host_efer;
4384}
4385
4386static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
4387{
4388 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4389 struct vcpu_vmx *vmx = to_vmx(vcpu);
4390 struct vmx_msr_entry g, h;
4391 gpa_t gpa;
4392 u32 i, j;
4393
4394 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
4395
4396 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
4397
4398
4399
4400
4401
4402
4403 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
4404 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
4405 else
4406 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
4407 }
4408
4409
4410
4411
4412
4413 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
4414
4415 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4416 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
4417
4418 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4419 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
4420
4421 nested_ept_uninit_mmu_context(vcpu);
4422 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
4423 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
4424
4425
4426
4427
4428
4429
4430
4431 if (enable_ept && is_pae_paging(vcpu))
4432 ept_save_pdptrs(vcpu);
4433
4434 kvm_mmu_reset_context(vcpu);
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
4448 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
4449 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
4450 pr_debug_ratelimited(
4451 "%s read MSR index failed (%u, 0x%08llx)\n",
4452 __func__, i, gpa);
4453 goto vmabort;
4454 }
4455
4456 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
4457 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
4458 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
4459 pr_debug_ratelimited(
4460 "%s read MSR failed (%u, 0x%08llx)\n",
4461 __func__, j, gpa);
4462 goto vmabort;
4463 }
4464 if (h.index != g.index)
4465 continue;
4466 if (h.value == g.value)
4467 break;
4468
4469 if (nested_vmx_load_msr_check(vcpu, &h)) {
4470 pr_debug_ratelimited(
4471 "%s check failed (%u, 0x%x, 0x%x)\n",
4472 __func__, j, h.index, h.reserved);
4473 goto vmabort;
4474 }
4475
4476 if (kvm_set_msr(vcpu, h.index, h.value)) {
4477 pr_debug_ratelimited(
4478 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
4479 __func__, j, h.index, h.value);
4480 goto vmabort;
4481 }
4482 }
4483 }
4484
4485 return;
4486
4487vmabort:
4488 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4489}
4490
4491
4492
4493
4494
4495
4496void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
4497 u32 exit_intr_info, unsigned long exit_qualification)
4498{
4499 struct vcpu_vmx *vmx = to_vmx(vcpu);
4500 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4501
4502
4503 WARN_ON_ONCE(vmx->nested.nested_run_pending);
4504
4505
4506 WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
4507
4508 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
4509
4510
4511
4512
4513
4514
4515 (void)nested_get_evmcs_page(vcpu);
4516 }
4517
4518
4519 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
4520 kvm_vcpu_flush_tlb_current(vcpu);
4521
4522
4523
4524
4525
4526
4527 if (enable_ept && is_pae_paging(vcpu))
4528 vmx_ept_load_pdptrs(vcpu);
4529
4530 leave_guest_mode(vcpu);
4531
4532 if (nested_cpu_has_preemption_timer(vmcs12))
4533 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
4534
4535 if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
4536 vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
4537 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
4538 vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
4539 }
4540
4541 if (likely(!vmx->fail)) {
4542 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
4543
4544 if (vm_exit_reason != -1)
4545 prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
4546 exit_intr_info, exit_qualification);
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
4558 } else {
4559
4560
4561
4562
4563
4564
4565 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
4566 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4567 WARN_ON_ONCE(nested_early_check);
4568 }
4569
4570 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
4571
4572
4573 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
4574 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
4575 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
4576 if (kvm_has_tsc_control)
4577 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
4578
4579 if (vmx->nested.l1_tpr_threshold != -1)
4580 vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
4581
4582 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
4583 vmx->nested.change_vmcs01_virtual_apic_mode = false;
4584 vmx_set_virtual_apic_mode(vcpu);
4585 }
4586
4587 if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
4588 vmx->nested.update_vmcs01_cpu_dirty_logging = false;
4589 vmx_update_cpu_dirty_logging(vcpu);
4590 }
4591
4592
4593 if (vmx->nested.apic_access_page) {
4594 kvm_release_page_clean(vmx->nested.apic_access_page);
4595 vmx->nested.apic_access_page = NULL;
4596 }
4597 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
4598 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
4599 vmx->nested.pi_desc = NULL;
4600
4601 if (vmx->nested.reload_vmcs01_apic_access_page) {
4602 vmx->nested.reload_vmcs01_apic_access_page = false;
4603 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4604 }
4605
4606 if ((vm_exit_reason != -1) &&
4607 (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
4608 vmx->nested.need_vmcs12_to_shadow_sync = true;
4609
4610
4611 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4612
4613 if (likely(!vmx->fail)) {
4614 if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
4615 nested_exit_intr_ack_set(vcpu)) {
4616 int irq = kvm_cpu_get_interrupt(vcpu);
4617 WARN_ON(irq < 0);
4618 vmcs12->vm_exit_intr_info = irq |
4619 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
4620 }
4621
4622 if (vm_exit_reason != -1)
4623 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
4624 vmcs12->exit_qualification,
4625 vmcs12->idt_vectoring_info_field,
4626 vmcs12->vm_exit_intr_info,
4627 vmcs12->vm_exit_intr_error_code,
4628 KVM_ISA_VMX);
4629
4630 load_vmcs12_host_state(vcpu, vmcs12);
4631
4632 return;
4633 }
4634
4635
4636
4637
4638
4639
4640
4641
4642 (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4643
4644
4645
4646
4647
4648
4649
4650 nested_vmx_restore_host_state(vcpu);
4651
4652 vmx->fail = 0;
4653}
4654
4655static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
4656{
4657 nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
4658}
4659
4660
4661
4662
4663
4664
4665
4666int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
4667 u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
4668{
4669 gva_t off;
4670 bool exn;
4671 struct kvm_segment s;
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681 int scaling = vmx_instruction_info & 3;
4682 int addr_size = (vmx_instruction_info >> 7) & 7;
4683 bool is_reg = vmx_instruction_info & (1u << 10);
4684 int seg_reg = (vmx_instruction_info >> 15) & 7;
4685 int index_reg = (vmx_instruction_info >> 18) & 0xf;
4686 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
4687 int base_reg = (vmx_instruction_info >> 23) & 0xf;
4688 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
4689
4690 if (is_reg) {
4691 kvm_queue_exception(vcpu, UD_VECTOR);
4692 return 1;
4693 }
4694
4695
4696
4697 off = exit_qualification;
4698 if (addr_size == 1)
4699 off = (gva_t)sign_extend64(off, 31);
4700 else if (addr_size == 0)
4701 off = (gva_t)sign_extend64(off, 15);
4702 if (base_is_valid)
4703 off += kvm_register_read(vcpu, base_reg);
4704 if (index_is_valid)
4705 off += kvm_register_read(vcpu, index_reg) << scaling;
4706 vmx_get_segment(vcpu, &s, seg_reg);
4707
4708
4709
4710
4711
4712
4713
4714 if (addr_size == 1)
4715 off &= 0xffffffff;
4716 else if (addr_size == 0)
4717 off &= 0xffff;
4718
4719
4720 exn = false;
4721 if (is_long_mode(vcpu)) {
4722
4723
4724
4725
4726
4727 if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
4728 *ret = s.base + off;
4729 else
4730 *ret = off;
4731
4732
4733
4734
4735
4736 exn = is_noncanonical_address(*ret, vcpu);
4737 } else {
4738
4739
4740
4741
4742
4743 *ret = (s.base + off) & 0xffffffff;
4744
4745
4746
4747
4748
4749
4750
4751 if (wr)
4752
4753
4754
4755 exn = ((s.type & 0xa) == 0 || (s.type & 8));
4756 else
4757
4758
4759
4760 exn = ((s.type & 0xa) == 8);
4761 if (exn) {
4762 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
4763 return 1;
4764 }
4765
4766
4767 exn = (s.unusable != 0);
4768
4769
4770
4771
4772
4773
4774
4775 if (!(s.base == 0 && s.limit == 0xffffffff &&
4776 ((s.type & 8) || !(s.type & 4))))
4777 exn = exn || ((u64)off + len - 1 > s.limit);
4778 }
4779 if (exn) {
4780 kvm_queue_exception_e(vcpu,
4781 seg_reg == VCPU_SREG_SS ?
4782 SS_VECTOR : GP_VECTOR,
4783 0);
4784 return 1;
4785 }
4786
4787 return 0;
4788}
4789
4790void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
4791{
4792 struct vcpu_vmx *vmx;
4793
4794 if (!nested_vmx_allowed(vcpu))
4795 return;
4796
4797 vmx = to_vmx(vcpu);
4798 if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
4799 vmx->nested.msrs.entry_ctls_high |=
4800 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4801 vmx->nested.msrs.exit_ctls_high |=
4802 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4803 } else {
4804 vmx->nested.msrs.entry_ctls_high &=
4805 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4806 vmx->nested.msrs.exit_ctls_high &=
4807 ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4808 }
4809}
4810
4811static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
4812 int *ret)
4813{
4814 gva_t gva;
4815 struct x86_exception e;
4816 int r;
4817
4818 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
4819 vmcs_read32(VMX_INSTRUCTION_INFO), false,
4820 sizeof(*vmpointer), &gva)) {
4821 *ret = 1;
4822 return -EINVAL;
4823 }
4824
4825 r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
4826 if (r != X86EMUL_CONTINUE) {
4827 *ret = kvm_handle_memory_failure(vcpu, r, &e);
4828 return -EINVAL;
4829 }
4830
4831 return 0;
4832}
4833
4834
4835
4836
4837
4838
4839static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
4840{
4841 struct vcpu_vmx *vmx = to_vmx(vcpu);
4842 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
4843
4844
4845
4846
4847
4848
4849
4850 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
4851
4852 if (!loaded_vmcs->shadow_vmcs) {
4853 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
4854 if (loaded_vmcs->shadow_vmcs)
4855 vmcs_clear(loaded_vmcs->shadow_vmcs);
4856 }
4857 return loaded_vmcs->shadow_vmcs;
4858}
4859
4860static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4861{
4862 struct vcpu_vmx *vmx = to_vmx(vcpu);
4863 int r;
4864
4865 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
4866 if (r < 0)
4867 goto out_vmcs02;
4868
4869 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4870 if (!vmx->nested.cached_vmcs12)
4871 goto out_cached_vmcs12;
4872
4873 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4874 if (!vmx->nested.cached_shadow_vmcs12)
4875 goto out_cached_shadow_vmcs12;
4876
4877 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
4878 goto out_shadow_vmcs;
4879
4880 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
4881 HRTIMER_MODE_ABS_PINNED);
4882 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
4883
4884 vmx->nested.vpid02 = allocate_vpid();
4885
4886 vmx->nested.vmcs02_initialized = false;
4887 vmx->nested.vmxon = true;
4888
4889 if (vmx_pt_mode_is_host_guest()) {
4890 vmx->pt_desc.guest.ctl = 0;
4891 pt_update_intercept_for_msr(vcpu);
4892 }
4893
4894 return 0;
4895
4896out_shadow_vmcs:
4897 kfree(vmx->nested.cached_shadow_vmcs12);
4898
4899out_cached_shadow_vmcs12:
4900 kfree(vmx->nested.cached_vmcs12);
4901
4902out_cached_vmcs12:
4903 free_loaded_vmcs(&vmx->nested.vmcs02);
4904
4905out_vmcs02:
4906 return -ENOMEM;
4907}
4908
4909
4910static int handle_vmon(struct kvm_vcpu *vcpu)
4911{
4912 int ret;
4913 gpa_t vmptr;
4914 uint32_t revision;
4915 struct vcpu_vmx *vmx = to_vmx(vcpu);
4916 const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
4917 | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
4929 kvm_queue_exception(vcpu, UD_VECTOR);
4930 return 1;
4931 }
4932
4933
4934 if (vmx_get_cpl(vcpu)) {
4935 kvm_inject_gp(vcpu, 0);
4936 return 1;
4937 }
4938
4939 if (vmx->nested.vmxon)
4940 return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
4941
4942 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
4943 != VMXON_NEEDED_FEATURES) {
4944 kvm_inject_gp(vcpu, 0);
4945 return 1;
4946 }
4947
4948 if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
4949 return ret;
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959 if (!page_address_valid(vcpu, vmptr))
4960 return nested_vmx_failInvalid(vcpu);
4961
4962 if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
4963 revision != VMCS12_REVISION)
4964 return nested_vmx_failInvalid(vcpu);
4965
4966 vmx->nested.vmxon_ptr = vmptr;
4967 ret = enter_vmx_operation(vcpu);
4968 if (ret)
4969 return ret;
4970
4971 return nested_vmx_succeed(vcpu);
4972}
4973
4974static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
4975{
4976 struct vcpu_vmx *vmx = to_vmx(vcpu);
4977
4978 if (vmx->nested.current_vmptr == -1ull)
4979 return;
4980
4981 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
4982
4983 if (enable_shadow_vmcs) {
4984
4985
4986 copy_shadow_to_vmcs12(vmx);
4987 vmx_disable_shadow_vmcs(vmx);
4988 }
4989 vmx->nested.posted_intr_nv = -1;
4990
4991
4992 kvm_vcpu_write_guest_page(vcpu,
4993 vmx->nested.current_vmptr >> PAGE_SHIFT,
4994 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
4995
4996 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
4997
4998 vmx->nested.current_vmptr = -1ull;
4999}
5000
5001
5002static int handle_vmoff(struct kvm_vcpu *vcpu)
5003{
5004 if (!nested_vmx_check_permission(vcpu))
5005 return 1;
5006
5007 free_nested(vcpu);
5008
5009
5010 kvm_make_request(KVM_REQ_EVENT, vcpu);
5011
5012 return nested_vmx_succeed(vcpu);
5013}
5014
5015
5016static int handle_vmclear(struct kvm_vcpu *vcpu)
5017{
5018 struct vcpu_vmx *vmx = to_vmx(vcpu);
5019 u32 zero = 0;
5020 gpa_t vmptr;
5021 u64 evmcs_gpa;
5022 int r;
5023
5024 if (!nested_vmx_check_permission(vcpu))
5025 return 1;
5026
5027 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5028 return r;
5029
5030 if (!page_address_valid(vcpu, vmptr))
5031 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
5032
5033 if (vmptr == vmx->nested.vmxon_ptr)
5034 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046 if (likely(!vmx->nested.enlightened_vmcs_enabled ||
5047 !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
5048 if (vmptr == vmx->nested.current_vmptr)
5049 nested_release_vmcs12(vcpu);
5050
5051 kvm_vcpu_write_guest(vcpu,
5052 vmptr + offsetof(struct vmcs12,
5053 launch_state),
5054 &zero, sizeof(zero));
5055 } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
5056 nested_release_evmcs(vcpu);
5057 }
5058
5059 return nested_vmx_succeed(vcpu);
5060}
5061
5062
5063static int handle_vmlaunch(struct kvm_vcpu *vcpu)
5064{
5065 return nested_vmx_run(vcpu, true);
5066}
5067
5068
5069static int handle_vmresume(struct kvm_vcpu *vcpu)
5070{
5071
5072 return nested_vmx_run(vcpu, false);
5073}
5074
5075static int handle_vmread(struct kvm_vcpu *vcpu)
5076{
5077 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5078 : get_vmcs12(vcpu);
5079 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5080 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5081 struct vcpu_vmx *vmx = to_vmx(vcpu);
5082 struct x86_exception e;
5083 unsigned long field;
5084 u64 value;
5085 gva_t gva = 0;
5086 short offset;
5087 int len, r;
5088
5089 if (!nested_vmx_check_permission(vcpu))
5090 return 1;
5091
5092
5093
5094
5095
5096 if (vmx->nested.current_vmptr == -1ull ||
5097 (is_guest_mode(vcpu) &&
5098 get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
5099 return nested_vmx_failInvalid(vcpu);
5100
5101
5102 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
5103
5104 offset = vmcs_field_to_offset(field);
5105 if (offset < 0)
5106 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5107
5108 if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
5109 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5110
5111
5112 value = vmcs12_read_any(vmcs12, field, offset);
5113
5114
5115
5116
5117
5118
5119 if (instr_info & BIT(10)) {
5120 kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value);
5121 } else {
5122 len = is_64_bit_mode(vcpu) ? 8 : 4;
5123 if (get_vmx_mem_address(vcpu, exit_qualification,
5124 instr_info, true, len, &gva))
5125 return 1;
5126
5127 r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
5128 if (r != X86EMUL_CONTINUE)
5129 return kvm_handle_memory_failure(vcpu, r, &e);
5130 }
5131
5132 return nested_vmx_succeed(vcpu);
5133}
5134
5135static bool is_shadow_field_rw(unsigned long field)
5136{
5137 switch (field) {
5138#define SHADOW_FIELD_RW(x, y) case x:
5139#include "vmcs_shadow_fields.h"
5140 return true;
5141 default:
5142 break;
5143 }
5144 return false;
5145}
5146
5147static bool is_shadow_field_ro(unsigned long field)
5148{
5149 switch (field) {
5150#define SHADOW_FIELD_RO(x, y) case x:
5151#include "vmcs_shadow_fields.h"
5152 return true;
5153 default:
5154 break;
5155 }
5156 return false;
5157}
5158
5159static int handle_vmwrite(struct kvm_vcpu *vcpu)
5160{
5161 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5162 : get_vmcs12(vcpu);
5163 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5164 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5165 struct vcpu_vmx *vmx = to_vmx(vcpu);
5166 struct x86_exception e;
5167 unsigned long field;
5168 short offset;
5169 gva_t gva;
5170 int len, r;
5171
5172
5173
5174
5175
5176
5177
5178
5179 u64 value = 0;
5180
5181 if (!nested_vmx_check_permission(vcpu))
5182 return 1;
5183
5184
5185
5186
5187
5188 if (vmx->nested.current_vmptr == -1ull ||
5189 (is_guest_mode(vcpu) &&
5190 get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
5191 return nested_vmx_failInvalid(vcpu);
5192
5193 if (instr_info & BIT(10))
5194 value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf));
5195 else {
5196 len = is_64_bit_mode(vcpu) ? 8 : 4;
5197 if (get_vmx_mem_address(vcpu, exit_qualification,
5198 instr_info, false, len, &gva))
5199 return 1;
5200 r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
5201 if (r != X86EMUL_CONTINUE)
5202 return kvm_handle_memory_failure(vcpu, r, &e);
5203 }
5204
5205 field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
5206
5207 offset = vmcs_field_to_offset(field);
5208 if (offset < 0)
5209 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5210
5211
5212
5213
5214
5215 if (vmcs_field_readonly(field) &&
5216 !nested_cpu_has_vmwrite_any_field(vcpu))
5217 return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
5218
5219
5220
5221
5222
5223 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
5224 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234 if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
5235 value &= 0x1f0ff;
5236
5237 vmcs12_write_any(vmcs12, field, offset, value);
5238
5239
5240
5241
5242
5243
5244
5245 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
5246
5247
5248
5249
5250 if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
5251 preempt_disable();
5252 vmcs_load(vmx->vmcs01.shadow_vmcs);
5253
5254 __vmcs_writel(field, value);
5255
5256 vmcs_clear(vmx->vmcs01.shadow_vmcs);
5257 vmcs_load(vmx->loaded_vmcs->vmcs);
5258 preempt_enable();
5259 }
5260 vmx->nested.dirty_vmcs12 = true;
5261 }
5262
5263 return nested_vmx_succeed(vcpu);
5264}
5265
5266static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
5267{
5268 vmx->nested.current_vmptr = vmptr;
5269 if (enable_shadow_vmcs) {
5270 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
5271 vmcs_write64(VMCS_LINK_POINTER,
5272 __pa(vmx->vmcs01.shadow_vmcs));
5273 vmx->nested.need_vmcs12_to_shadow_sync = true;
5274 }
5275 vmx->nested.dirty_vmcs12 = true;
5276}
5277
5278
5279static int handle_vmptrld(struct kvm_vcpu *vcpu)
5280{
5281 struct vcpu_vmx *vmx = to_vmx(vcpu);
5282 gpa_t vmptr;
5283 int r;
5284
5285 if (!nested_vmx_check_permission(vcpu))
5286 return 1;
5287
5288 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5289 return r;
5290
5291 if (!page_address_valid(vcpu, vmptr))
5292 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
5293
5294 if (vmptr == vmx->nested.vmxon_ptr)
5295 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
5296
5297
5298 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
5299 return 1;
5300
5301 if (vmx->nested.current_vmptr != vmptr) {
5302 struct kvm_host_map map;
5303 struct vmcs12 *new_vmcs12;
5304
5305 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
5306
5307
5308
5309
5310
5311
5312 return nested_vmx_fail(vcpu,
5313 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5314 }
5315
5316 new_vmcs12 = map.hva;
5317
5318 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
5319 (new_vmcs12->hdr.shadow_vmcs &&
5320 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
5321 kvm_vcpu_unmap(vcpu, &map, false);
5322 return nested_vmx_fail(vcpu,
5323 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5324 }
5325
5326 nested_release_vmcs12(vcpu);
5327
5328
5329
5330
5331
5332 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
5333 kvm_vcpu_unmap(vcpu, &map, false);
5334
5335 set_current_vmptr(vmx, vmptr);
5336 }
5337
5338 return nested_vmx_succeed(vcpu);
5339}
5340
5341
5342static int handle_vmptrst(struct kvm_vcpu *vcpu)
5343{
5344 unsigned long exit_qual = vmx_get_exit_qual(vcpu);
5345 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5346 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
5347 struct x86_exception e;
5348 gva_t gva;
5349 int r;
5350
5351 if (!nested_vmx_check_permission(vcpu))
5352 return 1;
5353
5354 if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
5355 return 1;
5356
5357 if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
5358 true, sizeof(gpa_t), &gva))
5359 return 1;
5360
5361 r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr,
5362 sizeof(gpa_t), &e);
5363 if (r != X86EMUL_CONTINUE)
5364 return kvm_handle_memory_failure(vcpu, r, &e);
5365
5366 return nested_vmx_succeed(vcpu);
5367}
5368
5369
5370static int handle_invept(struct kvm_vcpu *vcpu)
5371{
5372 struct vcpu_vmx *vmx = to_vmx(vcpu);
5373 u32 vmx_instruction_info, types;
5374 unsigned long type, roots_to_free;
5375 struct kvm_mmu *mmu;
5376 gva_t gva;
5377 struct x86_exception e;
5378 struct {
5379 u64 eptp, gpa;
5380 } operand;
5381 int i, r;
5382
5383 if (!(vmx->nested.msrs.secondary_ctls_high &
5384 SECONDARY_EXEC_ENABLE_EPT) ||
5385 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
5386 kvm_queue_exception(vcpu, UD_VECTOR);
5387 return 1;
5388 }
5389
5390 if (!nested_vmx_check_permission(vcpu))
5391 return 1;
5392
5393 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5394 type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
5395
5396 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
5397
5398 if (type >= 32 || !(types & (1 << type)))
5399 return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5400
5401
5402
5403
5404 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5405 vmx_instruction_info, false, sizeof(operand), &gva))
5406 return 1;
5407 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5408 if (r != X86EMUL_CONTINUE)
5409 return kvm_handle_memory_failure(vcpu, r, &e);
5410
5411
5412
5413
5414
5415 mmu = &vcpu->arch.guest_mmu;
5416
5417 switch (type) {
5418 case VMX_EPT_EXTENT_CONTEXT:
5419 if (!nested_vmx_check_eptp(vcpu, operand.eptp))
5420 return nested_vmx_fail(vcpu,
5421 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5422
5423 roots_to_free = 0;
5424 if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
5425 operand.eptp))
5426 roots_to_free |= KVM_MMU_ROOT_CURRENT;
5427
5428 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5429 if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
5430 mmu->prev_roots[i].pgd,
5431 operand.eptp))
5432 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5433 }
5434 break;
5435 case VMX_EPT_EXTENT_GLOBAL:
5436 roots_to_free = KVM_MMU_ROOTS_ALL;
5437 break;
5438 default:
5439 BUG();
5440 break;
5441 }
5442
5443 if (roots_to_free)
5444 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
5445
5446 return nested_vmx_succeed(vcpu);
5447}
5448
5449static int handle_invvpid(struct kvm_vcpu *vcpu)
5450{
5451 struct vcpu_vmx *vmx = to_vmx(vcpu);
5452 u32 vmx_instruction_info;
5453 unsigned long type, types;
5454 gva_t gva;
5455 struct x86_exception e;
5456 struct {
5457 u64 vpid;
5458 u64 gla;
5459 } operand;
5460 u16 vpid02;
5461 int r;
5462
5463 if (!(vmx->nested.msrs.secondary_ctls_high &
5464 SECONDARY_EXEC_ENABLE_VPID) ||
5465 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
5466 kvm_queue_exception(vcpu, UD_VECTOR);
5467 return 1;
5468 }
5469
5470 if (!nested_vmx_check_permission(vcpu))
5471 return 1;
5472
5473 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5474 type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
5475
5476 types = (vmx->nested.msrs.vpid_caps &
5477 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
5478
5479 if (type >= 32 || !(types & (1 << type)))
5480 return nested_vmx_fail(vcpu,
5481 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5482
5483
5484
5485
5486 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5487 vmx_instruction_info, false, sizeof(operand), &gva))
5488 return 1;
5489 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5490 if (r != X86EMUL_CONTINUE)
5491 return kvm_handle_memory_failure(vcpu, r, &e);
5492
5493 if (operand.vpid >> 16)
5494 return nested_vmx_fail(vcpu,
5495 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5496
5497 vpid02 = nested_get_vpid02(vcpu);
5498 switch (type) {
5499 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
5500 if (!operand.vpid ||
5501 is_noncanonical_address(operand.gla, vcpu))
5502 return nested_vmx_fail(vcpu,
5503 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5504 vpid_sync_vcpu_addr(vpid02, operand.gla);
5505 break;
5506 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
5507 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
5508 if (!operand.vpid)
5509 return nested_vmx_fail(vcpu,
5510 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5511 vpid_sync_context(vpid02);
5512 break;
5513 case VMX_VPID_EXTENT_ALL_CONTEXT:
5514 vpid_sync_context(vpid02);
5515 break;
5516 default:
5517 WARN_ON_ONCE(1);
5518 return kvm_skip_emulated_instruction(vcpu);
5519 }
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531 if (!enable_ept)
5532 kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
5533
5534 return nested_vmx_succeed(vcpu);
5535}
5536
5537static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
5538 struct vmcs12 *vmcs12)
5539{
5540 u32 index = kvm_rcx_read(vcpu);
5541 u64 new_eptp;
5542
5543 if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
5544 return 1;
5545 if (index >= VMFUNC_EPTP_ENTRIES)
5546 return 1;
5547
5548 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
5549 &new_eptp, index * 8, 8))
5550 return 1;
5551
5552
5553
5554
5555
5556 if (vmcs12->ept_pointer != new_eptp) {
5557 if (!nested_vmx_check_eptp(vcpu, new_eptp))
5558 return 1;
5559
5560 vmcs12->ept_pointer = new_eptp;
5561 nested_ept_new_eptp(vcpu);
5562
5563 if (!nested_cpu_has_vpid(vmcs12))
5564 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
5565 }
5566
5567 return 0;
5568}
5569
5570static int handle_vmfunc(struct kvm_vcpu *vcpu)
5571{
5572 struct vcpu_vmx *vmx = to_vmx(vcpu);
5573 struct vmcs12 *vmcs12;
5574 u32 function = kvm_rax_read(vcpu);
5575
5576
5577
5578
5579
5580
5581 if (!is_guest_mode(vcpu)) {
5582 kvm_queue_exception(vcpu, UD_VECTOR);
5583 return 1;
5584 }
5585
5586 vmcs12 = get_vmcs12(vcpu);
5587
5588
5589
5590
5591
5592 if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
5593 kvm_queue_exception(vcpu, UD_VECTOR);
5594 return 1;
5595 }
5596
5597 if (!(vmcs12->vm_function_control & BIT_ULL(function)))
5598 goto fail;
5599
5600 switch (function) {
5601 case 0:
5602 if (nested_vmx_eptp_switching(vcpu, vmcs12))
5603 goto fail;
5604 break;
5605 default:
5606 goto fail;
5607 }
5608 return kvm_skip_emulated_instruction(vcpu);
5609
5610fail:
5611
5612
5613
5614
5615
5616 nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
5617 vmx_get_intr_info(vcpu),
5618 vmx_get_exit_qual(vcpu));
5619 return 1;
5620}
5621
5622
5623
5624
5625
5626bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
5627 int size)
5628{
5629 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5630 gpa_t bitmap, last_bitmap;
5631 u8 b;
5632
5633 last_bitmap = (gpa_t)-1;
5634 b = -1;
5635
5636 while (size > 0) {
5637 if (port < 0x8000)
5638 bitmap = vmcs12->io_bitmap_a;
5639 else if (port < 0x10000)
5640 bitmap = vmcs12->io_bitmap_b;
5641 else
5642 return true;
5643 bitmap += (port & 0x7fff) / 8;
5644
5645 if (last_bitmap != bitmap)
5646 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
5647 return true;
5648 if (b & (1 << (port & 7)))
5649 return true;
5650
5651 port++;
5652 size--;
5653 last_bitmap = bitmap;
5654 }
5655
5656 return false;
5657}
5658
5659static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
5660 struct vmcs12 *vmcs12)
5661{
5662 unsigned long exit_qualification;
5663 unsigned short port;
5664 int size;
5665
5666 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
5667 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
5668
5669 exit_qualification = vmx_get_exit_qual(vcpu);
5670
5671 port = exit_qualification >> 16;
5672 size = (exit_qualification & 7) + 1;
5673
5674 return nested_vmx_check_io_bitmaps(vcpu, port, size);
5675}
5676
5677
5678
5679
5680
5681
5682
5683static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5684 struct vmcs12 *vmcs12,
5685 union vmx_exit_reason exit_reason)
5686{
5687 u32 msr_index = kvm_rcx_read(vcpu);
5688 gpa_t bitmap;
5689
5690 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5691 return true;
5692
5693
5694
5695
5696
5697
5698 bitmap = vmcs12->msr_bitmap;
5699 if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
5700 bitmap += 2048;
5701 if (msr_index >= 0xc0000000) {
5702 msr_index -= 0xc0000000;
5703 bitmap += 1024;
5704 }
5705
5706
5707 if (msr_index < 1024*8) {
5708 unsigned char b;
5709 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
5710 return true;
5711 return 1 & (b >> (msr_index & 7));
5712 } else
5713 return true;
5714}
5715
5716
5717
5718
5719
5720
5721static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5722 struct vmcs12 *vmcs12)
5723{
5724 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5725 int cr = exit_qualification & 15;
5726 int reg;
5727 unsigned long val;
5728
5729 switch ((exit_qualification >> 4) & 3) {
5730 case 0:
5731 reg = (exit_qualification >> 8) & 15;
5732 val = kvm_register_read(vcpu, reg);
5733 switch (cr) {
5734 case 0:
5735 if (vmcs12->cr0_guest_host_mask &
5736 (val ^ vmcs12->cr0_read_shadow))
5737 return true;
5738 break;
5739 case 3:
5740 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5741 return true;
5742 break;
5743 case 4:
5744 if (vmcs12->cr4_guest_host_mask &
5745 (vmcs12->cr4_read_shadow ^ val))
5746 return true;
5747 break;
5748 case 8:
5749 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5750 return true;
5751 break;
5752 }
5753 break;
5754 case 2:
5755 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5756 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5757 return true;
5758 break;
5759 case 1:
5760 switch (cr) {
5761 case 3:
5762 if (vmcs12->cpu_based_vm_exec_control &
5763 CPU_BASED_CR3_STORE_EXITING)
5764 return true;
5765 break;
5766 case 8:
5767 if (vmcs12->cpu_based_vm_exec_control &
5768 CPU_BASED_CR8_STORE_EXITING)
5769 return true;
5770 break;
5771 }
5772 break;
5773 case 3:
5774
5775
5776
5777
5778 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5779 if (vmcs12->cr0_guest_host_mask & 0xe &
5780 (val ^ vmcs12->cr0_read_shadow))
5781 return true;
5782 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5783 !(vmcs12->cr0_read_shadow & 0x1) &&
5784 (val & 0x1))
5785 return true;
5786 break;
5787 }
5788 return false;
5789}
5790
5791static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
5792 struct vmcs12 *vmcs12)
5793{
5794 u32 encls_leaf;
5795
5796 if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
5797 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
5798 return false;
5799
5800 encls_leaf = kvm_rax_read(vcpu);
5801 if (encls_leaf > 62)
5802 encls_leaf = 63;
5803 return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
5804}
5805
5806static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
5807 struct vmcs12 *vmcs12, gpa_t bitmap)
5808{
5809 u32 vmx_instruction_info;
5810 unsigned long field;
5811 u8 b;
5812
5813 if (!nested_cpu_has_shadow_vmcs(vmcs12))
5814 return true;
5815
5816
5817 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5818 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5819
5820
5821 if (field >> 15)
5822 return true;
5823
5824 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
5825 return true;
5826
5827 return 1 & (b >> (field & 7));
5828}
5829
5830static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
5831{
5832 u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
5833
5834 if (nested_cpu_has_mtf(vmcs12))
5835 return true;
5836
5837
5838
5839
5840
5841
5842
5843 return entry_intr_info == (INTR_INFO_VALID_MASK
5844 | INTR_TYPE_OTHER_EVENT);
5845}
5846
5847
5848
5849
5850
5851static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
5852 union vmx_exit_reason exit_reason)
5853{
5854 u32 intr_info;
5855
5856 switch ((u16)exit_reason.basic) {
5857 case EXIT_REASON_EXCEPTION_NMI:
5858 intr_info = vmx_get_intr_info(vcpu);
5859 if (is_nmi(intr_info))
5860 return true;
5861 else if (is_page_fault(intr_info))
5862 return vcpu->arch.apf.host_apf_flags ||
5863 vmx_need_pf_intercept(vcpu);
5864 else if (is_debug(intr_info) &&
5865 vcpu->guest_debug &
5866 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5867 return true;
5868 else if (is_breakpoint(intr_info) &&
5869 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5870 return true;
5871 else if (is_alignment_check(intr_info) &&
5872 !vmx_guest_inject_ac(vcpu))
5873 return true;
5874 return false;
5875 case EXIT_REASON_EXTERNAL_INTERRUPT:
5876 return true;
5877 case EXIT_REASON_MCE_DURING_VMENTRY:
5878 return true;
5879 case EXIT_REASON_EPT_VIOLATION:
5880
5881
5882
5883
5884
5885
5886 return true;
5887 case EXIT_REASON_EPT_MISCONFIG:
5888
5889
5890
5891
5892
5893
5894 return true;
5895 case EXIT_REASON_PREEMPTION_TIMER:
5896 return true;
5897 case EXIT_REASON_PML_FULL:
5898
5899
5900
5901
5902 return true;
5903 case EXIT_REASON_VMFUNC:
5904
5905 return true;
5906 case EXIT_REASON_BUS_LOCK:
5907
5908
5909
5910
5911 return true;
5912 default:
5913 break;
5914 }
5915 return false;
5916}
5917
5918
5919
5920
5921
5922static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
5923 union vmx_exit_reason exit_reason)
5924{
5925 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5926 u32 intr_info;
5927
5928 switch ((u16)exit_reason.basic) {
5929 case EXIT_REASON_EXCEPTION_NMI:
5930 intr_info = vmx_get_intr_info(vcpu);
5931 if (is_nmi(intr_info))
5932 return true;
5933 else if (is_page_fault(intr_info))
5934 return true;
5935 return vmcs12->exception_bitmap &
5936 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5937 case EXIT_REASON_EXTERNAL_INTERRUPT:
5938 return nested_exit_on_intr(vcpu);
5939 case EXIT_REASON_TRIPLE_FAULT:
5940 return true;
5941 case EXIT_REASON_INTERRUPT_WINDOW:
5942 return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
5943 case EXIT_REASON_NMI_WINDOW:
5944 return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
5945 case EXIT_REASON_TASK_SWITCH:
5946 return true;
5947 case EXIT_REASON_CPUID:
5948 return true;
5949 case EXIT_REASON_HLT:
5950 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5951 case EXIT_REASON_INVD:
5952 return true;
5953 case EXIT_REASON_INVLPG:
5954 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5955 case EXIT_REASON_RDPMC:
5956 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5957 case EXIT_REASON_RDRAND:
5958 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
5959 case EXIT_REASON_RDSEED:
5960 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
5961 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
5962 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5963 case EXIT_REASON_VMREAD:
5964 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5965 vmcs12->vmread_bitmap);
5966 case EXIT_REASON_VMWRITE:
5967 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5968 vmcs12->vmwrite_bitmap);
5969 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5970 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5971 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
5972 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5973 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
5974
5975
5976
5977
5978 return true;
5979 case EXIT_REASON_CR_ACCESS:
5980 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5981 case EXIT_REASON_DR_ACCESS:
5982 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5983 case EXIT_REASON_IO_INSTRUCTION:
5984 return nested_vmx_exit_handled_io(vcpu, vmcs12);
5985 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
5986 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
5987 case EXIT_REASON_MSR_READ:
5988 case EXIT_REASON_MSR_WRITE:
5989 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5990 case EXIT_REASON_INVALID_STATE:
5991 return true;
5992 case EXIT_REASON_MWAIT_INSTRUCTION:
5993 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5994 case EXIT_REASON_MONITOR_TRAP_FLAG:
5995 return nested_vmx_exit_handled_mtf(vmcs12);
5996 case EXIT_REASON_MONITOR_INSTRUCTION:
5997 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
5998 case EXIT_REASON_PAUSE_INSTRUCTION:
5999 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
6000 nested_cpu_has2(vmcs12,
6001 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
6002 case EXIT_REASON_MCE_DURING_VMENTRY:
6003 return true;
6004 case EXIT_REASON_TPR_BELOW_THRESHOLD:
6005 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
6006 case EXIT_REASON_APIC_ACCESS:
6007 case EXIT_REASON_APIC_WRITE:
6008 case EXIT_REASON_EOI_INDUCED:
6009
6010
6011
6012
6013
6014 return true;
6015 case EXIT_REASON_INVPCID:
6016 return
6017 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
6018 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
6019 case EXIT_REASON_WBINVD:
6020 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
6021 case EXIT_REASON_XSETBV:
6022 return true;
6023 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
6024
6025
6026
6027
6028
6029
6030 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
6031 case EXIT_REASON_UMWAIT:
6032 case EXIT_REASON_TPAUSE:
6033 return nested_cpu_has2(vmcs12,
6034 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
6035 case EXIT_REASON_ENCLS:
6036 return nested_vmx_exit_handled_encls(vcpu, vmcs12);
6037 default:
6038 return true;
6039 }
6040}
6041
6042
6043
6044
6045
6046bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
6047{
6048 struct vcpu_vmx *vmx = to_vmx(vcpu);
6049 union vmx_exit_reason exit_reason = vmx->exit_reason;
6050 unsigned long exit_qual;
6051 u32 exit_intr_info;
6052
6053 WARN_ON_ONCE(vmx->nested.nested_run_pending);
6054
6055
6056
6057
6058
6059 if (unlikely(vmx->fail)) {
6060 trace_kvm_nested_vmenter_failed(
6061 "hardware VM-instruction error: ",
6062 vmcs_read32(VM_INSTRUCTION_ERROR));
6063 exit_intr_info = 0;
6064 exit_qual = 0;
6065 goto reflect_vmexit;
6066 }
6067
6068 trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX);
6069
6070
6071 if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
6072 return false;
6073
6074
6075 if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
6076 return false;
6077
6078
6079
6080
6081
6082
6083
6084 exit_intr_info = vmx_get_intr_info(vcpu);
6085 if (is_exception_with_error_code(exit_intr_info)) {
6086 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6087
6088 vmcs12->vm_exit_intr_error_code =
6089 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
6090 }
6091 exit_qual = vmx_get_exit_qual(vcpu);
6092
6093reflect_vmexit:
6094 nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
6095 return true;
6096}
6097
6098static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
6099 struct kvm_nested_state __user *user_kvm_nested_state,
6100 u32 user_data_size)
6101{
6102 struct vcpu_vmx *vmx;
6103 struct vmcs12 *vmcs12;
6104 struct kvm_nested_state kvm_state = {
6105 .flags = 0,
6106 .format = KVM_STATE_NESTED_FORMAT_VMX,
6107 .size = sizeof(kvm_state),
6108 .hdr.vmx.flags = 0,
6109 .hdr.vmx.vmxon_pa = -1ull,
6110 .hdr.vmx.vmcs12_pa = -1ull,
6111 .hdr.vmx.preemption_timer_deadline = 0,
6112 };
6113 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6114 &user_kvm_nested_state->data.vmx[0];
6115
6116 if (!vcpu)
6117 return kvm_state.size + sizeof(*user_vmx_nested_state);
6118
6119 vmx = to_vmx(vcpu);
6120 vmcs12 = get_vmcs12(vcpu);
6121
6122 if (nested_vmx_allowed(vcpu) &&
6123 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
6124 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
6125 kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
6126
6127 if (vmx_has_valid_vmcs12(vcpu)) {
6128 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
6129
6130
6131 if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
6132 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
6133
6134 if (is_guest_mode(vcpu) &&
6135 nested_cpu_has_shadow_vmcs(vmcs12) &&
6136 vmcs12->vmcs_link_pointer != -1ull)
6137 kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
6138 }
6139
6140 if (vmx->nested.smm.vmxon)
6141 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
6142
6143 if (vmx->nested.smm.guest_mode)
6144 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
6145
6146 if (is_guest_mode(vcpu)) {
6147 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
6148
6149 if (vmx->nested.nested_run_pending)
6150 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
6151
6152 if (vmx->nested.mtf_pending)
6153 kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
6154
6155 if (nested_cpu_has_preemption_timer(vmcs12) &&
6156 vmx->nested.has_preemption_timer_deadline) {
6157 kvm_state.hdr.vmx.flags |=
6158 KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
6159 kvm_state.hdr.vmx.preemption_timer_deadline =
6160 vmx->nested.preemption_timer_deadline;
6161 }
6162 }
6163 }
6164
6165 if (user_data_size < kvm_state.size)
6166 goto out;
6167
6168 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
6169 return -EFAULT;
6170
6171 if (!vmx_has_valid_vmcs12(vcpu))
6172 goto out;
6173
6174
6175
6176
6177
6178
6179
6180
6181 if (is_guest_mode(vcpu)) {
6182 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
6183 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
6184 } else {
6185 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
6186 if (!vmx->nested.need_vmcs12_to_shadow_sync) {
6187 if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
6188
6189
6190
6191
6192
6193
6194
6195 copy_enlightened_to_vmcs12(vmx, 0);
6196 else if (enable_shadow_vmcs)
6197 copy_shadow_to_vmcs12(vmx);
6198 }
6199 }
6200
6201 BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
6202 BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
6203
6204
6205
6206
6207
6208 if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
6209 return -EFAULT;
6210
6211 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
6212 vmcs12->vmcs_link_pointer != -1ull) {
6213 if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
6214 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
6215 return -EFAULT;
6216 }
6217out:
6218 return kvm_state.size;
6219}
6220
6221
6222
6223
6224void vmx_leave_nested(struct kvm_vcpu *vcpu)
6225{
6226 if (is_guest_mode(vcpu)) {
6227 to_vmx(vcpu)->nested.nested_run_pending = 0;
6228 nested_vmx_vmexit(vcpu, -1, 0, 0);
6229 }
6230 free_nested(vcpu);
6231}
6232
6233static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
6234 struct kvm_nested_state __user *user_kvm_nested_state,
6235 struct kvm_nested_state *kvm_state)
6236{
6237 struct vcpu_vmx *vmx = to_vmx(vcpu);
6238 struct vmcs12 *vmcs12;
6239 enum vm_entry_failure_code ignored;
6240 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6241 &user_kvm_nested_state->data.vmx[0];
6242 int ret;
6243
6244 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
6245 return -EINVAL;
6246
6247 if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
6248 if (kvm_state->hdr.vmx.smm.flags)
6249 return -EINVAL;
6250
6251 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
6252 return -EINVAL;
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
6264 return -EINVAL;
6265 } else {
6266 if (!nested_vmx_allowed(vcpu))
6267 return -EINVAL;
6268
6269 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
6270 return -EINVAL;
6271 }
6272
6273 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6274 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6275 return -EINVAL;
6276
6277 if (kvm_state->hdr.vmx.smm.flags &
6278 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
6279 return -EINVAL;
6280
6281 if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
6282 return -EINVAL;
6283
6284
6285
6286
6287
6288
6289 if (is_smm(vcpu) ?
6290 (kvm_state->flags &
6291 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
6292 : kvm_state->hdr.vmx.smm.flags)
6293 return -EINVAL;
6294
6295 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6296 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
6297 return -EINVAL;
6298
6299 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
6300 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
6301 return -EINVAL;
6302
6303 vmx_leave_nested(vcpu);
6304
6305 if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
6306 return 0;
6307
6308 vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
6309 ret = enter_vmx_operation(vcpu);
6310 if (ret)
6311 return ret;
6312
6313
6314 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
6315
6316 if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
6317 (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
6318 (kvm_state->hdr.vmx.vmcs12_pa != -1ull))
6319 return -EINVAL;
6320 else
6321 return 0;
6322 }
6323
6324 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
6325 if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
6326 !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
6327 return -EINVAL;
6328
6329 set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
6330 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
6331
6332
6333
6334
6335
6336
6337 vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
6338 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
6339 } else {
6340 return -EINVAL;
6341 }
6342
6343 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
6344 vmx->nested.smm.vmxon = true;
6345 vmx->nested.vmxon = false;
6346
6347 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
6348 vmx->nested.smm.guest_mode = true;
6349 }
6350
6351 vmcs12 = get_vmcs12(vcpu);
6352 if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
6353 return -EFAULT;
6354
6355 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
6356 return -EINVAL;
6357
6358 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6359 return 0;
6360
6361 vmx->nested.nested_run_pending =
6362 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
6363
6364 vmx->nested.mtf_pending =
6365 !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
6366
6367 ret = -EINVAL;
6368 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
6369 vmcs12->vmcs_link_pointer != -1ull) {
6370 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
6371
6372 if (kvm_state->size <
6373 sizeof(*kvm_state) +
6374 sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
6375 goto error_guest_mode;
6376
6377 if (copy_from_user(shadow_vmcs12,
6378 user_vmx_nested_state->shadow_vmcs12,
6379 sizeof(*shadow_vmcs12))) {
6380 ret = -EFAULT;
6381 goto error_guest_mode;
6382 }
6383
6384 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
6385 !shadow_vmcs12->hdr.shadow_vmcs)
6386 goto error_guest_mode;
6387 }
6388
6389 vmx->nested.has_preemption_timer_deadline = false;
6390 if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
6391 vmx->nested.has_preemption_timer_deadline = true;
6392 vmx->nested.preemption_timer_deadline =
6393 kvm_state->hdr.vmx.preemption_timer_deadline;
6394 }
6395
6396 if (nested_vmx_check_controls(vcpu, vmcs12) ||
6397 nested_vmx_check_host_state(vcpu, vmcs12) ||
6398 nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
6399 goto error_guest_mode;
6400
6401 vmx->nested.dirty_vmcs12 = true;
6402 ret = nested_vmx_enter_non_root_mode(vcpu, false);
6403 if (ret)
6404 goto error_guest_mode;
6405
6406 return 0;
6407
6408error_guest_mode:
6409 vmx->nested.nested_run_pending = 0;
6410 return ret;
6411}
6412
6413void nested_vmx_set_vmcs_shadowing_bitmap(void)
6414{
6415 if (enable_shadow_vmcs) {
6416 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
6417 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
6418 }
6419}
6420
6421
6422
6423
6424
6425#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
6426
6427static u64 nested_vmx_calc_vmcs_enum_msr(void)
6428{
6429
6430
6431
6432
6433 unsigned int max_idx, idx;
6434 int i;
6435
6436
6437
6438
6439
6440
6441 max_idx = 0;
6442 for (i = 0; i < nr_vmcs12_fields; i++) {
6443
6444 if (!vmcs_field_to_offset_table[i])
6445 continue;
6446
6447 idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
6448 if (idx > max_idx)
6449 max_idx = idx;
6450 }
6451
6452 return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
6453}
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
6466{
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
6484 msrs->pinbased_ctls_low,
6485 msrs->pinbased_ctls_high);
6486 msrs->pinbased_ctls_low |=
6487 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6488 msrs->pinbased_ctls_high &=
6489 PIN_BASED_EXT_INTR_MASK |
6490 PIN_BASED_NMI_EXITING |
6491 PIN_BASED_VIRTUAL_NMIS |
6492 (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
6493 msrs->pinbased_ctls_high |=
6494 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6495 PIN_BASED_VMX_PREEMPTION_TIMER;
6496
6497
6498 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
6499 msrs->exit_ctls_low,
6500 msrs->exit_ctls_high);
6501 msrs->exit_ctls_low =
6502 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
6503
6504 msrs->exit_ctls_high &=
6505#ifdef CONFIG_X86_64
6506 VM_EXIT_HOST_ADDR_SPACE_SIZE |
6507#endif
6508 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
6509 VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
6510 msrs->exit_ctls_high |=
6511 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
6512 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
6513 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
6514
6515
6516 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
6517
6518
6519 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
6520 msrs->entry_ctls_low,
6521 msrs->entry_ctls_high);
6522 msrs->entry_ctls_low =
6523 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
6524 msrs->entry_ctls_high &=
6525#ifdef CONFIG_X86_64
6526 VM_ENTRY_IA32E_MODE |
6527#endif
6528 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
6529 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
6530 msrs->entry_ctls_high |=
6531 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
6532
6533
6534 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
6535
6536
6537 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
6538 msrs->procbased_ctls_low,
6539 msrs->procbased_ctls_high);
6540 msrs->procbased_ctls_low =
6541 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6542 msrs->procbased_ctls_high &=
6543 CPU_BASED_INTR_WINDOW_EXITING |
6544 CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
6545 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
6546 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
6547 CPU_BASED_CR3_STORE_EXITING |
6548#ifdef CONFIG_X86_64
6549 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
6550#endif
6551 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
6552 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
6553 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
6554 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
6555 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
6556
6557
6558
6559
6560
6561
6562 msrs->procbased_ctls_high |=
6563 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6564 CPU_BASED_USE_MSR_BITMAPS;
6565
6566
6567 msrs->procbased_ctls_low &=
6568 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
6569
6570
6571
6572
6573
6574
6575 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
6576 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
6577 msrs->secondary_ctls_low,
6578 msrs->secondary_ctls_high);
6579
6580 msrs->secondary_ctls_low = 0;
6581 msrs->secondary_ctls_high &=
6582 SECONDARY_EXEC_DESC |
6583 SECONDARY_EXEC_ENABLE_RDTSCP |
6584 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
6585 SECONDARY_EXEC_WBINVD_EXITING |
6586 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6587 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
6588 SECONDARY_EXEC_RDRAND_EXITING |
6589 SECONDARY_EXEC_ENABLE_INVPCID |
6590 SECONDARY_EXEC_RDSEED_EXITING |
6591 SECONDARY_EXEC_XSAVES |
6592 SECONDARY_EXEC_TSC_SCALING;
6593
6594
6595
6596
6597
6598 msrs->secondary_ctls_high |=
6599 SECONDARY_EXEC_SHADOW_VMCS;
6600
6601 if (enable_ept) {
6602
6603 msrs->secondary_ctls_high |=
6604 SECONDARY_EXEC_ENABLE_EPT;
6605 msrs->ept_caps =
6606 VMX_EPT_PAGE_WALK_4_BIT |
6607 VMX_EPT_PAGE_WALK_5_BIT |
6608 VMX_EPTP_WB_BIT |
6609 VMX_EPT_INVEPT_BIT |
6610 VMX_EPT_EXECUTE_ONLY_BIT;
6611
6612 msrs->ept_caps &= ept_caps;
6613 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
6614 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
6615 VMX_EPT_1GB_PAGE_BIT;
6616 if (enable_ept_ad_bits) {
6617 msrs->secondary_ctls_high |=
6618 SECONDARY_EXEC_ENABLE_PML;
6619 msrs->ept_caps |= VMX_EPT_AD_BIT;
6620 }
6621 }
6622
6623 if (cpu_has_vmx_vmfunc()) {
6624 msrs->secondary_ctls_high |=
6625 SECONDARY_EXEC_ENABLE_VMFUNC;
6626
6627
6628
6629
6630 if (enable_ept)
6631 msrs->vmfunc_controls =
6632 VMX_VMFUNC_EPTP_SWITCHING;
6633 }
6634
6635
6636
6637
6638
6639
6640
6641 if (enable_vpid) {
6642 msrs->secondary_ctls_high |=
6643 SECONDARY_EXEC_ENABLE_VPID;
6644 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
6645 VMX_VPID_EXTENT_SUPPORTED_MASK;
6646 }
6647
6648 if (enable_unrestricted_guest)
6649 msrs->secondary_ctls_high |=
6650 SECONDARY_EXEC_UNRESTRICTED_GUEST;
6651
6652 if (flexpriority_enabled)
6653 msrs->secondary_ctls_high |=
6654 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6655
6656 if (enable_sgx)
6657 msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
6658
6659
6660 rdmsr(MSR_IA32_VMX_MISC,
6661 msrs->misc_low,
6662 msrs->misc_high);
6663 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
6664 msrs->misc_low |=
6665 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
6666 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
6667 VMX_MISC_ACTIVITY_HLT |
6668 VMX_MISC_ACTIVITY_WAIT_SIPI;
6669 msrs->misc_high = 0;
6670
6671
6672
6673
6674
6675
6676
6677 msrs->basic =
6678 VMCS12_REVISION |
6679 VMX_BASIC_TRUE_CTLS |
6680 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
6681 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
6682
6683 if (cpu_has_vmx_basic_inout())
6684 msrs->basic |= VMX_BASIC_INOUT;
6685
6686
6687
6688
6689
6690
6691#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
6692#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
6693 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
6694 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
6695
6696
6697 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
6698 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
6699
6700 msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
6701}
6702
6703void nested_vmx_hardware_unsetup(void)
6704{
6705 int i;
6706
6707 if (enable_shadow_vmcs) {
6708 for (i = 0; i < VMX_BITMAP_NR; i++)
6709 free_page((unsigned long)vmx_bitmap[i]);
6710 }
6711}
6712
6713__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
6714{
6715 int i;
6716
6717 if (!cpu_has_vmx_shadow_vmcs())
6718 enable_shadow_vmcs = 0;
6719 if (enable_shadow_vmcs) {
6720 for (i = 0; i < VMX_BITMAP_NR; i++) {
6721
6722
6723
6724
6725 vmx_bitmap[i] = (unsigned long *)
6726 __get_free_page(GFP_KERNEL);
6727 if (!vmx_bitmap[i]) {
6728 nested_vmx_hardware_unsetup();
6729 return -ENOMEM;
6730 }
6731 }
6732
6733 init_vmcs_shadow_fields();
6734 }
6735
6736 exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
6737 exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
6738 exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
6739 exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
6740 exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
6741 exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
6742 exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
6743 exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
6744 exit_handlers[EXIT_REASON_VMON] = handle_vmon;
6745 exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
6746 exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
6747 exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
6748
6749 return 0;
6750}
6751
6752struct kvm_x86_nested_ops vmx_nested_ops = {
6753 .check_events = vmx_check_nested_events,
6754 .hv_timer_pending = nested_vmx_preemption_timer_pending,
6755 .triple_fault = nested_vmx_triple_fault,
6756 .get_state = vmx_get_nested_state,
6757 .set_state = vmx_set_nested_state,
6758 .get_nested_state_pages = vmx_get_nested_state_pages,
6759 .write_log_dirty = nested_vmx_write_pml_buffer,
6760 .enable_evmcs = nested_enable_evmcs,
6761 .get_evmcs_version = nested_get_evmcs_version,
6762};
6763