1
2
3#include <linux/objtool.h>
4#include <linux/percpu.h>
5
6#include <asm/debugreg.h>
7#include <asm/mmu_context.h>
8
9#include "cpuid.h"
10#include "hyperv.h"
11#include "mmu.h"
12#include "nested.h"
13#include "pmu.h"
14#include "trace.h"
15#include "x86.h"
16
17static bool __read_mostly enable_shadow_vmcs = 1;
18module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
19
20static bool __read_mostly nested_early_check = 0;
21module_param(nested_early_check, bool, S_IRUGO);
22
23#define CC(consistency_check) \
24({ \
25 bool failed = (consistency_check); \
26 if (failed) \
27 trace_kvm_nested_vmenter_failed(#consistency_check, 0); \
28 failed; \
29})
30
31
32
33
34
35#define VMX_VPID_EXTENT_SUPPORTED_MASK \
36 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
37 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
38 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
39 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
40
41#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
42
43enum {
44 VMX_VMREAD_BITMAP,
45 VMX_VMWRITE_BITMAP,
46 VMX_BITMAP_NR
47};
48static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
49
50#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
51#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
52
53struct shadow_vmcs_field {
54 u16 encoding;
55 u16 offset;
56};
57static struct shadow_vmcs_field shadow_read_only_fields[] = {
58#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
59#include "vmcs_shadow_fields.h"
60};
61static int max_shadow_read_only_fields =
62 ARRAY_SIZE(shadow_read_only_fields);
63
64static struct shadow_vmcs_field shadow_read_write_fields[] = {
65#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
66#include "vmcs_shadow_fields.h"
67};
68static int max_shadow_read_write_fields =
69 ARRAY_SIZE(shadow_read_write_fields);
70
71static void init_vmcs_shadow_fields(void)
72{
73 int i, j;
74
75 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
76 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
77
78 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
79 struct shadow_vmcs_field entry = shadow_read_only_fields[i];
80 u16 field = entry.encoding;
81
82 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
83 (i + 1 == max_shadow_read_only_fields ||
84 shadow_read_only_fields[i + 1].encoding != field + 1))
85 pr_err("Missing field from shadow_read_only_field %x\n",
86 field + 1);
87
88 clear_bit(field, vmx_vmread_bitmap);
89 if (field & 1)
90#ifdef CONFIG_X86_64
91 continue;
92#else
93 entry.offset += sizeof(u32);
94#endif
95 shadow_read_only_fields[j++] = entry;
96 }
97 max_shadow_read_only_fields = j;
98
99 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
100 struct shadow_vmcs_field entry = shadow_read_write_fields[i];
101 u16 field = entry.encoding;
102
103 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
104 (i + 1 == max_shadow_read_write_fields ||
105 shadow_read_write_fields[i + 1].encoding != field + 1))
106 pr_err("Missing field from shadow_read_write_field %x\n",
107 field + 1);
108
109 WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
110 field <= GUEST_TR_AR_BYTES,
111 "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
112
113
114
115
116
117
118 switch (field) {
119 case GUEST_PML_INDEX:
120 if (!cpu_has_vmx_pml())
121 continue;
122 break;
123 case VMX_PREEMPTION_TIMER_VALUE:
124 if (!cpu_has_vmx_preemption_timer())
125 continue;
126 break;
127 case GUEST_INTR_STATUS:
128 if (!cpu_has_vmx_apicv())
129 continue;
130 break;
131 default:
132 break;
133 }
134
135 clear_bit(field, vmx_vmwrite_bitmap);
136 clear_bit(field, vmx_vmread_bitmap);
137 if (field & 1)
138#ifdef CONFIG_X86_64
139 continue;
140#else
141 entry.offset += sizeof(u32);
142#endif
143 shadow_read_write_fields[j++] = entry;
144 }
145 max_shadow_read_write_fields = j;
146}
147
148
149
150
151
152
153
154static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
155{
156 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
157 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
158 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
159 return kvm_skip_emulated_instruction(vcpu);
160}
161
162static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
163{
164 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
165 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
166 X86_EFLAGS_SF | X86_EFLAGS_OF))
167 | X86_EFLAGS_CF);
168 return kvm_skip_emulated_instruction(vcpu);
169}
170
171static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
172 u32 vm_instruction_error)
173{
174 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
175 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
176 X86_EFLAGS_SF | X86_EFLAGS_OF))
177 | X86_EFLAGS_ZF);
178 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
179
180
181
182
183 return kvm_skip_emulated_instruction(vcpu);
184}
185
186static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
187{
188 struct vcpu_vmx *vmx = to_vmx(vcpu);
189
190
191
192
193
194 if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
195 return nested_vmx_failInvalid(vcpu);
196
197 return nested_vmx_failValid(vcpu, vm_instruction_error);
198}
199
200static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
201{
202
203 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
204 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
205}
206
207static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
208{
209 return fixed_bits_valid(control, low, high);
210}
211
212static inline u64 vmx_control_msr(u32 low, u32 high)
213{
214 return low | ((u64)high << 32);
215}
216
217static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
218{
219 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
220 vmcs_write64(VMCS_LINK_POINTER, -1ull);
221 vmx->nested.need_vmcs12_to_shadow_sync = false;
222}
223
224static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
225{
226 struct vcpu_vmx *vmx = to_vmx(vcpu);
227
228 if (!vmx->nested.hv_evmcs)
229 return;
230
231 kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
232 vmx->nested.hv_evmcs_vmptr = 0;
233 vmx->nested.hv_evmcs = NULL;
234}
235
236static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
237 struct loaded_vmcs *prev)
238{
239 struct vmcs_host_state *dest, *src;
240
241 if (unlikely(!vmx->guest_state_loaded))
242 return;
243
244 src = &prev->host_state;
245 dest = &vmx->loaded_vmcs->host_state;
246
247 vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
248 dest->ldt_sel = src->ldt_sel;
249#ifdef CONFIG_X86_64
250 dest->ds_sel = src->ds_sel;
251 dest->es_sel = src->es_sel;
252#endif
253}
254
255static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
256{
257 struct vcpu_vmx *vmx = to_vmx(vcpu);
258 struct loaded_vmcs *prev;
259 int cpu;
260
261 if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
262 return;
263
264 cpu = get_cpu();
265 prev = vmx->loaded_vmcs;
266 vmx->loaded_vmcs = vmcs;
267 vmx_vcpu_load_vmcs(vcpu, cpu, prev);
268 vmx_sync_vmcs_host_state(vmx, prev);
269 put_cpu();
270
271 vmx_register_cache_reset(vcpu);
272}
273
274
275
276
277
278static void free_nested(struct kvm_vcpu *vcpu)
279{
280 struct vcpu_vmx *vmx = to_vmx(vcpu);
281
282 if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
283 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
284
285 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
286 return;
287
288 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
289
290 vmx->nested.vmxon = false;
291 vmx->nested.smm.vmxon = false;
292 free_vpid(vmx->nested.vpid02);
293 vmx->nested.posted_intr_nv = -1;
294 vmx->nested.current_vmptr = -1ull;
295 if (enable_shadow_vmcs) {
296 vmx_disable_shadow_vmcs(vmx);
297 vmcs_clear(vmx->vmcs01.shadow_vmcs);
298 free_vmcs(vmx->vmcs01.shadow_vmcs);
299 vmx->vmcs01.shadow_vmcs = NULL;
300 }
301 kfree(vmx->nested.cached_vmcs12);
302 vmx->nested.cached_vmcs12 = NULL;
303 kfree(vmx->nested.cached_shadow_vmcs12);
304 vmx->nested.cached_shadow_vmcs12 = NULL;
305
306 if (vmx->nested.apic_access_page) {
307 kvm_release_page_clean(vmx->nested.apic_access_page);
308 vmx->nested.apic_access_page = NULL;
309 }
310 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
311 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
312 vmx->nested.pi_desc = NULL;
313
314 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
315
316 nested_release_evmcs(vcpu);
317
318 free_loaded_vmcs(&vmx->nested.vmcs02);
319}
320
321
322
323
324
325void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
326{
327 vcpu_load(vcpu);
328 vmx_leave_nested(vcpu);
329 vcpu_put(vcpu);
330}
331
332static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
333 struct x86_exception *fault)
334{
335 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
336 struct vcpu_vmx *vmx = to_vmx(vcpu);
337 u32 vm_exit_reason;
338 unsigned long exit_qualification = vcpu->arch.exit_qualification;
339
340 if (vmx->nested.pml_full) {
341 vm_exit_reason = EXIT_REASON_PML_FULL;
342 vmx->nested.pml_full = false;
343 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
344 } else if (fault->error_code & PFERR_RSVD_MASK)
345 vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
346 else
347 vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
348
349 nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
350 vmcs12->guest_physical_address = fault->address;
351}
352
353static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
354{
355 WARN_ON(mmu_is_nested(vcpu));
356
357 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
358 kvm_init_shadow_ept_mmu(vcpu,
359 to_vmx(vcpu)->nested.msrs.ept_caps &
360 VMX_EPT_EXECUTE_ONLY_BIT,
361 nested_ept_ad_enabled(vcpu),
362 nested_ept_get_eptp(vcpu));
363 vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
364 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
365 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
366
367 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
368}
369
370static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
371{
372 vcpu->arch.mmu = &vcpu->arch.root_mmu;
373 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
374}
375
376static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
377 u16 error_code)
378{
379 bool inequality, bit;
380
381 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
382 inequality =
383 (error_code & vmcs12->page_fault_error_code_mask) !=
384 vmcs12->page_fault_error_code_match;
385 return inequality ^ bit;
386}
387
388
389
390
391
392
393static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
394{
395 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
396 unsigned int nr = vcpu->arch.exception.nr;
397 bool has_payload = vcpu->arch.exception.has_payload;
398 unsigned long payload = vcpu->arch.exception.payload;
399
400 if (nr == PF_VECTOR) {
401 if (vcpu->arch.exception.nested_apf) {
402 *exit_qual = vcpu->arch.apf.nested_apf_token;
403 return 1;
404 }
405 if (nested_vmx_is_page_fault_vmexit(vmcs12,
406 vcpu->arch.exception.error_code)) {
407 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
408 return 1;
409 }
410 } else if (vmcs12->exception_bitmap & (1u << nr)) {
411 if (nr == DB_VECTOR) {
412 if (!has_payload) {
413 payload = vcpu->arch.dr6;
414 payload &= ~(DR6_FIXED_1 | DR6_BT);
415 payload ^= DR6_RTM;
416 }
417 *exit_qual = payload;
418 } else
419 *exit_qual = 0;
420 return 1;
421 }
422
423 return 0;
424}
425
426
427static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
428 struct x86_exception *fault)
429{
430 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
431
432 WARN_ON(!is_guest_mode(vcpu));
433
434 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
435 !to_vmx(vcpu)->nested.nested_run_pending) {
436 vmcs12->vm_exit_intr_error_code = fault->error_code;
437 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
438 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
439 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
440 fault->address);
441 } else {
442 kvm_inject_page_fault(vcpu, fault);
443 }
444}
445
446static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
447 struct vmcs12 *vmcs12)
448{
449 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
450 return 0;
451
452 if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
453 CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
454 return -EINVAL;
455
456 return 0;
457}
458
459static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
460 struct vmcs12 *vmcs12)
461{
462 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
463 return 0;
464
465 if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
466 return -EINVAL;
467
468 return 0;
469}
470
471static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
472 struct vmcs12 *vmcs12)
473{
474 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
475 return 0;
476
477 if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
478 return -EINVAL;
479
480 return 0;
481}
482
483
484
485
486static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
487{
488 unsigned long *msr_bitmap;
489 int f = sizeof(unsigned long);
490
491 if (!cpu_has_vmx_msr_bitmap())
492 return true;
493
494 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
495
496 if (msr <= 0x1fff) {
497 return !!test_bit(msr, msr_bitmap + 0x800 / f);
498 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
499 msr &= 0x1fff;
500 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
501 }
502
503 return true;
504}
505
506
507
508
509
510static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
511 unsigned long *msr_bitmap_nested,
512 u32 msr, int type)
513{
514 int f = sizeof(unsigned long);
515
516
517
518
519
520
521 if (msr <= 0x1fff) {
522 if (type & MSR_TYPE_R &&
523 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
524
525 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
526
527 if (type & MSR_TYPE_W &&
528 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
529
530 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
531
532 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
533 msr &= 0x1fff;
534 if (type & MSR_TYPE_R &&
535 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
536
537 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
538
539 if (type & MSR_TYPE_W &&
540 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
541
542 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
543
544 }
545}
546
547static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
548{
549 int msr;
550
551 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
552 unsigned word = msr / BITS_PER_LONG;
553
554 msr_bitmap[word] = ~0;
555 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
556 }
557}
558
559
560
561
562
563static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
564 struct vmcs12 *vmcs12)
565{
566 int msr;
567 unsigned long *msr_bitmap_l1;
568 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
569 struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
570
571
572 if (!cpu_has_vmx_msr_bitmap() ||
573 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
574 return false;
575
576 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
577 return false;
578
579 msr_bitmap_l1 = (unsigned long *)map->hva;
580
581
582
583
584
585
586 enable_x2apic_msr_intercepts(msr_bitmap_l0);
587
588 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
589 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
590
591
592
593
594
595
596 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
597 unsigned word = msr / BITS_PER_LONG;
598
599 msr_bitmap_l0[word] = msr_bitmap_l1[word];
600 }
601 }
602
603 nested_vmx_disable_intercept_for_msr(
604 msr_bitmap_l1, msr_bitmap_l0,
605 X2APIC_MSR(APIC_TASKPRI),
606 MSR_TYPE_R | MSR_TYPE_W);
607
608 if (nested_cpu_has_vid(vmcs12)) {
609 nested_vmx_disable_intercept_for_msr(
610 msr_bitmap_l1, msr_bitmap_l0,
611 X2APIC_MSR(APIC_EOI),
612 MSR_TYPE_W);
613 nested_vmx_disable_intercept_for_msr(
614 msr_bitmap_l1, msr_bitmap_l0,
615 X2APIC_MSR(APIC_SELF_IPI),
616 MSR_TYPE_W);
617 }
618 }
619
620
621 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
622 MSR_FS_BASE, MSR_TYPE_RW);
623
624 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
625 MSR_GS_BASE, MSR_TYPE_RW);
626
627 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
628 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
644 nested_vmx_disable_intercept_for_msr(
645 msr_bitmap_l1, msr_bitmap_l0,
646 MSR_IA32_SPEC_CTRL,
647 MSR_TYPE_R | MSR_TYPE_W);
648
649 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
650 nested_vmx_disable_intercept_for_msr(
651 msr_bitmap_l1, msr_bitmap_l0,
652 MSR_IA32_PRED_CMD,
653 MSR_TYPE_W);
654
655 kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
656
657 return true;
658}
659
660static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
661 struct vmcs12 *vmcs12)
662{
663 struct kvm_host_map map;
664 struct vmcs12 *shadow;
665
666 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
667 vmcs12->vmcs_link_pointer == -1ull)
668 return;
669
670 shadow = get_shadow_vmcs12(vcpu);
671
672 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
673 return;
674
675 memcpy(shadow, map.hva, VMCS12_SIZE);
676 kvm_vcpu_unmap(vcpu, &map, false);
677}
678
679static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
680 struct vmcs12 *vmcs12)
681{
682 struct vcpu_vmx *vmx = to_vmx(vcpu);
683
684 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
685 vmcs12->vmcs_link_pointer == -1ull)
686 return;
687
688 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
689 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
690}
691
692
693
694
695
696static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
697{
698 return get_vmcs12(vcpu)->vm_exit_controls &
699 VM_EXIT_ACK_INTR_ON_EXIT;
700}
701
702static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
703 struct vmcs12 *vmcs12)
704{
705 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
706 CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
707 return -EINVAL;
708 else
709 return 0;
710}
711
712static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
713 struct vmcs12 *vmcs12)
714{
715 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
716 !nested_cpu_has_apic_reg_virt(vmcs12) &&
717 !nested_cpu_has_vid(vmcs12) &&
718 !nested_cpu_has_posted_intr(vmcs12))
719 return 0;
720
721
722
723
724
725 if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
726 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
727 return -EINVAL;
728
729
730
731
732
733 if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
734 return -EINVAL;
735
736
737
738
739
740
741
742
743 if (nested_cpu_has_posted_intr(vmcs12) &&
744 (CC(!nested_cpu_has_vid(vmcs12)) ||
745 CC(!nested_exit_intr_ack_set(vcpu)) ||
746 CC((vmcs12->posted_intr_nv & 0xff00)) ||
747 CC((vmcs12->posted_intr_desc_addr & 0x3f)) ||
748 CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))))
749 return -EINVAL;
750
751
752 if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
753 return -EINVAL;
754
755 return 0;
756}
757
758static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
759 u32 count, u64 addr)
760{
761 int maxphyaddr;
762
763 if (count == 0)
764 return 0;
765 maxphyaddr = cpuid_maxphyaddr(vcpu);
766 if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
767 (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
768 return -EINVAL;
769
770 return 0;
771}
772
773static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
774 struct vmcs12 *vmcs12)
775{
776 if (CC(nested_vmx_check_msr_switch(vcpu,
777 vmcs12->vm_exit_msr_load_count,
778 vmcs12->vm_exit_msr_load_addr)) ||
779 CC(nested_vmx_check_msr_switch(vcpu,
780 vmcs12->vm_exit_msr_store_count,
781 vmcs12->vm_exit_msr_store_addr)))
782 return -EINVAL;
783
784 return 0;
785}
786
787static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
788 struct vmcs12 *vmcs12)
789{
790 if (CC(nested_vmx_check_msr_switch(vcpu,
791 vmcs12->vm_entry_msr_load_count,
792 vmcs12->vm_entry_msr_load_addr)))
793 return -EINVAL;
794
795 return 0;
796}
797
798static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
799 struct vmcs12 *vmcs12)
800{
801 if (!nested_cpu_has_pml(vmcs12))
802 return 0;
803
804 if (CC(!nested_cpu_has_ept(vmcs12)) ||
805 CC(!page_address_valid(vcpu, vmcs12->pml_address)))
806 return -EINVAL;
807
808 return 0;
809}
810
811static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
812 struct vmcs12 *vmcs12)
813{
814 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
815 !nested_cpu_has_ept(vmcs12)))
816 return -EINVAL;
817 return 0;
818}
819
820static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
821 struct vmcs12 *vmcs12)
822{
823 if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
824 !nested_cpu_has_ept(vmcs12)))
825 return -EINVAL;
826 return 0;
827}
828
829static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
830 struct vmcs12 *vmcs12)
831{
832 if (!nested_cpu_has_shadow_vmcs(vmcs12))
833 return 0;
834
835 if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
836 CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
837 return -EINVAL;
838
839 return 0;
840}
841
842static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
843 struct vmx_msr_entry *e)
844{
845
846 if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
847 return -EINVAL;
848 if (CC(e->index == MSR_IA32_UCODE_WRITE) ||
849 CC(e->index == MSR_IA32_UCODE_REV))
850 return -EINVAL;
851 if (CC(e->reserved != 0))
852 return -EINVAL;
853 return 0;
854}
855
856static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
857 struct vmx_msr_entry *e)
858{
859 if (CC(e->index == MSR_FS_BASE) ||
860 CC(e->index == MSR_GS_BASE) ||
861 CC(e->index == MSR_IA32_SMM_MONITOR_CTL) ||
862 nested_vmx_msr_check_common(vcpu, e))
863 return -EINVAL;
864 return 0;
865}
866
867static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
868 struct vmx_msr_entry *e)
869{
870 if (CC(e->index == MSR_IA32_SMBASE) ||
871 nested_vmx_msr_check_common(vcpu, e))
872 return -EINVAL;
873 return 0;
874}
875
876static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
877{
878 struct vcpu_vmx *vmx = to_vmx(vcpu);
879 u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
880 vmx->nested.msrs.misc_high);
881
882 return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
883}
884
885
886
887
888
889
890
891
892
893
894static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
895{
896 u32 i;
897 struct vmx_msr_entry e;
898 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
899
900 for (i = 0; i < count; i++) {
901 if (unlikely(i >= max_msr_list_size))
902 goto fail;
903
904 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
905 &e, sizeof(e))) {
906 pr_debug_ratelimited(
907 "%s cannot read MSR entry (%u, 0x%08llx)\n",
908 __func__, i, gpa + i * sizeof(e));
909 goto fail;
910 }
911 if (nested_vmx_load_msr_check(vcpu, &e)) {
912 pr_debug_ratelimited(
913 "%s check failed (%u, 0x%x, 0x%x)\n",
914 __func__, i, e.index, e.reserved);
915 goto fail;
916 }
917 if (kvm_set_msr(vcpu, e.index, e.value)) {
918 pr_debug_ratelimited(
919 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
920 __func__, i, e.index, e.value);
921 goto fail;
922 }
923 }
924 return 0;
925fail:
926
927 return i + 1;
928}
929
930static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
931 u32 msr_index,
932 u64 *data)
933{
934 struct vcpu_vmx *vmx = to_vmx(vcpu);
935
936
937
938
939
940
941 if (msr_index == MSR_IA32_TSC) {
942 int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
943 MSR_IA32_TSC);
944
945 if (i >= 0) {
946 u64 val = vmx->msr_autostore.guest.val[i].value;
947
948 *data = kvm_read_l1_tsc(vcpu, val);
949 return true;
950 }
951 }
952
953 if (kvm_get_msr(vcpu, msr_index, data)) {
954 pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
955 msr_index);
956 return false;
957 }
958 return true;
959}
960
961static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
962 struct vmx_msr_entry *e)
963{
964 if (kvm_vcpu_read_guest(vcpu,
965 gpa + i * sizeof(*e),
966 e, 2 * sizeof(u32))) {
967 pr_debug_ratelimited(
968 "%s cannot read MSR entry (%u, 0x%08llx)\n",
969 __func__, i, gpa + i * sizeof(*e));
970 return false;
971 }
972 if (nested_vmx_store_msr_check(vcpu, e)) {
973 pr_debug_ratelimited(
974 "%s check failed (%u, 0x%x, 0x%x)\n",
975 __func__, i, e->index, e->reserved);
976 return false;
977 }
978 return true;
979}
980
981static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
982{
983 u64 data;
984 u32 i;
985 struct vmx_msr_entry e;
986 u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
987
988 for (i = 0; i < count; i++) {
989 if (unlikely(i >= max_msr_list_size))
990 return -EINVAL;
991
992 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
993 return -EINVAL;
994
995 if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
996 return -EINVAL;
997
998 if (kvm_vcpu_write_guest(vcpu,
999 gpa + i * sizeof(e) +
1000 offsetof(struct vmx_msr_entry, value),
1001 &data, sizeof(data))) {
1002 pr_debug_ratelimited(
1003 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
1004 __func__, i, e.index, data);
1005 return -EINVAL;
1006 }
1007 }
1008 return 0;
1009}
1010
1011static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
1012{
1013 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1014 u32 count = vmcs12->vm_exit_msr_store_count;
1015 u64 gpa = vmcs12->vm_exit_msr_store_addr;
1016 struct vmx_msr_entry e;
1017 u32 i;
1018
1019 for (i = 0; i < count; i++) {
1020 if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
1021 return false;
1022
1023 if (e.index == msr_index)
1024 return true;
1025 }
1026 return false;
1027}
1028
1029static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
1030 u32 msr_index)
1031{
1032 struct vcpu_vmx *vmx = to_vmx(vcpu);
1033 struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
1034 bool in_vmcs12_store_list;
1035 int msr_autostore_slot;
1036 bool in_autostore_list;
1037 int last;
1038
1039 msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
1040 in_autostore_list = msr_autostore_slot >= 0;
1041 in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
1042
1043 if (in_vmcs12_store_list && !in_autostore_list) {
1044 if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
1045
1046
1047
1048
1049
1050
1051
1052 pr_warn_ratelimited(
1053 "Not enough msr entries in msr_autostore. Can't add msr %x\n",
1054 msr_index);
1055 return;
1056 }
1057 last = autostore->nr++;
1058 autostore->val[last].index = msr_index;
1059 } else if (!in_vmcs12_store_list && in_autostore_list) {
1060 last = --autostore->nr;
1061 autostore->val[msr_autostore_slot] = autostore->val[last];
1062 }
1063}
1064
1065static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
1066{
1067 unsigned long invalid_mask;
1068
1069 invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
1070 return (val & invalid_mask) == 0;
1071}
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
1111{
1112 return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu));
1113}
1114
1115
1116
1117
1118
1119
1120
1121static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
1122 enum vm_entry_failure_code *entry_failure_code)
1123{
1124 if (CC(!nested_cr3_valid(vcpu, cr3))) {
1125 *entry_failure_code = ENTRY_FAIL_DEFAULT;
1126 return -EINVAL;
1127 }
1128
1129
1130
1131
1132
1133 if (!nested_ept && is_pae_paging(vcpu) &&
1134 (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
1135 if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
1136 *entry_failure_code = ENTRY_FAIL_PDPTE;
1137 return -EINVAL;
1138 }
1139 }
1140
1141
1142
1143
1144
1145
1146 if (!nested_ept)
1147 kvm_mmu_new_pgd(vcpu, cr3, true,
1148 !nested_vmx_transition_mmu_sync(vcpu));
1149
1150 vcpu->arch.cr3 = cr3;
1151 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1152
1153 kvm_init_mmu(vcpu, false);
1154
1155 return 0;
1156}
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
1172{
1173 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1174
1175 return enable_ept ||
1176 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
1177}
1178
1179static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
1180 struct vmcs12 *vmcs12,
1181 bool is_vmenter)
1182{
1183 struct vcpu_vmx *vmx = to_vmx(vcpu);
1184
1185
1186
1187
1188
1189
1190 if (!enable_vpid)
1191 return;
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212 if (!nested_cpu_has_vpid(vmcs12)) {
1213 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1214 } else if (!nested_has_guest_tlb_tag(vcpu)) {
1215 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1216 } else if (is_vmenter &&
1217 vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
1218 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
1219 vpid_sync_context(nested_get_vpid02(vcpu));
1220 }
1221}
1222
1223static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
1224{
1225 superset &= mask;
1226 subset &= mask;
1227
1228 return (superset | subset) == superset;
1229}
1230
1231static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
1232{
1233 const u64 feature_and_reserved =
1234
1235 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
1236
1237 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
1238 u64 vmx_basic = vmx->nested.msrs.basic;
1239
1240 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
1241 return -EINVAL;
1242
1243
1244
1245
1246
1247 if (data & BIT_ULL(48))
1248 return -EINVAL;
1249
1250 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
1251 vmx_basic_vmcs_revision_id(data))
1252 return -EINVAL;
1253
1254 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
1255 return -EINVAL;
1256
1257 vmx->nested.msrs.basic = data;
1258 return 0;
1259}
1260
1261static int
1262vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1263{
1264 u64 supported;
1265 u32 *lowp, *highp;
1266
1267 switch (msr_index) {
1268 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1269 lowp = &vmx->nested.msrs.pinbased_ctls_low;
1270 highp = &vmx->nested.msrs.pinbased_ctls_high;
1271 break;
1272 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1273 lowp = &vmx->nested.msrs.procbased_ctls_low;
1274 highp = &vmx->nested.msrs.procbased_ctls_high;
1275 break;
1276 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1277 lowp = &vmx->nested.msrs.exit_ctls_low;
1278 highp = &vmx->nested.msrs.exit_ctls_high;
1279 break;
1280 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1281 lowp = &vmx->nested.msrs.entry_ctls_low;
1282 highp = &vmx->nested.msrs.entry_ctls_high;
1283 break;
1284 case MSR_IA32_VMX_PROCBASED_CTLS2:
1285 lowp = &vmx->nested.msrs.secondary_ctls_low;
1286 highp = &vmx->nested.msrs.secondary_ctls_high;
1287 break;
1288 default:
1289 BUG();
1290 }
1291
1292 supported = vmx_control_msr(*lowp, *highp);
1293
1294
1295 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
1296 return -EINVAL;
1297
1298
1299 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
1300 return -EINVAL;
1301
1302 *lowp = data;
1303 *highp = data >> 32;
1304 return 0;
1305}
1306
1307static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
1308{
1309 const u64 feature_and_reserved_bits =
1310
1311 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
1312 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
1313
1314 GENMASK_ULL(13, 9) | BIT_ULL(31);
1315 u64 vmx_misc;
1316
1317 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
1318 vmx->nested.msrs.misc_high);
1319
1320 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
1321 return -EINVAL;
1322
1323 if ((vmx->nested.msrs.pinbased_ctls_high &
1324 PIN_BASED_VMX_PREEMPTION_TIMER) &&
1325 vmx_misc_preemption_timer_rate(data) !=
1326 vmx_misc_preemption_timer_rate(vmx_misc))
1327 return -EINVAL;
1328
1329 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
1330 return -EINVAL;
1331
1332 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
1333 return -EINVAL;
1334
1335 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
1336 return -EINVAL;
1337
1338 vmx->nested.msrs.misc_low = data;
1339 vmx->nested.msrs.misc_high = data >> 32;
1340
1341 return 0;
1342}
1343
1344static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
1345{
1346 u64 vmx_ept_vpid_cap;
1347
1348 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
1349 vmx->nested.msrs.vpid_caps);
1350
1351
1352 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
1353 return -EINVAL;
1354
1355 vmx->nested.msrs.ept_caps = data;
1356 vmx->nested.msrs.vpid_caps = data >> 32;
1357 return 0;
1358}
1359
1360static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1361{
1362 u64 *msr;
1363
1364 switch (msr_index) {
1365 case MSR_IA32_VMX_CR0_FIXED0:
1366 msr = &vmx->nested.msrs.cr0_fixed0;
1367 break;
1368 case MSR_IA32_VMX_CR4_FIXED0:
1369 msr = &vmx->nested.msrs.cr4_fixed0;
1370 break;
1371 default:
1372 BUG();
1373 }
1374
1375
1376
1377
1378
1379 if (!is_bitwise_subset(data, *msr, -1ULL))
1380 return -EINVAL;
1381
1382 *msr = data;
1383 return 0;
1384}
1385
1386
1387
1388
1389
1390
1391int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1392{
1393 struct vcpu_vmx *vmx = to_vmx(vcpu);
1394
1395
1396
1397
1398
1399 if (vmx->nested.vmxon)
1400 return -EBUSY;
1401
1402 switch (msr_index) {
1403 case MSR_IA32_VMX_BASIC:
1404 return vmx_restore_vmx_basic(vmx, data);
1405 case MSR_IA32_VMX_PINBASED_CTLS:
1406 case MSR_IA32_VMX_PROCBASED_CTLS:
1407 case MSR_IA32_VMX_EXIT_CTLS:
1408 case MSR_IA32_VMX_ENTRY_CTLS:
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418 return -EINVAL;
1419 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1420 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1421 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1422 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1423 case MSR_IA32_VMX_PROCBASED_CTLS2:
1424 return vmx_restore_control_msr(vmx, msr_index, data);
1425 case MSR_IA32_VMX_MISC:
1426 return vmx_restore_vmx_misc(vmx, data);
1427 case MSR_IA32_VMX_CR0_FIXED0:
1428 case MSR_IA32_VMX_CR4_FIXED0:
1429 return vmx_restore_fixed0_msr(vmx, msr_index, data);
1430 case MSR_IA32_VMX_CR0_FIXED1:
1431 case MSR_IA32_VMX_CR4_FIXED1:
1432
1433
1434
1435
1436 return -EINVAL;
1437 case MSR_IA32_VMX_EPT_VPID_CAP:
1438 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
1439 case MSR_IA32_VMX_VMCS_ENUM:
1440 vmx->nested.msrs.vmcs_enum = data;
1441 return 0;
1442 case MSR_IA32_VMX_VMFUNC:
1443 if (data & ~vmx->nested.msrs.vmfunc_controls)
1444 return -EINVAL;
1445 vmx->nested.msrs.vmfunc_controls = data;
1446 return 0;
1447 default:
1448
1449
1450
1451 return -EINVAL;
1452 }
1453}
1454
1455
1456int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
1457{
1458 switch (msr_index) {
1459 case MSR_IA32_VMX_BASIC:
1460 *pdata = msrs->basic;
1461 break;
1462 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1463 case MSR_IA32_VMX_PINBASED_CTLS:
1464 *pdata = vmx_control_msr(
1465 msrs->pinbased_ctls_low,
1466 msrs->pinbased_ctls_high);
1467 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
1468 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1469 break;
1470 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1471 case MSR_IA32_VMX_PROCBASED_CTLS:
1472 *pdata = vmx_control_msr(
1473 msrs->procbased_ctls_low,
1474 msrs->procbased_ctls_high);
1475 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
1476 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1477 break;
1478 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1479 case MSR_IA32_VMX_EXIT_CTLS:
1480 *pdata = vmx_control_msr(
1481 msrs->exit_ctls_low,
1482 msrs->exit_ctls_high);
1483 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
1484 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
1485 break;
1486 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1487 case MSR_IA32_VMX_ENTRY_CTLS:
1488 *pdata = vmx_control_msr(
1489 msrs->entry_ctls_low,
1490 msrs->entry_ctls_high);
1491 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
1492 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
1493 break;
1494 case MSR_IA32_VMX_MISC:
1495 *pdata = vmx_control_msr(
1496 msrs->misc_low,
1497 msrs->misc_high);
1498 break;
1499 case MSR_IA32_VMX_CR0_FIXED0:
1500 *pdata = msrs->cr0_fixed0;
1501 break;
1502 case MSR_IA32_VMX_CR0_FIXED1:
1503 *pdata = msrs->cr0_fixed1;
1504 break;
1505 case MSR_IA32_VMX_CR4_FIXED0:
1506 *pdata = msrs->cr4_fixed0;
1507 break;
1508 case MSR_IA32_VMX_CR4_FIXED1:
1509 *pdata = msrs->cr4_fixed1;
1510 break;
1511 case MSR_IA32_VMX_VMCS_ENUM:
1512 *pdata = msrs->vmcs_enum;
1513 break;
1514 case MSR_IA32_VMX_PROCBASED_CTLS2:
1515 *pdata = vmx_control_msr(
1516 msrs->secondary_ctls_low,
1517 msrs->secondary_ctls_high);
1518 break;
1519 case MSR_IA32_VMX_EPT_VPID_CAP:
1520 *pdata = msrs->ept_caps |
1521 ((u64)msrs->vpid_caps << 32);
1522 break;
1523 case MSR_IA32_VMX_VMFUNC:
1524 *pdata = msrs->vmfunc_controls;
1525 break;
1526 default:
1527 return 1;
1528 }
1529
1530 return 0;
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
1542{
1543 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1544 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1545 struct shadow_vmcs_field field;
1546 unsigned long val;
1547 int i;
1548
1549 if (WARN_ON(!shadow_vmcs))
1550 return;
1551
1552 preempt_disable();
1553
1554 vmcs_load(shadow_vmcs);
1555
1556 for (i = 0; i < max_shadow_read_write_fields; i++) {
1557 field = shadow_read_write_fields[i];
1558 val = __vmcs_readl(field.encoding);
1559 vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
1560 }
1561
1562 vmcs_clear(shadow_vmcs);
1563 vmcs_load(vmx->loaded_vmcs->vmcs);
1564
1565 preempt_enable();
1566}
1567
1568static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
1569{
1570 const struct shadow_vmcs_field *fields[] = {
1571 shadow_read_write_fields,
1572 shadow_read_only_fields
1573 };
1574 const int max_fields[] = {
1575 max_shadow_read_write_fields,
1576 max_shadow_read_only_fields
1577 };
1578 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1579 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1580 struct shadow_vmcs_field field;
1581 unsigned long val;
1582 int i, q;
1583
1584 if (WARN_ON(!shadow_vmcs))
1585 return;
1586
1587 vmcs_load(shadow_vmcs);
1588
1589 for (q = 0; q < ARRAY_SIZE(fields); q++) {
1590 for (i = 0; i < max_fields[q]; i++) {
1591 field = fields[q][i];
1592 val = vmcs12_read_any(vmcs12, field.encoding,
1593 field.offset);
1594 __vmcs_writel(field.encoding, val);
1595 }
1596 }
1597
1598 vmcs_clear(shadow_vmcs);
1599 vmcs_load(vmx->loaded_vmcs->vmcs);
1600}
1601
1602static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
1603{
1604 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1605 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1606
1607
1608 vmcs12->tpr_threshold = evmcs->tpr_threshold;
1609 vmcs12->guest_rip = evmcs->guest_rip;
1610
1611 if (unlikely(!(evmcs->hv_clean_fields &
1612 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
1613 vmcs12->guest_rsp = evmcs->guest_rsp;
1614 vmcs12->guest_rflags = evmcs->guest_rflags;
1615 vmcs12->guest_interruptibility_info =
1616 evmcs->guest_interruptibility_info;
1617 }
1618
1619 if (unlikely(!(evmcs->hv_clean_fields &
1620 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
1621 vmcs12->cpu_based_vm_exec_control =
1622 evmcs->cpu_based_vm_exec_control;
1623 }
1624
1625 if (unlikely(!(evmcs->hv_clean_fields &
1626 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
1627 vmcs12->exception_bitmap = evmcs->exception_bitmap;
1628 }
1629
1630 if (unlikely(!(evmcs->hv_clean_fields &
1631 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
1632 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
1633 }
1634
1635 if (unlikely(!(evmcs->hv_clean_fields &
1636 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
1637 vmcs12->vm_entry_intr_info_field =
1638 evmcs->vm_entry_intr_info_field;
1639 vmcs12->vm_entry_exception_error_code =
1640 evmcs->vm_entry_exception_error_code;
1641 vmcs12->vm_entry_instruction_len =
1642 evmcs->vm_entry_instruction_len;
1643 }
1644
1645 if (unlikely(!(evmcs->hv_clean_fields &
1646 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
1647 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
1648 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
1649 vmcs12->host_cr0 = evmcs->host_cr0;
1650 vmcs12->host_cr3 = evmcs->host_cr3;
1651 vmcs12->host_cr4 = evmcs->host_cr4;
1652 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
1653 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
1654 vmcs12->host_rip = evmcs->host_rip;
1655 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
1656 vmcs12->host_es_selector = evmcs->host_es_selector;
1657 vmcs12->host_cs_selector = evmcs->host_cs_selector;
1658 vmcs12->host_ss_selector = evmcs->host_ss_selector;
1659 vmcs12->host_ds_selector = evmcs->host_ds_selector;
1660 vmcs12->host_fs_selector = evmcs->host_fs_selector;
1661 vmcs12->host_gs_selector = evmcs->host_gs_selector;
1662 vmcs12->host_tr_selector = evmcs->host_tr_selector;
1663 }
1664
1665 if (unlikely(!(evmcs->hv_clean_fields &
1666 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
1667 vmcs12->pin_based_vm_exec_control =
1668 evmcs->pin_based_vm_exec_control;
1669 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
1670 vmcs12->secondary_vm_exec_control =
1671 evmcs->secondary_vm_exec_control;
1672 }
1673
1674 if (unlikely(!(evmcs->hv_clean_fields &
1675 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
1676 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
1677 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
1678 }
1679
1680 if (unlikely(!(evmcs->hv_clean_fields &
1681 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
1682 vmcs12->msr_bitmap = evmcs->msr_bitmap;
1683 }
1684
1685 if (unlikely(!(evmcs->hv_clean_fields &
1686 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
1687 vmcs12->guest_es_base = evmcs->guest_es_base;
1688 vmcs12->guest_cs_base = evmcs->guest_cs_base;
1689 vmcs12->guest_ss_base = evmcs->guest_ss_base;
1690 vmcs12->guest_ds_base = evmcs->guest_ds_base;
1691 vmcs12->guest_fs_base = evmcs->guest_fs_base;
1692 vmcs12->guest_gs_base = evmcs->guest_gs_base;
1693 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
1694 vmcs12->guest_tr_base = evmcs->guest_tr_base;
1695 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
1696 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
1697 vmcs12->guest_es_limit = evmcs->guest_es_limit;
1698 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
1699 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
1700 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
1701 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
1702 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
1703 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
1704 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
1705 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
1706 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
1707 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
1708 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
1709 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
1710 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
1711 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
1712 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
1713 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
1714 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
1715 vmcs12->guest_es_selector = evmcs->guest_es_selector;
1716 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
1717 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
1718 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
1719 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
1720 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
1721 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
1722 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
1723 }
1724
1725 if (unlikely(!(evmcs->hv_clean_fields &
1726 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
1727 vmcs12->tsc_offset = evmcs->tsc_offset;
1728 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
1729 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
1730 }
1731
1732 if (unlikely(!(evmcs->hv_clean_fields &
1733 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
1734 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
1735 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
1736 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
1737 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
1738 vmcs12->guest_cr0 = evmcs->guest_cr0;
1739 vmcs12->guest_cr3 = evmcs->guest_cr3;
1740 vmcs12->guest_cr4 = evmcs->guest_cr4;
1741 vmcs12->guest_dr7 = evmcs->guest_dr7;
1742 }
1743
1744 if (unlikely(!(evmcs->hv_clean_fields &
1745 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
1746 vmcs12->host_fs_base = evmcs->host_fs_base;
1747 vmcs12->host_gs_base = evmcs->host_gs_base;
1748 vmcs12->host_tr_base = evmcs->host_tr_base;
1749 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
1750 vmcs12->host_idtr_base = evmcs->host_idtr_base;
1751 vmcs12->host_rsp = evmcs->host_rsp;
1752 }
1753
1754 if (unlikely(!(evmcs->hv_clean_fields &
1755 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
1756 vmcs12->ept_pointer = evmcs->ept_pointer;
1757 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
1758 }
1759
1760 if (unlikely(!(evmcs->hv_clean_fields &
1761 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
1762 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
1763 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
1764 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
1765 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
1766 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
1767 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
1768 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
1769 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
1770 vmcs12->guest_pending_dbg_exceptions =
1771 evmcs->guest_pending_dbg_exceptions;
1772 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
1773 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
1774 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
1775 vmcs12->guest_activity_state = evmcs->guest_activity_state;
1776 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
1777 }
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815 return 0;
1816}
1817
1818static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
1819{
1820 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1821 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887 evmcs->guest_es_selector = vmcs12->guest_es_selector;
1888 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
1889 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
1890 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
1891 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
1892 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
1893 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
1894 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
1895
1896 evmcs->guest_es_limit = vmcs12->guest_es_limit;
1897 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
1898 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
1899 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
1900 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
1901 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
1902 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
1903 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
1904 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
1905 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
1906
1907 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
1908 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
1909 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
1910 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
1911 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
1912 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
1913 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
1914 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
1915
1916 evmcs->guest_es_base = vmcs12->guest_es_base;
1917 evmcs->guest_cs_base = vmcs12->guest_cs_base;
1918 evmcs->guest_ss_base = vmcs12->guest_ss_base;
1919 evmcs->guest_ds_base = vmcs12->guest_ds_base;
1920 evmcs->guest_fs_base = vmcs12->guest_fs_base;
1921 evmcs->guest_gs_base = vmcs12->guest_gs_base;
1922 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
1923 evmcs->guest_tr_base = vmcs12->guest_tr_base;
1924 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
1925 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
1926
1927 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
1928 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
1929
1930 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
1931 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
1932 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
1933 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
1934
1935 evmcs->guest_pending_dbg_exceptions =
1936 vmcs12->guest_pending_dbg_exceptions;
1937 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
1938 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
1939
1940 evmcs->guest_activity_state = vmcs12->guest_activity_state;
1941 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
1942
1943 evmcs->guest_cr0 = vmcs12->guest_cr0;
1944 evmcs->guest_cr3 = vmcs12->guest_cr3;
1945 evmcs->guest_cr4 = vmcs12->guest_cr4;
1946 evmcs->guest_dr7 = vmcs12->guest_dr7;
1947
1948 evmcs->guest_physical_address = vmcs12->guest_physical_address;
1949
1950 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
1951 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
1952 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
1953 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
1954 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
1955 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
1956 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
1957 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
1958
1959 evmcs->exit_qualification = vmcs12->exit_qualification;
1960
1961 evmcs->guest_linear_address = vmcs12->guest_linear_address;
1962 evmcs->guest_rsp = vmcs12->guest_rsp;
1963 evmcs->guest_rflags = vmcs12->guest_rflags;
1964
1965 evmcs->guest_interruptibility_info =
1966 vmcs12->guest_interruptibility_info;
1967 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
1968 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
1969 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
1970 evmcs->vm_entry_exception_error_code =
1971 vmcs12->vm_entry_exception_error_code;
1972 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
1973
1974 evmcs->guest_rip = vmcs12->guest_rip;
1975
1976 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
1977
1978 return 0;
1979}
1980
1981
1982
1983
1984
1985static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
1986 struct kvm_vcpu *vcpu, bool from_launch)
1987{
1988 struct vcpu_vmx *vmx = to_vmx(vcpu);
1989 bool evmcs_gpa_changed = false;
1990 u64 evmcs_gpa;
1991
1992 if (likely(!vmx->nested.enlightened_vmcs_enabled))
1993 return EVMPTRLD_DISABLED;
1994
1995 if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
1996 return EVMPTRLD_DISABLED;
1997
1998 if (unlikely(!vmx->nested.hv_evmcs ||
1999 evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
2000 if (!vmx->nested.hv_evmcs)
2001 vmx->nested.current_vmptr = -1ull;
2002
2003 nested_release_evmcs(vcpu);
2004
2005 if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
2006 &vmx->nested.hv_evmcs_map))
2007 return EVMPTRLD_ERROR;
2008
2009 vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033 if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
2034 (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
2035 nested_release_evmcs(vcpu);
2036 return EVMPTRLD_VMFAIL;
2037 }
2038
2039 vmx->nested.dirty_vmcs12 = true;
2040 vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
2041
2042 evmcs_gpa_changed = true;
2043
2044
2045
2046
2047
2048
2049 if (from_launch) {
2050 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2051 memset(vmcs12, 0, sizeof(*vmcs12));
2052 vmcs12->hdr.revision_id = VMCS12_REVISION;
2053 }
2054
2055 }
2056
2057
2058
2059
2060
2061 if (from_launch || evmcs_gpa_changed)
2062 vmx->nested.hv_evmcs->hv_clean_fields &=
2063 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2064
2065 return EVMPTRLD_SUCCEEDED;
2066}
2067
2068void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
2069{
2070 struct vcpu_vmx *vmx = to_vmx(vcpu);
2071
2072 if (vmx->nested.hv_evmcs) {
2073 copy_vmcs12_to_enlightened(vmx);
2074
2075 vmx->nested.hv_evmcs->hv_clean_fields |=
2076 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
2077 } else {
2078 copy_vmcs12_to_shadow(vmx);
2079 }
2080
2081 vmx->nested.need_vmcs12_to_shadow_sync = false;
2082}
2083
2084static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
2085{
2086 struct vcpu_vmx *vmx =
2087 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
2088
2089 vmx->nested.preemption_timer_expired = true;
2090 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
2091 kvm_vcpu_kick(&vmx->vcpu);
2092
2093 return HRTIMER_NORESTART;
2094}
2095
2096static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
2097{
2098 struct vcpu_vmx *vmx = to_vmx(vcpu);
2099 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2100
2101 u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
2102 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2103
2104 if (!vmx->nested.has_preemption_timer_deadline) {
2105 vmx->nested.preemption_timer_deadline =
2106 vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
2107 vmx->nested.has_preemption_timer_deadline = true;
2108 }
2109 return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
2110}
2111
2112static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
2113 u64 preemption_timeout)
2114{
2115 struct vcpu_vmx *vmx = to_vmx(vcpu);
2116
2117
2118
2119
2120
2121 if (preemption_timeout == 0) {
2122 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
2123 return;
2124 }
2125
2126 if (vcpu->arch.virtual_tsc_khz == 0)
2127 return;
2128
2129 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
2130 preemption_timeout *= 1000000;
2131 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
2132 hrtimer_start(&vmx->nested.preemption_timer,
2133 ktime_add_ns(ktime_get(), preemption_timeout),
2134 HRTIMER_MODE_ABS_PINNED);
2135}
2136
2137static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2138{
2139 if (vmx->nested.nested_run_pending &&
2140 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
2141 return vmcs12->guest_ia32_efer;
2142 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
2143 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
2144 else
2145 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
2146}
2147
2148static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
2149{
2150
2151
2152
2153
2154
2155
2156 if (vmx->nested.vmcs02_initialized)
2157 return;
2158 vmx->nested.vmcs02_initialized = true;
2159
2160
2161
2162
2163
2164
2165 if (enable_ept && nested_early_check)
2166 vmcs_write64(EPT_POINTER,
2167 construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
2168
2169
2170 if (cpu_has_vmx_vmfunc())
2171 vmcs_write64(VM_FUNCTION_CONTROL, 0);
2172
2173 if (cpu_has_vmx_posted_intr())
2174 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
2175
2176 if (cpu_has_vmx_msr_bitmap())
2177 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
2178
2179
2180
2181
2182
2183
2184
2185
2186 if (enable_pml) {
2187 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
2188 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
2189 }
2190
2191 if (cpu_has_vmx_encls_vmexit())
2192 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
2193
2194
2195
2196
2197
2198
2199 vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
2200 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
2201 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
2202
2203 vmx_set_constant_host_state(vmx);
2204}
2205
2206static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
2207 struct vmcs12 *vmcs12)
2208{
2209 prepare_vmcs02_constant_state(vmx);
2210
2211 vmcs_write64(VMCS_LINK_POINTER, -1ull);
2212
2213 if (enable_vpid) {
2214 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
2215 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
2216 else
2217 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2218 }
2219}
2220
2221static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2222{
2223 u32 exec_control, vmcs12_exec_ctrl;
2224 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
2225
2226 if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
2227 prepare_vmcs02_early_rare(vmx, vmcs12);
2228
2229
2230
2231
2232 exec_control = vmx_pin_based_exec_ctrl(vmx);
2233 exec_control |= (vmcs12->pin_based_vm_exec_control &
2234 ~PIN_BASED_VMX_PREEMPTION_TIMER);
2235
2236
2237 if (nested_cpu_has_posted_intr(vmcs12)) {
2238 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
2239 vmx->nested.pi_pending = false;
2240 } else {
2241 exec_control &= ~PIN_BASED_POSTED_INTR;
2242 }
2243 pin_controls_set(vmx, exec_control);
2244
2245
2246
2247
2248 exec_control = vmx_exec_control(vmx);
2249 exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
2250 exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
2251 exec_control &= ~CPU_BASED_TPR_SHADOW;
2252 exec_control |= vmcs12->cpu_based_vm_exec_control;
2253
2254 vmx->nested.l1_tpr_threshold = -1;
2255 if (exec_control & CPU_BASED_TPR_SHADOW)
2256 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
2257#ifdef CONFIG_X86_64
2258 else
2259 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
2260 CPU_BASED_CR8_STORE_EXITING;
2261#endif
2262
2263
2264
2265
2266
2267 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
2268 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
2269
2270
2271
2272
2273
2274
2275
2276 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
2277 exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
2278
2279 exec_controls_set(vmx, exec_control);
2280
2281
2282
2283
2284 if (cpu_has_secondary_exec_ctrls()) {
2285 exec_control = vmx->secondary_exec_control;
2286
2287
2288 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2289 SECONDARY_EXEC_ENABLE_INVPCID |
2290 SECONDARY_EXEC_ENABLE_RDTSCP |
2291 SECONDARY_EXEC_XSAVES |
2292 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
2293 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2294 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2295 SECONDARY_EXEC_ENABLE_VMFUNC);
2296 if (nested_cpu_has(vmcs12,
2297 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
2298 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
2299 ~SECONDARY_EXEC_ENABLE_PML;
2300 exec_control |= vmcs12_exec_ctrl;
2301 }
2302
2303
2304 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
2305
2306
2307
2308
2309
2310 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
2311 (vmcs12->guest_cr4 & X86_CR4_UMIP))
2312 exec_control |= SECONDARY_EXEC_DESC;
2313
2314 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
2315 vmcs_write16(GUEST_INTR_STATUS,
2316 vmcs12->guest_intr_status);
2317
2318 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
2319 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
2320
2321 secondary_exec_controls_set(vmx, exec_control);
2322 }
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332 exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
2333 ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
2334 if (cpu_has_load_ia32_efer()) {
2335 if (guest_efer & EFER_LMA)
2336 exec_control |= VM_ENTRY_IA32E_MODE;
2337 if (guest_efer != host_efer)
2338 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
2339 }
2340 vm_entry_controls_set(vmx, exec_control);
2341
2342
2343
2344
2345
2346
2347
2348
2349 exec_control = vmx_vmexit_ctrl();
2350 if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
2351 exec_control |= VM_EXIT_LOAD_IA32_EFER;
2352 vm_exit_controls_set(vmx, exec_control);
2353
2354
2355
2356
2357 if (vmx->nested.nested_run_pending) {
2358 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2359 vmcs12->vm_entry_intr_info_field);
2360 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2361 vmcs12->vm_entry_exception_error_code);
2362 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2363 vmcs12->vm_entry_instruction_len);
2364 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2365 vmcs12->guest_interruptibility_info);
2366 vmx->loaded_vmcs->nmi_known_unmasked =
2367 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
2368 } else {
2369 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
2370 }
2371}
2372
2373static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2374{
2375 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2376
2377 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2378 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
2379 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
2380 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
2381 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
2382 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
2383 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
2384 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
2385 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
2386 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
2387 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
2388 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
2389 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
2390 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
2391 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
2392 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
2393 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
2394 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
2395 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
2396 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
2397 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
2398 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
2399 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
2400 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
2401 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
2402 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
2403 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
2404 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
2405 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
2406 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
2407 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
2408 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
2409 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
2410 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
2411 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
2412 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
2413 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
2414 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
2415
2416 vmx->segment_cache.bitmask = 0;
2417 }
2418
2419 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2420 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
2421 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
2422 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
2423 vmcs12->guest_pending_dbg_exceptions);
2424 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
2425 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
2426
2427
2428
2429
2430
2431 if (enable_ept) {
2432 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2433 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2434 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2435 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2436 }
2437
2438 if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
2439 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
2440 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
2441 }
2442
2443 if (nested_cpu_has_xsaves(vmcs12))
2444 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459 if (vmx_need_pf_intercept(&vmx->vcpu)) {
2460
2461
2462
2463
2464 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
2465 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
2466 } else {
2467 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
2468 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
2469 }
2470
2471 if (cpu_has_vmx_apicv()) {
2472 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
2473 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
2474 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
2475 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
2476 }
2477
2478
2479
2480
2481
2482 prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
2483
2484 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
2485 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2486 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2487
2488 set_cr4_guest_host_mask(vmx);
2489}
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
2503 enum vm_entry_failure_code *entry_failure_code)
2504{
2505 struct vcpu_vmx *vmx = to_vmx(vcpu);
2506 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2507 bool load_guest_pdptrs_vmcs12 = false;
2508
2509 if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
2510 prepare_vmcs02_rare(vmx, vmcs12);
2511 vmx->nested.dirty_vmcs12 = false;
2512
2513 load_guest_pdptrs_vmcs12 = !hv_evmcs ||
2514 !(hv_evmcs->hv_clean_fields &
2515 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
2516 }
2517
2518 if (vmx->nested.nested_run_pending &&
2519 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
2520 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
2521 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
2522 } else {
2523 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
2524 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
2525 }
2526 if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
2527 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
2528 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
2529 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
2530
2531
2532
2533
2534
2535 update_exception_bitmap(vcpu);
2536 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
2537 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2538
2539 if (vmx->nested.nested_run_pending &&
2540 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
2541 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
2542 vcpu->arch.pat = vmcs12->guest_ia32_pat;
2543 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2544 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
2545 }
2546
2547 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
2548
2549 if (kvm_has_tsc_control)
2550 decache_tsc_multiplier(vmx);
2551
2552 nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
2553
2554 if (nested_cpu_has_ept(vmcs12))
2555 nested_ept_init_mmu_context(vcpu);
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
2566 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2567
2568 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
2569 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
2570
2571 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
2572
2573 vmx_set_efer(vcpu, vcpu->arch.efer);
2574
2575
2576
2577
2578
2579
2580 if (CC(!vmx_guest_state_valid(vcpu))) {
2581 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2582 return -EINVAL;
2583 }
2584
2585
2586 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
2587 entry_failure_code))
2588 return -EINVAL;
2589
2590
2591
2592
2593
2594
2595
2596
2597 if (enable_ept)
2598 vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
2599
2600
2601 if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
2602 is_pae_paging(vcpu)) {
2603 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2604 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2605 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2606 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2607 }
2608
2609 if (!enable_ept)
2610 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
2611
2612 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2613 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
2614 vmcs12->guest_ia32_perf_global_ctrl)))
2615 return -EINVAL;
2616
2617 kvm_rsp_write(vcpu, vmcs12->guest_rsp);
2618 kvm_rip_write(vcpu, vmcs12->guest_rip);
2619 return 0;
2620}
2621
2622static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
2623{
2624 if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
2625 nested_cpu_has_virtual_nmis(vmcs12)))
2626 return -EINVAL;
2627
2628 if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
2629 nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
2630 return -EINVAL;
2631
2632 return 0;
2633}
2634
2635static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
2636{
2637 struct vcpu_vmx *vmx = to_vmx(vcpu);
2638 int maxphyaddr = cpuid_maxphyaddr(vcpu);
2639
2640
2641 switch (new_eptp & VMX_EPTP_MT_MASK) {
2642 case VMX_EPTP_MT_UC:
2643 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
2644 return false;
2645 break;
2646 case VMX_EPTP_MT_WB:
2647 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
2648 return false;
2649 break;
2650 default:
2651 return false;
2652 }
2653
2654
2655 switch (new_eptp & VMX_EPTP_PWL_MASK) {
2656 case VMX_EPTP_PWL_5:
2657 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
2658 return false;
2659 break;
2660 case VMX_EPTP_PWL_4:
2661 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
2662 return false;
2663 break;
2664 default:
2665 return false;
2666 }
2667
2668
2669 if (CC(new_eptp >> maxphyaddr || ((new_eptp >> 7) & 0x1f)))
2670 return false;
2671
2672
2673 if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
2674 if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
2675 return false;
2676 }
2677
2678 return true;
2679}
2680
2681
2682
2683
2684static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2685 struct vmcs12 *vmcs12)
2686{
2687 struct vcpu_vmx *vmx = to_vmx(vcpu);
2688
2689 if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
2690 vmx->nested.msrs.pinbased_ctls_low,
2691 vmx->nested.msrs.pinbased_ctls_high)) ||
2692 CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
2693 vmx->nested.msrs.procbased_ctls_low,
2694 vmx->nested.msrs.procbased_ctls_high)))
2695 return -EINVAL;
2696
2697 if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
2698 CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
2699 vmx->nested.msrs.secondary_ctls_low,
2700 vmx->nested.msrs.secondary_ctls_high)))
2701 return -EINVAL;
2702
2703 if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
2704 nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
2705 nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
2706 nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
2707 nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
2708 nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
2709 nested_vmx_check_nmi_controls(vmcs12) ||
2710 nested_vmx_check_pml_controls(vcpu, vmcs12) ||
2711 nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
2712 nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
2713 nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
2714 CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
2715 return -EINVAL;
2716
2717 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2718 nested_cpu_has_save_preemption_timer(vmcs12))
2719 return -EINVAL;
2720
2721 if (nested_cpu_has_ept(vmcs12) &&
2722 CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
2723 return -EINVAL;
2724
2725 if (nested_cpu_has_vmfunc(vmcs12)) {
2726 if (CC(vmcs12->vm_function_control &
2727 ~vmx->nested.msrs.vmfunc_controls))
2728 return -EINVAL;
2729
2730 if (nested_cpu_has_eptp_switching(vmcs12)) {
2731 if (CC(!nested_cpu_has_ept(vmcs12)) ||
2732 CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
2733 return -EINVAL;
2734 }
2735 }
2736
2737 return 0;
2738}
2739
2740
2741
2742
2743static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
2744 struct vmcs12 *vmcs12)
2745{
2746 struct vcpu_vmx *vmx = to_vmx(vcpu);
2747
2748 if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
2749 vmx->nested.msrs.exit_ctls_low,
2750 vmx->nested.msrs.exit_ctls_high)) ||
2751 CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
2752 return -EINVAL;
2753
2754 return 0;
2755}
2756
2757
2758
2759
2760static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
2761 struct vmcs12 *vmcs12)
2762{
2763 struct vcpu_vmx *vmx = to_vmx(vcpu);
2764
2765 if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
2766 vmx->nested.msrs.entry_ctls_low,
2767 vmx->nested.msrs.entry_ctls_high)))
2768 return -EINVAL;
2769
2770
2771
2772
2773
2774
2775
2776 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
2777 u32 intr_info = vmcs12->vm_entry_intr_info_field;
2778 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
2779 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
2780 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
2781 bool should_have_error_code;
2782 bool urg = nested_cpu_has2(vmcs12,
2783 SECONDARY_EXEC_UNRESTRICTED_GUEST);
2784 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
2785
2786
2787 if (CC(intr_type == INTR_TYPE_RESERVED) ||
2788 CC(intr_type == INTR_TYPE_OTHER_EVENT &&
2789 !nested_cpu_supports_monitor_trap_flag(vcpu)))
2790 return -EINVAL;
2791
2792
2793 if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
2794 CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
2795 CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
2796 return -EINVAL;
2797
2798
2799 should_have_error_code =
2800 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
2801 x86_exception_has_error_code(vector);
2802 if (CC(has_error_code != should_have_error_code))
2803 return -EINVAL;
2804
2805
2806 if (CC(has_error_code &&
2807 vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
2808 return -EINVAL;
2809
2810
2811 if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
2812 return -EINVAL;
2813
2814
2815 switch (intr_type) {
2816 case INTR_TYPE_SOFT_EXCEPTION:
2817 case INTR_TYPE_SOFT_INTR:
2818 case INTR_TYPE_PRIV_SW_EXCEPTION:
2819 if (CC(vmcs12->vm_entry_instruction_len > 15) ||
2820 CC(vmcs12->vm_entry_instruction_len == 0 &&
2821 CC(!nested_cpu_has_zero_length_injection(vcpu))))
2822 return -EINVAL;
2823 }
2824 }
2825
2826 if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
2827 return -EINVAL;
2828
2829 return 0;
2830}
2831
2832static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
2833 struct vmcs12 *vmcs12)
2834{
2835 if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
2836 nested_check_vm_exit_controls(vcpu, vmcs12) ||
2837 nested_check_vm_entry_controls(vcpu, vmcs12))
2838 return -EINVAL;
2839
2840 if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
2841 return nested_evmcs_check_controls(vmcs12);
2842
2843 return 0;
2844}
2845
2846static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
2847 struct vmcs12 *vmcs12)
2848{
2849 bool ia32e;
2850
2851 if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
2852 CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
2853 CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3)))
2854 return -EINVAL;
2855
2856 if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
2857 CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
2858 return -EINVAL;
2859
2860 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
2861 CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
2862 return -EINVAL;
2863
2864 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2865 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
2866 vmcs12->host_ia32_perf_global_ctrl)))
2867 return -EINVAL;
2868
2869#ifdef CONFIG_X86_64
2870 ia32e = !!(vcpu->arch.efer & EFER_LMA);
2871#else
2872 ia32e = false;
2873#endif
2874
2875 if (ia32e) {
2876 if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
2877 CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
2878 return -EINVAL;
2879 } else {
2880 if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
2881 CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
2882 CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
2883 CC((vmcs12->host_rip) >> 32))
2884 return -EINVAL;
2885 }
2886
2887 if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2888 CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2889 CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2890 CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2891 CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2892 CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2893 CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
2894 CC(vmcs12->host_cs_selector == 0) ||
2895 CC(vmcs12->host_tr_selector == 0) ||
2896 CC(vmcs12->host_ss_selector == 0 && !ia32e))
2897 return -EINVAL;
2898
2899 if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
2900 CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
2901 CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
2902 CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
2903 CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
2904 CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
2905 return -EINVAL;
2906
2907
2908
2909
2910
2911
2912
2913 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
2914 if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
2915 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
2916 CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
2917 return -EINVAL;
2918 }
2919
2920 return 0;
2921}
2922
2923static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
2924 struct vmcs12 *vmcs12)
2925{
2926 int r = 0;
2927 struct vmcs12 *shadow;
2928 struct kvm_host_map map;
2929
2930 if (vmcs12->vmcs_link_pointer == -1ull)
2931 return 0;
2932
2933 if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
2934 return -EINVAL;
2935
2936 if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
2937 return -EINVAL;
2938
2939 shadow = map.hva;
2940
2941 if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
2942 CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
2943 r = -EINVAL;
2944
2945 kvm_vcpu_unmap(vcpu, &map, false);
2946 return r;
2947}
2948
2949
2950
2951
2952static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
2953{
2954 if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
2955 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
2956 return -EINVAL;
2957
2958 return 0;
2959}
2960
2961static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
2962 struct vmcs12 *vmcs12,
2963 enum vm_entry_failure_code *entry_failure_code)
2964{
2965 bool ia32e;
2966
2967 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2968
2969 if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
2970 CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
2971 return -EINVAL;
2972
2973 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
2974 CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
2975 return -EINVAL;
2976
2977 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
2978 CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
2979 return -EINVAL;
2980
2981 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
2982 *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
2983 return -EINVAL;
2984 }
2985
2986 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
2987 CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
2988 vmcs12->guest_ia32_perf_global_ctrl)))
2989 return -EINVAL;
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000 if (to_vmx(vcpu)->nested.nested_run_pending &&
3001 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
3002 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
3003 if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
3004 CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
3005 CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
3006 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
3007 return -EINVAL;
3008 }
3009
3010 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
3011 (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
3012 CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
3013 return -EINVAL;
3014
3015 if (nested_check_guest_non_reg_state(vmcs12))
3016 return -EINVAL;
3017
3018 return 0;
3019}
3020
3021static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
3022{
3023 struct vcpu_vmx *vmx = to_vmx(vcpu);
3024 unsigned long cr3, cr4;
3025 bool vm_fail;
3026
3027 if (!nested_early_check)
3028 return 0;
3029
3030 if (vmx->msr_autoload.host.nr)
3031 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
3032 if (vmx->msr_autoload.guest.nr)
3033 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
3034
3035 preempt_disable();
3036
3037 vmx_prepare_switch_to_guest(vcpu);
3038
3039
3040
3041
3042
3043
3044
3045 vmcs_writel(GUEST_RFLAGS, 0);
3046
3047 cr3 = __get_current_cr3_fast();
3048 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
3049 vmcs_writel(HOST_CR3, cr3);
3050 vmx->loaded_vmcs->host_state.cr3 = cr3;
3051 }
3052
3053 cr4 = cr4_read_shadow();
3054 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
3055 vmcs_writel(HOST_CR4, cr4);
3056 vmx->loaded_vmcs->host_state.cr4 = cr4;
3057 }
3058
3059 asm(
3060 "sub $%c[wordsize], %%" _ASM_SP "\n\t"
3061 "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
3062 "je 1f \n\t"
3063 __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
3064 "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
3065 "1: \n\t"
3066 "add $%c[wordsize], %%" _ASM_SP "\n\t"
3067
3068
3069 "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
3070
3071
3072
3073
3074
3075
3076
3077 "call vmx_vmenter\n\t"
3078
3079 CC_SET(be)
3080 : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
3081 : [HOST_RSP]"r"((unsigned long)HOST_RSP),
3082 [loaded_vmcs]"r"(vmx->loaded_vmcs),
3083 [launched]"i"(offsetof(struct loaded_vmcs, launched)),
3084 [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
3085 [wordsize]"i"(sizeof(ulong))
3086 : "memory"
3087 );
3088
3089 if (vmx->msr_autoload.host.nr)
3090 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
3091 if (vmx->msr_autoload.guest.nr)
3092 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
3093
3094 if (vm_fail) {
3095 u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
3096
3097 preempt_enable();
3098
3099 trace_kvm_nested_vmenter_failed(
3100 "early hardware check VM-instruction error: ", error);
3101 WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3102 return 1;
3103 }
3104
3105
3106
3107
3108 if (hw_breakpoint_active())
3109 set_debugreg(__this_cpu_read(cpu_dr7), 7);
3110 local_irq_enable();
3111 preempt_enable();
3112
3113
3114
3115
3116
3117
3118
3119
3120 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
3121 VMX_EXIT_REASONS_FAILED_VMENTRY));
3122
3123 return 0;
3124}
3125
3126static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
3127{
3128 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3129 struct vcpu_vmx *vmx = to_vmx(vcpu);
3130 struct kvm_host_map *map;
3131 struct page *page;
3132 u64 hpa;
3133
3134
3135
3136
3137
3138
3139 if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) {
3140 enum nested_evmptrld_status evmptrld_status =
3141 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
3142
3143 if (evmptrld_status == EVMPTRLD_VMFAIL ||
3144 evmptrld_status == EVMPTRLD_ERROR) {
3145 pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
3146 __func__);
3147 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3148 vcpu->run->internal.suberror =
3149 KVM_INTERNAL_ERROR_EMULATION;
3150 vcpu->run->internal.ndata = 0;
3151 return false;
3152 }
3153 }
3154
3155 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3156
3157
3158
3159
3160
3161
3162 if (vmx->nested.apic_access_page) {
3163 kvm_release_page_clean(vmx->nested.apic_access_page);
3164 vmx->nested.apic_access_page = NULL;
3165 }
3166 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
3167 if (!is_error_page(page)) {
3168 vmx->nested.apic_access_page = page;
3169 hpa = page_to_phys(vmx->nested.apic_access_page);
3170 vmcs_write64(APIC_ACCESS_ADDR, hpa);
3171 } else {
3172 pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
3173 __func__);
3174 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3175 vcpu->run->internal.suberror =
3176 KVM_INTERNAL_ERROR_EMULATION;
3177 vcpu->run->internal.ndata = 0;
3178 return false;
3179 }
3180 }
3181
3182 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3183 map = &vmx->nested.virtual_apic_map;
3184
3185 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
3186 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
3187 } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
3188 nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
3189 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
3190
3191
3192
3193
3194
3195
3196
3197
3198 exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
3199 } else {
3200
3201
3202
3203
3204 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
3205 }
3206 }
3207
3208 if (nested_cpu_has_posted_intr(vmcs12)) {
3209 map = &vmx->nested.pi_desc_map;
3210
3211 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
3212 vmx->nested.pi_desc =
3213 (struct pi_desc *)(((void *)map->hva) +
3214 offset_in_page(vmcs12->posted_intr_desc_addr));
3215 vmcs_write64(POSTED_INTR_DESC_ADDR,
3216 pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
3217 }
3218 }
3219 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
3220 exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
3221 else
3222 exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
3223 return true;
3224}
3225
3226static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
3227{
3228 struct vmcs12 *vmcs12;
3229 struct vcpu_vmx *vmx = to_vmx(vcpu);
3230 gpa_t dst;
3231
3232 if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
3233 return 0;
3234
3235 if (WARN_ON_ONCE(vmx->nested.pml_full))
3236 return 1;
3237
3238
3239
3240
3241
3242 vmcs12 = get_vmcs12(vcpu);
3243 if (!nested_cpu_has_pml(vmcs12))
3244 return 0;
3245
3246 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
3247 vmx->nested.pml_full = true;
3248 return 1;
3249 }
3250
3251 gpa &= ~0xFFFull;
3252 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
3253
3254 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
3255 offset_in_page(dst), sizeof(gpa)))
3256 return 0;
3257
3258 vmcs12->guest_pml_index--;
3259
3260 return 0;
3261}
3262
3263
3264
3265
3266
3267
3268
3269
3270static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
3271{
3272 if (!to_vmx(vcpu)->nested.vmxon) {
3273 kvm_queue_exception(vcpu, UD_VECTOR);
3274 return 0;
3275 }
3276
3277 if (vmx_get_cpl(vcpu)) {
3278 kvm_inject_gp(vcpu, 0);
3279 return 0;
3280 }
3281
3282 return 1;
3283}
3284
3285static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
3286{
3287 u8 rvi = vmx_get_rvi();
3288 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
3289
3290 return ((rvi & 0xf0) > (vppr & 0xf0));
3291}
3292
3293static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
3294 struct vmcs12 *vmcs12);
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
3307 bool from_vmentry)
3308{
3309 struct vcpu_vmx *vmx = to_vmx(vcpu);
3310 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3311 enum vm_entry_failure_code entry_failure_code;
3312 bool evaluate_pending_interrupts;
3313 u32 exit_reason, failed_index;
3314
3315 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
3316 kvm_vcpu_flush_tlb_current(vcpu);
3317
3318 evaluate_pending_interrupts = exec_controls_get(vmx) &
3319 (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
3320 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
3321 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
3322
3323 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
3324 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
3325 if (kvm_mpx_supported() &&
3326 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
3327 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345 if (!enable_ept && !nested_early_check)
3346 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3347
3348 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
3349
3350 prepare_vmcs02_early(vmx, vmcs12);
3351
3352 if (from_vmentry) {
3353 if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
3354 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3355 return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
3356 }
3357
3358 if (nested_vmx_check_vmentry_hw(vcpu)) {
3359 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3360 return NVMX_VMENTRY_VMFAIL;
3361 }
3362
3363 if (nested_vmx_check_guest_state(vcpu, vmcs12,
3364 &entry_failure_code)) {
3365 exit_reason = EXIT_REASON_INVALID_STATE;
3366 vmcs12->exit_qualification = entry_failure_code;
3367 goto vmentry_fail_vmexit;
3368 }
3369 }
3370
3371 enter_guest_mode(vcpu);
3372 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
3373 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
3374
3375 if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
3376 exit_reason = EXIT_REASON_INVALID_STATE;
3377 vmcs12->exit_qualification = entry_failure_code;
3378 goto vmentry_fail_vmexit_guest_mode;
3379 }
3380
3381 if (from_vmentry) {
3382 failed_index = nested_vmx_load_msr(vcpu,
3383 vmcs12->vm_entry_msr_load_addr,
3384 vmcs12->vm_entry_msr_load_count);
3385 if (failed_index) {
3386 exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
3387 vmcs12->exit_qualification = failed_index;
3388 goto vmentry_fail_vmexit_guest_mode;
3389 }
3390 } else {
3391
3392
3393
3394
3395
3396
3397
3398 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
3399 }
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415 if (unlikely(evaluate_pending_interrupts))
3416 kvm_make_request(KVM_REQ_EVENT, vcpu);
3417
3418
3419
3420
3421
3422
3423 vmx->nested.preemption_timer_expired = false;
3424 if (nested_cpu_has_preemption_timer(vmcs12)) {
3425 u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
3426 vmx_start_preemption_timer(vcpu, timer_value);
3427 }
3428
3429
3430
3431
3432
3433
3434
3435 return NVMX_VMENTRY_SUCCESS;
3436
3437
3438
3439
3440
3441
3442vmentry_fail_vmexit_guest_mode:
3443 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
3444 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
3445 leave_guest_mode(vcpu);
3446
3447vmentry_fail_vmexit:
3448 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3449
3450 if (!from_vmentry)
3451 return NVMX_VMENTRY_VMEXIT;
3452
3453 load_vmcs12_host_state(vcpu, vmcs12);
3454 vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
3455 if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
3456 vmx->nested.need_vmcs12_to_shadow_sync = true;
3457 return NVMX_VMENTRY_VMEXIT;
3458}
3459
3460
3461
3462
3463
3464static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
3465{
3466 struct vmcs12 *vmcs12;
3467 enum nvmx_vmentry_status status;
3468 struct vcpu_vmx *vmx = to_vmx(vcpu);
3469 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
3470 enum nested_evmptrld_status evmptrld_status;
3471
3472 if (!nested_vmx_check_permission(vcpu))
3473 return 1;
3474
3475 evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
3476 if (evmptrld_status == EVMPTRLD_ERROR) {
3477 kvm_queue_exception(vcpu, UD_VECTOR);
3478 return 1;
3479 } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
3480 return nested_vmx_failInvalid(vcpu);
3481 }
3482
3483 if (CC(!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull))
3484 return nested_vmx_failInvalid(vcpu);
3485
3486 vmcs12 = get_vmcs12(vcpu);
3487
3488
3489
3490
3491
3492
3493
3494 if (CC(vmcs12->hdr.shadow_vmcs))
3495 return nested_vmx_failInvalid(vcpu);
3496
3497 if (vmx->nested.hv_evmcs) {
3498 copy_enlightened_to_vmcs12(vmx);
3499
3500 vmcs12->launch_state = !launch;
3501 } else if (enable_shadow_vmcs) {
3502 copy_shadow_to_vmcs12(vmx);
3503 }
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515 if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
3516 return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
3517
3518 if (CC(vmcs12->launch_state == launch))
3519 return nested_vmx_fail(vcpu,
3520 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
3521 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
3522
3523 if (nested_vmx_check_controls(vcpu, vmcs12))
3524 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3525
3526 if (nested_vmx_check_host_state(vcpu, vmcs12))
3527 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
3528
3529
3530
3531
3532
3533 vmx->nested.nested_run_pending = 1;
3534 vmx->nested.has_preemption_timer_deadline = false;
3535 status = nested_vmx_enter_non_root_mode(vcpu, true);
3536 if (unlikely(status != NVMX_VMENTRY_SUCCESS))
3537 goto vmentry_failed;
3538
3539
3540 if (nested_cpu_has_posted_intr(vmcs12) &&
3541 kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
3542 vmx->nested.pi_pending = true;
3543 kvm_make_request(KVM_REQ_EVENT, vcpu);
3544 kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
3545 }
3546
3547
3548 vmx->vcpu.arch.l1tf_flush_l1d = true;
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560 nested_cache_shadow_vmcs12(vcpu, vmcs12);
3561
3562
3563
3564
3565
3566
3567 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
3568 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
3569 !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) &&
3570 !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
3571 (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
3572 vmx->nested.nested_run_pending = 0;
3573 return kvm_vcpu_halt(vcpu);
3574 }
3575 return 1;
3576
3577vmentry_failed:
3578 vmx->nested.nested_run_pending = 0;
3579 if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
3580 return 0;
3581 if (status == NVMX_VMENTRY_VMEXIT)
3582 return 1;
3583 WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
3584 return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3585}
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604static inline unsigned long
3605vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3606{
3607 return
3608 (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
3609 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
3610 (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
3611 vcpu->arch.cr0_guest_owned_bits));
3612}
3613
3614static inline unsigned long
3615vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3616{
3617 return
3618 (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
3619 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
3620 (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
3621 vcpu->arch.cr4_guest_owned_bits));
3622}
3623
3624static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
3625 struct vmcs12 *vmcs12)
3626{
3627 u32 idt_vectoring;
3628 unsigned int nr;
3629
3630 if (vcpu->arch.exception.injected) {
3631 nr = vcpu->arch.exception.nr;
3632 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3633
3634 if (kvm_exception_is_soft(nr)) {
3635 vmcs12->vm_exit_instruction_len =
3636 vcpu->arch.event_exit_inst_len;
3637 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
3638 } else
3639 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
3640
3641 if (vcpu->arch.exception.has_error_code) {
3642 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
3643 vmcs12->idt_vectoring_error_code =
3644 vcpu->arch.exception.error_code;
3645 }
3646
3647 vmcs12->idt_vectoring_info_field = idt_vectoring;
3648 } else if (vcpu->arch.nmi_injected) {
3649 vmcs12->idt_vectoring_info_field =
3650 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
3651 } else if (vcpu->arch.interrupt.injected) {
3652 nr = vcpu->arch.interrupt.nr;
3653 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3654
3655 if (vcpu->arch.interrupt.soft) {
3656 idt_vectoring |= INTR_TYPE_SOFT_INTR;
3657 vmcs12->vm_entry_instruction_len =
3658 vcpu->arch.event_exit_inst_len;
3659 } else
3660 idt_vectoring |= INTR_TYPE_EXT_INTR;
3661
3662 vmcs12->idt_vectoring_info_field = idt_vectoring;
3663 }
3664}
3665
3666
3667void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
3668{
3669 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3670 gfn_t gfn;
3671
3672
3673
3674
3675
3676
3677 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3678 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3679 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3680 }
3681
3682 if (nested_cpu_has_posted_intr(vmcs12)) {
3683 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3684 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3685 }
3686}
3687
3688static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3689{
3690 struct vcpu_vmx *vmx = to_vmx(vcpu);
3691 int max_irr;
3692 void *vapic_page;
3693 u16 status;
3694
3695 if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
3696 return;
3697
3698 vmx->nested.pi_pending = false;
3699 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
3700 return;
3701
3702 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3703 if (max_irr != 256) {
3704 vapic_page = vmx->nested.virtual_apic_map.hva;
3705 if (!vapic_page)
3706 return;
3707
3708 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
3709 vapic_page, &max_irr);
3710 status = vmcs_read16(GUEST_INTR_STATUS);
3711 if ((u8)max_irr > ((u8)status & 0xff)) {
3712 status &= ~0xff;
3713 status |= (u8)max_irr;
3714 vmcs_write16(GUEST_INTR_STATUS, status);
3715 }
3716 }
3717
3718 nested_mark_vmcs12_pages_dirty(vcpu);
3719}
3720
3721static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3722 unsigned long exit_qual)
3723{
3724 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3725 unsigned int nr = vcpu->arch.exception.nr;
3726 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3727
3728 if (vcpu->arch.exception.has_error_code) {
3729 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3730 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3731 }
3732
3733 if (kvm_exception_is_soft(nr))
3734 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3735 else
3736 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3737
3738 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3739 vmx_get_nmi_mask(vcpu))
3740 intr_info |= INTR_INFO_UNBLOCK_NMI;
3741
3742 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3743}
3744
3745
3746
3747
3748
3749
3750
3751static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
3752{
3753 return vcpu->arch.exception.pending &&
3754 vcpu->arch.exception.nr == DB_VECTOR &&
3755 vcpu->arch.exception.payload;
3756}
3757
3758
3759
3760
3761
3762
3763
3764
3765static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
3766{
3767 if (vmx_pending_dbg_trap(vcpu))
3768 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
3769 vcpu->arch.exception.payload);
3770}
3771
3772static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
3773{
3774 return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
3775 to_vmx(vcpu)->nested.preemption_timer_expired;
3776}
3777
3778static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
3779{
3780 struct vcpu_vmx *vmx = to_vmx(vcpu);
3781 unsigned long exit_qual;
3782 bool block_nested_events =
3783 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
3784 bool mtf_pending = vmx->nested.mtf_pending;
3785 struct kvm_lapic *apic = vcpu->arch.apic;
3786
3787
3788
3789
3790
3791 if (!block_nested_events)
3792 vmx->nested.mtf_pending = false;
3793
3794 if (lapic_in_kernel(vcpu) &&
3795 test_bit(KVM_APIC_INIT, &apic->pending_events)) {
3796 if (block_nested_events)
3797 return -EBUSY;
3798 nested_vmx_update_pending_dbg(vcpu);
3799 clear_bit(KVM_APIC_INIT, &apic->pending_events);
3800 nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
3801 return 0;
3802 }
3803
3804
3805
3806
3807 if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
3808 if (block_nested_events)
3809 return -EBUSY;
3810 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3811 goto no_vmexit;
3812 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3813 return 0;
3814 }
3815
3816 if (mtf_pending) {
3817 if (block_nested_events)
3818 return -EBUSY;
3819 nested_vmx_update_pending_dbg(vcpu);
3820 nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
3821 return 0;
3822 }
3823
3824 if (vcpu->arch.exception.pending) {
3825 if (block_nested_events)
3826 return -EBUSY;
3827 if (!nested_vmx_check_exception(vcpu, &exit_qual))
3828 goto no_vmexit;
3829 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3830 return 0;
3831 }
3832
3833 if (nested_vmx_preemption_timer_pending(vcpu)) {
3834 if (block_nested_events)
3835 return -EBUSY;
3836 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
3837 return 0;
3838 }
3839
3840 if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
3841 if (block_nested_events)
3842 return -EBUSY;
3843 goto no_vmexit;
3844 }
3845
3846 if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
3847 if (block_nested_events)
3848 return -EBUSY;
3849 if (!nested_exit_on_nmi(vcpu))
3850 goto no_vmexit;
3851
3852 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
3853 NMI_VECTOR | INTR_TYPE_NMI_INTR |
3854 INTR_INFO_VALID_MASK, 0);
3855
3856
3857
3858
3859 vcpu->arch.nmi_pending = 0;
3860 vmx_set_nmi_mask(vcpu, true);
3861 return 0;
3862 }
3863
3864 if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
3865 if (block_nested_events)
3866 return -EBUSY;
3867 if (!nested_exit_on_intr(vcpu))
3868 goto no_vmexit;
3869 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
3870 return 0;
3871 }
3872
3873no_vmexit:
3874 vmx_complete_nested_posted_interrupt(vcpu);
3875 return 0;
3876}
3877
3878static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
3879{
3880 ktime_t remaining =
3881 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
3882 u64 value;
3883
3884 if (ktime_to_ns(remaining) <= 0)
3885 return 0;
3886
3887 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
3888 do_div(value, 1000000);
3889 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
3890}
3891
3892static bool is_vmcs12_ext_field(unsigned long field)
3893{
3894 switch (field) {
3895 case GUEST_ES_SELECTOR:
3896 case GUEST_CS_SELECTOR:
3897 case GUEST_SS_SELECTOR:
3898 case GUEST_DS_SELECTOR:
3899 case GUEST_FS_SELECTOR:
3900 case GUEST_GS_SELECTOR:
3901 case GUEST_LDTR_SELECTOR:
3902 case GUEST_TR_SELECTOR:
3903 case GUEST_ES_LIMIT:
3904 case GUEST_CS_LIMIT:
3905 case GUEST_SS_LIMIT:
3906 case GUEST_DS_LIMIT:
3907 case GUEST_FS_LIMIT:
3908 case GUEST_GS_LIMIT:
3909 case GUEST_LDTR_LIMIT:
3910 case GUEST_TR_LIMIT:
3911 case GUEST_GDTR_LIMIT:
3912 case GUEST_IDTR_LIMIT:
3913 case GUEST_ES_AR_BYTES:
3914 case GUEST_DS_AR_BYTES:
3915 case GUEST_FS_AR_BYTES:
3916 case GUEST_GS_AR_BYTES:
3917 case GUEST_LDTR_AR_BYTES:
3918 case GUEST_TR_AR_BYTES:
3919 case GUEST_ES_BASE:
3920 case GUEST_CS_BASE:
3921 case GUEST_SS_BASE:
3922 case GUEST_DS_BASE:
3923 case GUEST_FS_BASE:
3924 case GUEST_GS_BASE:
3925 case GUEST_LDTR_BASE:
3926 case GUEST_TR_BASE:
3927 case GUEST_GDTR_BASE:
3928 case GUEST_IDTR_BASE:
3929 case GUEST_PENDING_DBG_EXCEPTIONS:
3930 case GUEST_BNDCFGS:
3931 return true;
3932 default:
3933 break;
3934 }
3935
3936 return false;
3937}
3938
3939static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
3940 struct vmcs12 *vmcs12)
3941{
3942 struct vcpu_vmx *vmx = to_vmx(vcpu);
3943
3944 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
3945 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
3946 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
3947 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
3948 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
3949 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
3950 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
3951 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
3952 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
3953 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
3954 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
3955 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
3956 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
3957 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
3958 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
3959 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
3960 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
3961 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
3962 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
3963 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
3964 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
3965 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
3966 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
3967 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
3968 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
3969 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
3970 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
3971 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
3972 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
3973 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
3974 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
3975 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
3976 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
3977 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
3978 vmcs12->guest_pending_dbg_exceptions =
3979 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
3980 if (kvm_mpx_supported())
3981 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3982
3983 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
3984}
3985
3986static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
3987 struct vmcs12 *vmcs12)
3988{
3989 struct vcpu_vmx *vmx = to_vmx(vcpu);
3990 int cpu;
3991
3992 if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
3993 return;
3994
3995
3996 WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
3997
3998 cpu = get_cpu();
3999 vmx->loaded_vmcs = &vmx->nested.vmcs02;
4000 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
4001
4002 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4003
4004 vmx->loaded_vmcs = &vmx->vmcs01;
4005 vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
4006 put_cpu();
4007}
4008
4009
4010
4011
4012
4013
4014
4015static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
4016{
4017 struct vcpu_vmx *vmx = to_vmx(vcpu);
4018
4019 if (vmx->nested.hv_evmcs)
4020 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4021
4022 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
4023
4024 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
4025 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
4026
4027 vmcs12->guest_rsp = kvm_rsp_read(vcpu);
4028 vmcs12->guest_rip = kvm_rip_read(vcpu);
4029 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
4030
4031 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
4032 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
4033
4034 vmcs12->guest_interruptibility_info =
4035 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
4036
4037 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
4038 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
4039 else
4040 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
4041
4042 if (nested_cpu_has_preemption_timer(vmcs12) &&
4043 vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
4044 !vmx->nested.nested_run_pending)
4045 vmcs12->vmx_preemption_timer_value =
4046 vmx_get_preemption_timer_value(vcpu);
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056 if (enable_ept) {
4057 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
4058 if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
4059 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
4060 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
4061 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
4062 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
4063 }
4064 }
4065
4066 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
4067
4068 if (nested_cpu_has_vid(vmcs12))
4069 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
4070
4071 vmcs12->vm_entry_controls =
4072 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
4073 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
4074
4075 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
4076 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
4077
4078 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
4079 vmcs12->guest_ia32_efer = vcpu->arch.efer;
4080}
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
4094 u32 vm_exit_reason, u32 exit_intr_info,
4095 unsigned long exit_qualification)
4096{
4097
4098 vmcs12->vm_exit_reason = vm_exit_reason;
4099 vmcs12->exit_qualification = exit_qualification;
4100 vmcs12->vm_exit_intr_info = exit_intr_info;
4101
4102 vmcs12->idt_vectoring_info_field = 0;
4103 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4104 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4105
4106 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
4107 vmcs12->launch_state = 1;
4108
4109
4110
4111 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
4112
4113
4114
4115
4116
4117 vmcs12_save_pending_event(vcpu, vmcs12);
4118
4119
4120
4121
4122
4123
4124
4125 if (nested_vmx_store_msr(vcpu,
4126 vmcs12->vm_exit_msr_store_addr,
4127 vmcs12->vm_exit_msr_store_count))
4128 nested_vmx_abort(vcpu,
4129 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
4130 }
4131
4132
4133
4134
4135
4136 vcpu->arch.nmi_injected = false;
4137 kvm_clear_exception_queue(vcpu);
4138 kvm_clear_interrupt_queue(vcpu);
4139}
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
4151 struct vmcs12 *vmcs12)
4152{
4153 enum vm_entry_failure_code ignored;
4154 struct kvm_segment seg;
4155
4156 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
4157 vcpu->arch.efer = vmcs12->host_ia32_efer;
4158 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4159 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
4160 else
4161 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
4162 vmx_set_efer(vcpu, vcpu->arch.efer);
4163
4164 kvm_rsp_write(vcpu, vmcs12->host_rsp);
4165 kvm_rip_write(vcpu, vmcs12->host_rip);
4166 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
4167 vmx_set_interrupt_shadow(vcpu, 0);
4168
4169
4170
4171
4172
4173
4174
4175
4176 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4177 vmx_set_cr0(vcpu, vmcs12->host_cr0);
4178
4179
4180 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4181 vmx_set_cr4(vcpu, vmcs12->host_cr4);
4182
4183 nested_ept_uninit_mmu_context(vcpu);
4184
4185
4186
4187
4188
4189 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
4190 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
4191
4192 if (!enable_ept)
4193 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
4194
4195 nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
4196
4197 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
4198 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
4199 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
4200 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
4201 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
4202 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
4203 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
4204
4205
4206 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
4207 vmcs_write64(GUEST_BNDCFGS, 0);
4208
4209 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
4210 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
4211 vcpu->arch.pat = vmcs12->host_ia32_pat;
4212 }
4213 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
4214 WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
4215 vmcs12->host_ia32_perf_global_ctrl));
4216
4217
4218
4219 seg = (struct kvm_segment) {
4220 .base = 0,
4221 .limit = 0xFFFFFFFF,
4222 .selector = vmcs12->host_cs_selector,
4223 .type = 11,
4224 .present = 1,
4225 .s = 1,
4226 .g = 1
4227 };
4228 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
4229 seg.l = 1;
4230 else
4231 seg.db = 1;
4232 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
4233 seg = (struct kvm_segment) {
4234 .base = 0,
4235 .limit = 0xFFFFFFFF,
4236 .type = 3,
4237 .present = 1,
4238 .s = 1,
4239 .db = 1,
4240 .g = 1
4241 };
4242 seg.selector = vmcs12->host_ds_selector;
4243 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
4244 seg.selector = vmcs12->host_es_selector;
4245 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
4246 seg.selector = vmcs12->host_ss_selector;
4247 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
4248 seg.selector = vmcs12->host_fs_selector;
4249 seg.base = vmcs12->host_fs_base;
4250 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
4251 seg.selector = vmcs12->host_gs_selector;
4252 seg.base = vmcs12->host_gs_base;
4253 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
4254 seg = (struct kvm_segment) {
4255 .base = vmcs12->host_tr_base,
4256 .limit = 0x67,
4257 .selector = vmcs12->host_tr_selector,
4258 .type = 11,
4259 .present = 1
4260 };
4261 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
4262
4263 kvm_set_dr(vcpu, 7, 0x400);
4264 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4265
4266 if (cpu_has_vmx_msr_bitmap())
4267 vmx_update_msr_bitmap(vcpu);
4268
4269 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
4270 vmcs12->vm_exit_msr_load_count))
4271 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4272}
4273
4274static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
4275{
4276 struct vmx_uret_msr *efer_msr;
4277 unsigned int i;
4278
4279 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
4280 return vmcs_read64(GUEST_IA32_EFER);
4281
4282 if (cpu_has_load_ia32_efer())
4283 return host_efer;
4284
4285 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
4286 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
4287 return vmx->msr_autoload.guest.val[i].value;
4288 }
4289
4290 efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
4291 if (efer_msr)
4292 return efer_msr->data;
4293
4294 return host_efer;
4295}
4296
4297static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
4298{
4299 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4300 struct vcpu_vmx *vmx = to_vmx(vcpu);
4301 struct vmx_msr_entry g, h;
4302 gpa_t gpa;
4303 u32 i, j;
4304
4305 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
4306
4307 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
4308
4309
4310
4311
4312
4313
4314 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
4315 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
4316 else
4317 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
4318 }
4319
4320
4321
4322
4323
4324 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
4325
4326 vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4327 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
4328
4329 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
4330 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
4331
4332 nested_ept_uninit_mmu_context(vcpu);
4333 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
4334 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
4335
4336
4337
4338
4339
4340
4341
4342 if (enable_ept && is_pae_paging(vcpu))
4343 ept_save_pdptrs(vcpu);
4344
4345 kvm_mmu_reset_context(vcpu);
4346
4347 if (cpu_has_vmx_msr_bitmap())
4348 vmx_update_msr_bitmap(vcpu);
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
4362 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
4363 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
4364 pr_debug_ratelimited(
4365 "%s read MSR index failed (%u, 0x%08llx)\n",
4366 __func__, i, gpa);
4367 goto vmabort;
4368 }
4369
4370 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
4371 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
4372 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
4373 pr_debug_ratelimited(
4374 "%s read MSR failed (%u, 0x%08llx)\n",
4375 __func__, j, gpa);
4376 goto vmabort;
4377 }
4378 if (h.index != g.index)
4379 continue;
4380 if (h.value == g.value)
4381 break;
4382
4383 if (nested_vmx_load_msr_check(vcpu, &h)) {
4384 pr_debug_ratelimited(
4385 "%s check failed (%u, 0x%x, 0x%x)\n",
4386 __func__, j, h.index, h.reserved);
4387 goto vmabort;
4388 }
4389
4390 if (kvm_set_msr(vcpu, h.index, h.value)) {
4391 pr_debug_ratelimited(
4392 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
4393 __func__, j, h.index, h.value);
4394 goto vmabort;
4395 }
4396 }
4397 }
4398
4399 return;
4400
4401vmabort:
4402 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
4403}
4404
4405
4406
4407
4408
4409
4410void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
4411 u32 exit_intr_info, unsigned long exit_qualification)
4412{
4413 struct vcpu_vmx *vmx = to_vmx(vcpu);
4414 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4415
4416
4417 WARN_ON_ONCE(vmx->nested.nested_run_pending);
4418
4419
4420 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
4421 kvm_vcpu_flush_tlb_current(vcpu);
4422
4423
4424
4425
4426
4427
4428 if (enable_ept && is_pae_paging(vcpu))
4429 vmx_ept_load_pdptrs(vcpu);
4430
4431 leave_guest_mode(vcpu);
4432
4433 if (nested_cpu_has_preemption_timer(vmcs12))
4434 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
4435
4436 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
4437 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
4438
4439 if (likely(!vmx->fail)) {
4440 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
4441
4442 if (vm_exit_reason != -1)
4443 prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
4444 exit_intr_info, exit_qualification);
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
4456 } else {
4457
4458
4459
4460
4461
4462
4463 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
4464 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4465 WARN_ON_ONCE(nested_early_check);
4466 }
4467
4468 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
4469
4470
4471 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
4472 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
4473 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
4474 if (vmx->nested.l1_tpr_threshold != -1)
4475 vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
4476
4477 if (kvm_has_tsc_control)
4478 decache_tsc_multiplier(vmx);
4479
4480 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
4481 vmx->nested.change_vmcs01_virtual_apic_mode = false;
4482 vmx_set_virtual_apic_mode(vcpu);
4483 }
4484
4485
4486 if (vmx->nested.apic_access_page) {
4487 kvm_release_page_clean(vmx->nested.apic_access_page);
4488 vmx->nested.apic_access_page = NULL;
4489 }
4490 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
4491 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
4492 vmx->nested.pi_desc = NULL;
4493
4494 if (vmx->nested.reload_vmcs01_apic_access_page) {
4495 vmx->nested.reload_vmcs01_apic_access_page = false;
4496 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4497 }
4498
4499 if ((vm_exit_reason != -1) &&
4500 (enable_shadow_vmcs || vmx->nested.hv_evmcs))
4501 vmx->nested.need_vmcs12_to_shadow_sync = true;
4502
4503
4504 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4505
4506 if (likely(!vmx->fail)) {
4507 if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
4508 nested_exit_intr_ack_set(vcpu)) {
4509 int irq = kvm_cpu_get_interrupt(vcpu);
4510 WARN_ON(irq < 0);
4511 vmcs12->vm_exit_intr_info = irq |
4512 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
4513 }
4514
4515 if (vm_exit_reason != -1)
4516 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
4517 vmcs12->exit_qualification,
4518 vmcs12->idt_vectoring_info_field,
4519 vmcs12->vm_exit_intr_info,
4520 vmcs12->vm_exit_intr_error_code,
4521 KVM_ISA_VMX);
4522
4523 load_vmcs12_host_state(vcpu, vmcs12);
4524
4525 return;
4526 }
4527
4528
4529
4530
4531
4532
4533
4534
4535 (void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4536
4537
4538
4539
4540
4541
4542
4543 nested_vmx_restore_host_state(vcpu);
4544
4545 vmx->fail = 0;
4546}
4547
4548
4549
4550
4551
4552
4553
4554int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
4555 u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
4556{
4557 gva_t off;
4558 bool exn;
4559 struct kvm_segment s;
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569 int scaling = vmx_instruction_info & 3;
4570 int addr_size = (vmx_instruction_info >> 7) & 7;
4571 bool is_reg = vmx_instruction_info & (1u << 10);
4572 int seg_reg = (vmx_instruction_info >> 15) & 7;
4573 int index_reg = (vmx_instruction_info >> 18) & 0xf;
4574 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
4575 int base_reg = (vmx_instruction_info >> 23) & 0xf;
4576 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
4577
4578 if (is_reg) {
4579 kvm_queue_exception(vcpu, UD_VECTOR);
4580 return 1;
4581 }
4582
4583
4584
4585 off = exit_qualification;
4586 if (addr_size == 1)
4587 off = (gva_t)sign_extend64(off, 31);
4588 else if (addr_size == 0)
4589 off = (gva_t)sign_extend64(off, 15);
4590 if (base_is_valid)
4591 off += kvm_register_read(vcpu, base_reg);
4592 if (index_is_valid)
4593 off += kvm_register_read(vcpu, index_reg) << scaling;
4594 vmx_get_segment(vcpu, &s, seg_reg);
4595
4596
4597
4598
4599
4600
4601
4602 if (addr_size == 1)
4603 off &= 0xffffffff;
4604 else if (addr_size == 0)
4605 off &= 0xffff;
4606
4607
4608 exn = false;
4609 if (is_long_mode(vcpu)) {
4610
4611
4612
4613
4614
4615 if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
4616 *ret = s.base + off;
4617 else
4618 *ret = off;
4619
4620
4621
4622
4623
4624 exn = is_noncanonical_address(*ret, vcpu);
4625 } else {
4626
4627
4628
4629
4630
4631 *ret = (s.base + off) & 0xffffffff;
4632
4633
4634
4635
4636
4637
4638
4639 if (wr)
4640
4641
4642
4643 exn = ((s.type & 0xa) == 0 || (s.type & 8));
4644 else
4645
4646
4647
4648 exn = ((s.type & 0xa) == 8);
4649 if (exn) {
4650 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
4651 return 1;
4652 }
4653
4654
4655 exn = (s.unusable != 0);
4656
4657
4658
4659
4660
4661
4662
4663 if (!(s.base == 0 && s.limit == 0xffffffff &&
4664 ((s.type & 8) || !(s.type & 4))))
4665 exn = exn || ((u64)off + len - 1 > s.limit);
4666 }
4667 if (exn) {
4668 kvm_queue_exception_e(vcpu,
4669 seg_reg == VCPU_SREG_SS ?
4670 SS_VECTOR : GP_VECTOR,
4671 0);
4672 return 1;
4673 }
4674
4675 return 0;
4676}
4677
4678void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
4679{
4680 struct vcpu_vmx *vmx;
4681
4682 if (!nested_vmx_allowed(vcpu))
4683 return;
4684
4685 vmx = to_vmx(vcpu);
4686 if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
4687 vmx->nested.msrs.entry_ctls_high |=
4688 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4689 vmx->nested.msrs.exit_ctls_high |=
4690 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4691 } else {
4692 vmx->nested.msrs.entry_ctls_high &=
4693 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
4694 vmx->nested.msrs.exit_ctls_high &=
4695 ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
4696 }
4697}
4698
4699static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
4700 int *ret)
4701{
4702 gva_t gva;
4703 struct x86_exception e;
4704 int r;
4705
4706 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
4707 vmcs_read32(VMX_INSTRUCTION_INFO), false,
4708 sizeof(*vmpointer), &gva)) {
4709 *ret = 1;
4710 return -EINVAL;
4711 }
4712
4713 r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
4714 if (r != X86EMUL_CONTINUE) {
4715 *ret = kvm_handle_memory_failure(vcpu, r, &e);
4716 return -EINVAL;
4717 }
4718
4719 return 0;
4720}
4721
4722
4723
4724
4725
4726
4727static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
4728{
4729 struct vcpu_vmx *vmx = to_vmx(vcpu);
4730 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
4731
4732
4733
4734
4735
4736
4737
4738 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
4739
4740 if (!loaded_vmcs->shadow_vmcs) {
4741 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
4742 if (loaded_vmcs->shadow_vmcs)
4743 vmcs_clear(loaded_vmcs->shadow_vmcs);
4744 }
4745 return loaded_vmcs->shadow_vmcs;
4746}
4747
4748static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4749{
4750 struct vcpu_vmx *vmx = to_vmx(vcpu);
4751 int r;
4752
4753 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
4754 if (r < 0)
4755 goto out_vmcs02;
4756
4757 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4758 if (!vmx->nested.cached_vmcs12)
4759 goto out_cached_vmcs12;
4760
4761 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4762 if (!vmx->nested.cached_shadow_vmcs12)
4763 goto out_cached_shadow_vmcs12;
4764
4765 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
4766 goto out_shadow_vmcs;
4767
4768 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
4769 HRTIMER_MODE_ABS_PINNED);
4770 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
4771
4772 vmx->nested.vpid02 = allocate_vpid();
4773
4774 vmx->nested.vmcs02_initialized = false;
4775 vmx->nested.vmxon = true;
4776
4777 if (vmx_pt_mode_is_host_guest()) {
4778 vmx->pt_desc.guest.ctl = 0;
4779 pt_update_intercept_for_msr(vcpu);
4780 }
4781
4782 return 0;
4783
4784out_shadow_vmcs:
4785 kfree(vmx->nested.cached_shadow_vmcs12);
4786
4787out_cached_shadow_vmcs12:
4788 kfree(vmx->nested.cached_vmcs12);
4789
4790out_cached_vmcs12:
4791 free_loaded_vmcs(&vmx->nested.vmcs02);
4792
4793out_vmcs02:
4794 return -ENOMEM;
4795}
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805static int handle_vmon(struct kvm_vcpu *vcpu)
4806{
4807 int ret;
4808 gpa_t vmptr;
4809 uint32_t revision;
4810 struct vcpu_vmx *vmx = to_vmx(vcpu);
4811 const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
4812 | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
4824 kvm_queue_exception(vcpu, UD_VECTOR);
4825 return 1;
4826 }
4827
4828
4829 if (vmx_get_cpl(vcpu)) {
4830 kvm_inject_gp(vcpu, 0);
4831 return 1;
4832 }
4833
4834 if (vmx->nested.vmxon)
4835 return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
4836
4837 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
4838 != VMXON_NEEDED_FEATURES) {
4839 kvm_inject_gp(vcpu, 0);
4840 return 1;
4841 }
4842
4843 if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
4844 return ret;
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854 if (!page_address_valid(vcpu, vmptr))
4855 return nested_vmx_failInvalid(vcpu);
4856
4857 if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
4858 revision != VMCS12_REVISION)
4859 return nested_vmx_failInvalid(vcpu);
4860
4861 vmx->nested.vmxon_ptr = vmptr;
4862 ret = enter_vmx_operation(vcpu);
4863 if (ret)
4864 return ret;
4865
4866 return nested_vmx_succeed(vcpu);
4867}
4868
4869static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
4870{
4871 struct vcpu_vmx *vmx = to_vmx(vcpu);
4872
4873 if (vmx->nested.current_vmptr == -1ull)
4874 return;
4875
4876 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
4877
4878 if (enable_shadow_vmcs) {
4879
4880
4881 copy_shadow_to_vmcs12(vmx);
4882 vmx_disable_shadow_vmcs(vmx);
4883 }
4884 vmx->nested.posted_intr_nv = -1;
4885
4886
4887 kvm_vcpu_write_guest_page(vcpu,
4888 vmx->nested.current_vmptr >> PAGE_SHIFT,
4889 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
4890
4891 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
4892
4893 vmx->nested.current_vmptr = -1ull;
4894}
4895
4896
4897static int handle_vmoff(struct kvm_vcpu *vcpu)
4898{
4899 if (!nested_vmx_check_permission(vcpu))
4900 return 1;
4901
4902 free_nested(vcpu);
4903
4904
4905 kvm_make_request(KVM_REQ_EVENT, vcpu);
4906
4907 return nested_vmx_succeed(vcpu);
4908}
4909
4910
4911static int handle_vmclear(struct kvm_vcpu *vcpu)
4912{
4913 struct vcpu_vmx *vmx = to_vmx(vcpu);
4914 u32 zero = 0;
4915 gpa_t vmptr;
4916 u64 evmcs_gpa;
4917 int r;
4918
4919 if (!nested_vmx_check_permission(vcpu))
4920 return 1;
4921
4922 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
4923 return r;
4924
4925 if (!page_address_valid(vcpu, vmptr))
4926 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
4927
4928 if (vmptr == vmx->nested.vmxon_ptr)
4929 return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941 if (likely(!vmx->nested.enlightened_vmcs_enabled ||
4942 !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
4943 if (vmptr == vmx->nested.current_vmptr)
4944 nested_release_vmcs12(vcpu);
4945
4946 kvm_vcpu_write_guest(vcpu,
4947 vmptr + offsetof(struct vmcs12,
4948 launch_state),
4949 &zero, sizeof(zero));
4950 }
4951
4952 return nested_vmx_succeed(vcpu);
4953}
4954
4955
4956static int handle_vmlaunch(struct kvm_vcpu *vcpu)
4957{
4958 return nested_vmx_run(vcpu, true);
4959}
4960
4961
4962static int handle_vmresume(struct kvm_vcpu *vcpu)
4963{
4964
4965 return nested_vmx_run(vcpu, false);
4966}
4967
4968static int handle_vmread(struct kvm_vcpu *vcpu)
4969{
4970 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
4971 : get_vmcs12(vcpu);
4972 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
4973 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4974 struct vcpu_vmx *vmx = to_vmx(vcpu);
4975 struct x86_exception e;
4976 unsigned long field;
4977 u64 value;
4978 gva_t gva = 0;
4979 short offset;
4980 int len, r;
4981
4982 if (!nested_vmx_check_permission(vcpu))
4983 return 1;
4984
4985
4986
4987
4988
4989 if (vmx->nested.current_vmptr == -1ull ||
4990 (is_guest_mode(vcpu) &&
4991 get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
4992 return nested_vmx_failInvalid(vcpu);
4993
4994
4995 field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
4996
4997 offset = vmcs_field_to_offset(field);
4998 if (offset < 0)
4999 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5000
5001 if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
5002 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5003
5004
5005 value = vmcs12_read_any(vmcs12, field, offset);
5006
5007
5008
5009
5010
5011
5012 if (instr_info & BIT(10)) {
5013 kvm_register_writel(vcpu, (((instr_info) >> 3) & 0xf), value);
5014 } else {
5015 len = is_64_bit_mode(vcpu) ? 8 : 4;
5016 if (get_vmx_mem_address(vcpu, exit_qualification,
5017 instr_info, true, len, &gva))
5018 return 1;
5019
5020 r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
5021 if (r != X86EMUL_CONTINUE)
5022 return kvm_handle_memory_failure(vcpu, r, &e);
5023 }
5024
5025 return nested_vmx_succeed(vcpu);
5026}
5027
5028static bool is_shadow_field_rw(unsigned long field)
5029{
5030 switch (field) {
5031#define SHADOW_FIELD_RW(x, y) case x:
5032#include "vmcs_shadow_fields.h"
5033 return true;
5034 default:
5035 break;
5036 }
5037 return false;
5038}
5039
5040static bool is_shadow_field_ro(unsigned long field)
5041{
5042 switch (field) {
5043#define SHADOW_FIELD_RO(x, y) case x:
5044#include "vmcs_shadow_fields.h"
5045 return true;
5046 default:
5047 break;
5048 }
5049 return false;
5050}
5051
5052static int handle_vmwrite(struct kvm_vcpu *vcpu)
5053{
5054 struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
5055 : get_vmcs12(vcpu);
5056 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5057 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5058 struct vcpu_vmx *vmx = to_vmx(vcpu);
5059 struct x86_exception e;
5060 unsigned long field;
5061 short offset;
5062 gva_t gva;
5063 int len, r;
5064
5065
5066
5067
5068
5069
5070
5071
5072 u64 value = 0;
5073
5074 if (!nested_vmx_check_permission(vcpu))
5075 return 1;
5076
5077
5078
5079
5080
5081 if (vmx->nested.current_vmptr == -1ull ||
5082 (is_guest_mode(vcpu) &&
5083 get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
5084 return nested_vmx_failInvalid(vcpu);
5085
5086 if (instr_info & BIT(10))
5087 value = kvm_register_readl(vcpu, (((instr_info) >> 3) & 0xf));
5088 else {
5089 len = is_64_bit_mode(vcpu) ? 8 : 4;
5090 if (get_vmx_mem_address(vcpu, exit_qualification,
5091 instr_info, false, len, &gva))
5092 return 1;
5093 r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
5094 if (r != X86EMUL_CONTINUE)
5095 return kvm_handle_memory_failure(vcpu, r, &e);
5096 }
5097
5098 field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
5099
5100 offset = vmcs_field_to_offset(field);
5101 if (offset < 0)
5102 return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5103
5104
5105
5106
5107
5108 if (vmcs_field_readonly(field) &&
5109 !nested_cpu_has_vmwrite_any_field(vcpu))
5110 return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
5111
5112
5113
5114
5115
5116 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
5117 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127 if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
5128 value &= 0x1f0ff;
5129
5130 vmcs12_write_any(vmcs12, field, offset, value);
5131
5132
5133
5134
5135
5136
5137
5138 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
5139
5140
5141
5142
5143 if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
5144 preempt_disable();
5145 vmcs_load(vmx->vmcs01.shadow_vmcs);
5146
5147 __vmcs_writel(field, value);
5148
5149 vmcs_clear(vmx->vmcs01.shadow_vmcs);
5150 vmcs_load(vmx->loaded_vmcs->vmcs);
5151 preempt_enable();
5152 }
5153 vmx->nested.dirty_vmcs12 = true;
5154 }
5155
5156 return nested_vmx_succeed(vcpu);
5157}
5158
5159static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
5160{
5161 vmx->nested.current_vmptr = vmptr;
5162 if (enable_shadow_vmcs) {
5163 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
5164 vmcs_write64(VMCS_LINK_POINTER,
5165 __pa(vmx->vmcs01.shadow_vmcs));
5166 vmx->nested.need_vmcs12_to_shadow_sync = true;
5167 }
5168 vmx->nested.dirty_vmcs12 = true;
5169}
5170
5171
5172static int handle_vmptrld(struct kvm_vcpu *vcpu)
5173{
5174 struct vcpu_vmx *vmx = to_vmx(vcpu);
5175 gpa_t vmptr;
5176 int r;
5177
5178 if (!nested_vmx_check_permission(vcpu))
5179 return 1;
5180
5181 if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
5182 return r;
5183
5184 if (!page_address_valid(vcpu, vmptr))
5185 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
5186
5187 if (vmptr == vmx->nested.vmxon_ptr)
5188 return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
5189
5190
5191 if (vmx->nested.hv_evmcs)
5192 return 1;
5193
5194 if (vmx->nested.current_vmptr != vmptr) {
5195 struct kvm_host_map map;
5196 struct vmcs12 *new_vmcs12;
5197
5198 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
5199
5200
5201
5202
5203
5204
5205 return nested_vmx_fail(vcpu,
5206 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5207 }
5208
5209 new_vmcs12 = map.hva;
5210
5211 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
5212 (new_vmcs12->hdr.shadow_vmcs &&
5213 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
5214 kvm_vcpu_unmap(vcpu, &map, false);
5215 return nested_vmx_fail(vcpu,
5216 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5217 }
5218
5219 nested_release_vmcs12(vcpu);
5220
5221
5222
5223
5224
5225 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
5226 kvm_vcpu_unmap(vcpu, &map, false);
5227
5228 set_current_vmptr(vmx, vmptr);
5229 }
5230
5231 return nested_vmx_succeed(vcpu);
5232}
5233
5234
5235static int handle_vmptrst(struct kvm_vcpu *vcpu)
5236{
5237 unsigned long exit_qual = vmx_get_exit_qual(vcpu);
5238 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5239 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
5240 struct x86_exception e;
5241 gva_t gva;
5242 int r;
5243
5244 if (!nested_vmx_check_permission(vcpu))
5245 return 1;
5246
5247 if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
5248 return 1;
5249
5250 if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
5251 true, sizeof(gpa_t), &gva))
5252 return 1;
5253
5254 r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr,
5255 sizeof(gpa_t), &e);
5256 if (r != X86EMUL_CONTINUE)
5257 return kvm_handle_memory_failure(vcpu, r, &e);
5258
5259 return nested_vmx_succeed(vcpu);
5260}
5261
5262#define EPTP_PA_MASK GENMASK_ULL(51, 12)
5263
5264static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
5265{
5266 return VALID_PAGE(root_hpa) &&
5267 ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
5268}
5269
5270
5271static int handle_invept(struct kvm_vcpu *vcpu)
5272{
5273 struct vcpu_vmx *vmx = to_vmx(vcpu);
5274 u32 vmx_instruction_info, types;
5275 unsigned long type, roots_to_free;
5276 struct kvm_mmu *mmu;
5277 gva_t gva;
5278 struct x86_exception e;
5279 struct {
5280 u64 eptp, gpa;
5281 } operand;
5282 int i, r;
5283
5284 if (!(vmx->nested.msrs.secondary_ctls_high &
5285 SECONDARY_EXEC_ENABLE_EPT) ||
5286 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
5287 kvm_queue_exception(vcpu, UD_VECTOR);
5288 return 1;
5289 }
5290
5291 if (!nested_vmx_check_permission(vcpu))
5292 return 1;
5293
5294 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5295 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
5296
5297 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
5298
5299 if (type >= 32 || !(types & (1 << type)))
5300 return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5301
5302
5303
5304
5305 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5306 vmx_instruction_info, false, sizeof(operand), &gva))
5307 return 1;
5308 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5309 if (r != X86EMUL_CONTINUE)
5310 return kvm_handle_memory_failure(vcpu, r, &e);
5311
5312
5313
5314
5315
5316 mmu = &vcpu->arch.guest_mmu;
5317
5318 switch (type) {
5319 case VMX_EPT_EXTENT_CONTEXT:
5320 if (!nested_vmx_check_eptp(vcpu, operand.eptp))
5321 return nested_vmx_fail(vcpu,
5322 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5323
5324 roots_to_free = 0;
5325 if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
5326 operand.eptp))
5327 roots_to_free |= KVM_MMU_ROOT_CURRENT;
5328
5329 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
5330 if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
5331 mmu->prev_roots[i].pgd,
5332 operand.eptp))
5333 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5334 }
5335 break;
5336 case VMX_EPT_EXTENT_GLOBAL:
5337 roots_to_free = KVM_MMU_ROOTS_ALL;
5338 break;
5339 default:
5340 BUG();
5341 break;
5342 }
5343
5344 if (roots_to_free)
5345 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
5346
5347 return nested_vmx_succeed(vcpu);
5348}
5349
5350static int handle_invvpid(struct kvm_vcpu *vcpu)
5351{
5352 struct vcpu_vmx *vmx = to_vmx(vcpu);
5353 u32 vmx_instruction_info;
5354 unsigned long type, types;
5355 gva_t gva;
5356 struct x86_exception e;
5357 struct {
5358 u64 vpid;
5359 u64 gla;
5360 } operand;
5361 u16 vpid02;
5362 int r;
5363
5364 if (!(vmx->nested.msrs.secondary_ctls_high &
5365 SECONDARY_EXEC_ENABLE_VPID) ||
5366 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
5367 kvm_queue_exception(vcpu, UD_VECTOR);
5368 return 1;
5369 }
5370
5371 if (!nested_vmx_check_permission(vcpu))
5372 return 1;
5373
5374 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5375 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
5376
5377 types = (vmx->nested.msrs.vpid_caps &
5378 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
5379
5380 if (type >= 32 || !(types & (1 << type)))
5381 return nested_vmx_fail(vcpu,
5382 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5383
5384
5385
5386
5387 if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
5388 vmx_instruction_info, false, sizeof(operand), &gva))
5389 return 1;
5390 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
5391 if (r != X86EMUL_CONTINUE)
5392 return kvm_handle_memory_failure(vcpu, r, &e);
5393
5394 if (operand.vpid >> 16)
5395 return nested_vmx_fail(vcpu,
5396 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5397
5398 vpid02 = nested_get_vpid02(vcpu);
5399 switch (type) {
5400 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
5401 if (!operand.vpid ||
5402 is_noncanonical_address(operand.gla, vcpu))
5403 return nested_vmx_fail(vcpu,
5404 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5405 vpid_sync_vcpu_addr(vpid02, operand.gla);
5406 break;
5407 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
5408 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
5409 if (!operand.vpid)
5410 return nested_vmx_fail(vcpu,
5411 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
5412 vpid_sync_context(vpid02);
5413 break;
5414 case VMX_VPID_EXTENT_ALL_CONTEXT:
5415 vpid_sync_context(vpid02);
5416 break;
5417 default:
5418 WARN_ON_ONCE(1);
5419 return kvm_skip_emulated_instruction(vcpu);
5420 }
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432 if (!enable_ept)
5433 kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu,
5434 KVM_MMU_ROOTS_ALL);
5435
5436 return nested_vmx_succeed(vcpu);
5437}
5438
5439static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
5440 struct vmcs12 *vmcs12)
5441{
5442 u32 index = kvm_rcx_read(vcpu);
5443 u64 new_eptp;
5444 bool accessed_dirty;
5445 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
5446
5447 if (!nested_cpu_has_eptp_switching(vmcs12) ||
5448 !nested_cpu_has_ept(vmcs12))
5449 return 1;
5450
5451 if (index >= VMFUNC_EPTP_ENTRIES)
5452 return 1;
5453
5454
5455 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
5456 &new_eptp, index * 8, 8))
5457 return 1;
5458
5459 accessed_dirty = !!(new_eptp & VMX_EPTP_AD_ENABLE_BIT);
5460
5461
5462
5463
5464
5465 if (vmcs12->ept_pointer != new_eptp) {
5466 if (!nested_vmx_check_eptp(vcpu, new_eptp))
5467 return 1;
5468
5469 kvm_mmu_unload(vcpu);
5470 mmu->ept_ad = accessed_dirty;
5471 mmu->mmu_role.base.ad_disabled = !accessed_dirty;
5472 vmcs12->ept_pointer = new_eptp;
5473
5474
5475
5476
5477
5478 kvm_mmu_reload(vcpu);
5479 }
5480
5481 return 0;
5482}
5483
5484static int handle_vmfunc(struct kvm_vcpu *vcpu)
5485{
5486 struct vcpu_vmx *vmx = to_vmx(vcpu);
5487 struct vmcs12 *vmcs12;
5488 u32 function = kvm_rax_read(vcpu);
5489
5490
5491
5492
5493
5494
5495 if (!is_guest_mode(vcpu)) {
5496 kvm_queue_exception(vcpu, UD_VECTOR);
5497 return 1;
5498 }
5499
5500 vmcs12 = get_vmcs12(vcpu);
5501 if ((vmcs12->vm_function_control & (1 << function)) == 0)
5502 goto fail;
5503
5504 switch (function) {
5505 case 0:
5506 if (nested_vmx_eptp_switching(vcpu, vmcs12))
5507 goto fail;
5508 break;
5509 default:
5510 goto fail;
5511 }
5512 return kvm_skip_emulated_instruction(vcpu);
5513
5514fail:
5515 nested_vmx_vmexit(vcpu, vmx->exit_reason,
5516 vmx_get_intr_info(vcpu),
5517 vmx_get_exit_qual(vcpu));
5518 return 1;
5519}
5520
5521
5522
5523
5524
5525bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
5526 int size)
5527{
5528 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5529 gpa_t bitmap, last_bitmap;
5530 u8 b;
5531
5532 last_bitmap = (gpa_t)-1;
5533 b = -1;
5534
5535 while (size > 0) {
5536 if (port < 0x8000)
5537 bitmap = vmcs12->io_bitmap_a;
5538 else if (port < 0x10000)
5539 bitmap = vmcs12->io_bitmap_b;
5540 else
5541 return true;
5542 bitmap += (port & 0x7fff) / 8;
5543
5544 if (last_bitmap != bitmap)
5545 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
5546 return true;
5547 if (b & (1 << (port & 7)))
5548 return true;
5549
5550 port++;
5551 size--;
5552 last_bitmap = bitmap;
5553 }
5554
5555 return false;
5556}
5557
5558static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
5559 struct vmcs12 *vmcs12)
5560{
5561 unsigned long exit_qualification;
5562 unsigned short port;
5563 int size;
5564
5565 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
5566 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
5567
5568 exit_qualification = vmx_get_exit_qual(vcpu);
5569
5570 port = exit_qualification >> 16;
5571 size = (exit_qualification & 7) + 1;
5572
5573 return nested_vmx_check_io_bitmaps(vcpu, port, size);
5574}
5575
5576
5577
5578
5579
5580
5581
5582static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5583 struct vmcs12 *vmcs12, u32 exit_reason)
5584{
5585 u32 msr_index = kvm_rcx_read(vcpu);
5586 gpa_t bitmap;
5587
5588 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5589 return true;
5590
5591
5592
5593
5594
5595
5596 bitmap = vmcs12->msr_bitmap;
5597 if (exit_reason == EXIT_REASON_MSR_WRITE)
5598 bitmap += 2048;
5599 if (msr_index >= 0xc0000000) {
5600 msr_index -= 0xc0000000;
5601 bitmap += 1024;
5602 }
5603
5604
5605 if (msr_index < 1024*8) {
5606 unsigned char b;
5607 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
5608 return true;
5609 return 1 & (b >> (msr_index & 7));
5610 } else
5611 return true;
5612}
5613
5614
5615
5616
5617
5618
5619static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5620 struct vmcs12 *vmcs12)
5621{
5622 unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
5623 int cr = exit_qualification & 15;
5624 int reg;
5625 unsigned long val;
5626
5627 switch ((exit_qualification >> 4) & 3) {
5628 case 0:
5629 reg = (exit_qualification >> 8) & 15;
5630 val = kvm_register_readl(vcpu, reg);
5631 switch (cr) {
5632 case 0:
5633 if (vmcs12->cr0_guest_host_mask &
5634 (val ^ vmcs12->cr0_read_shadow))
5635 return true;
5636 break;
5637 case 3:
5638 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5639 return true;
5640 break;
5641 case 4:
5642 if (vmcs12->cr4_guest_host_mask &
5643 (vmcs12->cr4_read_shadow ^ val))
5644 return true;
5645 break;
5646 case 8:
5647 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5648 return true;
5649 break;
5650 }
5651 break;
5652 case 2:
5653 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5654 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5655 return true;
5656 break;
5657 case 1:
5658 switch (cr) {
5659 case 3:
5660 if (vmcs12->cpu_based_vm_exec_control &
5661 CPU_BASED_CR3_STORE_EXITING)
5662 return true;
5663 break;
5664 case 8:
5665 if (vmcs12->cpu_based_vm_exec_control &
5666 CPU_BASED_CR8_STORE_EXITING)
5667 return true;
5668 break;
5669 }
5670 break;
5671 case 3:
5672
5673
5674
5675
5676 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5677 if (vmcs12->cr0_guest_host_mask & 0xe &
5678 (val ^ vmcs12->cr0_read_shadow))
5679 return true;
5680 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5681 !(vmcs12->cr0_read_shadow & 0x1) &&
5682 (val & 0x1))
5683 return true;
5684 break;
5685 }
5686 return false;
5687}
5688
5689static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
5690 struct vmcs12 *vmcs12, gpa_t bitmap)
5691{
5692 u32 vmx_instruction_info;
5693 unsigned long field;
5694 u8 b;
5695
5696 if (!nested_cpu_has_shadow_vmcs(vmcs12))
5697 return true;
5698
5699
5700 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5701 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5702
5703
5704 if (field >> 15)
5705 return true;
5706
5707 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
5708 return true;
5709
5710 return 1 & (b >> (field & 7));
5711}
5712
5713static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
5714{
5715 u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
5716
5717 if (nested_cpu_has_mtf(vmcs12))
5718 return true;
5719
5720
5721
5722
5723
5724
5725
5726 return entry_intr_info == (INTR_INFO_VALID_MASK
5727 | INTR_TYPE_OTHER_EVENT);
5728}
5729
5730
5731
5732
5733
5734static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
5735{
5736 u32 intr_info;
5737
5738 switch ((u16)exit_reason) {
5739 case EXIT_REASON_EXCEPTION_NMI:
5740 intr_info = vmx_get_intr_info(vcpu);
5741 if (is_nmi(intr_info))
5742 return true;
5743 else if (is_page_fault(intr_info))
5744 return vcpu->arch.apf.host_apf_flags || !enable_ept;
5745 else if (is_debug(intr_info) &&
5746 vcpu->guest_debug &
5747 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5748 return true;
5749 else if (is_breakpoint(intr_info) &&
5750 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5751 return true;
5752 return false;
5753 case EXIT_REASON_EXTERNAL_INTERRUPT:
5754 return true;
5755 case EXIT_REASON_MCE_DURING_VMENTRY:
5756 return true;
5757 case EXIT_REASON_EPT_VIOLATION:
5758
5759
5760
5761
5762
5763
5764 return true;
5765 case EXIT_REASON_EPT_MISCONFIG:
5766
5767
5768
5769
5770
5771
5772 return true;
5773 case EXIT_REASON_PREEMPTION_TIMER:
5774 return true;
5775 case EXIT_REASON_PML_FULL:
5776
5777 return true;
5778 case EXIT_REASON_VMFUNC:
5779
5780 return true;
5781 case EXIT_REASON_ENCLS:
5782
5783 return true;
5784 default:
5785 break;
5786 }
5787 return false;
5788}
5789
5790
5791
5792
5793
5794static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason)
5795{
5796 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5797 u32 intr_info;
5798
5799 switch ((u16)exit_reason) {
5800 case EXIT_REASON_EXCEPTION_NMI:
5801 intr_info = vmx_get_intr_info(vcpu);
5802 if (is_nmi(intr_info))
5803 return true;
5804 else if (is_page_fault(intr_info))
5805 return true;
5806 return vmcs12->exception_bitmap &
5807 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5808 case EXIT_REASON_EXTERNAL_INTERRUPT:
5809 return nested_exit_on_intr(vcpu);
5810 case EXIT_REASON_TRIPLE_FAULT:
5811 return true;
5812 case EXIT_REASON_INTERRUPT_WINDOW:
5813 return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
5814 case EXIT_REASON_NMI_WINDOW:
5815 return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
5816 case EXIT_REASON_TASK_SWITCH:
5817 return true;
5818 case EXIT_REASON_CPUID:
5819 return true;
5820 case EXIT_REASON_HLT:
5821 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5822 case EXIT_REASON_INVD:
5823 return true;
5824 case EXIT_REASON_INVLPG:
5825 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5826 case EXIT_REASON_RDPMC:
5827 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5828 case EXIT_REASON_RDRAND:
5829 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
5830 case EXIT_REASON_RDSEED:
5831 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
5832 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
5833 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5834 case EXIT_REASON_VMREAD:
5835 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5836 vmcs12->vmread_bitmap);
5837 case EXIT_REASON_VMWRITE:
5838 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5839 vmcs12->vmwrite_bitmap);
5840 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5841 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5842 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
5843 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5844 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
5845
5846
5847
5848
5849 return true;
5850 case EXIT_REASON_CR_ACCESS:
5851 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5852 case EXIT_REASON_DR_ACCESS:
5853 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5854 case EXIT_REASON_IO_INSTRUCTION:
5855 return nested_vmx_exit_handled_io(vcpu, vmcs12);
5856 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
5857 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
5858 case EXIT_REASON_MSR_READ:
5859 case EXIT_REASON_MSR_WRITE:
5860 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5861 case EXIT_REASON_INVALID_STATE:
5862 return true;
5863 case EXIT_REASON_MWAIT_INSTRUCTION:
5864 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5865 case EXIT_REASON_MONITOR_TRAP_FLAG:
5866 return nested_vmx_exit_handled_mtf(vmcs12);
5867 case EXIT_REASON_MONITOR_INSTRUCTION:
5868 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
5869 case EXIT_REASON_PAUSE_INSTRUCTION:
5870 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
5871 nested_cpu_has2(vmcs12,
5872 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
5873 case EXIT_REASON_MCE_DURING_VMENTRY:
5874 return true;
5875 case EXIT_REASON_TPR_BELOW_THRESHOLD:
5876 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
5877 case EXIT_REASON_APIC_ACCESS:
5878 case EXIT_REASON_APIC_WRITE:
5879 case EXIT_REASON_EOI_INDUCED:
5880
5881
5882
5883
5884
5885 return true;
5886 case EXIT_REASON_INVPCID:
5887 return
5888 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
5889 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5890 case EXIT_REASON_WBINVD:
5891 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
5892 case EXIT_REASON_XSETBV:
5893 return true;
5894 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
5895
5896
5897
5898
5899
5900
5901 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
5902 case EXIT_REASON_UMWAIT:
5903 case EXIT_REASON_TPAUSE:
5904 return nested_cpu_has2(vmcs12,
5905 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
5906 default:
5907 return true;
5908 }
5909}
5910
5911
5912
5913
5914
5915bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
5916{
5917 struct vcpu_vmx *vmx = to_vmx(vcpu);
5918 u32 exit_reason = vmx->exit_reason;
5919 unsigned long exit_qual;
5920 u32 exit_intr_info;
5921
5922 WARN_ON_ONCE(vmx->nested.nested_run_pending);
5923
5924
5925
5926
5927
5928 if (unlikely(vmx->fail)) {
5929 trace_kvm_nested_vmenter_failed(
5930 "hardware VM-instruction error: ",
5931 vmcs_read32(VM_INSTRUCTION_ERROR));
5932 exit_intr_info = 0;
5933 exit_qual = 0;
5934 goto reflect_vmexit;
5935 }
5936
5937 trace_kvm_nested_vmexit(exit_reason, vcpu, KVM_ISA_VMX);
5938
5939
5940 if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
5941 return false;
5942
5943
5944 if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
5945 return false;
5946
5947
5948
5949
5950
5951
5952
5953 exit_intr_info = vmx_get_intr_info(vcpu);
5954 if (is_exception_with_error_code(exit_intr_info)) {
5955 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5956
5957 vmcs12->vm_exit_intr_error_code =
5958 vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
5959 }
5960 exit_qual = vmx_get_exit_qual(vcpu);
5961
5962reflect_vmexit:
5963 nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual);
5964 return true;
5965}
5966
5967static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
5968 struct kvm_nested_state __user *user_kvm_nested_state,
5969 u32 user_data_size)
5970{
5971 struct vcpu_vmx *vmx;
5972 struct vmcs12 *vmcs12;
5973 struct kvm_nested_state kvm_state = {
5974 .flags = 0,
5975 .format = KVM_STATE_NESTED_FORMAT_VMX,
5976 .size = sizeof(kvm_state),
5977 .hdr.vmx.flags = 0,
5978 .hdr.vmx.vmxon_pa = -1ull,
5979 .hdr.vmx.vmcs12_pa = -1ull,
5980 .hdr.vmx.preemption_timer_deadline = 0,
5981 };
5982 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
5983 &user_kvm_nested_state->data.vmx[0];
5984
5985 if (!vcpu)
5986 return kvm_state.size + sizeof(*user_vmx_nested_state);
5987
5988 vmx = to_vmx(vcpu);
5989 vmcs12 = get_vmcs12(vcpu);
5990
5991 if (nested_vmx_allowed(vcpu) &&
5992 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
5993 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
5994 kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
5995
5996 if (vmx_has_valid_vmcs12(vcpu)) {
5997 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
5998
5999 if (vmx->nested.hv_evmcs)
6000 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
6001
6002 if (is_guest_mode(vcpu) &&
6003 nested_cpu_has_shadow_vmcs(vmcs12) &&
6004 vmcs12->vmcs_link_pointer != -1ull)
6005 kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
6006 }
6007
6008 if (vmx->nested.smm.vmxon)
6009 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
6010
6011 if (vmx->nested.smm.guest_mode)
6012 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
6013
6014 if (is_guest_mode(vcpu)) {
6015 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
6016
6017 if (vmx->nested.nested_run_pending)
6018 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
6019
6020 if (vmx->nested.mtf_pending)
6021 kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
6022
6023 if (nested_cpu_has_preemption_timer(vmcs12) &&
6024 vmx->nested.has_preemption_timer_deadline) {
6025 kvm_state.hdr.vmx.flags |=
6026 KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
6027 kvm_state.hdr.vmx.preemption_timer_deadline =
6028 vmx->nested.preemption_timer_deadline;
6029 }
6030 }
6031 }
6032
6033 if (user_data_size < kvm_state.size)
6034 goto out;
6035
6036 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
6037 return -EFAULT;
6038
6039 if (!vmx_has_valid_vmcs12(vcpu))
6040 goto out;
6041
6042
6043
6044
6045
6046
6047
6048
6049 if (is_guest_mode(vcpu)) {
6050 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
6051 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
6052 } else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
6053 if (vmx->nested.hv_evmcs)
6054 copy_enlightened_to_vmcs12(vmx);
6055 else if (enable_shadow_vmcs)
6056 copy_shadow_to_vmcs12(vmx);
6057 }
6058
6059 BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
6060 BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
6061
6062
6063
6064
6065
6066 if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
6067 return -EFAULT;
6068
6069 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
6070 vmcs12->vmcs_link_pointer != -1ull) {
6071 if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
6072 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
6073 return -EFAULT;
6074 }
6075out:
6076 return kvm_state.size;
6077}
6078
6079
6080
6081
6082void vmx_leave_nested(struct kvm_vcpu *vcpu)
6083{
6084 if (is_guest_mode(vcpu)) {
6085 to_vmx(vcpu)->nested.nested_run_pending = 0;
6086 nested_vmx_vmexit(vcpu, -1, 0, 0);
6087 }
6088 free_nested(vcpu);
6089}
6090
6091static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
6092 struct kvm_nested_state __user *user_kvm_nested_state,
6093 struct kvm_nested_state *kvm_state)
6094{
6095 struct vcpu_vmx *vmx = to_vmx(vcpu);
6096 struct vmcs12 *vmcs12;
6097 enum vm_entry_failure_code ignored;
6098 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
6099 &user_kvm_nested_state->data.vmx[0];
6100 int ret;
6101
6102 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
6103 return -EINVAL;
6104
6105 if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
6106 if (kvm_state->hdr.vmx.smm.flags)
6107 return -EINVAL;
6108
6109 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
6110 return -EINVAL;
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
6122 return -EINVAL;
6123 } else {
6124 if (!nested_vmx_allowed(vcpu))
6125 return -EINVAL;
6126
6127 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
6128 return -EINVAL;
6129 }
6130
6131 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6132 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6133 return -EINVAL;
6134
6135 if (kvm_state->hdr.vmx.smm.flags &
6136 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
6137 return -EINVAL;
6138
6139 if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
6140 return -EINVAL;
6141
6142
6143
6144
6145
6146
6147 if (is_smm(vcpu) ?
6148 (kvm_state->flags &
6149 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
6150 : kvm_state->hdr.vmx.smm.flags)
6151 return -EINVAL;
6152
6153 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
6154 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
6155 return -EINVAL;
6156
6157 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
6158 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
6159 return -EINVAL;
6160
6161 vmx_leave_nested(vcpu);
6162
6163 if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
6164 return 0;
6165
6166 vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
6167 ret = enter_vmx_operation(vcpu);
6168 if (ret)
6169 return ret;
6170
6171
6172 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
6173
6174 if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
6175 (kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
6176 (kvm_state->hdr.vmx.vmcs12_pa != -1ull))
6177 return -EINVAL;
6178 else
6179 return 0;
6180 }
6181
6182 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
6183 if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
6184 !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
6185 return -EINVAL;
6186
6187 set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
6188 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
6189
6190
6191
6192
6193
6194
6195 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
6196 } else {
6197 return -EINVAL;
6198 }
6199
6200 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
6201 vmx->nested.smm.vmxon = true;
6202 vmx->nested.vmxon = false;
6203
6204 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
6205 vmx->nested.smm.guest_mode = true;
6206 }
6207
6208 vmcs12 = get_vmcs12(vcpu);
6209 if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
6210 return -EFAULT;
6211
6212 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
6213 return -EINVAL;
6214
6215 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
6216 return 0;
6217
6218 vmx->nested.nested_run_pending =
6219 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
6220
6221 vmx->nested.mtf_pending =
6222 !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
6223
6224 ret = -EINVAL;
6225 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
6226 vmcs12->vmcs_link_pointer != -1ull) {
6227 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
6228
6229 if (kvm_state->size <
6230 sizeof(*kvm_state) +
6231 sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
6232 goto error_guest_mode;
6233
6234 if (copy_from_user(shadow_vmcs12,
6235 user_vmx_nested_state->shadow_vmcs12,
6236 sizeof(*shadow_vmcs12))) {
6237 ret = -EFAULT;
6238 goto error_guest_mode;
6239 }
6240
6241 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
6242 !shadow_vmcs12->hdr.shadow_vmcs)
6243 goto error_guest_mode;
6244 }
6245
6246 vmx->nested.has_preemption_timer_deadline = false;
6247 if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
6248 vmx->nested.has_preemption_timer_deadline = true;
6249 vmx->nested.preemption_timer_deadline =
6250 kvm_state->hdr.vmx.preemption_timer_deadline;
6251 }
6252
6253 if (nested_vmx_check_controls(vcpu, vmcs12) ||
6254 nested_vmx_check_host_state(vcpu, vmcs12) ||
6255 nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
6256 goto error_guest_mode;
6257
6258 vmx->nested.dirty_vmcs12 = true;
6259 ret = nested_vmx_enter_non_root_mode(vcpu, false);
6260 if (ret)
6261 goto error_guest_mode;
6262
6263 return 0;
6264
6265error_guest_mode:
6266 vmx->nested.nested_run_pending = 0;
6267 return ret;
6268}
6269
6270void nested_vmx_set_vmcs_shadowing_bitmap(void)
6271{
6272 if (enable_shadow_vmcs) {
6273 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
6274 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
6275 }
6276}
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
6289{
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
6307 msrs->pinbased_ctls_low,
6308 msrs->pinbased_ctls_high);
6309 msrs->pinbased_ctls_low |=
6310 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6311 msrs->pinbased_ctls_high &=
6312 PIN_BASED_EXT_INTR_MASK |
6313 PIN_BASED_NMI_EXITING |
6314 PIN_BASED_VIRTUAL_NMIS |
6315 (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
6316 msrs->pinbased_ctls_high |=
6317 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6318 PIN_BASED_VMX_PREEMPTION_TIMER;
6319
6320
6321 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
6322 msrs->exit_ctls_low,
6323 msrs->exit_ctls_high);
6324 msrs->exit_ctls_low =
6325 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
6326
6327 msrs->exit_ctls_high &=
6328#ifdef CONFIG_X86_64
6329 VM_EXIT_HOST_ADDR_SPACE_SIZE |
6330#endif
6331 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
6332 VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
6333 msrs->exit_ctls_high |=
6334 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
6335 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
6336 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
6337
6338
6339 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
6340
6341
6342 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
6343 msrs->entry_ctls_low,
6344 msrs->entry_ctls_high);
6345 msrs->entry_ctls_low =
6346 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
6347 msrs->entry_ctls_high &=
6348#ifdef CONFIG_X86_64
6349 VM_ENTRY_IA32E_MODE |
6350#endif
6351 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
6352 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
6353 msrs->entry_ctls_high |=
6354 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
6355
6356
6357 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
6358
6359
6360 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
6361 msrs->procbased_ctls_low,
6362 msrs->procbased_ctls_high);
6363 msrs->procbased_ctls_low =
6364 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
6365 msrs->procbased_ctls_high &=
6366 CPU_BASED_INTR_WINDOW_EXITING |
6367 CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
6368 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
6369 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
6370 CPU_BASED_CR3_STORE_EXITING |
6371#ifdef CONFIG_X86_64
6372 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
6373#endif
6374 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
6375 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
6376 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
6377 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
6378 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
6379
6380
6381
6382
6383
6384
6385 msrs->procbased_ctls_high |=
6386 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
6387 CPU_BASED_USE_MSR_BITMAPS;
6388
6389
6390 msrs->procbased_ctls_low &=
6391 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
6392
6393
6394
6395
6396
6397
6398 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
6399 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
6400 msrs->secondary_ctls_low,
6401 msrs->secondary_ctls_high);
6402
6403 msrs->secondary_ctls_low = 0;
6404 msrs->secondary_ctls_high &=
6405 SECONDARY_EXEC_DESC |
6406 SECONDARY_EXEC_ENABLE_RDTSCP |
6407 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
6408 SECONDARY_EXEC_WBINVD_EXITING |
6409 SECONDARY_EXEC_APIC_REGISTER_VIRT |
6410 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
6411 SECONDARY_EXEC_RDRAND_EXITING |
6412 SECONDARY_EXEC_ENABLE_INVPCID |
6413 SECONDARY_EXEC_RDSEED_EXITING |
6414 SECONDARY_EXEC_XSAVES;
6415
6416
6417
6418
6419
6420 msrs->secondary_ctls_high |=
6421 SECONDARY_EXEC_SHADOW_VMCS;
6422
6423 if (enable_ept) {
6424
6425 msrs->secondary_ctls_high |=
6426 SECONDARY_EXEC_ENABLE_EPT;
6427 msrs->ept_caps =
6428 VMX_EPT_PAGE_WALK_4_BIT |
6429 VMX_EPT_PAGE_WALK_5_BIT |
6430 VMX_EPTP_WB_BIT |
6431 VMX_EPT_INVEPT_BIT |
6432 VMX_EPT_EXECUTE_ONLY_BIT;
6433
6434 msrs->ept_caps &= ept_caps;
6435 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
6436 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
6437 VMX_EPT_1GB_PAGE_BIT;
6438 if (enable_ept_ad_bits) {
6439 msrs->secondary_ctls_high |=
6440 SECONDARY_EXEC_ENABLE_PML;
6441 msrs->ept_caps |= VMX_EPT_AD_BIT;
6442 }
6443 }
6444
6445 if (cpu_has_vmx_vmfunc()) {
6446 msrs->secondary_ctls_high |=
6447 SECONDARY_EXEC_ENABLE_VMFUNC;
6448
6449
6450
6451
6452 if (enable_ept)
6453 msrs->vmfunc_controls =
6454 VMX_VMFUNC_EPTP_SWITCHING;
6455 }
6456
6457
6458
6459
6460
6461
6462
6463 if (enable_vpid) {
6464 msrs->secondary_ctls_high |=
6465 SECONDARY_EXEC_ENABLE_VPID;
6466 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
6467 VMX_VPID_EXTENT_SUPPORTED_MASK;
6468 }
6469
6470 if (enable_unrestricted_guest)
6471 msrs->secondary_ctls_high |=
6472 SECONDARY_EXEC_UNRESTRICTED_GUEST;
6473
6474 if (flexpriority_enabled)
6475 msrs->secondary_ctls_high |=
6476 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6477
6478
6479 rdmsr(MSR_IA32_VMX_MISC,
6480 msrs->misc_low,
6481 msrs->misc_high);
6482 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
6483 msrs->misc_low |=
6484 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
6485 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
6486 VMX_MISC_ACTIVITY_HLT;
6487 msrs->misc_high = 0;
6488
6489
6490
6491
6492
6493
6494
6495 msrs->basic =
6496 VMCS12_REVISION |
6497 VMX_BASIC_TRUE_CTLS |
6498 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
6499 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
6500
6501 if (cpu_has_vmx_basic_inout())
6502 msrs->basic |= VMX_BASIC_INOUT;
6503
6504
6505
6506
6507
6508
6509#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
6510#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
6511 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
6512 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
6513
6514
6515 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
6516 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
6517
6518
6519 msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
6520}
6521
6522void nested_vmx_hardware_unsetup(void)
6523{
6524 int i;
6525
6526 if (enable_shadow_vmcs) {
6527 for (i = 0; i < VMX_BITMAP_NR; i++)
6528 free_page((unsigned long)vmx_bitmap[i]);
6529 }
6530}
6531
6532__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
6533{
6534 int i;
6535
6536 if (!cpu_has_vmx_shadow_vmcs())
6537 enable_shadow_vmcs = 0;
6538 if (enable_shadow_vmcs) {
6539 for (i = 0; i < VMX_BITMAP_NR; i++) {
6540
6541
6542
6543
6544 vmx_bitmap[i] = (unsigned long *)
6545 __get_free_page(GFP_KERNEL);
6546 if (!vmx_bitmap[i]) {
6547 nested_vmx_hardware_unsetup();
6548 return -ENOMEM;
6549 }
6550 }
6551
6552 init_vmcs_shadow_fields();
6553 }
6554
6555 exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
6556 exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
6557 exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
6558 exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
6559 exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
6560 exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
6561 exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
6562 exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
6563 exit_handlers[EXIT_REASON_VMON] = handle_vmon;
6564 exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
6565 exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
6566 exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
6567
6568 return 0;
6569}
6570
6571struct kvm_x86_nested_ops vmx_nested_ops = {
6572 .check_events = vmx_check_nested_events,
6573 .hv_timer_pending = nested_vmx_preemption_timer_pending,
6574 .get_state = vmx_get_nested_state,
6575 .set_state = vmx_set_nested_state,
6576 .get_nested_state_pages = nested_get_vmcs12_pages,
6577 .write_log_dirty = nested_vmx_write_pml_buffer,
6578 .enable_evmcs = nested_enable_evmcs,
6579 .get_evmcs_version = nested_get_evmcs_version,
6580};
6581