1
2
3#include <linux/frame.h>
4#include <linux/percpu.h>
5
6#include <asm/debugreg.h>
7#include <asm/mmu_context.h>
8
9#include "cpuid.h"
10#include "hyperv.h"
11#include "mmu.h"
12#include "nested.h"
13#include "trace.h"
14#include "x86.h"
15
16static bool __read_mostly enable_shadow_vmcs = 1;
17module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
18
19static bool __read_mostly nested_early_check = 0;
20module_param(nested_early_check, bool, S_IRUGO);
21
22
23
24
25
26#define VMX_VPID_EXTENT_SUPPORTED_MASK \
27 (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
28 VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
29 VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
30 VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
31
32#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
33
34enum {
35 VMX_VMREAD_BITMAP,
36 VMX_VMWRITE_BITMAP,
37 VMX_BITMAP_NR
38};
39static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
40
41#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
42#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
43
44struct shadow_vmcs_field {
45 u16 encoding;
46 u16 offset;
47};
48static struct shadow_vmcs_field shadow_read_only_fields[] = {
49#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
50#include "vmcs_shadow_fields.h"
51};
52static int max_shadow_read_only_fields =
53 ARRAY_SIZE(shadow_read_only_fields);
54
55static struct shadow_vmcs_field shadow_read_write_fields[] = {
56#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
57#include "vmcs_shadow_fields.h"
58};
59static int max_shadow_read_write_fields =
60 ARRAY_SIZE(shadow_read_write_fields);
61
62static void init_vmcs_shadow_fields(void)
63{
64 int i, j;
65
66 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
67 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
68
69 for (i = j = 0; i < max_shadow_read_only_fields; i++) {
70 struct shadow_vmcs_field entry = shadow_read_only_fields[i];
71 u16 field = entry.encoding;
72
73 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
74 (i + 1 == max_shadow_read_only_fields ||
75 shadow_read_only_fields[i + 1].encoding != field + 1))
76 pr_err("Missing field from shadow_read_only_field %x\n",
77 field + 1);
78
79 clear_bit(field, vmx_vmread_bitmap);
80 if (field & 1)
81#ifdef CONFIG_X86_64
82 continue;
83#else
84 entry.offset += sizeof(u32);
85#endif
86 shadow_read_only_fields[j++] = entry;
87 }
88 max_shadow_read_only_fields = j;
89
90 for (i = j = 0; i < max_shadow_read_write_fields; i++) {
91 struct shadow_vmcs_field entry = shadow_read_write_fields[i];
92 u16 field = entry.encoding;
93
94 if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
95 (i + 1 == max_shadow_read_write_fields ||
96 shadow_read_write_fields[i + 1].encoding != field + 1))
97 pr_err("Missing field from shadow_read_write_field %x\n",
98 field + 1);
99
100 WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
101 field <= GUEST_TR_AR_BYTES,
102 "Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
103
104
105
106
107
108
109 switch (field) {
110 case GUEST_PML_INDEX:
111 if (!cpu_has_vmx_pml())
112 continue;
113 break;
114 case VMX_PREEMPTION_TIMER_VALUE:
115 if (!cpu_has_vmx_preemption_timer())
116 continue;
117 break;
118 case GUEST_INTR_STATUS:
119 if (!cpu_has_vmx_apicv())
120 continue;
121 break;
122 default:
123 break;
124 }
125
126 clear_bit(field, vmx_vmwrite_bitmap);
127 clear_bit(field, vmx_vmread_bitmap);
128 if (field & 1)
129#ifdef CONFIG_X86_64
130 continue;
131#else
132 entry.offset += sizeof(u32);
133#endif
134 shadow_read_write_fields[j++] = entry;
135 }
136 max_shadow_read_write_fields = j;
137}
138
139
140
141
142
143
144
145static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
146{
147 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
148 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
149 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
150 return kvm_skip_emulated_instruction(vcpu);
151}
152
153static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
154{
155 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
156 & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
157 X86_EFLAGS_SF | X86_EFLAGS_OF))
158 | X86_EFLAGS_CF);
159 return kvm_skip_emulated_instruction(vcpu);
160}
161
162static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
163 u32 vm_instruction_error)
164{
165 struct vcpu_vmx *vmx = to_vmx(vcpu);
166
167
168
169
170
171 if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
172 return nested_vmx_failInvalid(vcpu);
173
174 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
175 & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
176 X86_EFLAGS_SF | X86_EFLAGS_OF))
177 | X86_EFLAGS_ZF);
178 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
179
180
181
182
183 return kvm_skip_emulated_instruction(vcpu);
184}
185
186static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
187{
188
189 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
190 pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
191}
192
193static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
194{
195 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
196 vmcs_write64(VMCS_LINK_POINTER, -1ull);
197 vmx->nested.need_vmcs12_to_shadow_sync = false;
198}
199
200static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
201{
202 struct vcpu_vmx *vmx = to_vmx(vcpu);
203
204 if (!vmx->nested.hv_evmcs)
205 return;
206
207 kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
208 vmx->nested.hv_evmcs_vmptr = -1ull;
209 vmx->nested.hv_evmcs = NULL;
210}
211
212
213
214
215
216static void free_nested(struct kvm_vcpu *vcpu)
217{
218 struct vcpu_vmx *vmx = to_vmx(vcpu);
219
220 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
221 return;
222
223 kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
224
225 vmx->nested.vmxon = false;
226 vmx->nested.smm.vmxon = false;
227 free_vpid(vmx->nested.vpid02);
228 vmx->nested.posted_intr_nv = -1;
229 vmx->nested.current_vmptr = -1ull;
230 if (enable_shadow_vmcs) {
231 vmx_disable_shadow_vmcs(vmx);
232 vmcs_clear(vmx->vmcs01.shadow_vmcs);
233 free_vmcs(vmx->vmcs01.shadow_vmcs);
234 vmx->vmcs01.shadow_vmcs = NULL;
235 }
236 kfree(vmx->nested.cached_vmcs12);
237 vmx->nested.cached_vmcs12 = NULL;
238 kfree(vmx->nested.cached_shadow_vmcs12);
239 vmx->nested.cached_shadow_vmcs12 = NULL;
240
241 if (vmx->nested.apic_access_page) {
242 kvm_release_page_dirty(vmx->nested.apic_access_page);
243 vmx->nested.apic_access_page = NULL;
244 }
245 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
246 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
247 vmx->nested.pi_desc = NULL;
248
249 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
250
251 nested_release_evmcs(vcpu);
252
253 free_loaded_vmcs(&vmx->nested.vmcs02);
254}
255
256static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
257 struct loaded_vmcs *prev)
258{
259 struct vmcs_host_state *dest, *src;
260
261 if (unlikely(!vmx->guest_state_loaded))
262 return;
263
264 src = &prev->host_state;
265 dest = &vmx->loaded_vmcs->host_state;
266
267 vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
268 dest->ldt_sel = src->ldt_sel;
269#ifdef CONFIG_X86_64
270 dest->ds_sel = src->ds_sel;
271 dest->es_sel = src->es_sel;
272#endif
273}
274
275static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
276{
277 struct vcpu_vmx *vmx = to_vmx(vcpu);
278 struct loaded_vmcs *prev;
279 int cpu;
280
281 if (vmx->loaded_vmcs == vmcs)
282 return;
283
284 cpu = get_cpu();
285 prev = vmx->loaded_vmcs;
286 vmx->loaded_vmcs = vmcs;
287 vmx_vcpu_load_vmcs(vcpu, cpu);
288 vmx_sync_vmcs_host_state(vmx, prev);
289 put_cpu();
290
291 vmx_segment_cache_clear(vmx);
292}
293
294
295
296
297
298void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
299{
300 vcpu_load(vcpu);
301 vmx_leave_nested(vcpu);
302 vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
303 free_nested(vcpu);
304 vcpu_put(vcpu);
305}
306
307static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
308 struct x86_exception *fault)
309{
310 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
311 struct vcpu_vmx *vmx = to_vmx(vcpu);
312 u32 exit_reason;
313 unsigned long exit_qualification = vcpu->arch.exit_qualification;
314
315 if (vmx->nested.pml_full) {
316 exit_reason = EXIT_REASON_PML_FULL;
317 vmx->nested.pml_full = false;
318 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
319 } else if (fault->error_code & PFERR_RSVD_MASK)
320 exit_reason = EXIT_REASON_EPT_MISCONFIG;
321 else
322 exit_reason = EXIT_REASON_EPT_VIOLATION;
323
324 nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
325 vmcs12->guest_physical_address = fault->address;
326}
327
328static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
329{
330 WARN_ON(mmu_is_nested(vcpu));
331
332 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
333 kvm_init_shadow_ept_mmu(vcpu,
334 to_vmx(vcpu)->nested.msrs.ept_caps &
335 VMX_EPT_EXECUTE_ONLY_BIT,
336 nested_ept_ad_enabled(vcpu),
337 nested_ept_get_cr3(vcpu));
338 vcpu->arch.mmu->set_cr3 = vmx_set_cr3;
339 vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3;
340 vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
341 vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
342
343 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
344}
345
346static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
347{
348 vcpu->arch.mmu = &vcpu->arch.root_mmu;
349 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
350}
351
352static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
353 u16 error_code)
354{
355 bool inequality, bit;
356
357 bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
358 inequality =
359 (error_code & vmcs12->page_fault_error_code_mask) !=
360 vmcs12->page_fault_error_code_match;
361 return inequality ^ bit;
362}
363
364
365
366
367
368
369static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
370{
371 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
372 unsigned int nr = vcpu->arch.exception.nr;
373 bool has_payload = vcpu->arch.exception.has_payload;
374 unsigned long payload = vcpu->arch.exception.payload;
375
376 if (nr == PF_VECTOR) {
377 if (vcpu->arch.exception.nested_apf) {
378 *exit_qual = vcpu->arch.apf.nested_apf_token;
379 return 1;
380 }
381 if (nested_vmx_is_page_fault_vmexit(vmcs12,
382 vcpu->arch.exception.error_code)) {
383 *exit_qual = has_payload ? payload : vcpu->arch.cr2;
384 return 1;
385 }
386 } else if (vmcs12->exception_bitmap & (1u << nr)) {
387 if (nr == DB_VECTOR) {
388 if (!has_payload) {
389 payload = vcpu->arch.dr6;
390 payload &= ~(DR6_FIXED_1 | DR6_BT);
391 payload ^= DR6_RTM;
392 }
393 *exit_qual = payload;
394 } else
395 *exit_qual = 0;
396 return 1;
397 }
398
399 return 0;
400}
401
402
403static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
404 struct x86_exception *fault)
405{
406 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
407
408 WARN_ON(!is_guest_mode(vcpu));
409
410 if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
411 !to_vmx(vcpu)->nested.nested_run_pending) {
412 vmcs12->vm_exit_intr_error_code = fault->error_code;
413 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
414 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
415 INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
416 fault->address);
417 } else {
418 kvm_inject_page_fault(vcpu, fault);
419 }
420}
421
422static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
423{
424 return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
425}
426
427static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
428 struct vmcs12 *vmcs12)
429{
430 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
431 return 0;
432
433 if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
434 !page_address_valid(vcpu, vmcs12->io_bitmap_b))
435 return -EINVAL;
436
437 return 0;
438}
439
440static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
441 struct vmcs12 *vmcs12)
442{
443 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
444 return 0;
445
446 if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
447 return -EINVAL;
448
449 return 0;
450}
451
452static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
453 struct vmcs12 *vmcs12)
454{
455 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
456 return 0;
457
458 if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))
459 return -EINVAL;
460
461 return 0;
462}
463
464
465
466
467static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
468{
469 unsigned long *msr_bitmap;
470 int f = sizeof(unsigned long);
471
472 if (!cpu_has_vmx_msr_bitmap())
473 return true;
474
475 msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
476
477 if (msr <= 0x1fff) {
478 return !!test_bit(msr, msr_bitmap + 0x800 / f);
479 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
480 msr &= 0x1fff;
481 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
482 }
483
484 return true;
485}
486
487
488
489
490
491static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
492 unsigned long *msr_bitmap_nested,
493 u32 msr, int type)
494{
495 int f = sizeof(unsigned long);
496
497
498
499
500
501
502 if (msr <= 0x1fff) {
503 if (type & MSR_TYPE_R &&
504 !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
505
506 __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
507
508 if (type & MSR_TYPE_W &&
509 !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
510
511 __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
512
513 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
514 msr &= 0x1fff;
515 if (type & MSR_TYPE_R &&
516 !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
517
518 __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
519
520 if (type & MSR_TYPE_W &&
521 !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
522
523 __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
524
525 }
526}
527
528static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) {
529 int msr;
530
531 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
532 unsigned word = msr / BITS_PER_LONG;
533
534 msr_bitmap[word] = ~0;
535 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
536 }
537}
538
539
540
541
542
543static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
544 struct vmcs12 *vmcs12)
545{
546 int msr;
547 unsigned long *msr_bitmap_l1;
548 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
549 struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
550
551
552 if (!cpu_has_vmx_msr_bitmap() ||
553 !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
554 return false;
555
556 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
557 return false;
558
559 msr_bitmap_l1 = (unsigned long *)map->hva;
560
561
562
563
564
565
566 enable_x2apic_msr_intercepts(msr_bitmap_l0);
567
568 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
569 if (nested_cpu_has_apic_reg_virt(vmcs12)) {
570
571
572
573
574
575
576 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
577 unsigned word = msr / BITS_PER_LONG;
578
579 msr_bitmap_l0[word] = msr_bitmap_l1[word];
580 }
581 }
582
583 nested_vmx_disable_intercept_for_msr(
584 msr_bitmap_l1, msr_bitmap_l0,
585 X2APIC_MSR(APIC_TASKPRI),
586 MSR_TYPE_R | MSR_TYPE_W);
587
588 if (nested_cpu_has_vid(vmcs12)) {
589 nested_vmx_disable_intercept_for_msr(
590 msr_bitmap_l1, msr_bitmap_l0,
591 X2APIC_MSR(APIC_EOI),
592 MSR_TYPE_W);
593 nested_vmx_disable_intercept_for_msr(
594 msr_bitmap_l1, msr_bitmap_l0,
595 X2APIC_MSR(APIC_SELF_IPI),
596 MSR_TYPE_W);
597 }
598 }
599
600
601 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
602 MSR_FS_BASE, MSR_TYPE_RW);
603
604 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
605 MSR_GS_BASE, MSR_TYPE_RW);
606
607 nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
608 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
624 nested_vmx_disable_intercept_for_msr(
625 msr_bitmap_l1, msr_bitmap_l0,
626 MSR_IA32_SPEC_CTRL,
627 MSR_TYPE_R | MSR_TYPE_W);
628
629 if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
630 nested_vmx_disable_intercept_for_msr(
631 msr_bitmap_l1, msr_bitmap_l0,
632 MSR_IA32_PRED_CMD,
633 MSR_TYPE_W);
634
635 kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
636
637 return true;
638}
639
640static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
641 struct vmcs12 *vmcs12)
642{
643 struct kvm_host_map map;
644 struct vmcs12 *shadow;
645
646 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
647 vmcs12->vmcs_link_pointer == -1ull)
648 return;
649
650 shadow = get_shadow_vmcs12(vcpu);
651
652 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
653 return;
654
655 memcpy(shadow, map.hva, VMCS12_SIZE);
656 kvm_vcpu_unmap(vcpu, &map, false);
657}
658
659static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
660 struct vmcs12 *vmcs12)
661{
662 struct vcpu_vmx *vmx = to_vmx(vcpu);
663
664 if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
665 vmcs12->vmcs_link_pointer == -1ull)
666 return;
667
668 kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
669 get_shadow_vmcs12(vcpu), VMCS12_SIZE);
670}
671
672
673
674
675
676static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
677{
678 return get_vmcs12(vcpu)->vm_exit_controls &
679 VM_EXIT_ACK_INTR_ON_EXIT;
680}
681
682static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
683{
684 return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
685}
686
687static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
688 struct vmcs12 *vmcs12)
689{
690 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
691 !page_address_valid(vcpu, vmcs12->apic_access_addr))
692 return -EINVAL;
693 else
694 return 0;
695}
696
697static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
698 struct vmcs12 *vmcs12)
699{
700 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
701 !nested_cpu_has_apic_reg_virt(vmcs12) &&
702 !nested_cpu_has_vid(vmcs12) &&
703 !nested_cpu_has_posted_intr(vmcs12))
704 return 0;
705
706
707
708
709
710 if (nested_cpu_has_virt_x2apic_mode(vmcs12) &&
711 nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
712 return -EINVAL;
713
714
715
716
717
718 if (nested_cpu_has_vid(vmcs12) &&
719 !nested_exit_on_intr(vcpu))
720 return -EINVAL;
721
722
723
724
725
726
727
728
729 if (nested_cpu_has_posted_intr(vmcs12) &&
730 (!nested_cpu_has_vid(vmcs12) ||
731 !nested_exit_intr_ack_set(vcpu) ||
732 (vmcs12->posted_intr_nv & 0xff00) ||
733 (vmcs12->posted_intr_desc_addr & 0x3f) ||
734 (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
735 return -EINVAL;
736
737
738 if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
739 return -EINVAL;
740
741 return 0;
742}
743
744static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
745 u32 count, u64 addr)
746{
747 int maxphyaddr;
748
749 if (count == 0)
750 return 0;
751 maxphyaddr = cpuid_maxphyaddr(vcpu);
752 if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
753 (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr)
754 return -EINVAL;
755
756 return 0;
757}
758
759static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
760 struct vmcs12 *vmcs12)
761{
762 if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_load_count,
763 vmcs12->vm_exit_msr_load_addr) ||
764 nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_store_count,
765 vmcs12->vm_exit_msr_store_addr))
766 return -EINVAL;
767
768 return 0;
769}
770
771static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
772 struct vmcs12 *vmcs12)
773{
774 if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_entry_msr_load_count,
775 vmcs12->vm_entry_msr_load_addr))
776 return -EINVAL;
777
778 return 0;
779}
780
781static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
782 struct vmcs12 *vmcs12)
783{
784 if (!nested_cpu_has_pml(vmcs12))
785 return 0;
786
787 if (!nested_cpu_has_ept(vmcs12) ||
788 !page_address_valid(vcpu, vmcs12->pml_address))
789 return -EINVAL;
790
791 return 0;
792}
793
794static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
795 struct vmcs12 *vmcs12)
796{
797 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
798 !nested_cpu_has_ept(vmcs12))
799 return -EINVAL;
800 return 0;
801}
802
803static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
804 struct vmcs12 *vmcs12)
805{
806 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
807 !nested_cpu_has_ept(vmcs12))
808 return -EINVAL;
809 return 0;
810}
811
812static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
813 struct vmcs12 *vmcs12)
814{
815 if (!nested_cpu_has_shadow_vmcs(vmcs12))
816 return 0;
817
818 if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
819 !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
820 return -EINVAL;
821
822 return 0;
823}
824
825static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
826 struct vmx_msr_entry *e)
827{
828
829 if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)
830 return -EINVAL;
831 if (e->index == MSR_IA32_UCODE_WRITE ||
832 e->index == MSR_IA32_UCODE_REV)
833 return -EINVAL;
834 if (e->reserved != 0)
835 return -EINVAL;
836 return 0;
837}
838
839static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
840 struct vmx_msr_entry *e)
841{
842 if (e->index == MSR_FS_BASE ||
843 e->index == MSR_GS_BASE ||
844 e->index == MSR_IA32_SMM_MONITOR_CTL ||
845 nested_vmx_msr_check_common(vcpu, e))
846 return -EINVAL;
847 return 0;
848}
849
850static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
851 struct vmx_msr_entry *e)
852{
853 if (e->index == MSR_IA32_SMBASE ||
854 nested_vmx_msr_check_common(vcpu, e))
855 return -EINVAL;
856 return 0;
857}
858
859
860
861
862
863static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
864{
865 u32 i;
866 struct vmx_msr_entry e;
867 struct msr_data msr;
868
869 msr.host_initiated = false;
870 for (i = 0; i < count; i++) {
871 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
872 &e, sizeof(e))) {
873 pr_debug_ratelimited(
874 "%s cannot read MSR entry (%u, 0x%08llx)\n",
875 __func__, i, gpa + i * sizeof(e));
876 goto fail;
877 }
878 if (nested_vmx_load_msr_check(vcpu, &e)) {
879 pr_debug_ratelimited(
880 "%s check failed (%u, 0x%x, 0x%x)\n",
881 __func__, i, e.index, e.reserved);
882 goto fail;
883 }
884 msr.index = e.index;
885 msr.data = e.value;
886 if (kvm_set_msr(vcpu, &msr)) {
887 pr_debug_ratelimited(
888 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
889 __func__, i, e.index, e.value);
890 goto fail;
891 }
892 }
893 return 0;
894fail:
895 return i + 1;
896}
897
898static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
899{
900 u32 i;
901 struct vmx_msr_entry e;
902
903 for (i = 0; i < count; i++) {
904 struct msr_data msr_info;
905 if (kvm_vcpu_read_guest(vcpu,
906 gpa + i * sizeof(e),
907 &e, 2 * sizeof(u32))) {
908 pr_debug_ratelimited(
909 "%s cannot read MSR entry (%u, 0x%08llx)\n",
910 __func__, i, gpa + i * sizeof(e));
911 return -EINVAL;
912 }
913 if (nested_vmx_store_msr_check(vcpu, &e)) {
914 pr_debug_ratelimited(
915 "%s check failed (%u, 0x%x, 0x%x)\n",
916 __func__, i, e.index, e.reserved);
917 return -EINVAL;
918 }
919 msr_info.host_initiated = false;
920 msr_info.index = e.index;
921 if (kvm_get_msr(vcpu, &msr_info)) {
922 pr_debug_ratelimited(
923 "%s cannot read MSR (%u, 0x%x)\n",
924 __func__, i, e.index);
925 return -EINVAL;
926 }
927 if (kvm_vcpu_write_guest(vcpu,
928 gpa + i * sizeof(e) +
929 offsetof(struct vmx_msr_entry, value),
930 &msr_info.data, sizeof(msr_info.data))) {
931 pr_debug_ratelimited(
932 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
933 __func__, i, e.index, msr_info.data);
934 return -EINVAL;
935 }
936 }
937 return 0;
938}
939
940static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
941{
942 unsigned long invalid_mask;
943
944 invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
945 return (val & invalid_mask) == 0;
946}
947
948
949
950
951
952
953
954static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
955 u32 *entry_failure_code)
956{
957 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
958 if (!nested_cr3_valid(vcpu, cr3)) {
959 *entry_failure_code = ENTRY_FAIL_DEFAULT;
960 return -EINVAL;
961 }
962
963
964
965
966
967 if (is_pae_paging(vcpu) && !nested_ept) {
968 if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
969 *entry_failure_code = ENTRY_FAIL_PDPTE;
970 return -EINVAL;
971 }
972 }
973 }
974
975 if (!nested_ept)
976 kvm_mmu_new_cr3(vcpu, cr3, false);
977
978 vcpu->arch.cr3 = cr3;
979 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
980
981 kvm_init_mmu(vcpu, false);
982
983 return 0;
984}
985
986
987
988
989
990
991
992
993
994
995
996
997static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
998{
999 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1000
1001 return nested_cpu_has_ept(vmcs12) ||
1002 (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
1003}
1004
1005static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
1006{
1007 struct vcpu_vmx *vmx = to_vmx(vcpu);
1008
1009 return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
1010}
1011
1012
1013static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
1014{
1015 return fixed_bits_valid(control, low, high);
1016}
1017
1018static inline u64 vmx_control_msr(u32 low, u32 high)
1019{
1020 return low | ((u64)high << 32);
1021}
1022
1023static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
1024{
1025 superset &= mask;
1026 subset &= mask;
1027
1028 return (superset | subset) == superset;
1029}
1030
1031static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
1032{
1033 const u64 feature_and_reserved =
1034
1035 BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
1036
1037 BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
1038 u64 vmx_basic = vmx->nested.msrs.basic;
1039
1040 if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
1041 return -EINVAL;
1042
1043
1044
1045
1046
1047 if (data & BIT_ULL(48))
1048 return -EINVAL;
1049
1050 if (vmx_basic_vmcs_revision_id(vmx_basic) !=
1051 vmx_basic_vmcs_revision_id(data))
1052 return -EINVAL;
1053
1054 if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
1055 return -EINVAL;
1056
1057 vmx->nested.msrs.basic = data;
1058 return 0;
1059}
1060
1061static int
1062vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1063{
1064 u64 supported;
1065 u32 *lowp, *highp;
1066
1067 switch (msr_index) {
1068 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1069 lowp = &vmx->nested.msrs.pinbased_ctls_low;
1070 highp = &vmx->nested.msrs.pinbased_ctls_high;
1071 break;
1072 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1073 lowp = &vmx->nested.msrs.procbased_ctls_low;
1074 highp = &vmx->nested.msrs.procbased_ctls_high;
1075 break;
1076 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1077 lowp = &vmx->nested.msrs.exit_ctls_low;
1078 highp = &vmx->nested.msrs.exit_ctls_high;
1079 break;
1080 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1081 lowp = &vmx->nested.msrs.entry_ctls_low;
1082 highp = &vmx->nested.msrs.entry_ctls_high;
1083 break;
1084 case MSR_IA32_VMX_PROCBASED_CTLS2:
1085 lowp = &vmx->nested.msrs.secondary_ctls_low;
1086 highp = &vmx->nested.msrs.secondary_ctls_high;
1087 break;
1088 default:
1089 BUG();
1090 }
1091
1092 supported = vmx_control_msr(*lowp, *highp);
1093
1094
1095 if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
1096 return -EINVAL;
1097
1098
1099 if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
1100 return -EINVAL;
1101
1102 *lowp = data;
1103 *highp = data >> 32;
1104 return 0;
1105}
1106
1107static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
1108{
1109 const u64 feature_and_reserved_bits =
1110
1111 BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
1112 BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
1113
1114 GENMASK_ULL(13, 9) | BIT_ULL(31);
1115 u64 vmx_misc;
1116
1117 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
1118 vmx->nested.msrs.misc_high);
1119
1120 if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
1121 return -EINVAL;
1122
1123 if ((vmx->nested.msrs.pinbased_ctls_high &
1124 PIN_BASED_VMX_PREEMPTION_TIMER) &&
1125 vmx_misc_preemption_timer_rate(data) !=
1126 vmx_misc_preemption_timer_rate(vmx_misc))
1127 return -EINVAL;
1128
1129 if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
1130 return -EINVAL;
1131
1132 if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
1133 return -EINVAL;
1134
1135 if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
1136 return -EINVAL;
1137
1138 vmx->nested.msrs.misc_low = data;
1139 vmx->nested.msrs.misc_high = data >> 32;
1140
1141 return 0;
1142}
1143
1144static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
1145{
1146 u64 vmx_ept_vpid_cap;
1147
1148 vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
1149 vmx->nested.msrs.vpid_caps);
1150
1151
1152 if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
1153 return -EINVAL;
1154
1155 vmx->nested.msrs.ept_caps = data;
1156 vmx->nested.msrs.vpid_caps = data >> 32;
1157 return 0;
1158}
1159
1160static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
1161{
1162 u64 *msr;
1163
1164 switch (msr_index) {
1165 case MSR_IA32_VMX_CR0_FIXED0:
1166 msr = &vmx->nested.msrs.cr0_fixed0;
1167 break;
1168 case MSR_IA32_VMX_CR4_FIXED0:
1169 msr = &vmx->nested.msrs.cr4_fixed0;
1170 break;
1171 default:
1172 BUG();
1173 }
1174
1175
1176
1177
1178
1179 if (!is_bitwise_subset(data, *msr, -1ULL))
1180 return -EINVAL;
1181
1182 *msr = data;
1183 return 0;
1184}
1185
1186
1187
1188
1189
1190
1191int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1192{
1193 struct vcpu_vmx *vmx = to_vmx(vcpu);
1194
1195
1196
1197
1198
1199 if (vmx->nested.vmxon)
1200 return -EBUSY;
1201
1202 switch (msr_index) {
1203 case MSR_IA32_VMX_BASIC:
1204 return vmx_restore_vmx_basic(vmx, data);
1205 case MSR_IA32_VMX_PINBASED_CTLS:
1206 case MSR_IA32_VMX_PROCBASED_CTLS:
1207 case MSR_IA32_VMX_EXIT_CTLS:
1208 case MSR_IA32_VMX_ENTRY_CTLS:
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218 return -EINVAL;
1219 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1220 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1221 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1222 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1223 case MSR_IA32_VMX_PROCBASED_CTLS2:
1224 return vmx_restore_control_msr(vmx, msr_index, data);
1225 case MSR_IA32_VMX_MISC:
1226 return vmx_restore_vmx_misc(vmx, data);
1227 case MSR_IA32_VMX_CR0_FIXED0:
1228 case MSR_IA32_VMX_CR4_FIXED0:
1229 return vmx_restore_fixed0_msr(vmx, msr_index, data);
1230 case MSR_IA32_VMX_CR0_FIXED1:
1231 case MSR_IA32_VMX_CR4_FIXED1:
1232
1233
1234
1235
1236 return -EINVAL;
1237 case MSR_IA32_VMX_EPT_VPID_CAP:
1238 return vmx_restore_vmx_ept_vpid_cap(vmx, data);
1239 case MSR_IA32_VMX_VMCS_ENUM:
1240 vmx->nested.msrs.vmcs_enum = data;
1241 return 0;
1242 case MSR_IA32_VMX_VMFUNC:
1243 if (data & ~vmx->nested.msrs.vmfunc_controls)
1244 return -EINVAL;
1245 vmx->nested.msrs.vmfunc_controls = data;
1246 return 0;
1247 default:
1248
1249
1250
1251 return -EINVAL;
1252 }
1253}
1254
1255
1256int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
1257{
1258 switch (msr_index) {
1259 case MSR_IA32_VMX_BASIC:
1260 *pdata = msrs->basic;
1261 break;
1262 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
1263 case MSR_IA32_VMX_PINBASED_CTLS:
1264 *pdata = vmx_control_msr(
1265 msrs->pinbased_ctls_low,
1266 msrs->pinbased_ctls_high);
1267 if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
1268 *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1269 break;
1270 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
1271 case MSR_IA32_VMX_PROCBASED_CTLS:
1272 *pdata = vmx_control_msr(
1273 msrs->procbased_ctls_low,
1274 msrs->procbased_ctls_high);
1275 if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
1276 *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
1277 break;
1278 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
1279 case MSR_IA32_VMX_EXIT_CTLS:
1280 *pdata = vmx_control_msr(
1281 msrs->exit_ctls_low,
1282 msrs->exit_ctls_high);
1283 if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
1284 *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
1285 break;
1286 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
1287 case MSR_IA32_VMX_ENTRY_CTLS:
1288 *pdata = vmx_control_msr(
1289 msrs->entry_ctls_low,
1290 msrs->entry_ctls_high);
1291 if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
1292 *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
1293 break;
1294 case MSR_IA32_VMX_MISC:
1295 *pdata = vmx_control_msr(
1296 msrs->misc_low,
1297 msrs->misc_high);
1298 break;
1299 case MSR_IA32_VMX_CR0_FIXED0:
1300 *pdata = msrs->cr0_fixed0;
1301 break;
1302 case MSR_IA32_VMX_CR0_FIXED1:
1303 *pdata = msrs->cr0_fixed1;
1304 break;
1305 case MSR_IA32_VMX_CR4_FIXED0:
1306 *pdata = msrs->cr4_fixed0;
1307 break;
1308 case MSR_IA32_VMX_CR4_FIXED1:
1309 *pdata = msrs->cr4_fixed1;
1310 break;
1311 case MSR_IA32_VMX_VMCS_ENUM:
1312 *pdata = msrs->vmcs_enum;
1313 break;
1314 case MSR_IA32_VMX_PROCBASED_CTLS2:
1315 *pdata = vmx_control_msr(
1316 msrs->secondary_ctls_low,
1317 msrs->secondary_ctls_high);
1318 break;
1319 case MSR_IA32_VMX_EPT_VPID_CAP:
1320 *pdata = msrs->ept_caps |
1321 ((u64)msrs->vpid_caps << 32);
1322 break;
1323 case MSR_IA32_VMX_VMFUNC:
1324 *pdata = msrs->vmfunc_controls;
1325 break;
1326 default:
1327 return 1;
1328 }
1329
1330 return 0;
1331}
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
1342{
1343 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1344 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1345 struct shadow_vmcs_field field;
1346 unsigned long val;
1347 int i;
1348
1349 if (WARN_ON(!shadow_vmcs))
1350 return;
1351
1352 preempt_disable();
1353
1354 vmcs_load(shadow_vmcs);
1355
1356 for (i = 0; i < max_shadow_read_write_fields; i++) {
1357 field = shadow_read_write_fields[i];
1358 val = __vmcs_readl(field.encoding);
1359 vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
1360 }
1361
1362 vmcs_clear(shadow_vmcs);
1363 vmcs_load(vmx->loaded_vmcs->vmcs);
1364
1365 preempt_enable();
1366}
1367
1368static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
1369{
1370 const struct shadow_vmcs_field *fields[] = {
1371 shadow_read_write_fields,
1372 shadow_read_only_fields
1373 };
1374 const int max_fields[] = {
1375 max_shadow_read_write_fields,
1376 max_shadow_read_only_fields
1377 };
1378 struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
1379 struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
1380 struct shadow_vmcs_field field;
1381 unsigned long val;
1382 int i, q;
1383
1384 if (WARN_ON(!shadow_vmcs))
1385 return;
1386
1387 vmcs_load(shadow_vmcs);
1388
1389 for (q = 0; q < ARRAY_SIZE(fields); q++) {
1390 for (i = 0; i < max_fields[q]; i++) {
1391 field = fields[q][i];
1392 val = vmcs12_read_any(vmcs12, field.encoding,
1393 field.offset);
1394 __vmcs_writel(field.encoding, val);
1395 }
1396 }
1397
1398 vmcs_clear(shadow_vmcs);
1399 vmcs_load(vmx->loaded_vmcs->vmcs);
1400}
1401
1402static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
1403{
1404 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1405 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1406
1407
1408 vmcs12->tpr_threshold = evmcs->tpr_threshold;
1409 vmcs12->guest_rip = evmcs->guest_rip;
1410
1411 if (unlikely(!(evmcs->hv_clean_fields &
1412 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
1413 vmcs12->guest_rsp = evmcs->guest_rsp;
1414 vmcs12->guest_rflags = evmcs->guest_rflags;
1415 vmcs12->guest_interruptibility_info =
1416 evmcs->guest_interruptibility_info;
1417 }
1418
1419 if (unlikely(!(evmcs->hv_clean_fields &
1420 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
1421 vmcs12->cpu_based_vm_exec_control =
1422 evmcs->cpu_based_vm_exec_control;
1423 }
1424
1425 if (unlikely(!(evmcs->hv_clean_fields &
1426 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
1427 vmcs12->exception_bitmap = evmcs->exception_bitmap;
1428 }
1429
1430 if (unlikely(!(evmcs->hv_clean_fields &
1431 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
1432 vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
1433 }
1434
1435 if (unlikely(!(evmcs->hv_clean_fields &
1436 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
1437 vmcs12->vm_entry_intr_info_field =
1438 evmcs->vm_entry_intr_info_field;
1439 vmcs12->vm_entry_exception_error_code =
1440 evmcs->vm_entry_exception_error_code;
1441 vmcs12->vm_entry_instruction_len =
1442 evmcs->vm_entry_instruction_len;
1443 }
1444
1445 if (unlikely(!(evmcs->hv_clean_fields &
1446 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
1447 vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
1448 vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
1449 vmcs12->host_cr0 = evmcs->host_cr0;
1450 vmcs12->host_cr3 = evmcs->host_cr3;
1451 vmcs12->host_cr4 = evmcs->host_cr4;
1452 vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
1453 vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
1454 vmcs12->host_rip = evmcs->host_rip;
1455 vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
1456 vmcs12->host_es_selector = evmcs->host_es_selector;
1457 vmcs12->host_cs_selector = evmcs->host_cs_selector;
1458 vmcs12->host_ss_selector = evmcs->host_ss_selector;
1459 vmcs12->host_ds_selector = evmcs->host_ds_selector;
1460 vmcs12->host_fs_selector = evmcs->host_fs_selector;
1461 vmcs12->host_gs_selector = evmcs->host_gs_selector;
1462 vmcs12->host_tr_selector = evmcs->host_tr_selector;
1463 }
1464
1465 if (unlikely(!(evmcs->hv_clean_fields &
1466 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
1467 vmcs12->pin_based_vm_exec_control =
1468 evmcs->pin_based_vm_exec_control;
1469 vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
1470 vmcs12->secondary_vm_exec_control =
1471 evmcs->secondary_vm_exec_control;
1472 }
1473
1474 if (unlikely(!(evmcs->hv_clean_fields &
1475 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
1476 vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
1477 vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
1478 }
1479
1480 if (unlikely(!(evmcs->hv_clean_fields &
1481 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
1482 vmcs12->msr_bitmap = evmcs->msr_bitmap;
1483 }
1484
1485 if (unlikely(!(evmcs->hv_clean_fields &
1486 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
1487 vmcs12->guest_es_base = evmcs->guest_es_base;
1488 vmcs12->guest_cs_base = evmcs->guest_cs_base;
1489 vmcs12->guest_ss_base = evmcs->guest_ss_base;
1490 vmcs12->guest_ds_base = evmcs->guest_ds_base;
1491 vmcs12->guest_fs_base = evmcs->guest_fs_base;
1492 vmcs12->guest_gs_base = evmcs->guest_gs_base;
1493 vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
1494 vmcs12->guest_tr_base = evmcs->guest_tr_base;
1495 vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
1496 vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
1497 vmcs12->guest_es_limit = evmcs->guest_es_limit;
1498 vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
1499 vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
1500 vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
1501 vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
1502 vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
1503 vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
1504 vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
1505 vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
1506 vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
1507 vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
1508 vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
1509 vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
1510 vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
1511 vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
1512 vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
1513 vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
1514 vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
1515 vmcs12->guest_es_selector = evmcs->guest_es_selector;
1516 vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
1517 vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
1518 vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
1519 vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
1520 vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
1521 vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
1522 vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
1523 }
1524
1525 if (unlikely(!(evmcs->hv_clean_fields &
1526 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
1527 vmcs12->tsc_offset = evmcs->tsc_offset;
1528 vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
1529 vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
1530 }
1531
1532 if (unlikely(!(evmcs->hv_clean_fields &
1533 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
1534 vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
1535 vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
1536 vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
1537 vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
1538 vmcs12->guest_cr0 = evmcs->guest_cr0;
1539 vmcs12->guest_cr3 = evmcs->guest_cr3;
1540 vmcs12->guest_cr4 = evmcs->guest_cr4;
1541 vmcs12->guest_dr7 = evmcs->guest_dr7;
1542 }
1543
1544 if (unlikely(!(evmcs->hv_clean_fields &
1545 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
1546 vmcs12->host_fs_base = evmcs->host_fs_base;
1547 vmcs12->host_gs_base = evmcs->host_gs_base;
1548 vmcs12->host_tr_base = evmcs->host_tr_base;
1549 vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
1550 vmcs12->host_idtr_base = evmcs->host_idtr_base;
1551 vmcs12->host_rsp = evmcs->host_rsp;
1552 }
1553
1554 if (unlikely(!(evmcs->hv_clean_fields &
1555 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
1556 vmcs12->ept_pointer = evmcs->ept_pointer;
1557 vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
1558 }
1559
1560 if (unlikely(!(evmcs->hv_clean_fields &
1561 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
1562 vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
1563 vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
1564 vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
1565 vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
1566 vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
1567 vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
1568 vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
1569 vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
1570 vmcs12->guest_pending_dbg_exceptions =
1571 evmcs->guest_pending_dbg_exceptions;
1572 vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
1573 vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
1574 vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
1575 vmcs12->guest_activity_state = evmcs->guest_activity_state;
1576 vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
1577 }
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619 return 0;
1620}
1621
1622static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
1623{
1624 struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
1625 struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695 evmcs->guest_es_selector = vmcs12->guest_es_selector;
1696 evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
1697 evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
1698 evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
1699 evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
1700 evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
1701 evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
1702 evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
1703
1704 evmcs->guest_es_limit = vmcs12->guest_es_limit;
1705 evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
1706 evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
1707 evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
1708 evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
1709 evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
1710 evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
1711 evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
1712 evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
1713 evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
1714
1715 evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
1716 evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
1717 evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
1718 evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
1719 evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
1720 evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
1721 evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
1722 evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
1723
1724 evmcs->guest_es_base = vmcs12->guest_es_base;
1725 evmcs->guest_cs_base = vmcs12->guest_cs_base;
1726 evmcs->guest_ss_base = vmcs12->guest_ss_base;
1727 evmcs->guest_ds_base = vmcs12->guest_ds_base;
1728 evmcs->guest_fs_base = vmcs12->guest_fs_base;
1729 evmcs->guest_gs_base = vmcs12->guest_gs_base;
1730 evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
1731 evmcs->guest_tr_base = vmcs12->guest_tr_base;
1732 evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
1733 evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
1734
1735 evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
1736 evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
1737
1738 evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
1739 evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
1740 evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
1741 evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
1742
1743 evmcs->guest_pending_dbg_exceptions =
1744 vmcs12->guest_pending_dbg_exceptions;
1745 evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
1746 evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
1747
1748 evmcs->guest_activity_state = vmcs12->guest_activity_state;
1749 evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
1750
1751 evmcs->guest_cr0 = vmcs12->guest_cr0;
1752 evmcs->guest_cr3 = vmcs12->guest_cr3;
1753 evmcs->guest_cr4 = vmcs12->guest_cr4;
1754 evmcs->guest_dr7 = vmcs12->guest_dr7;
1755
1756 evmcs->guest_physical_address = vmcs12->guest_physical_address;
1757
1758 evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
1759 evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
1760 evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
1761 evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
1762 evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
1763 evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
1764 evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
1765 evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
1766
1767 evmcs->exit_qualification = vmcs12->exit_qualification;
1768
1769 evmcs->guest_linear_address = vmcs12->guest_linear_address;
1770 evmcs->guest_rsp = vmcs12->guest_rsp;
1771 evmcs->guest_rflags = vmcs12->guest_rflags;
1772
1773 evmcs->guest_interruptibility_info =
1774 vmcs12->guest_interruptibility_info;
1775 evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
1776 evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
1777 evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
1778 evmcs->vm_entry_exception_error_code =
1779 vmcs12->vm_entry_exception_error_code;
1780 evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
1781
1782 evmcs->guest_rip = vmcs12->guest_rip;
1783
1784 evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
1785
1786 return 0;
1787}
1788
1789
1790
1791
1792
1793static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
1794 bool from_launch)
1795{
1796 struct vcpu_vmx *vmx = to_vmx(vcpu);
1797 bool evmcs_gpa_changed = false;
1798 u64 evmcs_gpa;
1799
1800 if (likely(!vmx->nested.enlightened_vmcs_enabled))
1801 return 1;
1802
1803 if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
1804 return 1;
1805
1806 if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
1807 if (!vmx->nested.hv_evmcs)
1808 vmx->nested.current_vmptr = -1ull;
1809
1810 nested_release_evmcs(vcpu);
1811
1812 if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
1813 &vmx->nested.hv_evmcs_map))
1814 return 0;
1815
1816 vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
1841 (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
1842 nested_release_evmcs(vcpu);
1843 return 0;
1844 }
1845
1846 vmx->nested.dirty_vmcs12 = true;
1847 vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
1848
1849 evmcs_gpa_changed = true;
1850
1851
1852
1853
1854
1855
1856 if (from_launch) {
1857 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1858 memset(vmcs12, 0, sizeof(*vmcs12));
1859 vmcs12->hdr.revision_id = VMCS12_REVISION;
1860 }
1861
1862 }
1863
1864
1865
1866
1867
1868 if (from_launch || evmcs_gpa_changed)
1869 vmx->nested.hv_evmcs->hv_clean_fields &=
1870 ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
1871
1872 return 1;
1873}
1874
1875void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
1876{
1877 struct vcpu_vmx *vmx = to_vmx(vcpu);
1878
1879
1880
1881
1882
1883
1884 if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs)
1885 nested_vmx_handle_enlightened_vmptrld(vcpu, false);
1886
1887 if (vmx->nested.hv_evmcs) {
1888 copy_vmcs12_to_enlightened(vmx);
1889
1890 vmx->nested.hv_evmcs->hv_clean_fields |=
1891 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
1892 } else {
1893 copy_vmcs12_to_shadow(vmx);
1894 }
1895
1896 vmx->nested.need_vmcs12_to_shadow_sync = false;
1897}
1898
1899static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
1900{
1901 struct vcpu_vmx *vmx =
1902 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
1903
1904 vmx->nested.preemption_timer_expired = true;
1905 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
1906 kvm_vcpu_kick(&vmx->vcpu);
1907
1908 return HRTIMER_NORESTART;
1909}
1910
1911static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
1912{
1913 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
1914 struct vcpu_vmx *vmx = to_vmx(vcpu);
1915
1916
1917
1918
1919
1920 if (preemption_timeout == 0) {
1921 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
1922 return;
1923 }
1924
1925 if (vcpu->arch.virtual_tsc_khz == 0)
1926 return;
1927
1928 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
1929 preemption_timeout *= 1000000;
1930 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
1931 hrtimer_start(&vmx->nested.preemption_timer,
1932 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
1933}
1934
1935static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
1936{
1937 if (vmx->nested.nested_run_pending &&
1938 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
1939 return vmcs12->guest_ia32_efer;
1940 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
1941 return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
1942 else
1943 return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
1944}
1945
1946static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
1947{
1948
1949
1950
1951
1952
1953
1954 if (vmx->nested.vmcs02_initialized)
1955 return;
1956 vmx->nested.vmcs02_initialized = true;
1957
1958
1959
1960
1961
1962
1963 if (enable_ept && nested_early_check)
1964 vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0));
1965
1966
1967 if (cpu_has_vmx_vmfunc())
1968 vmcs_write64(VM_FUNCTION_CONTROL, 0);
1969
1970 if (cpu_has_vmx_posted_intr())
1971 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
1972
1973 if (cpu_has_vmx_msr_bitmap())
1974 vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
1975
1976
1977
1978
1979
1980
1981
1982
1983 if (enable_pml) {
1984 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
1985 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
1986 }
1987
1988 if (cpu_has_vmx_encls_vmexit())
1989 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
1990
1991
1992
1993
1994
1995
1996 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
1997 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
1998 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
1999
2000 vmx_set_constant_host_state(vmx);
2001}
2002
2003static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
2004 struct vmcs12 *vmcs12)
2005{
2006 prepare_vmcs02_constant_state(vmx);
2007
2008 vmcs_write64(VMCS_LINK_POINTER, -1ull);
2009
2010 if (enable_vpid) {
2011 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
2012 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
2013 else
2014 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2015 }
2016}
2017
2018static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2019{
2020 u32 exec_control, vmcs12_exec_ctrl;
2021 u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
2022
2023 if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
2024 prepare_vmcs02_early_rare(vmx, vmcs12);
2025
2026
2027
2028
2029 exec_control = vmx_pin_based_exec_ctrl(vmx);
2030 exec_control |= (vmcs12->pin_based_vm_exec_control &
2031 ~PIN_BASED_VMX_PREEMPTION_TIMER);
2032
2033
2034 if (nested_cpu_has_posted_intr(vmcs12)) {
2035 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
2036 vmx->nested.pi_pending = false;
2037 } else {
2038 exec_control &= ~PIN_BASED_POSTED_INTR;
2039 }
2040 pin_controls_set(vmx, exec_control);
2041
2042
2043
2044
2045 exec_control = vmx_exec_control(vmx);
2046 exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2047 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2048 exec_control &= ~CPU_BASED_TPR_SHADOW;
2049 exec_control |= vmcs12->cpu_based_vm_exec_control;
2050
2051 if (exec_control & CPU_BASED_TPR_SHADOW)
2052 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
2053#ifdef CONFIG_X86_64
2054 else
2055 exec_control |= CPU_BASED_CR8_LOAD_EXITING |
2056 CPU_BASED_CR8_STORE_EXITING;
2057#endif
2058
2059
2060
2061
2062
2063 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
2064 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
2065
2066
2067
2068
2069
2070
2071
2072 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
2073 exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
2074
2075 exec_controls_set(vmx, exec_control);
2076
2077
2078
2079
2080 if (cpu_has_secondary_exec_ctrls()) {
2081 exec_control = vmx->secondary_exec_control;
2082
2083
2084 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2085 SECONDARY_EXEC_ENABLE_INVPCID |
2086 SECONDARY_EXEC_RDTSCP |
2087 SECONDARY_EXEC_XSAVES |
2088 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2089 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2090 SECONDARY_EXEC_ENABLE_VMFUNC);
2091 if (nested_cpu_has(vmcs12,
2092 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
2093 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
2094 ~SECONDARY_EXEC_ENABLE_PML;
2095 exec_control |= vmcs12_exec_ctrl;
2096 }
2097
2098
2099 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
2100
2101
2102
2103
2104
2105 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
2106 (vmcs12->guest_cr4 & X86_CR4_UMIP))
2107 exec_control |= SECONDARY_EXEC_DESC;
2108
2109 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
2110 vmcs_write16(GUEST_INTR_STATUS,
2111 vmcs12->guest_intr_status);
2112
2113 secondary_exec_controls_set(vmx, exec_control);
2114 }
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124 exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
2125 ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
2126 if (cpu_has_load_ia32_efer()) {
2127 if (guest_efer & EFER_LMA)
2128 exec_control |= VM_ENTRY_IA32E_MODE;
2129 if (guest_efer != host_efer)
2130 exec_control |= VM_ENTRY_LOAD_IA32_EFER;
2131 }
2132 vm_entry_controls_set(vmx, exec_control);
2133
2134
2135
2136
2137
2138
2139
2140
2141 exec_control = vmx_vmexit_ctrl();
2142 if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
2143 exec_control |= VM_EXIT_LOAD_IA32_EFER;
2144 vm_exit_controls_set(vmx, exec_control);
2145
2146
2147
2148
2149 if (vmx->nested.nested_run_pending) {
2150 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2151 vmcs12->vm_entry_intr_info_field);
2152 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2153 vmcs12->vm_entry_exception_error_code);
2154 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2155 vmcs12->vm_entry_instruction_len);
2156 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2157 vmcs12->guest_interruptibility_info);
2158 vmx->loaded_vmcs->nmi_known_unmasked =
2159 !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
2160 } else {
2161 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
2162 }
2163}
2164
2165static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
2166{
2167 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2168
2169 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2170 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
2171 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
2172 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
2173 vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
2174 vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
2175 vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
2176 vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
2177 vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
2178 vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
2179 vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
2180 vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
2181 vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
2182 vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
2183 vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
2184 vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
2185 vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
2186 vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
2187 vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
2188 vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
2189 vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
2190 vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
2191 vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
2192 vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
2193 vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
2194 vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
2195 vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
2196 vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
2197 vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
2198 vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
2199 vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
2200 vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
2201 vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
2202 vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
2203 vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
2204 vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
2205 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
2206 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
2207 }
2208
2209 if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
2210 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
2211 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
2212 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
2213 vmcs12->guest_pending_dbg_exceptions);
2214 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
2215 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
2216
2217
2218
2219
2220
2221 if (enable_ept) {
2222 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2223 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2224 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2225 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2226 }
2227
2228 if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
2229 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
2230 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
2231 }
2232
2233 if (nested_cpu_has_xsaves(vmcs12))
2234 vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
2251 enable_ept ? vmcs12->page_fault_error_code_mask : 0);
2252 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
2253 enable_ept ? vmcs12->page_fault_error_code_match : 0);
2254
2255 if (cpu_has_vmx_apicv()) {
2256 vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
2257 vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
2258 vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
2259 vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
2260 }
2261
2262 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2263 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2264
2265 set_cr4_guest_host_mask(vmx);
2266}
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
2280 u32 *entry_failure_code)
2281{
2282 struct vcpu_vmx *vmx = to_vmx(vcpu);
2283 struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
2284 bool load_guest_pdptrs_vmcs12 = false;
2285
2286 if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
2287 prepare_vmcs02_rare(vmx, vmcs12);
2288 vmx->nested.dirty_vmcs12 = false;
2289
2290 load_guest_pdptrs_vmcs12 = !hv_evmcs ||
2291 !(hv_evmcs->hv_clean_fields &
2292 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
2293 }
2294
2295 if (vmx->nested.nested_run_pending &&
2296 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
2297 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
2298 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
2299 } else {
2300 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
2301 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
2302 }
2303 if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
2304 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
2305 vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
2306 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
2307
2308
2309
2310
2311
2312 update_exception_bitmap(vcpu);
2313 vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
2314 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2315
2316 if (vmx->nested.nested_run_pending &&
2317 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
2318 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
2319 vcpu->arch.pat = vmcs12->guest_ia32_pat;
2320 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2321 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
2322 }
2323
2324 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
2325
2326 if (kvm_has_tsc_control)
2327 decache_tsc_multiplier(vmx);
2328
2329 if (enable_vpid) {
2330
2331
2332
2333
2334
2335
2336
2337
2338 if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
2339 if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
2340 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
2341 __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
2342 }
2343 } else {
2344
2345
2346
2347
2348
2349
2350
2351
2352 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2353 }
2354 }
2355
2356 if (nested_cpu_has_ept(vmcs12))
2357 nested_ept_init_mmu_context(vcpu);
2358 else if (nested_cpu_has2(vmcs12,
2359 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2360 vmx_flush_tlb(vcpu, true);
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370 vmx_set_cr0(vcpu, vmcs12->guest_cr0);
2371 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2372
2373 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
2374 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
2375
2376 vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
2377
2378 vmx_set_efer(vcpu, vcpu->arch.efer);
2379
2380
2381
2382
2383
2384
2385 if (vmx->emulation_required) {
2386 *entry_failure_code = ENTRY_FAIL_DEFAULT;
2387 return -EINVAL;
2388 }
2389
2390
2391 if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
2392 entry_failure_code))
2393 return -EINVAL;
2394
2395
2396 if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
2397 is_pae_paging(vcpu)) {
2398 vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
2399 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
2400 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
2401 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
2402 }
2403
2404 if (!enable_ept)
2405 vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
2406
2407 kvm_rsp_write(vcpu, vmcs12->guest_rsp);
2408 kvm_rip_write(vcpu, vmcs12->guest_rip);
2409 return 0;
2410}
2411
2412static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
2413{
2414 if (!nested_cpu_has_nmi_exiting(vmcs12) &&
2415 nested_cpu_has_virtual_nmis(vmcs12))
2416 return -EINVAL;
2417
2418 if (!nested_cpu_has_virtual_nmis(vmcs12) &&
2419 nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
2420 return -EINVAL;
2421
2422 return 0;
2423}
2424
2425static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
2426{
2427 struct vcpu_vmx *vmx = to_vmx(vcpu);
2428 int maxphyaddr = cpuid_maxphyaddr(vcpu);
2429
2430
2431 switch (address & VMX_EPTP_MT_MASK) {
2432 case VMX_EPTP_MT_UC:
2433 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
2434 return false;
2435 break;
2436 case VMX_EPTP_MT_WB:
2437 if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
2438 return false;
2439 break;
2440 default:
2441 return false;
2442 }
2443
2444
2445 if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)
2446 return false;
2447
2448
2449 if (address >> maxphyaddr || ((address >> 7) & 0x1f))
2450 return false;
2451
2452
2453 if (address & VMX_EPTP_AD_ENABLE_BIT) {
2454 if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
2455 return false;
2456 }
2457
2458 return true;
2459}
2460
2461
2462
2463
2464static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
2465 struct vmcs12 *vmcs12)
2466{
2467 struct vcpu_vmx *vmx = to_vmx(vcpu);
2468
2469 if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
2470 vmx->nested.msrs.pinbased_ctls_low,
2471 vmx->nested.msrs.pinbased_ctls_high) ||
2472 !vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
2473 vmx->nested.msrs.procbased_ctls_low,
2474 vmx->nested.msrs.procbased_ctls_high))
2475 return -EINVAL;
2476
2477 if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
2478 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
2479 vmx->nested.msrs.secondary_ctls_low,
2480 vmx->nested.msrs.secondary_ctls_high))
2481 return -EINVAL;
2482
2483 if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu) ||
2484 nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
2485 nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
2486 nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
2487 nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
2488 nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
2489 nested_vmx_check_nmi_controls(vmcs12) ||
2490 nested_vmx_check_pml_controls(vcpu, vmcs12) ||
2491 nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
2492 nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
2493 nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
2494 (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
2495 return -EINVAL;
2496
2497 if (!nested_cpu_has_preemption_timer(vmcs12) &&
2498 nested_cpu_has_save_preemption_timer(vmcs12))
2499 return -EINVAL;
2500
2501 if (nested_cpu_has_ept(vmcs12) &&
2502 !valid_ept_address(vcpu, vmcs12->ept_pointer))
2503 return -EINVAL;
2504
2505 if (nested_cpu_has_vmfunc(vmcs12)) {
2506 if (vmcs12->vm_function_control &
2507 ~vmx->nested.msrs.vmfunc_controls)
2508 return -EINVAL;
2509
2510 if (nested_cpu_has_eptp_switching(vmcs12)) {
2511 if (!nested_cpu_has_ept(vmcs12) ||
2512 !page_address_valid(vcpu, vmcs12->eptp_list_address))
2513 return -EINVAL;
2514 }
2515 }
2516
2517 return 0;
2518}
2519
2520
2521
2522
2523static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
2524 struct vmcs12 *vmcs12)
2525{
2526 struct vcpu_vmx *vmx = to_vmx(vcpu);
2527
2528 if (!vmx_control_verify(vmcs12->vm_exit_controls,
2529 vmx->nested.msrs.exit_ctls_low,
2530 vmx->nested.msrs.exit_ctls_high) ||
2531 nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))
2532 return -EINVAL;
2533
2534 return 0;
2535}
2536
2537
2538
2539
2540static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
2541 struct vmcs12 *vmcs12)
2542{
2543 struct vcpu_vmx *vmx = to_vmx(vcpu);
2544
2545 if (!vmx_control_verify(vmcs12->vm_entry_controls,
2546 vmx->nested.msrs.entry_ctls_low,
2547 vmx->nested.msrs.entry_ctls_high))
2548 return -EINVAL;
2549
2550
2551
2552
2553
2554
2555
2556 if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
2557 u32 intr_info = vmcs12->vm_entry_intr_info_field;
2558 u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
2559 u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
2560 bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
2561 bool should_have_error_code;
2562 bool urg = nested_cpu_has2(vmcs12,
2563 SECONDARY_EXEC_UNRESTRICTED_GUEST);
2564 bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
2565
2566
2567 if (intr_type == INTR_TYPE_RESERVED ||
2568 (intr_type == INTR_TYPE_OTHER_EVENT &&
2569 !nested_cpu_supports_monitor_trap_flag(vcpu)))
2570 return -EINVAL;
2571
2572
2573 if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
2574 (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
2575 (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
2576 return -EINVAL;
2577
2578
2579 should_have_error_code =
2580 intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
2581 x86_exception_has_error_code(vector);
2582 if (has_error_code != should_have_error_code)
2583 return -EINVAL;
2584
2585
2586 if (has_error_code &&
2587 vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
2588 return -EINVAL;
2589
2590
2591 if (intr_info & INTR_INFO_RESVD_BITS_MASK)
2592 return -EINVAL;
2593
2594
2595 switch (intr_type) {
2596 case INTR_TYPE_SOFT_EXCEPTION:
2597 case INTR_TYPE_SOFT_INTR:
2598 case INTR_TYPE_PRIV_SW_EXCEPTION:
2599 if ((vmcs12->vm_entry_instruction_len > 15) ||
2600 (vmcs12->vm_entry_instruction_len == 0 &&
2601 !nested_cpu_has_zero_length_injection(vcpu)))
2602 return -EINVAL;
2603 }
2604 }
2605
2606 if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
2607 return -EINVAL;
2608
2609 return 0;
2610}
2611
2612static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
2613 struct vmcs12 *vmcs12)
2614{
2615 if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
2616 nested_check_vm_exit_controls(vcpu, vmcs12) ||
2617 nested_check_vm_entry_controls(vcpu, vmcs12))
2618 return -EINVAL;
2619
2620 return 0;
2621}
2622
2623static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
2624 struct vmcs12 *vmcs12)
2625{
2626 bool ia32e;
2627
2628 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
2629 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
2630 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
2631 return -EINVAL;
2632
2633 if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) ||
2634 is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))
2635 return -EINVAL;
2636
2637 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
2638 !kvm_pat_valid(vmcs12->host_ia32_pat))
2639 return -EINVAL;
2640
2641 ia32e = (vmcs12->vm_exit_controls &
2642 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
2643
2644 if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2645 vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2646 vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2647 vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2648 vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2649 vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2650 vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) ||
2651 vmcs12->host_cs_selector == 0 ||
2652 vmcs12->host_tr_selector == 0 ||
2653 (vmcs12->host_ss_selector == 0 && !ia32e))
2654 return -EINVAL;
2655
2656#ifdef CONFIG_X86_64
2657 if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) ||
2658 is_noncanonical_address(vmcs12->host_gs_base, vcpu) ||
2659 is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) ||
2660 is_noncanonical_address(vmcs12->host_idtr_base, vcpu) ||
2661 is_noncanonical_address(vmcs12->host_tr_base, vcpu))
2662 return -EINVAL;
2663#endif
2664
2665
2666
2667
2668
2669
2670
2671 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
2672 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
2673 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
2674 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
2675 return -EINVAL;
2676 }
2677
2678 return 0;
2679}
2680
2681static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
2682 struct vmcs12 *vmcs12)
2683{
2684 int r = 0;
2685 struct vmcs12 *shadow;
2686 struct kvm_host_map map;
2687
2688 if (vmcs12->vmcs_link_pointer == -1ull)
2689 return 0;
2690
2691 if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
2692 return -EINVAL;
2693
2694 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
2695 return -EINVAL;
2696
2697 shadow = map.hva;
2698
2699 if (shadow->hdr.revision_id != VMCS12_REVISION ||
2700 shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
2701 r = -EINVAL;
2702
2703 kvm_vcpu_unmap(vcpu, &map, false);
2704 return r;
2705}
2706
2707
2708
2709
2710static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
2711{
2712 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
2713 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
2714 return -EINVAL;
2715
2716 return 0;
2717}
2718
2719static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
2720 struct vmcs12 *vmcs12,
2721 u32 *exit_qual)
2722{
2723 bool ia32e;
2724
2725 *exit_qual = ENTRY_FAIL_DEFAULT;
2726
2727 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
2728 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
2729 return -EINVAL;
2730
2731 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
2732 !kvm_pat_valid(vmcs12->guest_ia32_pat))
2733 return -EINVAL;
2734
2735 if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
2736 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
2737 return -EINVAL;
2738 }
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 if (to_vmx(vcpu)->nested.nested_run_pending &&
2750 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
2751 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
2752 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
2753 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
2754 ((vmcs12->guest_cr0 & X86_CR0_PG) &&
2755 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
2756 return -EINVAL;
2757 }
2758
2759 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
2760 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
2761 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
2762 return -EINVAL;
2763
2764 if (nested_check_guest_non_reg_state(vmcs12))
2765 return -EINVAL;
2766
2767 return 0;
2768}
2769
2770static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
2771{
2772 struct vcpu_vmx *vmx = to_vmx(vcpu);
2773 unsigned long cr3, cr4;
2774 bool vm_fail;
2775
2776 if (!nested_early_check)
2777 return 0;
2778
2779 if (vmx->msr_autoload.host.nr)
2780 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
2781 if (vmx->msr_autoload.guest.nr)
2782 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
2783
2784 preempt_disable();
2785
2786 vmx_prepare_switch_to_guest(vcpu);
2787
2788
2789
2790
2791
2792
2793
2794 vmcs_writel(GUEST_RFLAGS, 0);
2795
2796 cr3 = __get_current_cr3_fast();
2797 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
2798 vmcs_writel(HOST_CR3, cr3);
2799 vmx->loaded_vmcs->host_state.cr3 = cr3;
2800 }
2801
2802 cr4 = cr4_read_shadow();
2803 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
2804 vmcs_writel(HOST_CR4, cr4);
2805 vmx->loaded_vmcs->host_state.cr4 = cr4;
2806 }
2807
2808 asm(
2809 "sub $%c[wordsize], %%" _ASM_SP "\n\t"
2810 "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
2811 "je 1f \n\t"
2812 __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
2813 "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
2814 "1: \n\t"
2815 "add $%c[wordsize], %%" _ASM_SP "\n\t"
2816
2817
2818 "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
2819
2820
2821
2822
2823
2824
2825
2826 "call vmx_vmenter\n\t"
2827
2828 CC_SET(be)
2829 : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
2830 : [HOST_RSP]"r"((unsigned long)HOST_RSP),
2831 [loaded_vmcs]"r"(vmx->loaded_vmcs),
2832 [launched]"i"(offsetof(struct loaded_vmcs, launched)),
2833 [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
2834 [wordsize]"i"(sizeof(ulong))
2835 : "memory"
2836 );
2837
2838 if (vmx->msr_autoload.host.nr)
2839 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
2840 if (vmx->msr_autoload.guest.nr)
2841 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
2842
2843 if (vm_fail) {
2844 preempt_enable();
2845 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
2846 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
2847 return 1;
2848 }
2849
2850
2851
2852
2853 local_irq_enable();
2854 if (hw_breakpoint_active())
2855 set_debugreg(__this_cpu_read(cpu_dr7), 7);
2856 preempt_enable();
2857
2858
2859
2860
2861
2862
2863
2864
2865 WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
2866 VMX_EXIT_REASONS_FAILED_VMENTRY));
2867
2868 return 0;
2869}
2870
2871static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
2872 struct vmcs12 *vmcs12);
2873
2874static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
2875{
2876 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2877 struct vcpu_vmx *vmx = to_vmx(vcpu);
2878 struct kvm_host_map *map;
2879 struct page *page;
2880 u64 hpa;
2881
2882 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
2883
2884
2885
2886
2887
2888
2889 if (vmx->nested.apic_access_page) {
2890 kvm_release_page_dirty(vmx->nested.apic_access_page);
2891 vmx->nested.apic_access_page = NULL;
2892 }
2893 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
2894
2895
2896
2897
2898
2899
2900 if (!is_error_page(page)) {
2901 vmx->nested.apic_access_page = page;
2902 hpa = page_to_phys(vmx->nested.apic_access_page);
2903 vmcs_write64(APIC_ACCESS_ADDR, hpa);
2904 } else {
2905 secondary_exec_controls_clearbit(vmx,
2906 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
2907 }
2908 }
2909
2910 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
2911 map = &vmx->nested.virtual_apic_map;
2912
2913 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
2914 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
2915 } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
2916 nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
2917 !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
2918
2919
2920
2921
2922
2923
2924
2925
2926 exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
2927 } else {
2928
2929
2930
2931
2932 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
2933 }
2934 }
2935
2936 if (nested_cpu_has_posted_intr(vmcs12)) {
2937 map = &vmx->nested.pi_desc_map;
2938
2939 if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
2940 vmx->nested.pi_desc =
2941 (struct pi_desc *)(((void *)map->hva) +
2942 offset_in_page(vmcs12->posted_intr_desc_addr));
2943 vmcs_write64(POSTED_INTR_DESC_ADDR,
2944 pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
2945 }
2946 }
2947 if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
2948 exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
2949 else
2950 exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
2951}
2952
2953
2954
2955
2956
2957
2958
2959
2960static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
2961{
2962 if (!to_vmx(vcpu)->nested.vmxon) {
2963 kvm_queue_exception(vcpu, UD_VECTOR);
2964 return 0;
2965 }
2966
2967 if (vmx_get_cpl(vcpu)) {
2968 kvm_inject_gp(vcpu, 0);
2969 return 0;
2970 }
2971
2972 return 1;
2973}
2974
2975static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
2976{
2977 u8 rvi = vmx_get_rvi();
2978 u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
2979
2980 return ((rvi & 0xf0) > (vppr & 0xf0));
2981}
2982
2983static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
2984 struct vmcs12 *vmcs12);
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
2996{
2997 struct vcpu_vmx *vmx = to_vmx(vcpu);
2998 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2999 bool evaluate_pending_interrupts;
3000 u32 exit_reason = EXIT_REASON_INVALID_STATE;
3001 u32 exit_qual;
3002
3003 evaluate_pending_interrupts = exec_controls_get(vmx) &
3004 (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
3005 if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
3006 evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
3007
3008 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
3009 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
3010 if (kvm_mpx_supported() &&
3011 !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
3012 vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030 if (!enable_ept && !nested_early_check)
3031 vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
3032
3033 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
3034
3035 prepare_vmcs02_early(vmx, vmcs12);
3036
3037 if (from_vmentry) {
3038 nested_get_vmcs12_pages(vcpu);
3039
3040 if (nested_vmx_check_vmentry_hw(vcpu)) {
3041 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3042 return -1;
3043 }
3044
3045 if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
3046 goto vmentry_fail_vmexit;
3047 }
3048
3049 enter_guest_mode(vcpu);
3050 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
3051 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
3052
3053 if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
3054 goto vmentry_fail_vmexit_guest_mode;
3055
3056 if (from_vmentry) {
3057 exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
3058 exit_qual = nested_vmx_load_msr(vcpu,
3059 vmcs12->vm_entry_msr_load_addr,
3060 vmcs12->vm_entry_msr_load_count);
3061 if (exit_qual)
3062 goto vmentry_fail_vmexit_guest_mode;
3063 } else {
3064
3065
3066
3067
3068
3069
3070
3071 kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
3072 }
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088 if (unlikely(evaluate_pending_interrupts))
3089 kvm_make_request(KVM_REQ_EVENT, vcpu);
3090
3091
3092
3093
3094
3095
3096 vmx->nested.preemption_timer_expired = false;
3097 if (nested_cpu_has_preemption_timer(vmcs12))
3098 vmx_start_preemption_timer(vcpu);
3099
3100
3101
3102
3103
3104
3105
3106 return 0;
3107
3108
3109
3110
3111
3112
3113vmentry_fail_vmexit_guest_mode:
3114 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
3115 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
3116 leave_guest_mode(vcpu);
3117
3118vmentry_fail_vmexit:
3119 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
3120
3121 if (!from_vmentry)
3122 return 1;
3123
3124 load_vmcs12_host_state(vcpu, vmcs12);
3125 vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
3126 vmcs12->exit_qualification = exit_qual;
3127 if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
3128 vmx->nested.need_vmcs12_to_shadow_sync = true;
3129 return 1;
3130}
3131
3132
3133
3134
3135
3136static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
3137{
3138 struct vmcs12 *vmcs12;
3139 struct vcpu_vmx *vmx = to_vmx(vcpu);
3140 u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
3141 int ret;
3142
3143 if (!nested_vmx_check_permission(vcpu))
3144 return 1;
3145
3146 if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch))
3147 return 1;
3148
3149 if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
3150 return nested_vmx_failInvalid(vcpu);
3151
3152 vmcs12 = get_vmcs12(vcpu);
3153
3154
3155
3156
3157
3158
3159
3160 if (vmcs12->hdr.shadow_vmcs)
3161 return nested_vmx_failInvalid(vcpu);
3162
3163 if (vmx->nested.hv_evmcs) {
3164 copy_enlightened_to_vmcs12(vmx);
3165
3166 vmcs12->launch_state = !launch;
3167 } else if (enable_shadow_vmcs) {
3168 copy_shadow_to_vmcs12(vmx);
3169 }
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181 if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
3182 return nested_vmx_failValid(vcpu,
3183 VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
3184
3185 if (vmcs12->launch_state == launch)
3186 return nested_vmx_failValid(vcpu,
3187 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
3188 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
3189
3190 if (nested_vmx_check_controls(vcpu, vmcs12))
3191 return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3192
3193 if (nested_vmx_check_host_state(vcpu, vmcs12))
3194 return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
3195
3196
3197
3198
3199
3200 vmx->nested.nested_run_pending = 1;
3201 ret = nested_vmx_enter_non_root_mode(vcpu, true);
3202 vmx->nested.nested_run_pending = !ret;
3203 if (ret > 0)
3204 return 1;
3205 else if (ret)
3206 return nested_vmx_failValid(vcpu,
3207 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
3208
3209
3210 vmx->vcpu.arch.l1tf_flush_l1d = true;
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222 nested_cache_shadow_vmcs12(vcpu, vmcs12);
3223
3224
3225
3226
3227
3228
3229 if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
3230 !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
3231 !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_NMI_PENDING) &&
3232 !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING) &&
3233 (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
3234 vmx->nested.nested_run_pending = 0;
3235 return kvm_vcpu_halt(vcpu);
3236 }
3237 return 1;
3238}
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257static inline unsigned long
3258vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3259{
3260 return
3261 (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
3262 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
3263 (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
3264 vcpu->arch.cr0_guest_owned_bits));
3265}
3266
3267static inline unsigned long
3268vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3269{
3270 return
3271 (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
3272 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
3273 (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
3274 vcpu->arch.cr4_guest_owned_bits));
3275}
3276
3277static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
3278 struct vmcs12 *vmcs12)
3279{
3280 u32 idt_vectoring;
3281 unsigned int nr;
3282
3283 if (vcpu->arch.exception.injected) {
3284 nr = vcpu->arch.exception.nr;
3285 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3286
3287 if (kvm_exception_is_soft(nr)) {
3288 vmcs12->vm_exit_instruction_len =
3289 vcpu->arch.event_exit_inst_len;
3290 idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
3291 } else
3292 idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
3293
3294 if (vcpu->arch.exception.has_error_code) {
3295 idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
3296 vmcs12->idt_vectoring_error_code =
3297 vcpu->arch.exception.error_code;
3298 }
3299
3300 vmcs12->idt_vectoring_info_field = idt_vectoring;
3301 } else if (vcpu->arch.nmi_injected) {
3302 vmcs12->idt_vectoring_info_field =
3303 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
3304 } else if (vcpu->arch.interrupt.injected) {
3305 nr = vcpu->arch.interrupt.nr;
3306 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
3307
3308 if (vcpu->arch.interrupt.soft) {
3309 idt_vectoring |= INTR_TYPE_SOFT_INTR;
3310 vmcs12->vm_entry_instruction_len =
3311 vcpu->arch.event_exit_inst_len;
3312 } else
3313 idt_vectoring |= INTR_TYPE_EXT_INTR;
3314
3315 vmcs12->idt_vectoring_info_field = idt_vectoring;
3316 }
3317}
3318
3319
3320static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
3321{
3322 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3323 gfn_t gfn;
3324
3325
3326
3327
3328
3329
3330 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3331 gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3332 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3333 }
3334
3335 if (nested_cpu_has_posted_intr(vmcs12)) {
3336 gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3337 kvm_vcpu_mark_page_dirty(vcpu, gfn);
3338 }
3339}
3340
3341static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3342{
3343 struct vcpu_vmx *vmx = to_vmx(vcpu);
3344 int max_irr;
3345 void *vapic_page;
3346 u16 status;
3347
3348 if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
3349 return;
3350
3351 vmx->nested.pi_pending = false;
3352 if (!pi_test_and_clear_on(vmx->nested.pi_desc))
3353 return;
3354
3355 max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3356 if (max_irr != 256) {
3357 vapic_page = vmx->nested.virtual_apic_map.hva;
3358 if (!vapic_page)
3359 return;
3360
3361 __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
3362 vapic_page, &max_irr);
3363 status = vmcs_read16(GUEST_INTR_STATUS);
3364 if ((u8)max_irr > ((u8)status & 0xff)) {
3365 status &= ~0xff;
3366 status |= (u8)max_irr;
3367 vmcs_write16(GUEST_INTR_STATUS, status);
3368 }
3369 }
3370
3371 nested_mark_vmcs12_pages_dirty(vcpu);
3372}
3373
3374static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
3375 unsigned long exit_qual)
3376{
3377 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3378 unsigned int nr = vcpu->arch.exception.nr;
3379 u32 intr_info = nr | INTR_INFO_VALID_MASK;
3380
3381 if (vcpu->arch.exception.has_error_code) {
3382 vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
3383 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
3384 }
3385
3386 if (kvm_exception_is_soft(nr))
3387 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
3388 else
3389 intr_info |= INTR_TYPE_HARD_EXCEPTION;
3390
3391 if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
3392 vmx_get_nmi_mask(vcpu))
3393 intr_info |= INTR_INFO_UNBLOCK_NMI;
3394
3395 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
3396}
3397
3398static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
3399{
3400 struct vcpu_vmx *vmx = to_vmx(vcpu);
3401 unsigned long exit_qual;
3402 bool block_nested_events =
3403 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
3404
3405 if (vcpu->arch.exception.pending &&
3406 nested_vmx_check_exception(vcpu, &exit_qual)) {
3407 if (block_nested_events)
3408 return -EBUSY;
3409 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
3410 return 0;
3411 }
3412
3413 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
3414 vmx->nested.preemption_timer_expired) {
3415 if (block_nested_events)
3416 return -EBUSY;
3417 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
3418 return 0;
3419 }
3420
3421 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
3422 if (block_nested_events)
3423 return -EBUSY;
3424 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
3425 NMI_VECTOR | INTR_TYPE_NMI_INTR |
3426 INTR_INFO_VALID_MASK, 0);
3427
3428
3429
3430
3431 vcpu->arch.nmi_pending = 0;
3432 vmx_set_nmi_mask(vcpu, true);
3433 return 0;
3434 }
3435
3436 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
3437 nested_exit_on_intr(vcpu)) {
3438 if (block_nested_events)
3439 return -EBUSY;
3440 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
3441 return 0;
3442 }
3443
3444 vmx_complete_nested_posted_interrupt(vcpu);
3445 return 0;
3446}
3447
3448static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
3449{
3450 ktime_t remaining =
3451 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
3452 u64 value;
3453
3454 if (ktime_to_ns(remaining) <= 0)
3455 return 0;
3456
3457 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
3458 do_div(value, 1000000);
3459 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
3460}
3461
3462static bool is_vmcs12_ext_field(unsigned long field)
3463{
3464 switch (field) {
3465 case GUEST_ES_SELECTOR:
3466 case GUEST_CS_SELECTOR:
3467 case GUEST_SS_SELECTOR:
3468 case GUEST_DS_SELECTOR:
3469 case GUEST_FS_SELECTOR:
3470 case GUEST_GS_SELECTOR:
3471 case GUEST_LDTR_SELECTOR:
3472 case GUEST_TR_SELECTOR:
3473 case GUEST_ES_LIMIT:
3474 case GUEST_CS_LIMIT:
3475 case GUEST_SS_LIMIT:
3476 case GUEST_DS_LIMIT:
3477 case GUEST_FS_LIMIT:
3478 case GUEST_GS_LIMIT:
3479 case GUEST_LDTR_LIMIT:
3480 case GUEST_TR_LIMIT:
3481 case GUEST_GDTR_LIMIT:
3482 case GUEST_IDTR_LIMIT:
3483 case GUEST_ES_AR_BYTES:
3484 case GUEST_DS_AR_BYTES:
3485 case GUEST_FS_AR_BYTES:
3486 case GUEST_GS_AR_BYTES:
3487 case GUEST_LDTR_AR_BYTES:
3488 case GUEST_TR_AR_BYTES:
3489 case GUEST_ES_BASE:
3490 case GUEST_CS_BASE:
3491 case GUEST_SS_BASE:
3492 case GUEST_DS_BASE:
3493 case GUEST_FS_BASE:
3494 case GUEST_GS_BASE:
3495 case GUEST_LDTR_BASE:
3496 case GUEST_TR_BASE:
3497 case GUEST_GDTR_BASE:
3498 case GUEST_IDTR_BASE:
3499 case GUEST_PENDING_DBG_EXCEPTIONS:
3500 case GUEST_BNDCFGS:
3501 return true;
3502 default:
3503 break;
3504 }
3505
3506 return false;
3507}
3508
3509static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
3510 struct vmcs12 *vmcs12)
3511{
3512 struct vcpu_vmx *vmx = to_vmx(vcpu);
3513
3514 vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
3515 vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
3516 vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
3517 vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
3518 vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
3519 vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
3520 vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
3521 vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
3522 vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
3523 vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
3524 vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
3525 vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
3526 vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
3527 vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
3528 vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
3529 vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
3530 vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
3531 vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
3532 vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
3533 vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
3534 vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
3535 vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
3536 vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
3537 vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
3538 vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
3539 vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
3540 vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
3541 vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
3542 vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
3543 vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
3544 vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
3545 vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
3546 vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
3547 vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
3548 vmcs12->guest_pending_dbg_exceptions =
3549 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
3550 if (kvm_mpx_supported())
3551 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
3552
3553 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
3554}
3555
3556static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
3557 struct vmcs12 *vmcs12)
3558{
3559 struct vcpu_vmx *vmx = to_vmx(vcpu);
3560 int cpu;
3561
3562 if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
3563 return;
3564
3565
3566 WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
3567
3568 cpu = get_cpu();
3569 vmx->loaded_vmcs = &vmx->nested.vmcs02;
3570 vmx_vcpu_load(&vmx->vcpu, cpu);
3571
3572 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
3573
3574 vmx->loaded_vmcs = &vmx->vmcs01;
3575 vmx_vcpu_load(&vmx->vcpu, cpu);
3576 put_cpu();
3577}
3578
3579
3580
3581
3582
3583
3584
3585static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
3586{
3587 struct vcpu_vmx *vmx = to_vmx(vcpu);
3588
3589 if (vmx->nested.hv_evmcs)
3590 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
3591
3592 vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
3593
3594 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
3595 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
3596
3597 vmcs12->guest_rsp = kvm_rsp_read(vcpu);
3598 vmcs12->guest_rip = kvm_rip_read(vcpu);
3599 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
3600
3601 vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
3602 vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
3603
3604 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
3605 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
3606 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
3607
3608 vmcs12->guest_interruptibility_info =
3609 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
3610
3611 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
3612 vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
3613 else
3614 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
3615
3616 if (nested_cpu_has_preemption_timer(vmcs12) &&
3617 vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
3618 vmcs12->vmx_preemption_timer_value =
3619 vmx_get_preemption_timer_value(vcpu);
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629 if (enable_ept) {
3630 vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
3631 if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
3632 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
3633 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
3634 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
3635 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
3636 }
3637 }
3638
3639 vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
3640
3641 if (nested_cpu_has_vid(vmcs12))
3642 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
3643
3644 vmcs12->vm_entry_controls =
3645 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
3646 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
3647
3648 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
3649 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
3650
3651 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
3652 vmcs12->guest_ia32_efer = vcpu->arch.efer;
3653}
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
3667 u32 exit_reason, u32 exit_intr_info,
3668 unsigned long exit_qualification)
3669{
3670
3671 vmcs12->vm_exit_reason = exit_reason;
3672 vmcs12->exit_qualification = exit_qualification;
3673 vmcs12->vm_exit_intr_info = exit_intr_info;
3674
3675 vmcs12->idt_vectoring_info_field = 0;
3676 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
3677 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
3678
3679 if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
3680 vmcs12->launch_state = 1;
3681
3682
3683
3684 vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
3685
3686
3687
3688
3689
3690 vmcs12_save_pending_event(vcpu, vmcs12);
3691
3692
3693
3694
3695
3696
3697
3698 if (nested_vmx_store_msr(vcpu,
3699 vmcs12->vm_exit_msr_store_addr,
3700 vmcs12->vm_exit_msr_store_count))
3701 nested_vmx_abort(vcpu,
3702 VMX_ABORT_SAVE_GUEST_MSR_FAIL);
3703 }
3704
3705
3706
3707
3708
3709 vcpu->arch.nmi_injected = false;
3710 kvm_clear_exception_queue(vcpu);
3711 kvm_clear_interrupt_queue(vcpu);
3712}
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
3724 struct vmcs12 *vmcs12)
3725{
3726 struct kvm_segment seg;
3727 u32 entry_failure_code;
3728
3729 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
3730 vcpu->arch.efer = vmcs12->host_ia32_efer;
3731 else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
3732 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
3733 else
3734 vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
3735 vmx_set_efer(vcpu, vcpu->arch.efer);
3736
3737 kvm_rsp_write(vcpu, vmcs12->host_rsp);
3738 kvm_rip_write(vcpu, vmcs12->host_rip);
3739 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
3740 vmx_set_interrupt_shadow(vcpu, 0);
3741
3742
3743
3744
3745
3746
3747
3748
3749 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
3750 vmx_set_cr0(vcpu, vmcs12->host_cr0);
3751
3752
3753 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
3754 vmx_set_cr4(vcpu, vmcs12->host_cr4);
3755
3756 nested_ept_uninit_mmu_context(vcpu);
3757
3758
3759
3760
3761
3762 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
3763 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
3764
3765 if (!enable_ept)
3766 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782 if (enable_vpid &&
3783 (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
3784 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3785 }
3786
3787 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
3788 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
3789 vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
3790 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
3791 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
3792 vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
3793 vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
3794
3795
3796 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
3797 vmcs_write64(GUEST_BNDCFGS, 0);
3798
3799 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
3800 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
3801 vcpu->arch.pat = vmcs12->host_ia32_pat;
3802 }
3803 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
3804 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
3805 vmcs12->host_ia32_perf_global_ctrl);
3806
3807
3808
3809 seg = (struct kvm_segment) {
3810 .base = 0,
3811 .limit = 0xFFFFFFFF,
3812 .selector = vmcs12->host_cs_selector,
3813 .type = 11,
3814 .present = 1,
3815 .s = 1,
3816 .g = 1
3817 };
3818 if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
3819 seg.l = 1;
3820 else
3821 seg.db = 1;
3822 vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
3823 seg = (struct kvm_segment) {
3824 .base = 0,
3825 .limit = 0xFFFFFFFF,
3826 .type = 3,
3827 .present = 1,
3828 .s = 1,
3829 .db = 1,
3830 .g = 1
3831 };
3832 seg.selector = vmcs12->host_ds_selector;
3833 vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
3834 seg.selector = vmcs12->host_es_selector;
3835 vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
3836 seg.selector = vmcs12->host_ss_selector;
3837 vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
3838 seg.selector = vmcs12->host_fs_selector;
3839 seg.base = vmcs12->host_fs_base;
3840 vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
3841 seg.selector = vmcs12->host_gs_selector;
3842 seg.base = vmcs12->host_gs_base;
3843 vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
3844 seg = (struct kvm_segment) {
3845 .base = vmcs12->host_tr_base,
3846 .limit = 0x67,
3847 .selector = vmcs12->host_tr_selector,
3848 .type = 11,
3849 .present = 1
3850 };
3851 vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
3852
3853 kvm_set_dr(vcpu, 7, 0x400);
3854 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
3855
3856 if (cpu_has_vmx_msr_bitmap())
3857 vmx_update_msr_bitmap(vcpu);
3858
3859 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
3860 vmcs12->vm_exit_msr_load_count))
3861 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
3862}
3863
3864static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
3865{
3866 struct shared_msr_entry *efer_msr;
3867 unsigned int i;
3868
3869 if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
3870 return vmcs_read64(GUEST_IA32_EFER);
3871
3872 if (cpu_has_load_ia32_efer())
3873 return host_efer;
3874
3875 for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
3876 if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
3877 return vmx->msr_autoload.guest.val[i].value;
3878 }
3879
3880 efer_msr = find_msr_entry(vmx, MSR_EFER);
3881 if (efer_msr)
3882 return efer_msr->data;
3883
3884 return host_efer;
3885}
3886
3887static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
3888{
3889 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3890 struct vcpu_vmx *vmx = to_vmx(vcpu);
3891 struct vmx_msr_entry g, h;
3892 struct msr_data msr;
3893 gpa_t gpa;
3894 u32 i, j;
3895
3896 vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
3897
3898 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
3899
3900
3901
3902
3903
3904
3905 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
3906 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
3907 else
3908 WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
3909 }
3910
3911
3912
3913
3914
3915 vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
3916
3917 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
3918 vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
3919
3920 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
3921 vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
3922
3923 nested_ept_uninit_mmu_context(vcpu);
3924 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
3925 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
3926
3927
3928
3929
3930
3931
3932
3933 if (enable_ept)
3934 ept_save_pdptrs(vcpu);
3935
3936 kvm_mmu_reset_context(vcpu);
3937
3938 if (cpu_has_vmx_msr_bitmap())
3939 vmx_update_msr_bitmap(vcpu);
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952 msr.host_initiated = false;
3953 for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
3954 gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
3955 if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
3956 pr_debug_ratelimited(
3957 "%s read MSR index failed (%u, 0x%08llx)\n",
3958 __func__, i, gpa);
3959 goto vmabort;
3960 }
3961
3962 for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
3963 gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
3964 if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
3965 pr_debug_ratelimited(
3966 "%s read MSR failed (%u, 0x%08llx)\n",
3967 __func__, j, gpa);
3968 goto vmabort;
3969 }
3970 if (h.index != g.index)
3971 continue;
3972 if (h.value == g.value)
3973 break;
3974
3975 if (nested_vmx_load_msr_check(vcpu, &h)) {
3976 pr_debug_ratelimited(
3977 "%s check failed (%u, 0x%x, 0x%x)\n",
3978 __func__, j, h.index, h.reserved);
3979 goto vmabort;
3980 }
3981
3982 msr.index = h.index;
3983 msr.data = h.value;
3984 if (kvm_set_msr(vcpu, &msr)) {
3985 pr_debug_ratelimited(
3986 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
3987 __func__, j, h.index, h.value);
3988 goto vmabort;
3989 }
3990 }
3991 }
3992
3993 return;
3994
3995vmabort:
3996 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
3997}
3998
3999
4000
4001
4002
4003
4004void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
4005 u32 exit_intr_info, unsigned long exit_qualification)
4006{
4007 struct vcpu_vmx *vmx = to_vmx(vcpu);
4008 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4009
4010
4011 WARN_ON_ONCE(vmx->nested.nested_run_pending);
4012
4013 leave_guest_mode(vcpu);
4014
4015 if (nested_cpu_has_preemption_timer(vmcs12))
4016 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
4017
4018 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
4019 vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
4020
4021 if (likely(!vmx->fail)) {
4022 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
4023
4024 if (exit_reason != -1)
4025 prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
4026 exit_qualification);
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037 nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
4038 } else {
4039
4040
4041
4042
4043
4044
4045 WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
4046 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4047 WARN_ON_ONCE(nested_early_check);
4048 }
4049
4050 vmx_switch_vmcs(vcpu, &vmx->vmcs01);
4051
4052
4053 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
4054 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
4055 vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
4056
4057 if (kvm_has_tsc_control)
4058 decache_tsc_multiplier(vmx);
4059
4060 if (vmx->nested.change_vmcs01_virtual_apic_mode) {
4061 vmx->nested.change_vmcs01_virtual_apic_mode = false;
4062 vmx_set_virtual_apic_mode(vcpu);
4063 } else if (!nested_cpu_has_ept(vmcs12) &&
4064 nested_cpu_has2(vmcs12,
4065 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
4066 vmx_flush_tlb(vcpu, true);
4067 }
4068
4069
4070 if (vmx->nested.apic_access_page) {
4071 kvm_release_page_dirty(vmx->nested.apic_access_page);
4072 vmx->nested.apic_access_page = NULL;
4073 }
4074 kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
4075 kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
4076 vmx->nested.pi_desc = NULL;
4077
4078
4079
4080
4081
4082 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4083
4084 if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
4085 vmx->nested.need_vmcs12_to_shadow_sync = true;
4086
4087
4088 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4089
4090 if (likely(!vmx->fail)) {
4091
4092
4093
4094
4095
4096
4097
4098
4099 if (nested_exit_intr_ack_set(vcpu) &&
4100 exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
4101 kvm_cpu_has_interrupt(vcpu)) {
4102 int irq = kvm_cpu_get_interrupt(vcpu);
4103 WARN_ON(irq < 0);
4104 vmcs12->vm_exit_intr_info = irq |
4105 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
4106 }
4107
4108 if (exit_reason != -1)
4109 trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
4110 vmcs12->exit_qualification,
4111 vmcs12->idt_vectoring_info_field,
4112 vmcs12->vm_exit_intr_info,
4113 vmcs12->vm_exit_intr_error_code,
4114 KVM_ISA_VMX);
4115
4116 load_vmcs12_host_state(vcpu, vmcs12);
4117
4118 return;
4119 }
4120
4121
4122
4123
4124
4125
4126
4127
4128 (void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
4129
4130
4131
4132
4133
4134
4135
4136 nested_vmx_restore_host_state(vcpu);
4137
4138 vmx->fail = 0;
4139}
4140
4141
4142
4143
4144
4145
4146
4147int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
4148 u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
4149{
4150 gva_t off;
4151 bool exn;
4152 struct kvm_segment s;
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162 int scaling = vmx_instruction_info & 3;
4163 int addr_size = (vmx_instruction_info >> 7) & 7;
4164 bool is_reg = vmx_instruction_info & (1u << 10);
4165 int seg_reg = (vmx_instruction_info >> 15) & 7;
4166 int index_reg = (vmx_instruction_info >> 18) & 0xf;
4167 bool index_is_valid = !(vmx_instruction_info & (1u << 22));
4168 int base_reg = (vmx_instruction_info >> 23) & 0xf;
4169 bool base_is_valid = !(vmx_instruction_info & (1u << 27));
4170
4171 if (is_reg) {
4172 kvm_queue_exception(vcpu, UD_VECTOR);
4173 return 1;
4174 }
4175
4176
4177
4178 off = exit_qualification;
4179 if (addr_size == 1)
4180 off = (gva_t)sign_extend64(off, 31);
4181 else if (addr_size == 0)
4182 off = (gva_t)sign_extend64(off, 15);
4183 if (base_is_valid)
4184 off += kvm_register_read(vcpu, base_reg);
4185 if (index_is_valid)
4186 off += kvm_register_read(vcpu, index_reg)<<scaling;
4187 vmx_get_segment(vcpu, &s, seg_reg);
4188
4189
4190
4191
4192
4193
4194
4195 if (addr_size == 1)
4196 off &= 0xffffffff;
4197 else if (addr_size == 0)
4198 off &= 0xffff;
4199
4200
4201 exn = false;
4202 if (is_long_mode(vcpu)) {
4203
4204
4205
4206
4207
4208 if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
4209 *ret = s.base + off;
4210 else
4211 *ret = off;
4212
4213
4214
4215
4216
4217 exn = is_noncanonical_address(*ret, vcpu);
4218 } else {
4219
4220
4221
4222
4223
4224 *ret = (s.base + off) & 0xffffffff;
4225
4226
4227
4228
4229
4230
4231
4232 if (wr)
4233
4234
4235
4236 exn = ((s.type & 0xa) == 0 || (s.type & 8));
4237 else
4238
4239
4240
4241 exn = ((s.type & 0xa) == 8);
4242 if (exn) {
4243 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
4244 return 1;
4245 }
4246
4247
4248 exn = (s.unusable != 0);
4249
4250
4251
4252
4253
4254
4255
4256 if (!(s.base == 0 && s.limit == 0xffffffff &&
4257 ((s.type & 8) || !(s.type & 4))))
4258 exn = exn || ((u64)off + len - 1 > s.limit);
4259 }
4260 if (exn) {
4261 kvm_queue_exception_e(vcpu,
4262 seg_reg == VCPU_SREG_SS ?
4263 SS_VECTOR : GP_VECTOR,
4264 0);
4265 return 1;
4266 }
4267
4268 return 0;
4269}
4270
4271static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
4272{
4273 gva_t gva;
4274 struct x86_exception e;
4275
4276 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
4277 vmcs_read32(VMX_INSTRUCTION_INFO), false,
4278 sizeof(*vmpointer), &gva))
4279 return 1;
4280
4281 if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
4282 kvm_inject_page_fault(vcpu, &e);
4283 return 1;
4284 }
4285
4286 return 0;
4287}
4288
4289
4290
4291
4292
4293
4294static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
4295{
4296 struct vcpu_vmx *vmx = to_vmx(vcpu);
4297 struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
4298
4299
4300
4301
4302
4303
4304
4305 WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
4306
4307 if (!loaded_vmcs->shadow_vmcs) {
4308 loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
4309 if (loaded_vmcs->shadow_vmcs)
4310 vmcs_clear(loaded_vmcs->shadow_vmcs);
4311 }
4312 return loaded_vmcs->shadow_vmcs;
4313}
4314
4315static int enter_vmx_operation(struct kvm_vcpu *vcpu)
4316{
4317 struct vcpu_vmx *vmx = to_vmx(vcpu);
4318 int r;
4319
4320 r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
4321 if (r < 0)
4322 goto out_vmcs02;
4323
4324 vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4325 if (!vmx->nested.cached_vmcs12)
4326 goto out_cached_vmcs12;
4327
4328 vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
4329 if (!vmx->nested.cached_shadow_vmcs12)
4330 goto out_cached_shadow_vmcs12;
4331
4332 if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
4333 goto out_shadow_vmcs;
4334
4335 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
4336 HRTIMER_MODE_REL_PINNED);
4337 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
4338
4339 vmx->nested.vpid02 = allocate_vpid();
4340
4341 vmx->nested.vmcs02_initialized = false;
4342 vmx->nested.vmxon = true;
4343
4344 if (pt_mode == PT_MODE_HOST_GUEST) {
4345 vmx->pt_desc.guest.ctl = 0;
4346 pt_update_intercept_for_msr(vmx);
4347 }
4348
4349 return 0;
4350
4351out_shadow_vmcs:
4352 kfree(vmx->nested.cached_shadow_vmcs12);
4353
4354out_cached_shadow_vmcs12:
4355 kfree(vmx->nested.cached_vmcs12);
4356
4357out_cached_vmcs12:
4358 free_loaded_vmcs(&vmx->nested.vmcs02);
4359
4360out_vmcs02:
4361 return -ENOMEM;
4362}
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372static int handle_vmon(struct kvm_vcpu *vcpu)
4373{
4374 int ret;
4375 gpa_t vmptr;
4376 uint32_t revision;
4377 struct vcpu_vmx *vmx = to_vmx(vcpu);
4378 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
4379 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
4391 kvm_queue_exception(vcpu, UD_VECTOR);
4392 return 1;
4393 }
4394
4395
4396 if (vmx_get_cpl(vcpu)) {
4397 kvm_inject_gp(vcpu, 0);
4398 return 1;
4399 }
4400
4401 if (vmx->nested.vmxon)
4402 return nested_vmx_failValid(vcpu,
4403 VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
4404
4405 if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
4406 != VMXON_NEEDED_FEATURES) {
4407 kvm_inject_gp(vcpu, 0);
4408 return 1;
4409 }
4410
4411 if (nested_vmx_get_vmptr(vcpu, &vmptr))
4412 return 1;
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422 if (!page_address_valid(vcpu, vmptr))
4423 return nested_vmx_failInvalid(vcpu);
4424
4425 if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
4426 revision != VMCS12_REVISION)
4427 return nested_vmx_failInvalid(vcpu);
4428
4429 vmx->nested.vmxon_ptr = vmptr;
4430 ret = enter_vmx_operation(vcpu);
4431 if (ret)
4432 return ret;
4433
4434 return nested_vmx_succeed(vcpu);
4435}
4436
4437static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
4438{
4439 struct vcpu_vmx *vmx = to_vmx(vcpu);
4440
4441 if (vmx->nested.current_vmptr == -1ull)
4442 return;
4443
4444 copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
4445
4446 if (enable_shadow_vmcs) {
4447
4448
4449 copy_shadow_to_vmcs12(vmx);
4450 vmx_disable_shadow_vmcs(vmx);
4451 }
4452 vmx->nested.posted_intr_nv = -1;
4453
4454
4455 kvm_vcpu_write_guest_page(vcpu,
4456 vmx->nested.current_vmptr >> PAGE_SHIFT,
4457 vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
4458
4459 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
4460
4461 vmx->nested.current_vmptr = -1ull;
4462}
4463
4464
4465static int handle_vmoff(struct kvm_vcpu *vcpu)
4466{
4467 if (!nested_vmx_check_permission(vcpu))
4468 return 1;
4469 free_nested(vcpu);
4470 return nested_vmx_succeed(vcpu);
4471}
4472
4473
4474static int handle_vmclear(struct kvm_vcpu *vcpu)
4475{
4476 struct vcpu_vmx *vmx = to_vmx(vcpu);
4477 u32 zero = 0;
4478 gpa_t vmptr;
4479 u64 evmcs_gpa;
4480
4481 if (!nested_vmx_check_permission(vcpu))
4482 return 1;
4483
4484 if (nested_vmx_get_vmptr(vcpu, &vmptr))
4485 return 1;
4486
4487 if (!page_address_valid(vcpu, vmptr))
4488 return nested_vmx_failValid(vcpu,
4489 VMXERR_VMCLEAR_INVALID_ADDRESS);
4490
4491 if (vmptr == vmx->nested.vmxon_ptr)
4492 return nested_vmx_failValid(vcpu,
4493 VMXERR_VMCLEAR_VMXON_POINTER);
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505 if (likely(!vmx->nested.enlightened_vmcs_enabled ||
4506 !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
4507 if (vmptr == vmx->nested.current_vmptr)
4508 nested_release_vmcs12(vcpu);
4509
4510 kvm_vcpu_write_guest(vcpu,
4511 vmptr + offsetof(struct vmcs12,
4512 launch_state),
4513 &zero, sizeof(zero));
4514 }
4515
4516 return nested_vmx_succeed(vcpu);
4517}
4518
4519static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
4520
4521
4522static int handle_vmlaunch(struct kvm_vcpu *vcpu)
4523{
4524 return nested_vmx_run(vcpu, true);
4525}
4526
4527
4528static int handle_vmresume(struct kvm_vcpu *vcpu)
4529{
4530
4531 return nested_vmx_run(vcpu, false);
4532}
4533
4534static int handle_vmread(struct kvm_vcpu *vcpu)
4535{
4536 unsigned long field;
4537 u64 field_value;
4538 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4539 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4540 int len;
4541 gva_t gva = 0;
4542 struct vmcs12 *vmcs12;
4543 struct x86_exception e;
4544 short offset;
4545
4546 if (!nested_vmx_check_permission(vcpu))
4547 return 1;
4548
4549 if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
4550 return nested_vmx_failInvalid(vcpu);
4551
4552 if (!is_guest_mode(vcpu))
4553 vmcs12 = get_vmcs12(vcpu);
4554 else {
4555
4556
4557
4558
4559 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
4560 return nested_vmx_failInvalid(vcpu);
4561 vmcs12 = get_shadow_vmcs12(vcpu);
4562 }
4563
4564
4565 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
4566
4567 offset = vmcs_field_to_offset(field);
4568 if (offset < 0)
4569 return nested_vmx_failValid(vcpu,
4570 VMXERR_UNSUPPORTED_VMCS_COMPONENT);
4571
4572 if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
4573 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4574
4575
4576 field_value = vmcs12_read_any(vmcs12, field, offset);
4577
4578
4579
4580
4581
4582
4583 if (vmx_instruction_info & (1u << 10)) {
4584 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
4585 field_value);
4586 } else {
4587 len = is_64_bit_mode(vcpu) ? 8 : 4;
4588 if (get_vmx_mem_address(vcpu, exit_qualification,
4589 vmx_instruction_info, true, len, &gva))
4590 return 1;
4591
4592 if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
4593 kvm_inject_page_fault(vcpu, &e);
4594 }
4595
4596 return nested_vmx_succeed(vcpu);
4597}
4598
4599static bool is_shadow_field_rw(unsigned long field)
4600{
4601 switch (field) {
4602#define SHADOW_FIELD_RW(x, y) case x:
4603#include "vmcs_shadow_fields.h"
4604 return true;
4605 default:
4606 break;
4607 }
4608 return false;
4609}
4610
4611static bool is_shadow_field_ro(unsigned long field)
4612{
4613 switch (field) {
4614#define SHADOW_FIELD_RO(x, y) case x:
4615#include "vmcs_shadow_fields.h"
4616 return true;
4617 default:
4618 break;
4619 }
4620 return false;
4621}
4622
4623static int handle_vmwrite(struct kvm_vcpu *vcpu)
4624{
4625 unsigned long field;
4626 int len;
4627 gva_t gva;
4628 struct vcpu_vmx *vmx = to_vmx(vcpu);
4629 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4630 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4631
4632
4633
4634
4635
4636
4637
4638 u64 field_value = 0;
4639 struct x86_exception e;
4640 struct vmcs12 *vmcs12;
4641 short offset;
4642
4643 if (!nested_vmx_check_permission(vcpu))
4644 return 1;
4645
4646 if (vmx->nested.current_vmptr == -1ull)
4647 return nested_vmx_failInvalid(vcpu);
4648
4649 if (vmx_instruction_info & (1u << 10))
4650 field_value = kvm_register_readl(vcpu,
4651 (((vmx_instruction_info) >> 3) & 0xf));
4652 else {
4653 len = is_64_bit_mode(vcpu) ? 8 : 4;
4654 if (get_vmx_mem_address(vcpu, exit_qualification,
4655 vmx_instruction_info, false, len, &gva))
4656 return 1;
4657 if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) {
4658 kvm_inject_page_fault(vcpu, &e);
4659 return 1;
4660 }
4661 }
4662
4663
4664 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
4665
4666
4667
4668
4669 if (vmcs_field_readonly(field) &&
4670 !nested_cpu_has_vmwrite_any_field(vcpu))
4671 return nested_vmx_failValid(vcpu,
4672 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
4673
4674 if (!is_guest_mode(vcpu)) {
4675 vmcs12 = get_vmcs12(vcpu);
4676
4677
4678
4679
4680
4681 if (!is_shadow_field_rw(field))
4682 copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
4683 } else {
4684
4685
4686
4687
4688 if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
4689 return nested_vmx_failInvalid(vcpu);
4690 vmcs12 = get_shadow_vmcs12(vcpu);
4691 }
4692
4693 offset = vmcs_field_to_offset(field);
4694 if (offset < 0)
4695 return nested_vmx_failValid(vcpu,
4696 VMXERR_UNSUPPORTED_VMCS_COMPONENT);
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706 if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
4707 field_value &= 0x1f0ff;
4708
4709 vmcs12_write_any(vmcs12, field, offset, field_value);
4710
4711
4712
4713
4714
4715
4716
4717 if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
4718
4719
4720
4721
4722 if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
4723 preempt_disable();
4724 vmcs_load(vmx->vmcs01.shadow_vmcs);
4725
4726 __vmcs_writel(field, field_value);
4727
4728 vmcs_clear(vmx->vmcs01.shadow_vmcs);
4729 vmcs_load(vmx->loaded_vmcs->vmcs);
4730 preempt_enable();
4731 }
4732 vmx->nested.dirty_vmcs12 = true;
4733 }
4734
4735 return nested_vmx_succeed(vcpu);
4736}
4737
4738static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
4739{
4740 vmx->nested.current_vmptr = vmptr;
4741 if (enable_shadow_vmcs) {
4742 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
4743 vmcs_write64(VMCS_LINK_POINTER,
4744 __pa(vmx->vmcs01.shadow_vmcs));
4745 vmx->nested.need_vmcs12_to_shadow_sync = true;
4746 }
4747 vmx->nested.dirty_vmcs12 = true;
4748}
4749
4750
4751static int handle_vmptrld(struct kvm_vcpu *vcpu)
4752{
4753 struct vcpu_vmx *vmx = to_vmx(vcpu);
4754 gpa_t vmptr;
4755
4756 if (!nested_vmx_check_permission(vcpu))
4757 return 1;
4758
4759 if (nested_vmx_get_vmptr(vcpu, &vmptr))
4760 return 1;
4761
4762 if (!page_address_valid(vcpu, vmptr))
4763 return nested_vmx_failValid(vcpu,
4764 VMXERR_VMPTRLD_INVALID_ADDRESS);
4765
4766 if (vmptr == vmx->nested.vmxon_ptr)
4767 return nested_vmx_failValid(vcpu,
4768 VMXERR_VMPTRLD_VMXON_POINTER);
4769
4770
4771 if (vmx->nested.hv_evmcs)
4772 return 1;
4773
4774 if (vmx->nested.current_vmptr != vmptr) {
4775 struct kvm_host_map map;
4776 struct vmcs12 *new_vmcs12;
4777
4778 if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
4779
4780
4781
4782
4783
4784
4785 return nested_vmx_failValid(vcpu,
4786 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
4787 }
4788
4789 new_vmcs12 = map.hva;
4790
4791 if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
4792 (new_vmcs12->hdr.shadow_vmcs &&
4793 !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
4794 kvm_vcpu_unmap(vcpu, &map, false);
4795 return nested_vmx_failValid(vcpu,
4796 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
4797 }
4798
4799 nested_release_vmcs12(vcpu);
4800
4801
4802
4803
4804
4805 memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
4806 kvm_vcpu_unmap(vcpu, &map, false);
4807
4808 set_current_vmptr(vmx, vmptr);
4809 }
4810
4811 return nested_vmx_succeed(vcpu);
4812}
4813
4814
4815static int handle_vmptrst(struct kvm_vcpu *vcpu)
4816{
4817 unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
4818 u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4819 gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
4820 struct x86_exception e;
4821 gva_t gva;
4822
4823 if (!nested_vmx_check_permission(vcpu))
4824 return 1;
4825
4826 if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
4827 return 1;
4828
4829 if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
4830 true, sizeof(gpa_t), &gva))
4831 return 1;
4832
4833 if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr,
4834 sizeof(gpa_t), &e)) {
4835 kvm_inject_page_fault(vcpu, &e);
4836 return 1;
4837 }
4838 return nested_vmx_succeed(vcpu);
4839}
4840
4841
4842static int handle_invept(struct kvm_vcpu *vcpu)
4843{
4844 struct vcpu_vmx *vmx = to_vmx(vcpu);
4845 u32 vmx_instruction_info, types;
4846 unsigned long type;
4847 gva_t gva;
4848 struct x86_exception e;
4849 struct {
4850 u64 eptp, gpa;
4851 } operand;
4852
4853 if (!(vmx->nested.msrs.secondary_ctls_high &
4854 SECONDARY_EXEC_ENABLE_EPT) ||
4855 !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
4856 kvm_queue_exception(vcpu, UD_VECTOR);
4857 return 1;
4858 }
4859
4860 if (!nested_vmx_check_permission(vcpu))
4861 return 1;
4862
4863 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4864 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
4865
4866 types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
4867
4868 if (type >= 32 || !(types & (1 << type)))
4869 return nested_vmx_failValid(vcpu,
4870 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
4871
4872
4873
4874
4875 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
4876 vmx_instruction_info, false, sizeof(operand), &gva))
4877 return 1;
4878 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
4879 kvm_inject_page_fault(vcpu, &e);
4880 return 1;
4881 }
4882
4883 switch (type) {
4884 case VMX_EPT_EXTENT_GLOBAL:
4885 case VMX_EPT_EXTENT_CONTEXT:
4886
4887
4888
4889
4890 break;
4891 default:
4892 BUG_ON(1);
4893 break;
4894 }
4895
4896 return nested_vmx_succeed(vcpu);
4897}
4898
4899static int handle_invvpid(struct kvm_vcpu *vcpu)
4900{
4901 struct vcpu_vmx *vmx = to_vmx(vcpu);
4902 u32 vmx_instruction_info;
4903 unsigned long type, types;
4904 gva_t gva;
4905 struct x86_exception e;
4906 struct {
4907 u64 vpid;
4908 u64 gla;
4909 } operand;
4910 u16 vpid02;
4911
4912 if (!(vmx->nested.msrs.secondary_ctls_high &
4913 SECONDARY_EXEC_ENABLE_VPID) ||
4914 !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
4915 kvm_queue_exception(vcpu, UD_VECTOR);
4916 return 1;
4917 }
4918
4919 if (!nested_vmx_check_permission(vcpu))
4920 return 1;
4921
4922 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
4923 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
4924
4925 types = (vmx->nested.msrs.vpid_caps &
4926 VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
4927
4928 if (type >= 32 || !(types & (1 << type)))
4929 return nested_vmx_failValid(vcpu,
4930 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
4931
4932
4933
4934
4935 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
4936 vmx_instruction_info, false, sizeof(operand), &gva))
4937 return 1;
4938 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
4939 kvm_inject_page_fault(vcpu, &e);
4940 return 1;
4941 }
4942 if (operand.vpid >> 16)
4943 return nested_vmx_failValid(vcpu,
4944 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
4945
4946 vpid02 = nested_get_vpid02(vcpu);
4947 switch (type) {
4948 case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
4949 if (!operand.vpid ||
4950 is_noncanonical_address(operand.gla, vcpu))
4951 return nested_vmx_failValid(vcpu,
4952 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
4953 if (cpu_has_vmx_invvpid_individual_addr()) {
4954 __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
4955 vpid02, operand.gla);
4956 } else
4957 __vmx_flush_tlb(vcpu, vpid02, false);
4958 break;
4959 case VMX_VPID_EXTENT_SINGLE_CONTEXT:
4960 case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
4961 if (!operand.vpid)
4962 return nested_vmx_failValid(vcpu,
4963 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
4964 __vmx_flush_tlb(vcpu, vpid02, false);
4965 break;
4966 case VMX_VPID_EXTENT_ALL_CONTEXT:
4967 __vmx_flush_tlb(vcpu, vpid02, false);
4968 break;
4969 default:
4970 WARN_ON_ONCE(1);
4971 return kvm_skip_emulated_instruction(vcpu);
4972 }
4973
4974 return nested_vmx_succeed(vcpu);
4975}
4976
4977static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
4978 struct vmcs12 *vmcs12)
4979{
4980 u32 index = kvm_rcx_read(vcpu);
4981 u64 address;
4982 bool accessed_dirty;
4983 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
4984
4985 if (!nested_cpu_has_eptp_switching(vmcs12) ||
4986 !nested_cpu_has_ept(vmcs12))
4987 return 1;
4988
4989 if (index >= VMFUNC_EPTP_ENTRIES)
4990 return 1;
4991
4992
4993 if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
4994 &address, index * 8, 8))
4995 return 1;
4996
4997 accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT);
4998
4999
5000
5001
5002
5003 if (vmcs12->ept_pointer != address) {
5004 if (!valid_ept_address(vcpu, address))
5005 return 1;
5006
5007 kvm_mmu_unload(vcpu);
5008 mmu->ept_ad = accessed_dirty;
5009 mmu->mmu_role.base.ad_disabled = !accessed_dirty;
5010 vmcs12->ept_pointer = address;
5011
5012
5013
5014
5015
5016 kvm_mmu_reload(vcpu);
5017 }
5018
5019 return 0;
5020}
5021
5022static int handle_vmfunc(struct kvm_vcpu *vcpu)
5023{
5024 struct vcpu_vmx *vmx = to_vmx(vcpu);
5025 struct vmcs12 *vmcs12;
5026 u32 function = kvm_rax_read(vcpu);
5027
5028
5029
5030
5031
5032
5033 if (!is_guest_mode(vcpu)) {
5034 kvm_queue_exception(vcpu, UD_VECTOR);
5035 return 1;
5036 }
5037
5038 vmcs12 = get_vmcs12(vcpu);
5039 if ((vmcs12->vm_function_control & (1 << function)) == 0)
5040 goto fail;
5041
5042 switch (function) {
5043 case 0:
5044 if (nested_vmx_eptp_switching(vcpu, vmcs12))
5045 goto fail;
5046 break;
5047 default:
5048 goto fail;
5049 }
5050 return kvm_skip_emulated_instruction(vcpu);
5051
5052fail:
5053 nested_vmx_vmexit(vcpu, vmx->exit_reason,
5054 vmcs_read32(VM_EXIT_INTR_INFO),
5055 vmcs_readl(EXIT_QUALIFICATION));
5056 return 1;
5057}
5058
5059
5060static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
5061 struct vmcs12 *vmcs12)
5062{
5063 unsigned long exit_qualification;
5064 gpa_t bitmap, last_bitmap;
5065 unsigned int port;
5066 int size;
5067 u8 b;
5068
5069 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
5070 return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
5071
5072 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5073
5074 port = exit_qualification >> 16;
5075 size = (exit_qualification & 7) + 1;
5076
5077 last_bitmap = (gpa_t)-1;
5078 b = -1;
5079
5080 while (size > 0) {
5081 if (port < 0x8000)
5082 bitmap = vmcs12->io_bitmap_a;
5083 else if (port < 0x10000)
5084 bitmap = vmcs12->io_bitmap_b;
5085 else
5086 return true;
5087 bitmap += (port & 0x7fff) / 8;
5088
5089 if (last_bitmap != bitmap)
5090 if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
5091 return true;
5092 if (b & (1 << (port & 7)))
5093 return true;
5094
5095 port++;
5096 size--;
5097 last_bitmap = bitmap;
5098 }
5099
5100 return false;
5101}
5102
5103
5104
5105
5106
5107
5108
5109static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
5110 struct vmcs12 *vmcs12, u32 exit_reason)
5111{
5112 u32 msr_index = kvm_rcx_read(vcpu);
5113 gpa_t bitmap;
5114
5115 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
5116 return true;
5117
5118
5119
5120
5121
5122
5123 bitmap = vmcs12->msr_bitmap;
5124 if (exit_reason == EXIT_REASON_MSR_WRITE)
5125 bitmap += 2048;
5126 if (msr_index >= 0xc0000000) {
5127 msr_index -= 0xc0000000;
5128 bitmap += 1024;
5129 }
5130
5131
5132 if (msr_index < 1024*8) {
5133 unsigned char b;
5134 if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
5135 return true;
5136 return 1 & (b >> (msr_index & 7));
5137 } else
5138 return true;
5139}
5140
5141
5142
5143
5144
5145
5146static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
5147 struct vmcs12 *vmcs12)
5148{
5149 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5150 int cr = exit_qualification & 15;
5151 int reg;
5152 unsigned long val;
5153
5154 switch ((exit_qualification >> 4) & 3) {
5155 case 0:
5156 reg = (exit_qualification >> 8) & 15;
5157 val = kvm_register_readl(vcpu, reg);
5158 switch (cr) {
5159 case 0:
5160 if (vmcs12->cr0_guest_host_mask &
5161 (val ^ vmcs12->cr0_read_shadow))
5162 return true;
5163 break;
5164 case 3:
5165 if ((vmcs12->cr3_target_count >= 1 &&
5166 vmcs12->cr3_target_value0 == val) ||
5167 (vmcs12->cr3_target_count >= 2 &&
5168 vmcs12->cr3_target_value1 == val) ||
5169 (vmcs12->cr3_target_count >= 3 &&
5170 vmcs12->cr3_target_value2 == val) ||
5171 (vmcs12->cr3_target_count >= 4 &&
5172 vmcs12->cr3_target_value3 == val))
5173 return false;
5174 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5175 return true;
5176 break;
5177 case 4:
5178 if (vmcs12->cr4_guest_host_mask &
5179 (vmcs12->cr4_read_shadow ^ val))
5180 return true;
5181 break;
5182 case 8:
5183 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5184 return true;
5185 break;
5186 }
5187 break;
5188 case 2:
5189 if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
5190 (vmcs12->cr0_read_shadow & X86_CR0_TS))
5191 return true;
5192 break;
5193 case 1:
5194 switch (cr) {
5195 case 3:
5196 if (vmcs12->cpu_based_vm_exec_control &
5197 CPU_BASED_CR3_STORE_EXITING)
5198 return true;
5199 break;
5200 case 8:
5201 if (vmcs12->cpu_based_vm_exec_control &
5202 CPU_BASED_CR8_STORE_EXITING)
5203 return true;
5204 break;
5205 }
5206 break;
5207 case 3:
5208
5209
5210
5211
5212 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5213 if (vmcs12->cr0_guest_host_mask & 0xe &
5214 (val ^ vmcs12->cr0_read_shadow))
5215 return true;
5216 if ((vmcs12->cr0_guest_host_mask & 0x1) &&
5217 !(vmcs12->cr0_read_shadow & 0x1) &&
5218 (val & 0x1))
5219 return true;
5220 break;
5221 }
5222 return false;
5223}
5224
5225static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
5226 struct vmcs12 *vmcs12, gpa_t bitmap)
5227{
5228 u32 vmx_instruction_info;
5229 unsigned long field;
5230 u8 b;
5231
5232 if (!nested_cpu_has_shadow_vmcs(vmcs12))
5233 return true;
5234
5235
5236 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5237 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5238
5239
5240 if (field >> 15)
5241 return true;
5242
5243 if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
5244 return true;
5245
5246 return 1 & (b >> (field & 7));
5247}
5248
5249
5250
5251
5252
5253
5254bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
5255{
5256 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
5257 struct vcpu_vmx *vmx = to_vmx(vcpu);
5258 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5259
5260 if (vmx->nested.nested_run_pending)
5261 return false;
5262
5263 if (unlikely(vmx->fail)) {
5264 pr_info_ratelimited("%s failed vm entry %x\n", __func__,
5265 vmcs_read32(VM_INSTRUCTION_ERROR));
5266 return true;
5267 }
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280 nested_mark_vmcs12_pages_dirty(vcpu);
5281
5282 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
5283 vmcs_readl(EXIT_QUALIFICATION),
5284 vmx->idt_vectoring_info,
5285 intr_info,
5286 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
5287 KVM_ISA_VMX);
5288
5289 switch (exit_reason) {
5290 case EXIT_REASON_EXCEPTION_NMI:
5291 if (is_nmi(intr_info))
5292 return false;
5293 else if (is_page_fault(intr_info))
5294 return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
5295 else if (is_debug(intr_info) &&
5296 vcpu->guest_debug &
5297 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5298 return false;
5299 else if (is_breakpoint(intr_info) &&
5300 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5301 return false;
5302 return vmcs12->exception_bitmap &
5303 (1u << (intr_info & INTR_INFO_VECTOR_MASK));
5304 case EXIT_REASON_EXTERNAL_INTERRUPT:
5305 return false;
5306 case EXIT_REASON_TRIPLE_FAULT:
5307 return true;
5308 case EXIT_REASON_PENDING_INTERRUPT:
5309 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
5310 case EXIT_REASON_NMI_WINDOW:
5311 return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
5312 case EXIT_REASON_TASK_SWITCH:
5313 return true;
5314 case EXIT_REASON_CPUID:
5315 return true;
5316 case EXIT_REASON_HLT:
5317 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5318 case EXIT_REASON_INVD:
5319 return true;
5320 case EXIT_REASON_INVLPG:
5321 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5322 case EXIT_REASON_RDPMC:
5323 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5324 case EXIT_REASON_RDRAND:
5325 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
5326 case EXIT_REASON_RDSEED:
5327 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
5328 case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
5329 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5330 case EXIT_REASON_VMREAD:
5331 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5332 vmcs12->vmread_bitmap);
5333 case EXIT_REASON_VMWRITE:
5334 return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
5335 vmcs12->vmwrite_bitmap);
5336 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
5337 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
5338 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
5339 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
5340 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
5341
5342
5343
5344
5345 return true;
5346 case EXIT_REASON_CR_ACCESS:
5347 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5348 case EXIT_REASON_DR_ACCESS:
5349 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5350 case EXIT_REASON_IO_INSTRUCTION:
5351 return nested_vmx_exit_handled_io(vcpu, vmcs12);
5352 case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
5353 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
5354 case EXIT_REASON_MSR_READ:
5355 case EXIT_REASON_MSR_WRITE:
5356 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5357 case EXIT_REASON_INVALID_STATE:
5358 return true;
5359 case EXIT_REASON_MWAIT_INSTRUCTION:
5360 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5361 case EXIT_REASON_MONITOR_TRAP_FLAG:
5362 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
5363 case EXIT_REASON_MONITOR_INSTRUCTION:
5364 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
5365 case EXIT_REASON_PAUSE_INSTRUCTION:
5366 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
5367 nested_cpu_has2(vmcs12,
5368 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
5369 case EXIT_REASON_MCE_DURING_VMENTRY:
5370 return false;
5371 case EXIT_REASON_TPR_BELOW_THRESHOLD:
5372 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
5373 case EXIT_REASON_APIC_ACCESS:
5374 case EXIT_REASON_APIC_WRITE:
5375 case EXIT_REASON_EOI_INDUCED:
5376
5377
5378
5379
5380
5381 return true;
5382 case EXIT_REASON_EPT_VIOLATION:
5383
5384
5385
5386
5387
5388
5389 return false;
5390 case EXIT_REASON_EPT_MISCONFIG:
5391
5392
5393
5394
5395
5396
5397 return false;
5398 case EXIT_REASON_INVPCID:
5399 return
5400 nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
5401 nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5402 case EXIT_REASON_WBINVD:
5403 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
5404 case EXIT_REASON_XSETBV:
5405 return true;
5406 case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
5407
5408
5409
5410
5411
5412
5413 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
5414 case EXIT_REASON_PREEMPTION_TIMER:
5415 return false;
5416 case EXIT_REASON_PML_FULL:
5417
5418 return false;
5419 case EXIT_REASON_VMFUNC:
5420
5421 return false;
5422 case EXIT_REASON_ENCLS:
5423
5424 return false;
5425 default:
5426 return true;
5427 }
5428}
5429
5430
5431static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
5432 struct kvm_nested_state __user *user_kvm_nested_state,
5433 u32 user_data_size)
5434{
5435 struct vcpu_vmx *vmx;
5436 struct vmcs12 *vmcs12;
5437 struct kvm_nested_state kvm_state = {
5438 .flags = 0,
5439 .format = KVM_STATE_NESTED_FORMAT_VMX,
5440 .size = sizeof(kvm_state),
5441 .hdr.vmx.vmxon_pa = -1ull,
5442 .hdr.vmx.vmcs12_pa = -1ull,
5443 };
5444 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
5445 &user_kvm_nested_state->data.vmx[0];
5446
5447 if (!vcpu)
5448 return kvm_state.size + sizeof(*user_vmx_nested_state);
5449
5450 vmx = to_vmx(vcpu);
5451 vmcs12 = get_vmcs12(vcpu);
5452
5453 if (nested_vmx_allowed(vcpu) &&
5454 (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
5455 kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
5456 kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
5457
5458 if (vmx_has_valid_vmcs12(vcpu)) {
5459 kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
5460
5461 if (vmx->nested.hv_evmcs)
5462 kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
5463
5464 if (is_guest_mode(vcpu) &&
5465 nested_cpu_has_shadow_vmcs(vmcs12) &&
5466 vmcs12->vmcs_link_pointer != -1ull)
5467 kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
5468 }
5469
5470 if (vmx->nested.smm.vmxon)
5471 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
5472
5473 if (vmx->nested.smm.guest_mode)
5474 kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
5475
5476 if (is_guest_mode(vcpu)) {
5477 kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
5478
5479 if (vmx->nested.nested_run_pending)
5480 kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
5481 }
5482 }
5483
5484 if (user_data_size < kvm_state.size)
5485 goto out;
5486
5487 if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
5488 return -EFAULT;
5489
5490 if (!vmx_has_valid_vmcs12(vcpu))
5491 goto out;
5492
5493
5494
5495
5496
5497
5498
5499
5500 if (is_guest_mode(vcpu)) {
5501 sync_vmcs02_to_vmcs12(vcpu, vmcs12);
5502 sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
5503 } else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
5504 if (vmx->nested.hv_evmcs)
5505 copy_enlightened_to_vmcs12(vmx);
5506 else if (enable_shadow_vmcs)
5507 copy_shadow_to_vmcs12(vmx);
5508 }
5509
5510 BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
5511 BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
5512
5513
5514
5515
5516
5517 if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
5518 return -EFAULT;
5519
5520 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
5521 vmcs12->vmcs_link_pointer != -1ull) {
5522 if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
5523 get_shadow_vmcs12(vcpu), VMCS12_SIZE))
5524 return -EFAULT;
5525 }
5526
5527out:
5528 return kvm_state.size;
5529}
5530
5531
5532
5533
5534void vmx_leave_nested(struct kvm_vcpu *vcpu)
5535{
5536 if (is_guest_mode(vcpu)) {
5537 to_vmx(vcpu)->nested.nested_run_pending = 0;
5538 nested_vmx_vmexit(vcpu, -1, 0, 0);
5539 }
5540 free_nested(vcpu);
5541}
5542
5543static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
5544 struct kvm_nested_state __user *user_kvm_nested_state,
5545 struct kvm_nested_state *kvm_state)
5546{
5547 struct vcpu_vmx *vmx = to_vmx(vcpu);
5548 struct vmcs12 *vmcs12;
5549 u32 exit_qual;
5550 struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
5551 &user_kvm_nested_state->data.vmx[0];
5552 int ret;
5553
5554 if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
5555 return -EINVAL;
5556
5557 if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
5558 if (kvm_state->hdr.vmx.smm.flags)
5559 return -EINVAL;
5560
5561 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
5562 return -EINVAL;
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573 if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
5574 return -EINVAL;
5575 } else {
5576 if (!nested_vmx_allowed(vcpu))
5577 return -EINVAL;
5578
5579 if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
5580 return -EINVAL;
5581 }
5582
5583 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
5584 (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
5585 return -EINVAL;
5586
5587 if (kvm_state->hdr.vmx.smm.flags &
5588 ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
5589 return -EINVAL;
5590
5591
5592
5593
5594
5595
5596 if (is_smm(vcpu) ?
5597 (kvm_state->flags &
5598 (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
5599 : kvm_state->hdr.vmx.smm.flags)
5600 return -EINVAL;
5601
5602 if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
5603 !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
5604 return -EINVAL;
5605
5606 if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
5607 (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
5608 return -EINVAL;
5609
5610 vmx_leave_nested(vcpu);
5611
5612 if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
5613 return 0;
5614
5615 vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
5616 ret = enter_vmx_operation(vcpu);
5617 if (ret)
5618 return ret;
5619
5620
5621 if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
5622 return 0;
5623
5624 if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
5625 if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
5626 !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
5627 return -EINVAL;
5628
5629 set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
5630 } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
5631
5632
5633
5634
5635 vmx->nested.need_vmcs12_to_shadow_sync = true;
5636 } else {
5637 return -EINVAL;
5638 }
5639
5640 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
5641 vmx->nested.smm.vmxon = true;
5642 vmx->nested.vmxon = false;
5643
5644 if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
5645 vmx->nested.smm.guest_mode = true;
5646 }
5647
5648 vmcs12 = get_vmcs12(vcpu);
5649 if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
5650 return -EFAULT;
5651
5652 if (vmcs12->hdr.revision_id != VMCS12_REVISION)
5653 return -EINVAL;
5654
5655 if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
5656 return 0;
5657
5658 vmx->nested.nested_run_pending =
5659 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
5660
5661 ret = -EINVAL;
5662 if (nested_cpu_has_shadow_vmcs(vmcs12) &&
5663 vmcs12->vmcs_link_pointer != -1ull) {
5664 struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
5665
5666 if (kvm_state->size <
5667 sizeof(*kvm_state) +
5668 sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
5669 goto error_guest_mode;
5670
5671 if (copy_from_user(shadow_vmcs12,
5672 user_vmx_nested_state->shadow_vmcs12,
5673 sizeof(*shadow_vmcs12))) {
5674 ret = -EFAULT;
5675 goto error_guest_mode;
5676 }
5677
5678 if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
5679 !shadow_vmcs12->hdr.shadow_vmcs)
5680 goto error_guest_mode;
5681 }
5682
5683 if (nested_vmx_check_controls(vcpu, vmcs12) ||
5684 nested_vmx_check_host_state(vcpu, vmcs12) ||
5685 nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
5686 goto error_guest_mode;
5687
5688 vmx->nested.dirty_vmcs12 = true;
5689 ret = nested_vmx_enter_non_root_mode(vcpu, false);
5690 if (ret)
5691 goto error_guest_mode;
5692
5693 return 0;
5694
5695error_guest_mode:
5696 vmx->nested.nested_run_pending = 0;
5697 return ret;
5698}
5699
5700void nested_vmx_vcpu_setup(void)
5701{
5702 if (enable_shadow_vmcs) {
5703 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
5704 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
5705 }
5706}
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
5719 bool apicv)
5720{
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
5738 msrs->pinbased_ctls_low,
5739 msrs->pinbased_ctls_high);
5740 msrs->pinbased_ctls_low |=
5741 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
5742 msrs->pinbased_ctls_high &=
5743 PIN_BASED_EXT_INTR_MASK |
5744 PIN_BASED_NMI_EXITING |
5745 PIN_BASED_VIRTUAL_NMIS |
5746 (apicv ? PIN_BASED_POSTED_INTR : 0);
5747 msrs->pinbased_ctls_high |=
5748 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
5749 PIN_BASED_VMX_PREEMPTION_TIMER;
5750
5751
5752 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
5753 msrs->exit_ctls_low,
5754 msrs->exit_ctls_high);
5755 msrs->exit_ctls_low =
5756 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
5757
5758 msrs->exit_ctls_high &=
5759#ifdef CONFIG_X86_64
5760 VM_EXIT_HOST_ADDR_SPACE_SIZE |
5761#endif
5762 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
5763 msrs->exit_ctls_high |=
5764 VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
5765 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
5766 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
5767
5768
5769 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
5770
5771
5772 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
5773 msrs->entry_ctls_low,
5774 msrs->entry_ctls_high);
5775 msrs->entry_ctls_low =
5776 VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
5777 msrs->entry_ctls_high &=
5778#ifdef CONFIG_X86_64
5779 VM_ENTRY_IA32E_MODE |
5780#endif
5781 VM_ENTRY_LOAD_IA32_PAT;
5782 msrs->entry_ctls_high |=
5783 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
5784
5785
5786 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
5787
5788
5789 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
5790 msrs->procbased_ctls_low,
5791 msrs->procbased_ctls_high);
5792 msrs->procbased_ctls_low =
5793 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
5794 msrs->procbased_ctls_high &=
5795 CPU_BASED_VIRTUAL_INTR_PENDING |
5796 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
5797 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
5798 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
5799 CPU_BASED_CR3_STORE_EXITING |
5800#ifdef CONFIG_X86_64
5801 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
5802#endif
5803 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
5804 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
5805 CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
5806 CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
5807 CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
5808
5809
5810
5811
5812
5813
5814 msrs->procbased_ctls_high |=
5815 CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
5816 CPU_BASED_USE_MSR_BITMAPS;
5817
5818
5819 msrs->procbased_ctls_low &=
5820 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
5821
5822
5823
5824
5825
5826 if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
5827 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
5828 msrs->secondary_ctls_low,
5829 msrs->secondary_ctls_high);
5830
5831 msrs->secondary_ctls_low = 0;
5832 msrs->secondary_ctls_high &=
5833 SECONDARY_EXEC_DESC |
5834 SECONDARY_EXEC_RDTSCP |
5835 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
5836 SECONDARY_EXEC_WBINVD_EXITING |
5837 SECONDARY_EXEC_APIC_REGISTER_VIRT |
5838 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
5839 SECONDARY_EXEC_RDRAND_EXITING |
5840 SECONDARY_EXEC_ENABLE_INVPCID |
5841 SECONDARY_EXEC_RDSEED_EXITING |
5842 SECONDARY_EXEC_XSAVES;
5843
5844
5845
5846
5847
5848 msrs->secondary_ctls_high |=
5849 SECONDARY_EXEC_SHADOW_VMCS;
5850
5851 if (enable_ept) {
5852
5853 msrs->secondary_ctls_high |=
5854 SECONDARY_EXEC_ENABLE_EPT;
5855 msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
5856 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
5857 if (cpu_has_vmx_ept_execute_only())
5858 msrs->ept_caps |=
5859 VMX_EPT_EXECUTE_ONLY_BIT;
5860 msrs->ept_caps &= ept_caps;
5861 msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
5862 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
5863 VMX_EPT_1GB_PAGE_BIT;
5864 if (enable_ept_ad_bits) {
5865 msrs->secondary_ctls_high |=
5866 SECONDARY_EXEC_ENABLE_PML;
5867 msrs->ept_caps |= VMX_EPT_AD_BIT;
5868 }
5869 }
5870
5871 if (cpu_has_vmx_vmfunc()) {
5872 msrs->secondary_ctls_high |=
5873 SECONDARY_EXEC_ENABLE_VMFUNC;
5874
5875
5876
5877
5878 if (enable_ept)
5879 msrs->vmfunc_controls =
5880 VMX_VMFUNC_EPTP_SWITCHING;
5881 }
5882
5883
5884
5885
5886
5887
5888
5889 if (enable_vpid) {
5890 msrs->secondary_ctls_high |=
5891 SECONDARY_EXEC_ENABLE_VPID;
5892 msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
5893 VMX_VPID_EXTENT_SUPPORTED_MASK;
5894 }
5895
5896 if (enable_unrestricted_guest)
5897 msrs->secondary_ctls_high |=
5898 SECONDARY_EXEC_UNRESTRICTED_GUEST;
5899
5900 if (flexpriority_enabled)
5901 msrs->secondary_ctls_high |=
5902 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
5903
5904
5905 rdmsr(MSR_IA32_VMX_MISC,
5906 msrs->misc_low,
5907 msrs->misc_high);
5908 msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
5909 msrs->misc_low |=
5910 MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
5911 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
5912 VMX_MISC_ACTIVITY_HLT;
5913 msrs->misc_high = 0;
5914
5915
5916
5917
5918
5919
5920
5921 msrs->basic =
5922 VMCS12_REVISION |
5923 VMX_BASIC_TRUE_CTLS |
5924 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
5925 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
5926
5927 if (cpu_has_vmx_basic_inout())
5928 msrs->basic |= VMX_BASIC_INOUT;
5929
5930
5931
5932
5933
5934
5935#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
5936#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
5937 msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
5938 msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
5939
5940
5941 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
5942 rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
5943
5944
5945 msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
5946}
5947
5948void nested_vmx_hardware_unsetup(void)
5949{
5950 int i;
5951
5952 if (enable_shadow_vmcs) {
5953 for (i = 0; i < VMX_BITMAP_NR; i++)
5954 free_page((unsigned long)vmx_bitmap[i]);
5955 }
5956}
5957
5958__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
5959{
5960 int i;
5961
5962 if (!cpu_has_vmx_shadow_vmcs())
5963 enable_shadow_vmcs = 0;
5964 if (enable_shadow_vmcs) {
5965 for (i = 0; i < VMX_BITMAP_NR; i++) {
5966
5967
5968
5969
5970 vmx_bitmap[i] = (unsigned long *)
5971 __get_free_page(GFP_KERNEL);
5972 if (!vmx_bitmap[i]) {
5973 nested_vmx_hardware_unsetup();
5974 return -ENOMEM;
5975 }
5976 }
5977
5978 init_vmcs_shadow_fields();
5979 }
5980
5981 exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear,
5982 exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch,
5983 exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld,
5984 exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst,
5985 exit_handlers[EXIT_REASON_VMREAD] = handle_vmread,
5986 exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume,
5987 exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite,
5988 exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff,
5989 exit_handlers[EXIT_REASON_VMON] = handle_vmon,
5990 exit_handlers[EXIT_REASON_INVEPT] = handle_invept,
5991 exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid,
5992 exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc,
5993
5994 kvm_x86_ops->check_nested_events = vmx_check_nested_events;
5995 kvm_x86_ops->get_nested_state = vmx_get_nested_state;
5996 kvm_x86_ops->set_nested_state = vmx_set_nested_state;
5997 kvm_x86_ops->get_vmcs12_pages = nested_get_vmcs12_pages,
5998 kvm_x86_ops->nested_enable_evmcs = nested_enable_evmcs;
5999 kvm_x86_ops->nested_get_evmcs_version = nested_get_evmcs_version;
6000
6001 return 0;
6002}
6003