1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#define pr_fmt(fmt) "SVM: " fmt
16
17#include <linux/kvm_host.h>
18
19#include "irq.h"
20#include "mmu.h"
21#include "kvm_cache_regs.h"
22#include "x86.h"
23#include "cpuid.h"
24#include "pmu.h"
25
26#include <linux/module.h>
27#include <linux/mod_devicetable.h>
28#include <linux/kernel.h>
29#include <linux/vmalloc.h>
30#include <linux/highmem.h>
31#include <linux/sched.h>
32#include <linux/trace_events.h>
33#include <linux/slab.h>
34#include <linux/amd-iommu.h>
35#include <linux/hashtable.h>
36#include <linux/frame.h>
37#include <linux/psp-sev.h>
38#include <linux/file.h>
39#include <linux/pagemap.h>
40#include <linux/swap.h>
41#include <linux/rwsem.h>
42
43#include <asm/apic.h>
44#include <asm/perf_event.h>
45#include <asm/tlbflush.h>
46#include <asm/desc.h>
47#include <asm/debugreg.h>
48#include <asm/kvm_para.h>
49#include <asm/irq_remapping.h>
50#include <asm/spec-ctrl.h>
51
52#include <asm/virtext.h>
53#include "trace.h"
54
55#define __ex(x) __kvm_handle_fault_on_reboot(x)
56
57MODULE_AUTHOR("Qumranet");
58MODULE_LICENSE("GPL");
59
60static const struct x86_cpu_id svm_cpu_id[] = {
61 X86_FEATURE_MATCH(X86_FEATURE_SVM),
62 {}
63};
64MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
65
66#define IOPM_ALLOC_ORDER 2
67#define MSRPM_ALLOC_ORDER 1
68
69#define SEG_TYPE_LDT 2
70#define SEG_TYPE_BUSY_TSS16 3
71
72#define SVM_FEATURE_LBRV (1 << 1)
73#define SVM_FEATURE_SVML (1 << 2)
74#define SVM_FEATURE_TSC_RATE (1 << 4)
75#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
76#define SVM_FEATURE_FLUSH_ASID (1 << 6)
77#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
78#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
79
80#define SVM_AVIC_DOORBELL 0xc001011b
81
82#define NESTED_EXIT_HOST 0
83#define NESTED_EXIT_DONE 1
84#define NESTED_EXIT_CONTINUE 2
85
86#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
87
88#define TSC_RATIO_RSVD 0xffffff0000000000ULL
89#define TSC_RATIO_MIN 0x0000000000000001ULL
90#define TSC_RATIO_MAX 0x000000ffffffffffULL
91
92#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
93
94
95
96
97
98#define AVIC_MAX_PHYSICAL_ID_COUNT 255
99
100#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
101#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
102#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
103
104
105#define AVIC_VCPU_ID_BITS 8
106#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
107
108#define AVIC_VM_ID_BITS 24
109#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
110#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
111
112#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
113 (y & AVIC_VCPU_ID_MASK))
114#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
115#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
116
117static bool erratum_383_found __read_mostly;
118
119static const u32 host_save_user_msrs[] = {
120#ifdef CONFIG_X86_64
121 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
122 MSR_FS_BASE,
123#endif
124 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
125 MSR_TSC_AUX,
126};
127
128#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
129
130struct kvm_sev_info {
131 bool active;
132 unsigned int asid;
133 unsigned int handle;
134 int fd;
135 unsigned long pages_locked;
136 struct list_head regions_list;
137};
138
139struct kvm_svm {
140 struct kvm kvm;
141
142
143 u32 avic_vm_id;
144 struct page *avic_logical_id_table_page;
145 struct page *avic_physical_id_table_page;
146 struct hlist_node hnode;
147
148 struct kvm_sev_info sev_info;
149};
150
151struct kvm_vcpu;
152
153struct nested_state {
154 struct vmcb *hsave;
155 u64 hsave_msr;
156 u64 vm_cr_msr;
157 u64 vmcb;
158
159
160 u32 *msrpm;
161
162
163 u64 vmcb_msrpm;
164 u64 vmcb_iopm;
165
166
167 bool exit_required;
168
169
170 u32 intercept_cr;
171 u32 intercept_dr;
172 u32 intercept_exceptions;
173 u64 intercept;
174
175
176 u64 nested_cr3;
177};
178
179#define MSRPM_OFFSETS 16
180static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
181
182
183
184
185
186static uint64_t osvw_len = 4, osvw_status;
187
188struct vcpu_svm {
189 struct kvm_vcpu vcpu;
190 struct vmcb *vmcb;
191 unsigned long vmcb_pa;
192 struct svm_cpu_data *svm_data;
193 uint64_t asid_generation;
194 uint64_t sysenter_esp;
195 uint64_t sysenter_eip;
196 uint64_t tsc_aux;
197
198 u64 msr_decfg;
199
200 u64 next_rip;
201
202 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
203 struct {
204 u16 fs;
205 u16 gs;
206 u16 ldt;
207 u64 gs_base;
208 } host;
209
210 u64 spec_ctrl;
211
212
213
214
215
216 u64 virt_spec_ctrl;
217
218 u32 *msrpm;
219
220 ulong nmi_iret_rip;
221
222 struct nested_state nested;
223
224 bool nmi_singlestep;
225 u64 nmi_singlestep_guest_rflags;
226
227 unsigned int3_injected;
228 unsigned long int3_rip;
229
230
231 bool nrips_enabled : 1;
232
233 u32 ldr_reg;
234 u32 dfr_reg;
235 struct page *avic_backing_page;
236 u64 *avic_physical_id_cache;
237 bool avic_is_running;
238
239
240
241
242
243
244
245 struct list_head ir_list;
246 spinlock_t ir_list_lock;
247
248
249 unsigned int last_cpu;
250};
251
252
253
254
255struct amd_svm_iommu_ir {
256 struct list_head node;
257 void *data;
258};
259
260#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
261#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
262#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
263
264#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
265#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
266#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
267#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
268
269static DEFINE_PER_CPU(u64, current_tsc_ratio);
270#define TSC_RATIO_DEFAULT 0x0100000000ULL
271
272#define MSR_INVALID 0xffffffffU
273
274static const struct svm_direct_access_msrs {
275 u32 index;
276 bool always;
277} direct_access_msrs[] = {
278 { .index = MSR_STAR, .always = true },
279 { .index = MSR_IA32_SYSENTER_CS, .always = true },
280#ifdef CONFIG_X86_64
281 { .index = MSR_GS_BASE, .always = true },
282 { .index = MSR_FS_BASE, .always = true },
283 { .index = MSR_KERNEL_GS_BASE, .always = true },
284 { .index = MSR_LSTAR, .always = true },
285 { .index = MSR_CSTAR, .always = true },
286 { .index = MSR_SYSCALL_MASK, .always = true },
287#endif
288 { .index = MSR_IA32_SPEC_CTRL, .always = false },
289 { .index = MSR_IA32_PRED_CMD, .always = false },
290 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
291 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
292 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
293 { .index = MSR_IA32_LASTINTTOIP, .always = false },
294 { .index = MSR_INVALID, .always = false },
295};
296
297
298#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
299static bool npt_enabled = true;
300#else
301static bool npt_enabled;
302#endif
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
335module_param(pause_filter_thresh, ushort, 0444);
336
337static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
338module_param(pause_filter_count, ushort, 0444);
339
340
341static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
342module_param(pause_filter_count_grow, ushort, 0444);
343
344
345static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
346module_param(pause_filter_count_shrink, ushort, 0444);
347
348
349static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
350module_param(pause_filter_count_max, ushort, 0444);
351
352
353static int npt = true;
354module_param(npt, int, S_IRUGO);
355
356
357static int nested = true;
358module_param(nested, int, S_IRUGO);
359
360
361static int avic;
362#ifdef CONFIG_X86_LOCAL_APIC
363module_param(avic, int, S_IRUGO);
364#endif
365
366
367static int nrips = true;
368module_param(nrips, int, 0444);
369
370
371static int vls = true;
372module_param(vls, int, 0444);
373
374
375static int vgif = true;
376module_param(vgif, int, 0444);
377
378
379static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
380module_param(sev, int, 0444);
381
382static bool __read_mostly dump_invalid_vmcb = 0;
383module_param(dump_invalid_vmcb, bool, 0644);
384
385static u8 rsm_ins_bytes[] = "\x0f\xaa";
386
387static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
388static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
389static void svm_complete_interrupts(struct vcpu_svm *svm);
390
391static int nested_svm_exit_handled(struct vcpu_svm *svm);
392static int nested_svm_intercept(struct vcpu_svm *svm);
393static int nested_svm_vmexit(struct vcpu_svm *svm);
394static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
395 bool has_error_code, u32 error_code);
396
397enum {
398 VMCB_INTERCEPTS,
399
400 VMCB_PERM_MAP,
401 VMCB_ASID,
402 VMCB_INTR,
403 VMCB_NPT,
404 VMCB_CR,
405 VMCB_DR,
406 VMCB_DT,
407 VMCB_SEG,
408 VMCB_CR2,
409 VMCB_LBR,
410 VMCB_AVIC,
411
412
413
414 VMCB_DIRTY_MAX,
415};
416
417
418#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
419
420#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
421
422static int sev_flush_asids(void);
423static DECLARE_RWSEM(sev_deactivate_lock);
424static DEFINE_MUTEX(sev_bitmap_lock);
425static unsigned int max_sev_asid;
426static unsigned int min_sev_asid;
427static unsigned long *sev_asid_bitmap;
428static unsigned long *sev_reclaim_asid_bitmap;
429#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
430
431struct enc_region {
432 struct list_head list;
433 unsigned long npages;
434 struct page **pages;
435 unsigned long uaddr;
436 unsigned long size;
437};
438
439
440static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
441{
442 return container_of(kvm, struct kvm_svm, kvm);
443}
444
445static inline bool svm_sev_enabled(void)
446{
447 return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
448}
449
450static inline bool sev_guest(struct kvm *kvm)
451{
452#ifdef CONFIG_KVM_AMD_SEV
453 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
454
455 return sev->active;
456#else
457 return false;
458#endif
459}
460
461static inline int sev_get_asid(struct kvm *kvm)
462{
463 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
464
465 return sev->asid;
466}
467
468static inline void mark_all_dirty(struct vmcb *vmcb)
469{
470 vmcb->control.clean = 0;
471}
472
473static inline void mark_all_clean(struct vmcb *vmcb)
474{
475 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
476 & ~VMCB_ALWAYS_DIRTY_MASK;
477}
478
479static inline void mark_dirty(struct vmcb *vmcb, int bit)
480{
481 vmcb->control.clean &= ~(1 << bit);
482}
483
484static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
485{
486 return container_of(vcpu, struct vcpu_svm, vcpu);
487}
488
489static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
490{
491 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
492 mark_dirty(svm->vmcb, VMCB_AVIC);
493}
494
495static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
496{
497 struct vcpu_svm *svm = to_svm(vcpu);
498 u64 *entry = svm->avic_physical_id_cache;
499
500 if (!entry)
501 return false;
502
503 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
504}
505
506static void recalc_intercepts(struct vcpu_svm *svm)
507{
508 struct vmcb_control_area *c, *h;
509 struct nested_state *g;
510
511 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
512
513 if (!is_guest_mode(&svm->vcpu))
514 return;
515
516 c = &svm->vmcb->control;
517 h = &svm->nested.hsave->control;
518 g = &svm->nested;
519
520 c->intercept_cr = h->intercept_cr | g->intercept_cr;
521 c->intercept_dr = h->intercept_dr | g->intercept_dr;
522 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
523 c->intercept = h->intercept | g->intercept;
524}
525
526static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
527{
528 if (is_guest_mode(&svm->vcpu))
529 return svm->nested.hsave;
530 else
531 return svm->vmcb;
532}
533
534static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
535{
536 struct vmcb *vmcb = get_host_vmcb(svm);
537
538 vmcb->control.intercept_cr |= (1U << bit);
539
540 recalc_intercepts(svm);
541}
542
543static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
544{
545 struct vmcb *vmcb = get_host_vmcb(svm);
546
547 vmcb->control.intercept_cr &= ~(1U << bit);
548
549 recalc_intercepts(svm);
550}
551
552static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
553{
554 struct vmcb *vmcb = get_host_vmcb(svm);
555
556 return vmcb->control.intercept_cr & (1U << bit);
557}
558
559static inline void set_dr_intercepts(struct vcpu_svm *svm)
560{
561 struct vmcb *vmcb = get_host_vmcb(svm);
562
563 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
564 | (1 << INTERCEPT_DR1_READ)
565 | (1 << INTERCEPT_DR2_READ)
566 | (1 << INTERCEPT_DR3_READ)
567 | (1 << INTERCEPT_DR4_READ)
568 | (1 << INTERCEPT_DR5_READ)
569 | (1 << INTERCEPT_DR6_READ)
570 | (1 << INTERCEPT_DR7_READ)
571 | (1 << INTERCEPT_DR0_WRITE)
572 | (1 << INTERCEPT_DR1_WRITE)
573 | (1 << INTERCEPT_DR2_WRITE)
574 | (1 << INTERCEPT_DR3_WRITE)
575 | (1 << INTERCEPT_DR4_WRITE)
576 | (1 << INTERCEPT_DR5_WRITE)
577 | (1 << INTERCEPT_DR6_WRITE)
578 | (1 << INTERCEPT_DR7_WRITE);
579
580 recalc_intercepts(svm);
581}
582
583static inline void clr_dr_intercepts(struct vcpu_svm *svm)
584{
585 struct vmcb *vmcb = get_host_vmcb(svm);
586
587 vmcb->control.intercept_dr = 0;
588
589 recalc_intercepts(svm);
590}
591
592static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
593{
594 struct vmcb *vmcb = get_host_vmcb(svm);
595
596 vmcb->control.intercept_exceptions |= (1U << bit);
597
598 recalc_intercepts(svm);
599}
600
601static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
602{
603 struct vmcb *vmcb = get_host_vmcb(svm);
604
605 vmcb->control.intercept_exceptions &= ~(1U << bit);
606
607 recalc_intercepts(svm);
608}
609
610static inline void set_intercept(struct vcpu_svm *svm, int bit)
611{
612 struct vmcb *vmcb = get_host_vmcb(svm);
613
614 vmcb->control.intercept |= (1ULL << bit);
615
616 recalc_intercepts(svm);
617}
618
619static inline void clr_intercept(struct vcpu_svm *svm, int bit)
620{
621 struct vmcb *vmcb = get_host_vmcb(svm);
622
623 vmcb->control.intercept &= ~(1ULL << bit);
624
625 recalc_intercepts(svm);
626}
627
628static inline bool vgif_enabled(struct vcpu_svm *svm)
629{
630 return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
631}
632
633static inline void enable_gif(struct vcpu_svm *svm)
634{
635 if (vgif_enabled(svm))
636 svm->vmcb->control.int_ctl |= V_GIF_MASK;
637 else
638 svm->vcpu.arch.hflags |= HF_GIF_MASK;
639}
640
641static inline void disable_gif(struct vcpu_svm *svm)
642{
643 if (vgif_enabled(svm))
644 svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
645 else
646 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
647}
648
649static inline bool gif_set(struct vcpu_svm *svm)
650{
651 if (vgif_enabled(svm))
652 return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
653 else
654 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
655}
656
657static unsigned long iopm_base;
658
659struct kvm_ldttss_desc {
660 u16 limit0;
661 u16 base0;
662 unsigned base1:8, type:5, dpl:2, p:1;
663 unsigned limit1:4, zero0:3, g:1, base2:8;
664 u32 base3;
665 u32 zero1;
666} __attribute__((packed));
667
668struct svm_cpu_data {
669 int cpu;
670
671 u64 asid_generation;
672 u32 max_asid;
673 u32 next_asid;
674 u32 min_asid;
675 struct kvm_ldttss_desc *tss_desc;
676
677 struct page *save_area;
678 struct vmcb *current_vmcb;
679
680
681 struct vmcb **sev_vmcbs;
682};
683
684static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
685
686static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
687
688#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
689#define MSRS_RANGE_SIZE 2048
690#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
691
692static u32 svm_msrpm_offset(u32 msr)
693{
694 u32 offset;
695 int i;
696
697 for (i = 0; i < NUM_MSR_MAPS; i++) {
698 if (msr < msrpm_ranges[i] ||
699 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
700 continue;
701
702 offset = (msr - msrpm_ranges[i]) / 4;
703 offset += (i * MSRS_RANGE_SIZE);
704
705
706 return offset / 4;
707 }
708
709
710 return MSR_INVALID;
711}
712
713#define MAX_INST_SIZE 15
714
715static inline void clgi(void)
716{
717 asm volatile (__ex("clgi"));
718}
719
720static inline void stgi(void)
721{
722 asm volatile (__ex("stgi"));
723}
724
725static inline void invlpga(unsigned long addr, u32 asid)
726{
727 asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
728}
729
730static int get_npt_level(struct kvm_vcpu *vcpu)
731{
732#ifdef CONFIG_X86_64
733 return PT64_ROOT_4LEVEL;
734#else
735 return PT32E_ROOT_LEVEL;
736#endif
737}
738
739static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
740{
741 vcpu->arch.efer = efer;
742
743 if (!npt_enabled) {
744
745 efer |= EFER_NX;
746
747 if (!(efer & EFER_LMA))
748 efer &= ~EFER_LME;
749 }
750
751 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
752 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
753}
754
755static int is_external_interrupt(u32 info)
756{
757 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
758 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
759}
760
761static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
762{
763 struct vcpu_svm *svm = to_svm(vcpu);
764 u32 ret = 0;
765
766 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
767 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
768 return ret;
769}
770
771static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
772{
773 struct vcpu_svm *svm = to_svm(vcpu);
774
775 if (mask == 0)
776 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
777 else
778 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
779
780}
781
782static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
783{
784 struct vcpu_svm *svm = to_svm(vcpu);
785
786 if (nrips && svm->vmcb->control.next_rip != 0) {
787 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
788 svm->next_rip = svm->vmcb->control.next_rip;
789 }
790
791 if (!svm->next_rip) {
792 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
793 return 0;
794 } else {
795 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
796 pr_err("%s: ip 0x%lx next 0x%llx\n",
797 __func__, kvm_rip_read(vcpu), svm->next_rip);
798 kvm_rip_write(vcpu, svm->next_rip);
799 }
800 svm_set_interrupt_shadow(vcpu, 0);
801
802 return 1;
803}
804
805static void svm_queue_exception(struct kvm_vcpu *vcpu)
806{
807 struct vcpu_svm *svm = to_svm(vcpu);
808 unsigned nr = vcpu->arch.exception.nr;
809 bool has_error_code = vcpu->arch.exception.has_error_code;
810 bool reinject = vcpu->arch.exception.injected;
811 u32 error_code = vcpu->arch.exception.error_code;
812
813
814
815
816
817 if (!reinject &&
818 nested_svm_check_exception(svm, nr, has_error_code, error_code))
819 return;
820
821 kvm_deliver_exception_payload(&svm->vcpu);
822
823 if (nr == BP_VECTOR && !nrips) {
824 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
825
826
827
828
829
830
831
832
833 (void)skip_emulated_instruction(&svm->vcpu);
834 rip = kvm_rip_read(&svm->vcpu);
835 svm->int3_rip = rip + svm->vmcb->save.cs.base;
836 svm->int3_injected = rip - old_rip;
837 }
838
839 svm->vmcb->control.event_inj = nr
840 | SVM_EVTINJ_VALID
841 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
842 | SVM_EVTINJ_TYPE_EXEPT;
843 svm->vmcb->control.event_inj_err = error_code;
844}
845
846static void svm_init_erratum_383(void)
847{
848 u32 low, high;
849 int err;
850 u64 val;
851
852 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
853 return;
854
855
856 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
857 if (err)
858 return;
859
860 val |= (1ULL << 47);
861
862 low = lower_32_bits(val);
863 high = upper_32_bits(val);
864
865 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
866
867 erratum_383_found = true;
868}
869
870static void svm_init_osvw(struct kvm_vcpu *vcpu)
871{
872
873
874
875
876 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
877 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
878
879
880
881
882
883
884
885
886
887 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
888 vcpu->arch.osvw.status |= 1;
889}
890
891static int has_svm(void)
892{
893 const char *msg;
894
895 if (!cpu_has_svm(&msg)) {
896 printk(KERN_INFO "has_svm: %s\n", msg);
897 return 0;
898 }
899
900 return 1;
901}
902
903static void svm_hardware_disable(void)
904{
905
906 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
907 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
908
909 cpu_svm_disable();
910
911 amd_pmu_disable_virt();
912}
913
914static int svm_hardware_enable(void)
915{
916
917 struct svm_cpu_data *sd;
918 uint64_t efer;
919 struct desc_struct *gdt;
920 int me = raw_smp_processor_id();
921
922 rdmsrl(MSR_EFER, efer);
923 if (efer & EFER_SVME)
924 return -EBUSY;
925
926 if (!has_svm()) {
927 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
928 return -EINVAL;
929 }
930 sd = per_cpu(svm_data, me);
931 if (!sd) {
932 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
933 return -EINVAL;
934 }
935
936 sd->asid_generation = 1;
937 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
938 sd->next_asid = sd->max_asid + 1;
939 sd->min_asid = max_sev_asid + 1;
940
941 gdt = get_current_gdt_rw();
942 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
943
944 wrmsrl(MSR_EFER, efer | EFER_SVME);
945
946 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
947
948 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
949 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
950 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
951 }
952
953
954
955
956
957
958
959
960
961
962
963 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
964 uint64_t len, status = 0;
965 int err;
966
967 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
968 if (!err)
969 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
970 &err);
971
972 if (err)
973 osvw_status = osvw_len = 0;
974 else {
975 if (len < osvw_len)
976 osvw_len = len;
977 osvw_status |= status;
978 osvw_status &= (1ULL << osvw_len) - 1;
979 }
980 } else
981 osvw_status = osvw_len = 0;
982
983 svm_init_erratum_383();
984
985 amd_pmu_enable_virt();
986
987 return 0;
988}
989
990static void svm_cpu_uninit(int cpu)
991{
992 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
993
994 if (!sd)
995 return;
996
997 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
998 kfree(sd->sev_vmcbs);
999 __free_page(sd->save_area);
1000 kfree(sd);
1001}
1002
1003static int svm_cpu_init(int cpu)
1004{
1005 struct svm_cpu_data *sd;
1006 int r;
1007
1008 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
1009 if (!sd)
1010 return -ENOMEM;
1011 sd->cpu = cpu;
1012 r = -ENOMEM;
1013 sd->save_area = alloc_page(GFP_KERNEL);
1014 if (!sd->save_area)
1015 goto err_1;
1016
1017 if (svm_sev_enabled()) {
1018 r = -ENOMEM;
1019 sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
1020 sizeof(void *),
1021 GFP_KERNEL);
1022 if (!sd->sev_vmcbs)
1023 goto err_1;
1024 }
1025
1026 per_cpu(svm_data, cpu) = sd;
1027
1028 return 0;
1029
1030err_1:
1031 kfree(sd);
1032 return r;
1033
1034}
1035
1036static bool valid_msr_intercept(u32 index)
1037{
1038 int i;
1039
1040 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
1041 if (direct_access_msrs[i].index == index)
1042 return true;
1043
1044 return false;
1045}
1046
1047static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
1048{
1049 u8 bit_write;
1050 unsigned long tmp;
1051 u32 offset;
1052 u32 *msrpm;
1053
1054 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
1055 to_svm(vcpu)->msrpm;
1056
1057 offset = svm_msrpm_offset(msr);
1058 bit_write = 2 * (msr & 0x0f) + 1;
1059 tmp = msrpm[offset];
1060
1061 BUG_ON(offset == MSR_INVALID);
1062
1063 return !!test_bit(bit_write, &tmp);
1064}
1065
1066static void set_msr_interception(u32 *msrpm, unsigned msr,
1067 int read, int write)
1068{
1069 u8 bit_read, bit_write;
1070 unsigned long tmp;
1071 u32 offset;
1072
1073
1074
1075
1076
1077 WARN_ON(!valid_msr_intercept(msr));
1078
1079 offset = svm_msrpm_offset(msr);
1080 bit_read = 2 * (msr & 0x0f);
1081 bit_write = 2 * (msr & 0x0f) + 1;
1082 tmp = msrpm[offset];
1083
1084 BUG_ON(offset == MSR_INVALID);
1085
1086 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
1087 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
1088
1089 msrpm[offset] = tmp;
1090}
1091
1092static void svm_vcpu_init_msrpm(u32 *msrpm)
1093{
1094 int i;
1095
1096 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
1097
1098 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1099 if (!direct_access_msrs[i].always)
1100 continue;
1101
1102 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
1103 }
1104}
1105
1106static void add_msr_offset(u32 offset)
1107{
1108 int i;
1109
1110 for (i = 0; i < MSRPM_OFFSETS; ++i) {
1111
1112
1113 if (msrpm_offsets[i] == offset)
1114 return;
1115
1116
1117 if (msrpm_offsets[i] != MSR_INVALID)
1118 continue;
1119
1120
1121 msrpm_offsets[i] = offset;
1122
1123 return;
1124 }
1125
1126
1127
1128
1129
1130 BUG();
1131}
1132
1133static void init_msrpm_offsets(void)
1134{
1135 int i;
1136
1137 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
1138
1139 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1140 u32 offset;
1141
1142 offset = svm_msrpm_offset(direct_access_msrs[i].index);
1143 BUG_ON(offset == MSR_INVALID);
1144
1145 add_msr_offset(offset);
1146 }
1147}
1148
1149static void svm_enable_lbrv(struct vcpu_svm *svm)
1150{
1151 u32 *msrpm = svm->msrpm;
1152
1153 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
1154 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
1155 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
1156 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
1157 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
1158}
1159
1160static void svm_disable_lbrv(struct vcpu_svm *svm)
1161{
1162 u32 *msrpm = svm->msrpm;
1163
1164 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
1165 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
1166 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
1167 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
1168 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
1169}
1170
1171static void disable_nmi_singlestep(struct vcpu_svm *svm)
1172{
1173 svm->nmi_singlestep = false;
1174
1175 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
1176
1177 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1178 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
1179 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1180 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
1181 }
1182}
1183
1184
1185
1186
1187
1188
1189#define SVM_VM_DATA_HASH_BITS 8
1190static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
1191static u32 next_vm_id = 0;
1192static bool next_vm_id_wrapped = 0;
1193static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
1194
1195
1196
1197
1198
1199static int avic_ga_log_notifier(u32 ga_tag)
1200{
1201 unsigned long flags;
1202 struct kvm_svm *kvm_svm;
1203 struct kvm_vcpu *vcpu = NULL;
1204 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
1205 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
1206
1207 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
1208
1209 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1210 hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
1211 if (kvm_svm->avic_vm_id != vm_id)
1212 continue;
1213 vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
1214 break;
1215 }
1216 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1217
1218
1219
1220
1221
1222
1223 if (vcpu)
1224 kvm_vcpu_wake_up(vcpu);
1225
1226 return 0;
1227}
1228
1229static __init int sev_hardware_setup(void)
1230{
1231 struct sev_user_data_status *status;
1232 int rc;
1233
1234
1235 max_sev_asid = cpuid_ecx(0x8000001F);
1236
1237 if (!max_sev_asid)
1238 return 1;
1239
1240
1241 min_sev_asid = cpuid_edx(0x8000001F);
1242
1243
1244 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1245 if (!sev_asid_bitmap)
1246 return 1;
1247
1248 sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1249 if (!sev_reclaim_asid_bitmap)
1250 return 1;
1251
1252 status = kmalloc(sizeof(*status), GFP_KERNEL);
1253 if (!status)
1254 return 1;
1255
1256
1257
1258
1259
1260
1261
1262
1263 rc = sev_platform_status(status, NULL);
1264 if (rc)
1265 goto err;
1266
1267 pr_info("SEV supported\n");
1268
1269err:
1270 kfree(status);
1271 return rc;
1272}
1273
1274static void grow_ple_window(struct kvm_vcpu *vcpu)
1275{
1276 struct vcpu_svm *svm = to_svm(vcpu);
1277 struct vmcb_control_area *control = &svm->vmcb->control;
1278 int old = control->pause_filter_count;
1279
1280 control->pause_filter_count = __grow_ple_window(old,
1281 pause_filter_count,
1282 pause_filter_count_grow,
1283 pause_filter_count_max);
1284
1285 if (control->pause_filter_count != old) {
1286 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1287 trace_kvm_ple_window_update(vcpu->vcpu_id,
1288 control->pause_filter_count, old);
1289 }
1290}
1291
1292static void shrink_ple_window(struct kvm_vcpu *vcpu)
1293{
1294 struct vcpu_svm *svm = to_svm(vcpu);
1295 struct vmcb_control_area *control = &svm->vmcb->control;
1296 int old = control->pause_filter_count;
1297
1298 control->pause_filter_count =
1299 __shrink_ple_window(old,
1300 pause_filter_count,
1301 pause_filter_count_shrink,
1302 pause_filter_count);
1303 if (control->pause_filter_count != old) {
1304 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1305 trace_kvm_ple_window_update(vcpu->vcpu_id,
1306 control->pause_filter_count, old);
1307 }
1308}
1309
1310static __init int svm_hardware_setup(void)
1311{
1312 int cpu;
1313 struct page *iopm_pages;
1314 void *iopm_va;
1315 int r;
1316
1317 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
1318
1319 if (!iopm_pages)
1320 return -ENOMEM;
1321
1322 iopm_va = page_address(iopm_pages);
1323 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
1324 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
1325
1326 init_msrpm_offsets();
1327
1328 if (boot_cpu_has(X86_FEATURE_NX))
1329 kvm_enable_efer_bits(EFER_NX);
1330
1331 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
1332 kvm_enable_efer_bits(EFER_FFXSR);
1333
1334 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1335 kvm_has_tsc_control = true;
1336 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
1337 kvm_tsc_scaling_ratio_frac_bits = 32;
1338 }
1339
1340
1341 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1342 pause_filter_count = 0;
1343 pause_filter_thresh = 0;
1344 } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
1345 pause_filter_thresh = 0;
1346 }
1347
1348 if (nested) {
1349 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
1350 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
1351 }
1352
1353 if (sev) {
1354 if (boot_cpu_has(X86_FEATURE_SEV) &&
1355 IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
1356 r = sev_hardware_setup();
1357 if (r)
1358 sev = false;
1359 } else {
1360 sev = false;
1361 }
1362 }
1363
1364 for_each_possible_cpu(cpu) {
1365 r = svm_cpu_init(cpu);
1366 if (r)
1367 goto err;
1368 }
1369
1370 if (!boot_cpu_has(X86_FEATURE_NPT))
1371 npt_enabled = false;
1372
1373 if (npt_enabled && !npt) {
1374 printk(KERN_INFO "kvm: Nested Paging disabled\n");
1375 npt_enabled = false;
1376 }
1377
1378 if (npt_enabled) {
1379 printk(KERN_INFO "kvm: Nested Paging enabled\n");
1380 kvm_enable_tdp();
1381 } else
1382 kvm_disable_tdp();
1383
1384 if (nrips) {
1385 if (!boot_cpu_has(X86_FEATURE_NRIPS))
1386 nrips = false;
1387 }
1388
1389 if (avic) {
1390 if (!npt_enabled ||
1391 !boot_cpu_has(X86_FEATURE_AVIC) ||
1392 !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
1393 avic = false;
1394 } else {
1395 pr_info("AVIC enabled\n");
1396
1397 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1398 }
1399 }
1400
1401 if (vls) {
1402 if (!npt_enabled ||
1403 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
1404 !IS_ENABLED(CONFIG_X86_64)) {
1405 vls = false;
1406 } else {
1407 pr_info("Virtual VMLOAD VMSAVE supported\n");
1408 }
1409 }
1410
1411 if (vgif) {
1412 if (!boot_cpu_has(X86_FEATURE_VGIF))
1413 vgif = false;
1414 else
1415 pr_info("Virtual GIF supported\n");
1416 }
1417
1418 return 0;
1419
1420err:
1421 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
1422 iopm_base = 0;
1423 return r;
1424}
1425
1426static __exit void svm_hardware_unsetup(void)
1427{
1428 int cpu;
1429
1430 if (svm_sev_enabled()) {
1431 bitmap_free(sev_asid_bitmap);
1432 bitmap_free(sev_reclaim_asid_bitmap);
1433
1434 sev_flush_asids();
1435 }
1436
1437 for_each_possible_cpu(cpu)
1438 svm_cpu_uninit(cpu);
1439
1440 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
1441 iopm_base = 0;
1442}
1443
1444static void init_seg(struct vmcb_seg *seg)
1445{
1446 seg->selector = 0;
1447 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
1448 SVM_SELECTOR_WRITE_MASK;
1449 seg->limit = 0xffff;
1450 seg->base = 0;
1451}
1452
1453static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1454{
1455 seg->selector = 0;
1456 seg->attrib = SVM_SELECTOR_P_MASK | type;
1457 seg->limit = 0xffff;
1458 seg->base = 0;
1459}
1460
1461static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1462{
1463 struct vcpu_svm *svm = to_svm(vcpu);
1464
1465 if (is_guest_mode(vcpu))
1466 return svm->nested.hsave->control.tsc_offset;
1467
1468 return vcpu->arch.tsc_offset;
1469}
1470
1471static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1472{
1473 struct vcpu_svm *svm = to_svm(vcpu);
1474 u64 g_tsc_offset = 0;
1475
1476 if (is_guest_mode(vcpu)) {
1477
1478 g_tsc_offset = svm->vmcb->control.tsc_offset -
1479 svm->nested.hsave->control.tsc_offset;
1480 svm->nested.hsave->control.tsc_offset = offset;
1481 }
1482
1483 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1484 svm->vmcb->control.tsc_offset - g_tsc_offset,
1485 offset);
1486
1487 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1488
1489 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1490 return svm->vmcb->control.tsc_offset;
1491}
1492
1493static void avic_init_vmcb(struct vcpu_svm *svm)
1494{
1495 struct vmcb *vmcb = svm->vmcb;
1496 struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
1497 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
1498 phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
1499 phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
1500
1501 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1502 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1503 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1504 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1505 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
1506}
1507
1508static void init_vmcb(struct vcpu_svm *svm)
1509{
1510 struct vmcb_control_area *control = &svm->vmcb->control;
1511 struct vmcb_save_area *save = &svm->vmcb->save;
1512
1513 svm->vcpu.arch.hflags = 0;
1514
1515 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1516 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1517 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1518 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1519 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1520 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1521 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1522 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1523
1524 set_dr_intercepts(svm);
1525
1526 set_exception_intercept(svm, PF_VECTOR);
1527 set_exception_intercept(svm, UD_VECTOR);
1528 set_exception_intercept(svm, MC_VECTOR);
1529 set_exception_intercept(svm, AC_VECTOR);
1530 set_exception_intercept(svm, DB_VECTOR);
1531
1532
1533
1534
1535
1536
1537 if (enable_vmware_backdoor)
1538 set_exception_intercept(svm, GP_VECTOR);
1539
1540 set_intercept(svm, INTERCEPT_INTR);
1541 set_intercept(svm, INTERCEPT_NMI);
1542 set_intercept(svm, INTERCEPT_SMI);
1543 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1544 set_intercept(svm, INTERCEPT_RDPMC);
1545 set_intercept(svm, INTERCEPT_CPUID);
1546 set_intercept(svm, INTERCEPT_INVD);
1547 set_intercept(svm, INTERCEPT_INVLPG);
1548 set_intercept(svm, INTERCEPT_INVLPGA);
1549 set_intercept(svm, INTERCEPT_IOIO_PROT);
1550 set_intercept(svm, INTERCEPT_MSR_PROT);
1551 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1552 set_intercept(svm, INTERCEPT_SHUTDOWN);
1553 set_intercept(svm, INTERCEPT_VMRUN);
1554 set_intercept(svm, INTERCEPT_VMMCALL);
1555 set_intercept(svm, INTERCEPT_VMLOAD);
1556 set_intercept(svm, INTERCEPT_VMSAVE);
1557 set_intercept(svm, INTERCEPT_STGI);
1558 set_intercept(svm, INTERCEPT_CLGI);
1559 set_intercept(svm, INTERCEPT_SKINIT);
1560 set_intercept(svm, INTERCEPT_WBINVD);
1561 set_intercept(svm, INTERCEPT_XSETBV);
1562 set_intercept(svm, INTERCEPT_RDPRU);
1563 set_intercept(svm, INTERCEPT_RSM);
1564
1565 if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
1566 set_intercept(svm, INTERCEPT_MONITOR);
1567 set_intercept(svm, INTERCEPT_MWAIT);
1568 }
1569
1570 if (!kvm_hlt_in_guest(svm->vcpu.kvm))
1571 set_intercept(svm, INTERCEPT_HLT);
1572
1573 control->iopm_base_pa = __sme_set(iopm_base);
1574 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1575 control->int_ctl = V_INTR_MASKING_MASK;
1576
1577 init_seg(&save->es);
1578 init_seg(&save->ss);
1579 init_seg(&save->ds);
1580 init_seg(&save->fs);
1581 init_seg(&save->gs);
1582
1583 save->cs.selector = 0xf000;
1584 save->cs.base = 0xffff0000;
1585
1586 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1587 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1588 save->cs.limit = 0xffff;
1589
1590 save->gdtr.limit = 0xffff;
1591 save->idtr.limit = 0xffff;
1592
1593 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1594 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1595
1596 svm_set_efer(&svm->vcpu, 0);
1597 save->dr6 = 0xffff0ff0;
1598 kvm_set_rflags(&svm->vcpu, 2);
1599 save->rip = 0x0000fff0;
1600 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1601
1602
1603
1604
1605
1606 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1607 kvm_mmu_reset_context(&svm->vcpu);
1608
1609 save->cr4 = X86_CR4_PAE;
1610
1611
1612 if (npt_enabled) {
1613
1614 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1615 clr_intercept(svm, INTERCEPT_INVLPG);
1616 clr_exception_intercept(svm, PF_VECTOR);
1617 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1618 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1619 save->g_pat = svm->vcpu.arch.pat;
1620 save->cr3 = 0;
1621 save->cr4 = 0;
1622 }
1623 svm->asid_generation = 0;
1624
1625 svm->nested.vmcb = 0;
1626 svm->vcpu.arch.hflags = 0;
1627
1628 if (pause_filter_count) {
1629 control->pause_filter_count = pause_filter_count;
1630 if (pause_filter_thresh)
1631 control->pause_filter_thresh = pause_filter_thresh;
1632 set_intercept(svm, INTERCEPT_PAUSE);
1633 } else {
1634 clr_intercept(svm, INTERCEPT_PAUSE);
1635 }
1636
1637 if (kvm_vcpu_apicv_active(&svm->vcpu))
1638 avic_init_vmcb(svm);
1639
1640
1641
1642
1643
1644 if (vls) {
1645 clr_intercept(svm, INTERCEPT_VMLOAD);
1646 clr_intercept(svm, INTERCEPT_VMSAVE);
1647 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1648 }
1649
1650 if (vgif) {
1651 clr_intercept(svm, INTERCEPT_STGI);
1652 clr_intercept(svm, INTERCEPT_CLGI);
1653 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1654 }
1655
1656 if (sev_guest(svm->vcpu.kvm)) {
1657 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
1658 clr_exception_intercept(svm, UD_VECTOR);
1659 }
1660
1661 mark_all_dirty(svm->vmcb);
1662
1663 enable_gif(svm);
1664
1665}
1666
1667static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
1668 unsigned int index)
1669{
1670 u64 *avic_physical_id_table;
1671 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
1672
1673 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1674 return NULL;
1675
1676 avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
1677
1678 return &avic_physical_id_table[index];
1679}
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689static int avic_init_access_page(struct kvm_vcpu *vcpu)
1690{
1691 struct kvm *kvm = vcpu->kvm;
1692 int ret = 0;
1693
1694 mutex_lock(&kvm->slots_lock);
1695 if (kvm->arch.apic_access_page_done)
1696 goto out;
1697
1698 ret = __x86_set_memory_region(kvm,
1699 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1700 APIC_DEFAULT_PHYS_BASE,
1701 PAGE_SIZE);
1702 if (ret)
1703 goto out;
1704
1705 kvm->arch.apic_access_page_done = true;
1706out:
1707 mutex_unlock(&kvm->slots_lock);
1708 return ret;
1709}
1710
1711static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1712{
1713 int ret;
1714 u64 *entry, new_entry;
1715 int id = vcpu->vcpu_id;
1716 struct vcpu_svm *svm = to_svm(vcpu);
1717
1718 ret = avic_init_access_page(vcpu);
1719 if (ret)
1720 return ret;
1721
1722 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1723 return -EINVAL;
1724
1725 if (!svm->vcpu.arch.apic->regs)
1726 return -EINVAL;
1727
1728 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1729
1730
1731 entry = avic_get_physical_id_entry(vcpu, id);
1732 if (!entry)
1733 return -EINVAL;
1734
1735 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
1736 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1737 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
1738 WRITE_ONCE(*entry, new_entry);
1739
1740 svm->avic_physical_id_cache = entry;
1741
1742 return 0;
1743}
1744
1745static void sev_asid_free(int asid)
1746{
1747 struct svm_cpu_data *sd;
1748 int cpu, pos;
1749
1750 mutex_lock(&sev_bitmap_lock);
1751
1752 pos = asid - 1;
1753 __set_bit(pos, sev_reclaim_asid_bitmap);
1754
1755 for_each_possible_cpu(cpu) {
1756 sd = per_cpu(svm_data, cpu);
1757 sd->sev_vmcbs[pos] = NULL;
1758 }
1759
1760 mutex_unlock(&sev_bitmap_lock);
1761}
1762
1763static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
1764{
1765 struct sev_data_decommission *decommission;
1766 struct sev_data_deactivate *data;
1767
1768 if (!handle)
1769 return;
1770
1771 data = kzalloc(sizeof(*data), GFP_KERNEL);
1772 if (!data)
1773 return;
1774
1775
1776 data->handle = handle;
1777
1778
1779 down_read(&sev_deactivate_lock);
1780 sev_guest_deactivate(data, NULL);
1781 up_read(&sev_deactivate_lock);
1782
1783 kfree(data);
1784
1785 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
1786 if (!decommission)
1787 return;
1788
1789
1790 decommission->handle = handle;
1791 sev_guest_decommission(decommission, NULL);
1792
1793 kfree(decommission);
1794}
1795
1796static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
1797 unsigned long ulen, unsigned long *n,
1798 int write)
1799{
1800 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1801 unsigned long npages, npinned, size;
1802 unsigned long locked, lock_limit;
1803 struct page **pages;
1804 unsigned long first, last;
1805
1806 if (ulen == 0 || uaddr + ulen < uaddr)
1807 return NULL;
1808
1809
1810 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
1811 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
1812 npages = (last - first + 1);
1813
1814 locked = sev->pages_locked + npages;
1815 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1816 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
1817 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
1818 return NULL;
1819 }
1820
1821
1822 size = npages * sizeof(struct page *);
1823 if (size > PAGE_SIZE)
1824 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
1825 PAGE_KERNEL);
1826 else
1827 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
1828
1829 if (!pages)
1830 return NULL;
1831
1832
1833 npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
1834 if (npinned != npages) {
1835 pr_err("SEV: Failure locking %lu pages.\n", npages);
1836 goto err;
1837 }
1838
1839 *n = npages;
1840 sev->pages_locked = locked;
1841
1842 return pages;
1843
1844err:
1845 if (npinned > 0)
1846 release_pages(pages, npinned);
1847
1848 kvfree(pages);
1849 return NULL;
1850}
1851
1852static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
1853 unsigned long npages)
1854{
1855 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1856
1857 release_pages(pages, npages);
1858 kvfree(pages);
1859 sev->pages_locked -= npages;
1860}
1861
1862static void sev_clflush_pages(struct page *pages[], unsigned long npages)
1863{
1864 uint8_t *page_virtual;
1865 unsigned long i;
1866
1867 if (npages == 0 || pages == NULL)
1868 return;
1869
1870 for (i = 0; i < npages; i++) {
1871 page_virtual = kmap_atomic(pages[i]);
1872 clflush_cache_range(page_virtual, PAGE_SIZE);
1873 kunmap_atomic(page_virtual);
1874 }
1875}
1876
1877static void __unregister_enc_region_locked(struct kvm *kvm,
1878 struct enc_region *region)
1879{
1880
1881
1882
1883
1884
1885
1886 sev_clflush_pages(region->pages, region->npages);
1887
1888 sev_unpin_memory(kvm, region->pages, region->npages);
1889 list_del(®ion->list);
1890 kfree(region);
1891}
1892
1893static struct kvm *svm_vm_alloc(void)
1894{
1895 struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm),
1896 GFP_KERNEL_ACCOUNT | __GFP_ZERO,
1897 PAGE_KERNEL);
1898 return &kvm_svm->kvm;
1899}
1900
1901static void svm_vm_free(struct kvm *kvm)
1902{
1903 vfree(to_kvm_svm(kvm));
1904}
1905
1906static void sev_vm_destroy(struct kvm *kvm)
1907{
1908 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1909 struct list_head *head = &sev->regions_list;
1910 struct list_head *pos, *q;
1911
1912 if (!sev_guest(kvm))
1913 return;
1914
1915 mutex_lock(&kvm->lock);
1916
1917
1918
1919
1920
1921 if (!list_empty(head)) {
1922 list_for_each_safe(pos, q, head) {
1923 __unregister_enc_region_locked(kvm,
1924 list_entry(pos, struct enc_region, list));
1925 }
1926 }
1927
1928 mutex_unlock(&kvm->lock);
1929
1930 sev_unbind_asid(kvm, sev->handle);
1931 sev_asid_free(sev->asid);
1932}
1933
1934static void avic_vm_destroy(struct kvm *kvm)
1935{
1936 unsigned long flags;
1937 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1938
1939 if (!avic)
1940 return;
1941
1942 if (kvm_svm->avic_logical_id_table_page)
1943 __free_page(kvm_svm->avic_logical_id_table_page);
1944 if (kvm_svm->avic_physical_id_table_page)
1945 __free_page(kvm_svm->avic_physical_id_table_page);
1946
1947 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1948 hash_del(&kvm_svm->hnode);
1949 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1950}
1951
1952static void svm_vm_destroy(struct kvm *kvm)
1953{
1954 avic_vm_destroy(kvm);
1955 sev_vm_destroy(kvm);
1956}
1957
1958static int avic_vm_init(struct kvm *kvm)
1959{
1960 unsigned long flags;
1961 int err = -ENOMEM;
1962 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1963 struct kvm_svm *k2;
1964 struct page *p_page;
1965 struct page *l_page;
1966 u32 vm_id;
1967
1968 if (!avic)
1969 return 0;
1970
1971
1972 p_page = alloc_page(GFP_KERNEL_ACCOUNT);
1973 if (!p_page)
1974 goto free_avic;
1975
1976 kvm_svm->avic_physical_id_table_page = p_page;
1977 clear_page(page_address(p_page));
1978
1979
1980 l_page = alloc_page(GFP_KERNEL_ACCOUNT);
1981 if (!l_page)
1982 goto free_avic;
1983
1984 kvm_svm->avic_logical_id_table_page = l_page;
1985 clear_page(page_address(l_page));
1986
1987 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1988 again:
1989 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
1990 if (vm_id == 0) {
1991 next_vm_id_wrapped = 1;
1992 goto again;
1993 }
1994
1995 if (next_vm_id_wrapped) {
1996 hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
1997 if (k2->avic_vm_id == vm_id)
1998 goto again;
1999 }
2000 }
2001 kvm_svm->avic_vm_id = vm_id;
2002 hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
2003 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
2004
2005 return 0;
2006
2007free_avic:
2008 avic_vm_destroy(kvm);
2009 return err;
2010}
2011
2012static inline int
2013avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
2014{
2015 int ret = 0;
2016 unsigned long flags;
2017 struct amd_svm_iommu_ir *ir;
2018 struct vcpu_svm *svm = to_svm(vcpu);
2019
2020 if (!kvm_arch_has_assigned_device(vcpu->kvm))
2021 return 0;
2022
2023
2024
2025
2026
2027 spin_lock_irqsave(&svm->ir_list_lock, flags);
2028
2029 if (list_empty(&svm->ir_list))
2030 goto out;
2031
2032 list_for_each_entry(ir, &svm->ir_list, node) {
2033 ret = amd_iommu_update_ga(cpu, r, ir->data);
2034 if (ret)
2035 break;
2036 }
2037out:
2038 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
2039 return ret;
2040}
2041
2042static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2043{
2044 u64 entry;
2045
2046 int h_physical_id = kvm_cpu_get_apicid(cpu);
2047 struct vcpu_svm *svm = to_svm(vcpu);
2048
2049 if (!kvm_vcpu_apicv_active(vcpu))
2050 return;
2051
2052
2053
2054
2055
2056 if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
2057 return;
2058
2059 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2060 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
2061
2062 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
2063 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
2064
2065 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2066 if (svm->avic_is_running)
2067 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2068
2069 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2070 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
2071 svm->avic_is_running);
2072}
2073
2074static void avic_vcpu_put(struct kvm_vcpu *vcpu)
2075{
2076 u64 entry;
2077 struct vcpu_svm *svm = to_svm(vcpu);
2078
2079 if (!kvm_vcpu_apicv_active(vcpu))
2080 return;
2081
2082 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2083 if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
2084 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
2085
2086 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2087 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2088}
2089
2090
2091
2092
2093static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
2094{
2095 struct vcpu_svm *svm = to_svm(vcpu);
2096
2097 svm->avic_is_running = is_run;
2098 if (is_run)
2099 avic_vcpu_load(vcpu, vcpu->cpu);
2100 else
2101 avic_vcpu_put(vcpu);
2102}
2103
2104static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2105{
2106 struct vcpu_svm *svm = to_svm(vcpu);
2107 u32 dummy;
2108 u32 eax = 1;
2109
2110 vcpu->arch.microcode_version = 0x01000065;
2111 svm->spec_ctrl = 0;
2112 svm->virt_spec_ctrl = 0;
2113
2114 if (!init_event) {
2115 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
2116 MSR_IA32_APICBASE_ENABLE;
2117 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
2118 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
2119 }
2120 init_vmcb(svm);
2121
2122 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
2123 kvm_rdx_write(vcpu, eax);
2124
2125 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
2126 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
2127}
2128
2129static int avic_init_vcpu(struct vcpu_svm *svm)
2130{
2131 int ret;
2132
2133 if (!kvm_vcpu_apicv_active(&svm->vcpu))
2134 return 0;
2135
2136 ret = avic_init_backing_page(&svm->vcpu);
2137 if (ret)
2138 return ret;
2139
2140 INIT_LIST_HEAD(&svm->ir_list);
2141 spin_lock_init(&svm->ir_list_lock);
2142 svm->dfr_reg = APIC_DFR_FLAT;
2143
2144 return ret;
2145}
2146
2147static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
2148{
2149 struct vcpu_svm *svm;
2150 struct page *page;
2151 struct page *msrpm_pages;
2152 struct page *hsave_page;
2153 struct page *nested_msrpm_pages;
2154 int err;
2155
2156 BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
2157 "struct kvm_vcpu must be at offset 0 for arch usercopy region");
2158
2159 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
2160 if (!svm) {
2161 err = -ENOMEM;
2162 goto out;
2163 }
2164
2165 svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
2166 GFP_KERNEL_ACCOUNT);
2167 if (!svm->vcpu.arch.user_fpu) {
2168 printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
2169 err = -ENOMEM;
2170 goto free_partial_svm;
2171 }
2172
2173 svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
2174 GFP_KERNEL_ACCOUNT);
2175 if (!svm->vcpu.arch.guest_fpu) {
2176 printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
2177 err = -ENOMEM;
2178 goto free_user_fpu;
2179 }
2180
2181 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
2182 if (err)
2183 goto free_svm;
2184
2185 err = -ENOMEM;
2186 page = alloc_page(GFP_KERNEL_ACCOUNT);
2187 if (!page)
2188 goto uninit;
2189
2190 msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
2191 if (!msrpm_pages)
2192 goto free_page1;
2193
2194 nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
2195 if (!nested_msrpm_pages)
2196 goto free_page2;
2197
2198 hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
2199 if (!hsave_page)
2200 goto free_page3;
2201
2202 err = avic_init_vcpu(svm);
2203 if (err)
2204 goto free_page4;
2205
2206
2207
2208
2209 svm->avic_is_running = true;
2210
2211 svm->nested.hsave = page_address(hsave_page);
2212
2213 svm->msrpm = page_address(msrpm_pages);
2214 svm_vcpu_init_msrpm(svm->msrpm);
2215
2216 svm->nested.msrpm = page_address(nested_msrpm_pages);
2217 svm_vcpu_init_msrpm(svm->nested.msrpm);
2218
2219 svm->vmcb = page_address(page);
2220 clear_page(svm->vmcb);
2221 svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
2222 svm->asid_generation = 0;
2223 init_vmcb(svm);
2224
2225 svm_init_osvw(&svm->vcpu);
2226
2227 return &svm->vcpu;
2228
2229free_page4:
2230 __free_page(hsave_page);
2231free_page3:
2232 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
2233free_page2:
2234 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
2235free_page1:
2236 __free_page(page);
2237uninit:
2238 kvm_vcpu_uninit(&svm->vcpu);
2239free_svm:
2240 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
2241free_user_fpu:
2242 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
2243free_partial_svm:
2244 kmem_cache_free(kvm_vcpu_cache, svm);
2245out:
2246 return ERR_PTR(err);
2247}
2248
2249static void svm_clear_current_vmcb(struct vmcb *vmcb)
2250{
2251 int i;
2252
2253 for_each_online_cpu(i)
2254 cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
2255}
2256
2257static void svm_free_vcpu(struct kvm_vcpu *vcpu)
2258{
2259 struct vcpu_svm *svm = to_svm(vcpu);
2260
2261
2262
2263
2264
2265
2266 svm_clear_current_vmcb(svm->vmcb);
2267
2268 __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
2269 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
2270 __free_page(virt_to_page(svm->nested.hsave));
2271 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
2272 kvm_vcpu_uninit(vcpu);
2273 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
2274 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
2275 kmem_cache_free(kvm_vcpu_cache, svm);
2276}
2277
2278static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2279{
2280 struct vcpu_svm *svm = to_svm(vcpu);
2281 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2282 int i;
2283
2284 if (unlikely(cpu != vcpu->cpu)) {
2285 svm->asid_generation = 0;
2286 mark_all_dirty(svm->vmcb);
2287 }
2288
2289#ifdef CONFIG_X86_64
2290 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
2291#endif
2292 savesegment(fs, svm->host.fs);
2293 savesegment(gs, svm->host.gs);
2294 svm->host.ldt = kvm_read_ldt();
2295
2296 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2297 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2298
2299 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
2300 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
2301 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
2302 __this_cpu_write(current_tsc_ratio, tsc_ratio);
2303 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
2304 }
2305 }
2306
2307 if (static_cpu_has(X86_FEATURE_RDTSCP))
2308 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
2309
2310 if (sd->current_vmcb != svm->vmcb) {
2311 sd->current_vmcb = svm->vmcb;
2312 indirect_branch_prediction_barrier();
2313 }
2314 avic_vcpu_load(vcpu, cpu);
2315}
2316
2317static void svm_vcpu_put(struct kvm_vcpu *vcpu)
2318{
2319 struct vcpu_svm *svm = to_svm(vcpu);
2320 int i;
2321
2322 avic_vcpu_put(vcpu);
2323
2324 ++vcpu->stat.host_state_reload;
2325 kvm_load_ldt(svm->host.ldt);
2326#ifdef CONFIG_X86_64
2327 loadsegment(fs, svm->host.fs);
2328 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
2329 load_gs_index(svm->host.gs);
2330#else
2331#ifdef CONFIG_X86_32_LAZY_GS
2332 loadsegment(gs, svm->host.gs);
2333#endif
2334#endif
2335 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2336 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2337}
2338
2339static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
2340{
2341 avic_set_running(vcpu, false);
2342}
2343
2344static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
2345{
2346 avic_set_running(vcpu, true);
2347}
2348
2349static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
2350{
2351 struct vcpu_svm *svm = to_svm(vcpu);
2352 unsigned long rflags = svm->vmcb->save.rflags;
2353
2354 if (svm->nmi_singlestep) {
2355
2356 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
2357 rflags &= ~X86_EFLAGS_TF;
2358 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
2359 rflags &= ~X86_EFLAGS_RF;
2360 }
2361 return rflags;
2362}
2363
2364static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2365{
2366 if (to_svm(vcpu)->nmi_singlestep)
2367 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2368
2369
2370
2371
2372
2373
2374 to_svm(vcpu)->vmcb->save.rflags = rflags;
2375}
2376
2377static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2378{
2379 switch (reg) {
2380 case VCPU_EXREG_PDPTR:
2381 BUG_ON(!npt_enabled);
2382 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
2383 break;
2384 default:
2385 WARN_ON_ONCE(1);
2386 }
2387}
2388
2389static void svm_set_vintr(struct vcpu_svm *svm)
2390{
2391 set_intercept(svm, INTERCEPT_VINTR);
2392}
2393
2394static void svm_clear_vintr(struct vcpu_svm *svm)
2395{
2396 clr_intercept(svm, INTERCEPT_VINTR);
2397}
2398
2399static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
2400{
2401 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2402
2403 switch (seg) {
2404 case VCPU_SREG_CS: return &save->cs;
2405 case VCPU_SREG_DS: return &save->ds;
2406 case VCPU_SREG_ES: return &save->es;
2407 case VCPU_SREG_FS: return &save->fs;
2408 case VCPU_SREG_GS: return &save->gs;
2409 case VCPU_SREG_SS: return &save->ss;
2410 case VCPU_SREG_TR: return &save->tr;
2411 case VCPU_SREG_LDTR: return &save->ldtr;
2412 }
2413 BUG();
2414 return NULL;
2415}
2416
2417static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2418{
2419 struct vmcb_seg *s = svm_seg(vcpu, seg);
2420
2421 return s->base;
2422}
2423
2424static void svm_get_segment(struct kvm_vcpu *vcpu,
2425 struct kvm_segment *var, int seg)
2426{
2427 struct vmcb_seg *s = svm_seg(vcpu, seg);
2428
2429 var->base = s->base;
2430 var->limit = s->limit;
2431 var->selector = s->selector;
2432 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
2433 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
2434 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
2435 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
2436 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
2437 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
2438 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448 var->g = s->limit > 0xfffff;
2449
2450
2451
2452
2453
2454 var->unusable = !var->present;
2455
2456 switch (seg) {
2457 case VCPU_SREG_TR:
2458
2459
2460
2461
2462 var->type |= 0x2;
2463 break;
2464 case VCPU_SREG_DS:
2465 case VCPU_SREG_ES:
2466 case VCPU_SREG_FS:
2467 case VCPU_SREG_GS:
2468
2469
2470
2471
2472
2473
2474
2475 if (!var->unusable)
2476 var->type |= 0x1;
2477 break;
2478 case VCPU_SREG_SS:
2479
2480
2481
2482
2483
2484
2485 if (var->unusable)
2486 var->db = 0;
2487
2488 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
2489 break;
2490 }
2491}
2492
2493static int svm_get_cpl(struct kvm_vcpu *vcpu)
2494{
2495 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2496
2497 return save->cpl;
2498}
2499
2500static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2501{
2502 struct vcpu_svm *svm = to_svm(vcpu);
2503
2504 dt->size = svm->vmcb->save.idtr.limit;
2505 dt->address = svm->vmcb->save.idtr.base;
2506}
2507
2508static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2509{
2510 struct vcpu_svm *svm = to_svm(vcpu);
2511
2512 svm->vmcb->save.idtr.limit = dt->size;
2513 svm->vmcb->save.idtr.base = dt->address ;
2514 mark_dirty(svm->vmcb, VMCB_DT);
2515}
2516
2517static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2518{
2519 struct vcpu_svm *svm = to_svm(vcpu);
2520
2521 dt->size = svm->vmcb->save.gdtr.limit;
2522 dt->address = svm->vmcb->save.gdtr.base;
2523}
2524
2525static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2526{
2527 struct vcpu_svm *svm = to_svm(vcpu);
2528
2529 svm->vmcb->save.gdtr.limit = dt->size;
2530 svm->vmcb->save.gdtr.base = dt->address ;
2531 mark_dirty(svm->vmcb, VMCB_DT);
2532}
2533
2534static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2535{
2536}
2537
2538static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
2539{
2540}
2541
2542static void update_cr0_intercept(struct vcpu_svm *svm)
2543{
2544 ulong gcr0 = svm->vcpu.arch.cr0;
2545 u64 *hcr0 = &svm->vmcb->save.cr0;
2546
2547 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
2548 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
2549
2550 mark_dirty(svm->vmcb, VMCB_CR);
2551
2552 if (gcr0 == *hcr0) {
2553 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
2554 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2555 } else {
2556 set_cr_intercept(svm, INTERCEPT_CR0_READ);
2557 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2558 }
2559}
2560
2561static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2562{
2563 struct vcpu_svm *svm = to_svm(vcpu);
2564
2565#ifdef CONFIG_X86_64
2566 if (vcpu->arch.efer & EFER_LME) {
2567 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
2568 vcpu->arch.efer |= EFER_LMA;
2569 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
2570 }
2571
2572 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
2573 vcpu->arch.efer &= ~EFER_LMA;
2574 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
2575 }
2576 }
2577#endif
2578 vcpu->arch.cr0 = cr0;
2579
2580 if (!npt_enabled)
2581 cr0 |= X86_CR0_PG | X86_CR0_WP;
2582
2583
2584
2585
2586
2587
2588 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
2589 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
2590 svm->vmcb->save.cr0 = cr0;
2591 mark_dirty(svm->vmcb, VMCB_CR);
2592 update_cr0_intercept(svm);
2593}
2594
2595static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
2596{
2597 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
2598 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
2599
2600 if (cr4 & X86_CR4_VMXE)
2601 return 1;
2602
2603 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
2604 svm_flush_tlb(vcpu, true);
2605
2606 vcpu->arch.cr4 = cr4;
2607 if (!npt_enabled)
2608 cr4 |= X86_CR4_PAE;
2609 cr4 |= host_cr4_mce;
2610 to_svm(vcpu)->vmcb->save.cr4 = cr4;
2611 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
2612 return 0;
2613}
2614
2615static void svm_set_segment(struct kvm_vcpu *vcpu,
2616 struct kvm_segment *var, int seg)
2617{
2618 struct vcpu_svm *svm = to_svm(vcpu);
2619 struct vmcb_seg *s = svm_seg(vcpu, seg);
2620
2621 s->base = var->base;
2622 s->limit = var->limit;
2623 s->selector = var->selector;
2624 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
2625 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
2626 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
2627 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
2628 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
2629 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
2630 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
2631 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
2632
2633
2634
2635
2636
2637
2638
2639 if (seg == VCPU_SREG_SS)
2640
2641 svm->vmcb->save.cpl = (var->dpl & 3);
2642
2643 mark_dirty(svm->vmcb, VMCB_SEG);
2644}
2645
2646static void update_bp_intercept(struct kvm_vcpu *vcpu)
2647{
2648 struct vcpu_svm *svm = to_svm(vcpu);
2649
2650 clr_exception_intercept(svm, BP_VECTOR);
2651
2652 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
2653 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2654 set_exception_intercept(svm, BP_VECTOR);
2655 } else
2656 vcpu->guest_debug = 0;
2657}
2658
2659static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
2660{
2661 if (sd->next_asid > sd->max_asid) {
2662 ++sd->asid_generation;
2663 sd->next_asid = sd->min_asid;
2664 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
2665 }
2666
2667 svm->asid_generation = sd->asid_generation;
2668 svm->vmcb->control.asid = sd->next_asid++;
2669
2670 mark_dirty(svm->vmcb, VMCB_ASID);
2671}
2672
2673static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
2674{
2675 return to_svm(vcpu)->vmcb->save.dr6;
2676}
2677
2678static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
2679{
2680 struct vcpu_svm *svm = to_svm(vcpu);
2681
2682 svm->vmcb->save.dr6 = value;
2683 mark_dirty(svm->vmcb, VMCB_DR);
2684}
2685
2686static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
2687{
2688 struct vcpu_svm *svm = to_svm(vcpu);
2689
2690 get_debugreg(vcpu->arch.db[0], 0);
2691 get_debugreg(vcpu->arch.db[1], 1);
2692 get_debugreg(vcpu->arch.db[2], 2);
2693 get_debugreg(vcpu->arch.db[3], 3);
2694 vcpu->arch.dr6 = svm_get_dr6(vcpu);
2695 vcpu->arch.dr7 = svm->vmcb->save.dr7;
2696
2697 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
2698 set_dr_intercepts(svm);
2699}
2700
2701static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
2702{
2703 struct vcpu_svm *svm = to_svm(vcpu);
2704
2705 svm->vmcb->save.dr7 = value;
2706 mark_dirty(svm->vmcb, VMCB_DR);
2707}
2708
2709static int pf_interception(struct vcpu_svm *svm)
2710{
2711 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2712 u64 error_code = svm->vmcb->control.exit_info_1;
2713
2714 return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
2715 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2716 svm->vmcb->control.insn_bytes : NULL,
2717 svm->vmcb->control.insn_len);
2718}
2719
2720static int npf_interception(struct vcpu_svm *svm)
2721{
2722 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2723 u64 error_code = svm->vmcb->control.exit_info_1;
2724
2725 trace_kvm_page_fault(fault_address, error_code);
2726 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
2727 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2728 svm->vmcb->control.insn_bytes : NULL,
2729 svm->vmcb->control.insn_len);
2730}
2731
2732static int db_interception(struct vcpu_svm *svm)
2733{
2734 struct kvm_run *kvm_run = svm->vcpu.run;
2735 struct kvm_vcpu *vcpu = &svm->vcpu;
2736
2737 if (!(svm->vcpu.guest_debug &
2738 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
2739 !svm->nmi_singlestep) {
2740 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
2741 return 1;
2742 }
2743
2744 if (svm->nmi_singlestep) {
2745 disable_nmi_singlestep(svm);
2746
2747 kvm_make_request(KVM_REQ_EVENT, vcpu);
2748 }
2749
2750 if (svm->vcpu.guest_debug &
2751 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
2752 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2753 kvm_run->debug.arch.pc =
2754 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2755 kvm_run->debug.arch.exception = DB_VECTOR;
2756 return 0;
2757 }
2758
2759 return 1;
2760}
2761
2762static int bp_interception(struct vcpu_svm *svm)
2763{
2764 struct kvm_run *kvm_run = svm->vcpu.run;
2765
2766 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2767 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2768 kvm_run->debug.arch.exception = BP_VECTOR;
2769 return 0;
2770}
2771
2772static int ud_interception(struct vcpu_svm *svm)
2773{
2774 return handle_ud(&svm->vcpu);
2775}
2776
2777static int ac_interception(struct vcpu_svm *svm)
2778{
2779 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
2780 return 1;
2781}
2782
2783static int gp_interception(struct vcpu_svm *svm)
2784{
2785 struct kvm_vcpu *vcpu = &svm->vcpu;
2786 u32 error_code = svm->vmcb->control.exit_info_1;
2787
2788 WARN_ON_ONCE(!enable_vmware_backdoor);
2789
2790
2791
2792
2793
2794 if (error_code) {
2795 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2796 return 1;
2797 }
2798 return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
2799}
2800
2801static bool is_erratum_383(void)
2802{
2803 int err, i;
2804 u64 value;
2805
2806 if (!erratum_383_found)
2807 return false;
2808
2809 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2810 if (err)
2811 return false;
2812
2813
2814 value &= ~(1ULL << 62);
2815
2816 if (value != 0xb600000000010015ULL)
2817 return false;
2818
2819
2820 for (i = 0; i < 6; ++i)
2821 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2822
2823 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2824 if (!err) {
2825 u32 low, high;
2826
2827 value &= ~(1ULL << 2);
2828 low = lower_32_bits(value);
2829 high = upper_32_bits(value);
2830
2831 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2832 }
2833
2834
2835 __flush_tlb_all();
2836
2837 return true;
2838}
2839
2840static void svm_handle_mce(struct vcpu_svm *svm)
2841{
2842 if (is_erratum_383()) {
2843
2844
2845
2846
2847 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2848
2849 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
2850
2851 return;
2852 }
2853
2854
2855
2856
2857
2858 asm volatile (
2859 "int $0x12\n");
2860
2861
2862 return;
2863}
2864
2865static int mc_interception(struct vcpu_svm *svm)
2866{
2867 return 1;
2868}
2869
2870static int shutdown_interception(struct vcpu_svm *svm)
2871{
2872 struct kvm_run *kvm_run = svm->vcpu.run;
2873
2874
2875
2876
2877
2878 clear_page(svm->vmcb);
2879 init_vmcb(svm);
2880
2881 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2882 return 0;
2883}
2884
2885static int io_interception(struct vcpu_svm *svm)
2886{
2887 struct kvm_vcpu *vcpu = &svm->vcpu;
2888 u32 io_info = svm->vmcb->control.exit_info_1;
2889 int size, in, string;
2890 unsigned port;
2891
2892 ++svm->vcpu.stat.io_exits;
2893 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2894 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2895 if (string)
2896 return kvm_emulate_instruction(vcpu, 0);
2897
2898 port = io_info >> 16;
2899 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
2900 svm->next_rip = svm->vmcb->control.exit_info_2;
2901
2902 return kvm_fast_pio(&svm->vcpu, size, port, in);
2903}
2904
2905static int nmi_interception(struct vcpu_svm *svm)
2906{
2907 return 1;
2908}
2909
2910static int intr_interception(struct vcpu_svm *svm)
2911{
2912 ++svm->vcpu.stat.irq_exits;
2913 return 1;
2914}
2915
2916static int nop_on_interception(struct vcpu_svm *svm)
2917{
2918 return 1;
2919}
2920
2921static int halt_interception(struct vcpu_svm *svm)
2922{
2923 return kvm_emulate_halt(&svm->vcpu);
2924}
2925
2926static int vmmcall_interception(struct vcpu_svm *svm)
2927{
2928 return kvm_emulate_hypercall(&svm->vcpu);
2929}
2930
2931static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
2932{
2933 struct vcpu_svm *svm = to_svm(vcpu);
2934
2935 return svm->nested.nested_cr3;
2936}
2937
2938static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
2939{
2940 struct vcpu_svm *svm = to_svm(vcpu);
2941 u64 cr3 = svm->nested.nested_cr3;
2942 u64 pdpte;
2943 int ret;
2944
2945 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
2946 offset_in_page(cr3) + index * 8, 8);
2947 if (ret)
2948 return 0;
2949 return pdpte;
2950}
2951
2952static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
2953 unsigned long root)
2954{
2955 struct vcpu_svm *svm = to_svm(vcpu);
2956
2957 svm->vmcb->control.nested_cr3 = __sme_set(root);
2958 mark_dirty(svm->vmcb, VMCB_NPT);
2959}
2960
2961static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
2962 struct x86_exception *fault)
2963{
2964 struct vcpu_svm *svm = to_svm(vcpu);
2965
2966 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
2967
2968
2969
2970
2971 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
2972 svm->vmcb->control.exit_code_hi = 0;
2973 svm->vmcb->control.exit_info_1 = (1ULL << 32);
2974 svm->vmcb->control.exit_info_2 = fault->address;
2975 }
2976
2977 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
2978 svm->vmcb->control.exit_info_1 |= fault->error_code;
2979
2980
2981
2982
2983
2984 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
2985 svm->vmcb->control.exit_info_1 &= ~1;
2986
2987 nested_svm_vmexit(svm);
2988}
2989
2990static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
2991{
2992 WARN_ON(mmu_is_nested(vcpu));
2993
2994 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
2995 kvm_init_shadow_mmu(vcpu);
2996 vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3;
2997 vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3;
2998 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
2999 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
3000 vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
3001 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
3002 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
3003}
3004
3005static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
3006{
3007 vcpu->arch.mmu = &vcpu->arch.root_mmu;
3008 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
3009}
3010
3011static int nested_svm_check_permissions(struct vcpu_svm *svm)
3012{
3013 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
3014 !is_paging(&svm->vcpu)) {
3015 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3016 return 1;
3017 }
3018
3019 if (svm->vmcb->save.cpl) {
3020 kvm_inject_gp(&svm->vcpu, 0);
3021 return 1;
3022 }
3023
3024 return 0;
3025}
3026
3027static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
3028 bool has_error_code, u32 error_code)
3029{
3030 int vmexit;
3031
3032 if (!is_guest_mode(&svm->vcpu))
3033 return 0;
3034
3035 vmexit = nested_svm_intercept(svm);
3036 if (vmexit != NESTED_EXIT_DONE)
3037 return 0;
3038
3039 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
3040 svm->vmcb->control.exit_code_hi = 0;
3041 svm->vmcb->control.exit_info_1 = error_code;
3042
3043
3044
3045
3046
3047 if (svm->vcpu.arch.exception.nested_apf)
3048 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
3049 else if (svm->vcpu.arch.exception.has_payload)
3050 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
3051 else
3052 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
3053
3054 svm->nested.exit_required = true;
3055 return vmexit;
3056}
3057
3058
3059static inline bool nested_svm_intr(struct vcpu_svm *svm)
3060{
3061 if (!is_guest_mode(&svm->vcpu))
3062 return true;
3063
3064 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3065 return true;
3066
3067 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
3068 return false;
3069
3070
3071
3072
3073
3074
3075 if (svm->nested.exit_required)
3076 return false;
3077
3078 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
3079 svm->vmcb->control.exit_info_1 = 0;
3080 svm->vmcb->control.exit_info_2 = 0;
3081
3082 if (svm->nested.intercept & 1ULL) {
3083
3084
3085
3086
3087
3088
3089 svm->nested.exit_required = true;
3090 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
3091 return false;
3092 }
3093
3094 return true;
3095}
3096
3097
3098static inline bool nested_svm_nmi(struct vcpu_svm *svm)
3099{
3100 if (!is_guest_mode(&svm->vcpu))
3101 return true;
3102
3103 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
3104 return true;
3105
3106 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
3107 svm->nested.exit_required = true;
3108
3109 return false;
3110}
3111
3112static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
3113{
3114 unsigned port, size, iopm_len;
3115 u16 val, mask;
3116 u8 start_bit;
3117 u64 gpa;
3118
3119 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
3120 return NESTED_EXIT_HOST;
3121
3122 port = svm->vmcb->control.exit_info_1 >> 16;
3123 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
3124 SVM_IOIO_SIZE_SHIFT;
3125 gpa = svm->nested.vmcb_iopm + (port / 8);
3126 start_bit = port % 8;
3127 iopm_len = (start_bit + size > 8) ? 2 : 1;
3128 mask = (0xf >> (4 - size)) << start_bit;
3129 val = 0;
3130
3131 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
3132 return NESTED_EXIT_DONE;
3133
3134 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3135}
3136
3137static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
3138{
3139 u32 offset, msr, value;
3140 int write, mask;
3141
3142 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3143 return NESTED_EXIT_HOST;
3144
3145 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3146 offset = svm_msrpm_offset(msr);
3147 write = svm->vmcb->control.exit_info_1 & 1;
3148 mask = 1 << ((2 * (msr & 0xf)) + write);
3149
3150 if (offset == MSR_INVALID)
3151 return NESTED_EXIT_DONE;
3152
3153
3154 offset *= 4;
3155
3156 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
3157 return NESTED_EXIT_DONE;
3158
3159 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3160}
3161
3162
3163static int nested_svm_intercept_db(struct vcpu_svm *svm)
3164{
3165 unsigned long dr6;
3166
3167
3168 if (!svm->nmi_singlestep)
3169 return NESTED_EXIT_DONE;
3170
3171
3172 if (kvm_get_dr(&svm->vcpu, 6, &dr6))
3173 return NESTED_EXIT_DONE;
3174 if (!(dr6 & DR6_BS))
3175 return NESTED_EXIT_DONE;
3176
3177
3178 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
3179 disable_nmi_singlestep(svm);
3180 return NESTED_EXIT_DONE;
3181 }
3182
3183
3184 return NESTED_EXIT_HOST;
3185}
3186
3187static int nested_svm_exit_special(struct vcpu_svm *svm)
3188{
3189 u32 exit_code = svm->vmcb->control.exit_code;
3190
3191 switch (exit_code) {
3192 case SVM_EXIT_INTR:
3193 case SVM_EXIT_NMI:
3194 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
3195 return NESTED_EXIT_HOST;
3196 case SVM_EXIT_NPF:
3197
3198 if (npt_enabled)
3199 return NESTED_EXIT_HOST;
3200 break;
3201 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
3202
3203 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
3204 return NESTED_EXIT_HOST;
3205 break;
3206 default:
3207 break;
3208 }
3209
3210 return NESTED_EXIT_CONTINUE;
3211}
3212
3213
3214
3215
3216static int nested_svm_intercept(struct vcpu_svm *svm)
3217{
3218 u32 exit_code = svm->vmcb->control.exit_code;
3219 int vmexit = NESTED_EXIT_HOST;
3220
3221 switch (exit_code) {
3222 case SVM_EXIT_MSR:
3223 vmexit = nested_svm_exit_handled_msr(svm);
3224 break;
3225 case SVM_EXIT_IOIO:
3226 vmexit = nested_svm_intercept_ioio(svm);
3227 break;
3228 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
3229 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
3230 if (svm->nested.intercept_cr & bit)
3231 vmexit = NESTED_EXIT_DONE;
3232 break;
3233 }
3234 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
3235 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
3236 if (svm->nested.intercept_dr & bit)
3237 vmexit = NESTED_EXIT_DONE;
3238 break;
3239 }
3240 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
3241 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
3242 if (svm->nested.intercept_exceptions & excp_bits) {
3243 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
3244 vmexit = nested_svm_intercept_db(svm);
3245 else
3246 vmexit = NESTED_EXIT_DONE;
3247 }
3248
3249 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
3250 svm->vcpu.arch.exception.nested_apf != 0)
3251 vmexit = NESTED_EXIT_DONE;
3252 break;
3253 }
3254 case SVM_EXIT_ERR: {
3255 vmexit = NESTED_EXIT_DONE;
3256 break;
3257 }
3258 default: {
3259 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
3260 if (svm->nested.intercept & exit_bits)
3261 vmexit = NESTED_EXIT_DONE;
3262 }
3263 }
3264
3265 return vmexit;
3266}
3267
3268static int nested_svm_exit_handled(struct vcpu_svm *svm)
3269{
3270 int vmexit;
3271
3272 vmexit = nested_svm_intercept(svm);
3273
3274 if (vmexit == NESTED_EXIT_DONE)
3275 nested_svm_vmexit(svm);
3276
3277 return vmexit;
3278}
3279
3280static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
3281{
3282 struct vmcb_control_area *dst = &dst_vmcb->control;
3283 struct vmcb_control_area *from = &from_vmcb->control;
3284
3285 dst->intercept_cr = from->intercept_cr;
3286 dst->intercept_dr = from->intercept_dr;
3287 dst->intercept_exceptions = from->intercept_exceptions;
3288 dst->intercept = from->intercept;
3289 dst->iopm_base_pa = from->iopm_base_pa;
3290 dst->msrpm_base_pa = from->msrpm_base_pa;
3291 dst->tsc_offset = from->tsc_offset;
3292 dst->asid = from->asid;
3293 dst->tlb_ctl = from->tlb_ctl;
3294 dst->int_ctl = from->int_ctl;
3295 dst->int_vector = from->int_vector;
3296 dst->int_state = from->int_state;
3297 dst->exit_code = from->exit_code;
3298 dst->exit_code_hi = from->exit_code_hi;
3299 dst->exit_info_1 = from->exit_info_1;
3300 dst->exit_info_2 = from->exit_info_2;
3301 dst->exit_int_info = from->exit_int_info;
3302 dst->exit_int_info_err = from->exit_int_info_err;
3303 dst->nested_ctl = from->nested_ctl;
3304 dst->event_inj = from->event_inj;
3305 dst->event_inj_err = from->event_inj_err;
3306 dst->nested_cr3 = from->nested_cr3;
3307 dst->virt_ext = from->virt_ext;
3308 dst->pause_filter_count = from->pause_filter_count;
3309 dst->pause_filter_thresh = from->pause_filter_thresh;
3310}
3311
3312static int nested_svm_vmexit(struct vcpu_svm *svm)
3313{
3314 int rc;
3315 struct vmcb *nested_vmcb;
3316 struct vmcb *hsave = svm->nested.hsave;
3317 struct vmcb *vmcb = svm->vmcb;
3318 struct kvm_host_map map;
3319
3320 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
3321 vmcb->control.exit_info_1,
3322 vmcb->control.exit_info_2,
3323 vmcb->control.exit_int_info,
3324 vmcb->control.exit_int_info_err,
3325 KVM_ISA_SVM);
3326
3327 rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
3328 if (rc) {
3329 if (rc == -EINVAL)
3330 kvm_inject_gp(&svm->vcpu, 0);
3331 return 1;
3332 }
3333
3334 nested_vmcb = map.hva;
3335
3336
3337 leave_guest_mode(&svm->vcpu);
3338 svm->nested.vmcb = 0;
3339
3340
3341 disable_gif(svm);
3342
3343 nested_vmcb->save.es = vmcb->save.es;
3344 nested_vmcb->save.cs = vmcb->save.cs;
3345 nested_vmcb->save.ss = vmcb->save.ss;
3346 nested_vmcb->save.ds = vmcb->save.ds;
3347 nested_vmcb->save.gdtr = vmcb->save.gdtr;
3348 nested_vmcb->save.idtr = vmcb->save.idtr;
3349 nested_vmcb->save.efer = svm->vcpu.arch.efer;
3350 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
3351 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
3352 nested_vmcb->save.cr2 = vmcb->save.cr2;
3353 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
3354 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
3355 nested_vmcb->save.rip = vmcb->save.rip;
3356 nested_vmcb->save.rsp = vmcb->save.rsp;
3357 nested_vmcb->save.rax = vmcb->save.rax;
3358 nested_vmcb->save.dr7 = vmcb->save.dr7;
3359 nested_vmcb->save.dr6 = vmcb->save.dr6;
3360 nested_vmcb->save.cpl = vmcb->save.cpl;
3361
3362 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
3363 nested_vmcb->control.int_vector = vmcb->control.int_vector;
3364 nested_vmcb->control.int_state = vmcb->control.int_state;
3365 nested_vmcb->control.exit_code = vmcb->control.exit_code;
3366 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
3367 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
3368 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
3369 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
3370 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
3371
3372 if (svm->nrips_enabled)
3373 nested_vmcb->control.next_rip = vmcb->control.next_rip;
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
3384 struct vmcb_control_area *nc = &nested_vmcb->control;
3385
3386 nc->exit_int_info = vmcb->control.event_inj;
3387 nc->exit_int_info_err = vmcb->control.event_inj_err;
3388 }
3389
3390 nested_vmcb->control.tlb_ctl = 0;
3391 nested_vmcb->control.event_inj = 0;
3392 nested_vmcb->control.event_inj_err = 0;
3393
3394 nested_vmcb->control.pause_filter_count =
3395 svm->vmcb->control.pause_filter_count;
3396 nested_vmcb->control.pause_filter_thresh =
3397 svm->vmcb->control.pause_filter_thresh;
3398
3399
3400 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3401 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
3402
3403
3404 copy_vmcb_control_area(vmcb, hsave);
3405
3406 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
3407 kvm_clear_exception_queue(&svm->vcpu);
3408 kvm_clear_interrupt_queue(&svm->vcpu);
3409
3410 svm->nested.nested_cr3 = 0;
3411
3412
3413 svm->vmcb->save.es = hsave->save.es;
3414 svm->vmcb->save.cs = hsave->save.cs;
3415 svm->vmcb->save.ss = hsave->save.ss;
3416 svm->vmcb->save.ds = hsave->save.ds;
3417 svm->vmcb->save.gdtr = hsave->save.gdtr;
3418 svm->vmcb->save.idtr = hsave->save.idtr;
3419 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
3420 svm_set_efer(&svm->vcpu, hsave->save.efer);
3421 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
3422 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
3423 if (npt_enabled) {
3424 svm->vmcb->save.cr3 = hsave->save.cr3;
3425 svm->vcpu.arch.cr3 = hsave->save.cr3;
3426 } else {
3427 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
3428 }
3429 kvm_rax_write(&svm->vcpu, hsave->save.rax);
3430 kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
3431 kvm_rip_write(&svm->vcpu, hsave->save.rip);
3432 svm->vmcb->save.dr7 = 0;
3433 svm->vmcb->save.cpl = 0;
3434 svm->vmcb->control.exit_int_info = 0;
3435
3436 mark_all_dirty(svm->vmcb);
3437
3438 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3439
3440 nested_svm_uninit_mmu_context(&svm->vcpu);
3441 kvm_mmu_reset_context(&svm->vcpu);
3442 kvm_mmu_load(&svm->vcpu);
3443
3444
3445
3446
3447
3448 svm->vcpu.arch.nmi_injected = false;
3449 kvm_clear_exception_queue(&svm->vcpu);
3450 kvm_clear_interrupt_queue(&svm->vcpu);
3451
3452 return 0;
3453}
3454
3455static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3456{
3457
3458
3459
3460
3461
3462 int i;
3463
3464 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3465 return true;
3466
3467 for (i = 0; i < MSRPM_OFFSETS; i++) {
3468 u32 value, p;
3469 u64 offset;
3470
3471 if (msrpm_offsets[i] == 0xffffffff)
3472 break;
3473
3474 p = msrpm_offsets[i];
3475 offset = svm->nested.vmcb_msrpm + (p * 4);
3476
3477 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
3478 return false;
3479
3480 svm->nested.msrpm[p] = svm->msrpm[p] | value;
3481 }
3482
3483 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
3484
3485 return true;
3486}
3487
3488static bool nested_vmcb_checks(struct vmcb *vmcb)
3489{
3490 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
3491 return false;
3492
3493 if (vmcb->control.asid == 0)
3494 return false;
3495
3496 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
3497 !npt_enabled)
3498 return false;
3499
3500 return true;
3501}
3502
3503static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
3504 struct vmcb *nested_vmcb, struct kvm_host_map *map)
3505{
3506 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3507 svm->vcpu.arch.hflags |= HF_HIF_MASK;
3508 else
3509 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
3510
3511 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
3512 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
3513 nested_svm_init_mmu_context(&svm->vcpu);
3514 }
3515
3516
3517 svm->vmcb->save.es = nested_vmcb->save.es;
3518 svm->vmcb->save.cs = nested_vmcb->save.cs;
3519 svm->vmcb->save.ss = nested_vmcb->save.ss;
3520 svm->vmcb->save.ds = nested_vmcb->save.ds;
3521 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
3522 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
3523 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3524 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
3525 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
3526 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
3527 if (npt_enabled) {
3528 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
3529 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
3530 } else
3531 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
3532
3533
3534 kvm_mmu_reset_context(&svm->vcpu);
3535
3536 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3537 kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
3538 kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
3539 kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
3540
3541
3542 svm->vmcb->save.rax = nested_vmcb->save.rax;
3543 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
3544 svm->vmcb->save.rip = nested_vmcb->save.rip;
3545 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
3546 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
3547 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
3548
3549 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
3550 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3551
3552
3553 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3554 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
3555 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
3556 svm->nested.intercept = nested_vmcb->control.intercept;
3557
3558 svm_flush_tlb(&svm->vcpu, true);
3559 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3560 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
3561 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
3562 else
3563 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
3564
3565 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
3566
3567 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
3568 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3569 }
3570
3571
3572 clr_intercept(svm, INTERCEPT_VMMCALL);
3573
3574 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
3575 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
3576
3577 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
3578 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
3579 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
3580 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
3581 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
3582
3583 svm->vmcb->control.pause_filter_count =
3584 nested_vmcb->control.pause_filter_count;
3585 svm->vmcb->control.pause_filter_thresh =
3586 nested_vmcb->control.pause_filter_thresh;
3587
3588 kvm_vcpu_unmap(&svm->vcpu, map, true);
3589
3590
3591 enter_guest_mode(&svm->vcpu);
3592
3593
3594
3595
3596
3597 recalc_intercepts(svm);
3598
3599 svm->nested.vmcb = vmcb_gpa;
3600
3601 enable_gif(svm);
3602
3603 mark_all_dirty(svm->vmcb);
3604}
3605
3606static int nested_svm_vmrun(struct vcpu_svm *svm)
3607{
3608 int ret;
3609 struct vmcb *nested_vmcb;
3610 struct vmcb *hsave = svm->nested.hsave;
3611 struct vmcb *vmcb = svm->vmcb;
3612 struct kvm_host_map map;
3613 u64 vmcb_gpa;
3614
3615 vmcb_gpa = svm->vmcb->save.rax;
3616
3617 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
3618 if (ret == -EINVAL) {
3619 kvm_inject_gp(&svm->vcpu, 0);
3620 return 1;
3621 } else if (ret) {
3622 return kvm_skip_emulated_instruction(&svm->vcpu);
3623 }
3624
3625 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3626
3627 nested_vmcb = map.hva;
3628
3629 if (!nested_vmcb_checks(nested_vmcb)) {
3630 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
3631 nested_vmcb->control.exit_code_hi = 0;
3632 nested_vmcb->control.exit_info_1 = 0;
3633 nested_vmcb->control.exit_info_2 = 0;
3634
3635 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3636
3637 return ret;
3638 }
3639
3640 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
3641 nested_vmcb->save.rip,
3642 nested_vmcb->control.int_ctl,
3643 nested_vmcb->control.event_inj,
3644 nested_vmcb->control.nested_ctl);
3645
3646 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
3647 nested_vmcb->control.intercept_cr >> 16,
3648 nested_vmcb->control.intercept_exceptions,
3649 nested_vmcb->control.intercept);
3650
3651
3652 kvm_clear_exception_queue(&svm->vcpu);
3653 kvm_clear_interrupt_queue(&svm->vcpu);
3654
3655
3656
3657
3658
3659 hsave->save.es = vmcb->save.es;
3660 hsave->save.cs = vmcb->save.cs;
3661 hsave->save.ss = vmcb->save.ss;
3662 hsave->save.ds = vmcb->save.ds;
3663 hsave->save.gdtr = vmcb->save.gdtr;
3664 hsave->save.idtr = vmcb->save.idtr;
3665 hsave->save.efer = svm->vcpu.arch.efer;
3666 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
3667 hsave->save.cr4 = svm->vcpu.arch.cr4;
3668 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
3669 hsave->save.rip = kvm_rip_read(&svm->vcpu);
3670 hsave->save.rsp = vmcb->save.rsp;
3671 hsave->save.rax = vmcb->save.rax;
3672 if (npt_enabled)
3673 hsave->save.cr3 = vmcb->save.cr3;
3674 else
3675 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
3676
3677 copy_vmcb_control_area(hsave, vmcb);
3678
3679 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
3680
3681 if (!nested_svm_vmrun_msrpm(svm)) {
3682 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
3683 svm->vmcb->control.exit_code_hi = 0;
3684 svm->vmcb->control.exit_info_1 = 0;
3685 svm->vmcb->control.exit_info_2 = 0;
3686
3687 nested_svm_vmexit(svm);
3688 }
3689
3690 return ret;
3691}
3692
3693static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
3694{
3695 to_vmcb->save.fs = from_vmcb->save.fs;
3696 to_vmcb->save.gs = from_vmcb->save.gs;
3697 to_vmcb->save.tr = from_vmcb->save.tr;
3698 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
3699 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
3700 to_vmcb->save.star = from_vmcb->save.star;
3701 to_vmcb->save.lstar = from_vmcb->save.lstar;
3702 to_vmcb->save.cstar = from_vmcb->save.cstar;
3703 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
3704 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
3705 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
3706 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
3707}
3708
3709static int vmload_interception(struct vcpu_svm *svm)
3710{
3711 struct vmcb *nested_vmcb;
3712 struct kvm_host_map map;
3713 int ret;
3714
3715 if (nested_svm_check_permissions(svm))
3716 return 1;
3717
3718 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
3719 if (ret) {
3720 if (ret == -EINVAL)
3721 kvm_inject_gp(&svm->vcpu, 0);
3722 return 1;
3723 }
3724
3725 nested_vmcb = map.hva;
3726
3727 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3728
3729 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
3730 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3731
3732 return ret;
3733}
3734
3735static int vmsave_interception(struct vcpu_svm *svm)
3736{
3737 struct vmcb *nested_vmcb;
3738 struct kvm_host_map map;
3739 int ret;
3740
3741 if (nested_svm_check_permissions(svm))
3742 return 1;
3743
3744 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
3745 if (ret) {
3746 if (ret == -EINVAL)
3747 kvm_inject_gp(&svm->vcpu, 0);
3748 return 1;
3749 }
3750
3751 nested_vmcb = map.hva;
3752
3753 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3754
3755 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
3756 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3757
3758 return ret;
3759}
3760
3761static int vmrun_interception(struct vcpu_svm *svm)
3762{
3763 if (nested_svm_check_permissions(svm))
3764 return 1;
3765
3766 return nested_svm_vmrun(svm);
3767}
3768
3769static int stgi_interception(struct vcpu_svm *svm)
3770{
3771 int ret;
3772
3773 if (nested_svm_check_permissions(svm))
3774 return 1;
3775
3776
3777
3778
3779
3780 if (vgif_enabled(svm))
3781 clr_intercept(svm, INTERCEPT_STGI);
3782
3783 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3784 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3785
3786 enable_gif(svm);
3787
3788 return ret;
3789}
3790
3791static int clgi_interception(struct vcpu_svm *svm)
3792{
3793 int ret;
3794
3795 if (nested_svm_check_permissions(svm))
3796 return 1;
3797
3798 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3799
3800 disable_gif(svm);
3801
3802
3803 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
3804 svm_clear_vintr(svm);
3805 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3806 mark_dirty(svm->vmcb, VMCB_INTR);
3807 }
3808
3809 return ret;
3810}
3811
3812static int invlpga_interception(struct vcpu_svm *svm)
3813{
3814 struct kvm_vcpu *vcpu = &svm->vcpu;
3815
3816 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
3817 kvm_rax_read(&svm->vcpu));
3818
3819
3820 kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
3821
3822 return kvm_skip_emulated_instruction(&svm->vcpu);
3823}
3824
3825static int skinit_interception(struct vcpu_svm *svm)
3826{
3827 trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
3828
3829 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3830 return 1;
3831}
3832
3833static int wbinvd_interception(struct vcpu_svm *svm)
3834{
3835 return kvm_emulate_wbinvd(&svm->vcpu);
3836}
3837
3838static int xsetbv_interception(struct vcpu_svm *svm)
3839{
3840 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
3841 u32 index = kvm_rcx_read(&svm->vcpu);
3842
3843 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
3844 return kvm_skip_emulated_instruction(&svm->vcpu);
3845 }
3846
3847 return 1;
3848}
3849
3850static int rdpru_interception(struct vcpu_svm *svm)
3851{
3852 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3853 return 1;
3854}
3855
3856static int task_switch_interception(struct vcpu_svm *svm)
3857{
3858 u16 tss_selector;
3859 int reason;
3860 int int_type = svm->vmcb->control.exit_int_info &
3861 SVM_EXITINTINFO_TYPE_MASK;
3862 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
3863 uint32_t type =
3864 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
3865 uint32_t idt_v =
3866 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
3867 bool has_error_code = false;
3868 u32 error_code = 0;
3869
3870 tss_selector = (u16)svm->vmcb->control.exit_info_1;
3871
3872 if (svm->vmcb->control.exit_info_2 &
3873 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
3874 reason = TASK_SWITCH_IRET;
3875 else if (svm->vmcb->control.exit_info_2 &
3876 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
3877 reason = TASK_SWITCH_JMP;
3878 else if (idt_v)
3879 reason = TASK_SWITCH_GATE;
3880 else
3881 reason = TASK_SWITCH_CALL;
3882
3883 if (reason == TASK_SWITCH_GATE) {
3884 switch (type) {
3885 case SVM_EXITINTINFO_TYPE_NMI:
3886 svm->vcpu.arch.nmi_injected = false;
3887 break;
3888 case SVM_EXITINTINFO_TYPE_EXEPT:
3889 if (svm->vmcb->control.exit_info_2 &
3890 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
3891 has_error_code = true;
3892 error_code =
3893 (u32)svm->vmcb->control.exit_info_2;
3894 }
3895 kvm_clear_exception_queue(&svm->vcpu);
3896 break;
3897 case SVM_EXITINTINFO_TYPE_INTR:
3898 kvm_clear_interrupt_queue(&svm->vcpu);
3899 break;
3900 default:
3901 break;
3902 }
3903 }
3904
3905 if (reason != TASK_SWITCH_GATE ||
3906 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
3907 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
3908 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
3909 if (!skip_emulated_instruction(&svm->vcpu))
3910 return 0;
3911 }
3912
3913 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
3914 int_vec = -1;
3915
3916 return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
3917 has_error_code, error_code);
3918}
3919
3920static int cpuid_interception(struct vcpu_svm *svm)
3921{
3922 return kvm_emulate_cpuid(&svm->vcpu);
3923}
3924
3925static int iret_interception(struct vcpu_svm *svm)
3926{
3927 ++svm->vcpu.stat.nmi_window_exits;
3928 clr_intercept(svm, INTERCEPT_IRET);
3929 svm->vcpu.arch.hflags |= HF_IRET_MASK;
3930 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
3931 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3932 return 1;
3933}
3934
3935static int invlpg_interception(struct vcpu_svm *svm)
3936{
3937 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3938 return kvm_emulate_instruction(&svm->vcpu, 0);
3939
3940 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
3941 return kvm_skip_emulated_instruction(&svm->vcpu);
3942}
3943
3944static int emulate_on_interception(struct vcpu_svm *svm)
3945{
3946 return kvm_emulate_instruction(&svm->vcpu, 0);
3947}
3948
3949static int rsm_interception(struct vcpu_svm *svm)
3950{
3951 return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
3952}
3953
3954static int rdpmc_interception(struct vcpu_svm *svm)
3955{
3956 int err;
3957
3958 if (!nrips)
3959 return emulate_on_interception(svm);
3960
3961 err = kvm_rdpmc(&svm->vcpu);
3962 return kvm_complete_insn_gp(&svm->vcpu, err);
3963}
3964
3965static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
3966 unsigned long val)
3967{
3968 unsigned long cr0 = svm->vcpu.arch.cr0;
3969 bool ret = false;
3970 u64 intercept;
3971
3972 intercept = svm->nested.intercept;
3973
3974 if (!is_guest_mode(&svm->vcpu) ||
3975 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
3976 return false;
3977
3978 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
3979 val &= ~SVM_CR0_SELECTIVE_MASK;
3980
3981 if (cr0 ^ val) {
3982 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3983 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
3984 }
3985
3986 return ret;
3987}
3988
3989#define CR_VALID (1ULL << 63)
3990
3991static int cr_interception(struct vcpu_svm *svm)
3992{
3993 int reg, cr;
3994 unsigned long val;
3995 int err;
3996
3997 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3998 return emulate_on_interception(svm);
3999
4000 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
4001 return emulate_on_interception(svm);
4002
4003 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
4004 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
4005 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
4006 else
4007 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
4008
4009 err = 0;
4010 if (cr >= 16) {
4011 cr -= 16;
4012 val = kvm_register_read(&svm->vcpu, reg);
4013 switch (cr) {
4014 case 0:
4015 if (!check_selective_cr0_intercepted(svm, val))
4016 err = kvm_set_cr0(&svm->vcpu, val);
4017 else
4018 return 1;
4019
4020 break;
4021 case 3:
4022 err = kvm_set_cr3(&svm->vcpu, val);
4023 break;
4024 case 4:
4025 err = kvm_set_cr4(&svm->vcpu, val);
4026 break;
4027 case 8:
4028 err = kvm_set_cr8(&svm->vcpu, val);
4029 break;
4030 default:
4031 WARN(1, "unhandled write to CR%d", cr);
4032 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
4033 return 1;
4034 }
4035 } else {
4036 switch (cr) {
4037 case 0:
4038 val = kvm_read_cr0(&svm->vcpu);
4039 break;
4040 case 2:
4041 val = svm->vcpu.arch.cr2;
4042 break;
4043 case 3:
4044 val = kvm_read_cr3(&svm->vcpu);
4045 break;
4046 case 4:
4047 val = kvm_read_cr4(&svm->vcpu);
4048 break;
4049 case 8:
4050 val = kvm_get_cr8(&svm->vcpu);
4051 break;
4052 default:
4053 WARN(1, "unhandled read from CR%d", cr);
4054 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
4055 return 1;
4056 }
4057 kvm_register_write(&svm->vcpu, reg, val);
4058 }
4059 return kvm_complete_insn_gp(&svm->vcpu, err);
4060}
4061
4062static int dr_interception(struct vcpu_svm *svm)
4063{
4064 int reg, dr;
4065 unsigned long val;
4066
4067 if (svm->vcpu.guest_debug == 0) {
4068
4069
4070
4071
4072
4073 clr_dr_intercepts(svm);
4074 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
4075 return 1;
4076 }
4077
4078 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
4079 return emulate_on_interception(svm);
4080
4081 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
4082 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
4083
4084 if (dr >= 16) {
4085 if (!kvm_require_dr(&svm->vcpu, dr - 16))
4086 return 1;
4087 val = kvm_register_read(&svm->vcpu, reg);
4088 kvm_set_dr(&svm->vcpu, dr - 16, val);
4089 } else {
4090 if (!kvm_require_dr(&svm->vcpu, dr))
4091 return 1;
4092 kvm_get_dr(&svm->vcpu, dr, &val);
4093 kvm_register_write(&svm->vcpu, reg, val);
4094 }
4095
4096 return kvm_skip_emulated_instruction(&svm->vcpu);
4097}
4098
4099static int cr8_write_interception(struct vcpu_svm *svm)
4100{
4101 struct kvm_run *kvm_run = svm->vcpu.run;
4102 int r;
4103
4104 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
4105
4106 r = cr_interception(svm);
4107 if (lapic_in_kernel(&svm->vcpu))
4108 return r;
4109 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
4110 return r;
4111 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
4112 return 0;
4113}
4114
4115static int svm_get_msr_feature(struct kvm_msr_entry *msr)
4116{
4117 msr->data = 0;
4118
4119 switch (msr->index) {
4120 case MSR_F10H_DECFG:
4121 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
4122 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
4123 break;
4124 default:
4125 return 1;
4126 }
4127
4128 return 0;
4129}
4130
4131static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
4132{
4133 struct vcpu_svm *svm = to_svm(vcpu);
4134
4135 switch (msr_info->index) {
4136 case MSR_STAR:
4137 msr_info->data = svm->vmcb->save.star;
4138 break;
4139#ifdef CONFIG_X86_64
4140 case MSR_LSTAR:
4141 msr_info->data = svm->vmcb->save.lstar;
4142 break;
4143 case MSR_CSTAR:
4144 msr_info->data = svm->vmcb->save.cstar;
4145 break;
4146 case MSR_KERNEL_GS_BASE:
4147 msr_info->data = svm->vmcb->save.kernel_gs_base;
4148 break;
4149 case MSR_SYSCALL_MASK:
4150 msr_info->data = svm->vmcb->save.sfmask;
4151 break;
4152#endif
4153 case MSR_IA32_SYSENTER_CS:
4154 msr_info->data = svm->vmcb->save.sysenter_cs;
4155 break;
4156 case MSR_IA32_SYSENTER_EIP:
4157 msr_info->data = svm->sysenter_eip;
4158 break;
4159 case MSR_IA32_SYSENTER_ESP:
4160 msr_info->data = svm->sysenter_esp;
4161 break;
4162 case MSR_TSC_AUX:
4163 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4164 return 1;
4165 msr_info->data = svm->tsc_aux;
4166 break;
4167
4168
4169
4170
4171
4172 case MSR_IA32_DEBUGCTLMSR:
4173 msr_info->data = svm->vmcb->save.dbgctl;
4174 break;
4175 case MSR_IA32_LASTBRANCHFROMIP:
4176 msr_info->data = svm->vmcb->save.br_from;
4177 break;
4178 case MSR_IA32_LASTBRANCHTOIP:
4179 msr_info->data = svm->vmcb->save.br_to;
4180 break;
4181 case MSR_IA32_LASTINTFROMIP:
4182 msr_info->data = svm->vmcb->save.last_excp_from;
4183 break;
4184 case MSR_IA32_LASTINTTOIP:
4185 msr_info->data = svm->vmcb->save.last_excp_to;
4186 break;
4187 case MSR_VM_HSAVE_PA:
4188 msr_info->data = svm->nested.hsave_msr;
4189 break;
4190 case MSR_VM_CR:
4191 msr_info->data = svm->nested.vm_cr_msr;
4192 break;
4193 case MSR_IA32_SPEC_CTRL:
4194 if (!msr_info->host_initiated &&
4195 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
4196 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
4197 return 1;
4198
4199 msr_info->data = svm->spec_ctrl;
4200 break;
4201 case MSR_AMD64_VIRT_SPEC_CTRL:
4202 if (!msr_info->host_initiated &&
4203 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4204 return 1;
4205
4206 msr_info->data = svm->virt_spec_ctrl;
4207 break;
4208 case MSR_F15H_IC_CFG: {
4209
4210 int family, model;
4211
4212 family = guest_cpuid_family(vcpu);
4213 model = guest_cpuid_model(vcpu);
4214
4215 if (family < 0 || model < 0)
4216 return kvm_get_msr_common(vcpu, msr_info);
4217
4218 msr_info->data = 0;
4219
4220 if (family == 0x15 &&
4221 (model >= 0x2 && model < 0x20))
4222 msr_info->data = 0x1E;
4223 }
4224 break;
4225 case MSR_F10H_DECFG:
4226 msr_info->data = svm->msr_decfg;
4227 break;
4228 default:
4229 return kvm_get_msr_common(vcpu, msr_info);
4230 }
4231 return 0;
4232}
4233
4234static int rdmsr_interception(struct vcpu_svm *svm)
4235{
4236 return kvm_emulate_rdmsr(&svm->vcpu);
4237}
4238
4239static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
4240{
4241 struct vcpu_svm *svm = to_svm(vcpu);
4242 int svm_dis, chg_mask;
4243
4244 if (data & ~SVM_VM_CR_VALID_MASK)
4245 return 1;
4246
4247 chg_mask = SVM_VM_CR_VALID_MASK;
4248
4249 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
4250 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
4251
4252 svm->nested.vm_cr_msr &= ~chg_mask;
4253 svm->nested.vm_cr_msr |= (data & chg_mask);
4254
4255 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
4256
4257
4258 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
4259 return 1;
4260
4261 return 0;
4262}
4263
4264static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4265{
4266 struct vcpu_svm *svm = to_svm(vcpu);
4267
4268 u32 ecx = msr->index;
4269 u64 data = msr->data;
4270 switch (ecx) {
4271 case MSR_IA32_CR_PAT:
4272 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4273 return 1;
4274 vcpu->arch.pat = data;
4275 svm->vmcb->save.g_pat = data;
4276 mark_dirty(svm->vmcb, VMCB_NPT);
4277 break;
4278 case MSR_IA32_SPEC_CTRL:
4279 if (!msr->host_initiated &&
4280 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
4281 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
4282 return 1;
4283
4284
4285 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
4286 return 1;
4287
4288 svm->spec_ctrl = data;
4289
4290 if (!data)
4291 break;
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304 set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
4305 break;
4306 case MSR_IA32_PRED_CMD:
4307 if (!msr->host_initiated &&
4308 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
4309 return 1;
4310
4311 if (data & ~PRED_CMD_IBPB)
4312 return 1;
4313
4314 if (!data)
4315 break;
4316
4317 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4318 if (is_guest_mode(vcpu))
4319 break;
4320 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
4321 break;
4322 case MSR_AMD64_VIRT_SPEC_CTRL:
4323 if (!msr->host_initiated &&
4324 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4325 return 1;
4326
4327 if (data & ~SPEC_CTRL_SSBD)
4328 return 1;
4329
4330 svm->virt_spec_ctrl = data;
4331 break;
4332 case MSR_STAR:
4333 svm->vmcb->save.star = data;
4334 break;
4335#ifdef CONFIG_X86_64
4336 case MSR_LSTAR:
4337 svm->vmcb->save.lstar = data;
4338 break;
4339 case MSR_CSTAR:
4340 svm->vmcb->save.cstar = data;
4341 break;
4342 case MSR_KERNEL_GS_BASE:
4343 svm->vmcb->save.kernel_gs_base = data;
4344 break;
4345 case MSR_SYSCALL_MASK:
4346 svm->vmcb->save.sfmask = data;
4347 break;
4348#endif
4349 case MSR_IA32_SYSENTER_CS:
4350 svm->vmcb->save.sysenter_cs = data;
4351 break;
4352 case MSR_IA32_SYSENTER_EIP:
4353 svm->sysenter_eip = data;
4354 svm->vmcb->save.sysenter_eip = data;
4355 break;
4356 case MSR_IA32_SYSENTER_ESP:
4357 svm->sysenter_esp = data;
4358 svm->vmcb->save.sysenter_esp = data;
4359 break;
4360 case MSR_TSC_AUX:
4361 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4362 return 1;
4363
4364
4365
4366
4367
4368
4369 svm->tsc_aux = data;
4370 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
4371 break;
4372 case MSR_IA32_DEBUGCTLMSR:
4373 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
4374 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
4375 __func__, data);
4376 break;
4377 }
4378 if (data & DEBUGCTL_RESERVED_BITS)
4379 return 1;
4380
4381 svm->vmcb->save.dbgctl = data;
4382 mark_dirty(svm->vmcb, VMCB_LBR);
4383 if (data & (1ULL<<0))
4384 svm_enable_lbrv(svm);
4385 else
4386 svm_disable_lbrv(svm);
4387 break;
4388 case MSR_VM_HSAVE_PA:
4389 svm->nested.hsave_msr = data;
4390 break;
4391 case MSR_VM_CR:
4392 return svm_set_vm_cr(vcpu, data);
4393 case MSR_VM_IGNNE:
4394 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
4395 break;
4396 case MSR_F10H_DECFG: {
4397 struct kvm_msr_entry msr_entry;
4398
4399 msr_entry.index = msr->index;
4400 if (svm_get_msr_feature(&msr_entry))
4401 return 1;
4402
4403
4404 if (data & ~msr_entry.data)
4405 return 1;
4406
4407
4408 if (!msr->host_initiated && (data ^ msr_entry.data))
4409 return 1;
4410
4411 svm->msr_decfg = data;
4412 break;
4413 }
4414 case MSR_IA32_APICBASE:
4415 if (kvm_vcpu_apicv_active(vcpu))
4416 avic_update_vapic_bar(to_svm(vcpu), data);
4417
4418 default:
4419 return kvm_set_msr_common(vcpu, msr);
4420 }
4421 return 0;
4422}
4423
4424static int wrmsr_interception(struct vcpu_svm *svm)
4425{
4426 return kvm_emulate_wrmsr(&svm->vcpu);
4427}
4428
4429static int msr_interception(struct vcpu_svm *svm)
4430{
4431 if (svm->vmcb->control.exit_info_1)
4432 return wrmsr_interception(svm);
4433 else
4434 return rdmsr_interception(svm);
4435}
4436
4437static int interrupt_window_interception(struct vcpu_svm *svm)
4438{
4439 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
4440 svm_clear_vintr(svm);
4441 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
4442 mark_dirty(svm->vmcb, VMCB_INTR);
4443 ++svm->vcpu.stat.irq_window_exits;
4444 return 1;
4445}
4446
4447static int pause_interception(struct vcpu_svm *svm)
4448{
4449 struct kvm_vcpu *vcpu = &svm->vcpu;
4450 bool in_kernel = (svm_get_cpl(vcpu) == 0);
4451
4452 if (pause_filter_thresh)
4453 grow_ple_window(vcpu);
4454
4455 kvm_vcpu_on_spin(vcpu, in_kernel);
4456 return 1;
4457}
4458
4459static int nop_interception(struct vcpu_svm *svm)
4460{
4461 return kvm_skip_emulated_instruction(&(svm->vcpu));
4462}
4463
4464static int monitor_interception(struct vcpu_svm *svm)
4465{
4466 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
4467 return nop_interception(svm);
4468}
4469
4470static int mwait_interception(struct vcpu_svm *svm)
4471{
4472 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
4473 return nop_interception(svm);
4474}
4475
4476enum avic_ipi_failure_cause {
4477 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
4478 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
4479 AVIC_IPI_FAILURE_INVALID_TARGET,
4480 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
4481};
4482
4483static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
4484{
4485 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
4486 u32 icrl = svm->vmcb->control.exit_info_1;
4487 u32 id = svm->vmcb->control.exit_info_2 >> 32;
4488 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
4489 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4490
4491 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
4492
4493 switch (id) {
4494 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
4507 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4508 break;
4509 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
4510 int i;
4511 struct kvm_vcpu *vcpu;
4512 struct kvm *kvm = svm->vcpu.kvm;
4513 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4514
4515
4516
4517
4518
4519
4520 kvm_for_each_vcpu(i, vcpu, kvm) {
4521 bool m = kvm_apic_match_dest(vcpu, apic,
4522 icrl & KVM_APIC_SHORT_MASK,
4523 GET_APIC_DEST_FIELD(icrh),
4524 icrl & KVM_APIC_DEST_MASK);
4525
4526 if (m && !avic_vcpu_is_running(vcpu))
4527 kvm_vcpu_wake_up(vcpu);
4528 }
4529 break;
4530 }
4531 case AVIC_IPI_FAILURE_INVALID_TARGET:
4532 WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
4533 index, svm->vcpu.vcpu_id, icrh, icrl);
4534 break;
4535 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
4536 WARN_ONCE(1, "Invalid backing page\n");
4537 break;
4538 default:
4539 pr_err("Unknown IPI interception\n");
4540 }
4541
4542 return 1;
4543}
4544
4545static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
4546{
4547 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
4548 int index;
4549 u32 *logical_apic_id_table;
4550 int dlid = GET_APIC_LOGICAL_ID(ldr);
4551
4552 if (!dlid)
4553 return NULL;
4554
4555 if (flat) {
4556 index = ffs(dlid) - 1;
4557 if (index > 7)
4558 return NULL;
4559 } else {
4560 int cluster = (dlid & 0xf0) >> 4;
4561 int apic = ffs(dlid & 0x0f) - 1;
4562
4563 if ((apic < 0) || (apic > 7) ||
4564 (cluster >= 0xf))
4565 return NULL;
4566 index = (cluster << 2) + apic;
4567 }
4568
4569 logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
4570
4571 return &logical_apic_id_table[index];
4572}
4573
4574static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
4575{
4576 bool flat;
4577 u32 *entry, new_entry;
4578
4579 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
4580 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
4581 if (!entry)
4582 return -EINVAL;
4583
4584 new_entry = READ_ONCE(*entry);
4585 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
4586 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
4587 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4588 WRITE_ONCE(*entry, new_entry);
4589
4590 return 0;
4591}
4592
4593static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
4594{
4595 struct vcpu_svm *svm = to_svm(vcpu);
4596 bool flat = svm->dfr_reg == APIC_DFR_FLAT;
4597 u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
4598
4599 if (entry)
4600 clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
4601}
4602
4603static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
4604{
4605 int ret = 0;
4606 struct vcpu_svm *svm = to_svm(vcpu);
4607 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
4608 u32 id = kvm_xapic_id(vcpu->arch.apic);
4609
4610 if (ldr == svm->ldr_reg)
4611 return 0;
4612
4613 avic_invalidate_logical_id_entry(vcpu);
4614
4615 if (ldr)
4616 ret = avic_ldr_write(vcpu, id, ldr);
4617
4618 if (!ret)
4619 svm->ldr_reg = ldr;
4620
4621 return ret;
4622}
4623
4624static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
4625{
4626 u64 *old, *new;
4627 struct vcpu_svm *svm = to_svm(vcpu);
4628 u32 id = kvm_xapic_id(vcpu->arch.apic);
4629
4630 if (vcpu->vcpu_id == id)
4631 return 0;
4632
4633 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
4634 new = avic_get_physical_id_entry(vcpu, id);
4635 if (!new || !old)
4636 return 1;
4637
4638
4639 *new = *old;
4640 *old = 0ULL;
4641 to_svm(vcpu)->avic_physical_id_cache = new;
4642
4643
4644
4645
4646
4647 if (svm->ldr_reg)
4648 avic_handle_ldr_update(vcpu);
4649
4650 return 0;
4651}
4652
4653static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
4654{
4655 struct vcpu_svm *svm = to_svm(vcpu);
4656 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
4657
4658 if (svm->dfr_reg == dfr)
4659 return;
4660
4661 avic_invalidate_logical_id_entry(vcpu);
4662 svm->dfr_reg = dfr;
4663}
4664
4665static int avic_unaccel_trap_write(struct vcpu_svm *svm)
4666{
4667 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4668 u32 offset = svm->vmcb->control.exit_info_1 &
4669 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4670
4671 switch (offset) {
4672 case APIC_ID:
4673 if (avic_handle_apic_id_update(&svm->vcpu))
4674 return 0;
4675 break;
4676 case APIC_LDR:
4677 if (avic_handle_ldr_update(&svm->vcpu))
4678 return 0;
4679 break;
4680 case APIC_DFR:
4681 avic_handle_dfr_update(&svm->vcpu);
4682 break;
4683 default:
4684 break;
4685 }
4686
4687 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
4688
4689 return 1;
4690}
4691
4692static bool is_avic_unaccelerated_access_trap(u32 offset)
4693{
4694 bool ret = false;
4695
4696 switch (offset) {
4697 case APIC_ID:
4698 case APIC_EOI:
4699 case APIC_RRR:
4700 case APIC_LDR:
4701 case APIC_DFR:
4702 case APIC_SPIV:
4703 case APIC_ESR:
4704 case APIC_ICR:
4705 case APIC_LVTT:
4706 case APIC_LVTTHMR:
4707 case APIC_LVTPC:
4708 case APIC_LVT0:
4709 case APIC_LVT1:
4710 case APIC_LVTERR:
4711 case APIC_TMICT:
4712 case APIC_TDCR:
4713 ret = true;
4714 break;
4715 default:
4716 break;
4717 }
4718 return ret;
4719}
4720
4721static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
4722{
4723 int ret = 0;
4724 u32 offset = svm->vmcb->control.exit_info_1 &
4725 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4726 u32 vector = svm->vmcb->control.exit_info_2 &
4727 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
4728 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
4729 AVIC_UNACCEL_ACCESS_WRITE_MASK;
4730 bool trap = is_avic_unaccelerated_access_trap(offset);
4731
4732 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
4733 trap, write, vector);
4734 if (trap) {
4735
4736 WARN_ONCE(!write, "svm: Handling trap read.\n");
4737 ret = avic_unaccel_trap_write(svm);
4738 } else {
4739
4740 ret = kvm_emulate_instruction(&svm->vcpu, 0);
4741 }
4742
4743 return ret;
4744}
4745
4746static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4747 [SVM_EXIT_READ_CR0] = cr_interception,
4748 [SVM_EXIT_READ_CR3] = cr_interception,
4749 [SVM_EXIT_READ_CR4] = cr_interception,
4750 [SVM_EXIT_READ_CR8] = cr_interception,
4751 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
4752 [SVM_EXIT_WRITE_CR0] = cr_interception,
4753 [SVM_EXIT_WRITE_CR3] = cr_interception,
4754 [SVM_EXIT_WRITE_CR4] = cr_interception,
4755 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
4756 [SVM_EXIT_READ_DR0] = dr_interception,
4757 [SVM_EXIT_READ_DR1] = dr_interception,
4758 [SVM_EXIT_READ_DR2] = dr_interception,
4759 [SVM_EXIT_READ_DR3] = dr_interception,
4760 [SVM_EXIT_READ_DR4] = dr_interception,
4761 [SVM_EXIT_READ_DR5] = dr_interception,
4762 [SVM_EXIT_READ_DR6] = dr_interception,
4763 [SVM_EXIT_READ_DR7] = dr_interception,
4764 [SVM_EXIT_WRITE_DR0] = dr_interception,
4765 [SVM_EXIT_WRITE_DR1] = dr_interception,
4766 [SVM_EXIT_WRITE_DR2] = dr_interception,
4767 [SVM_EXIT_WRITE_DR3] = dr_interception,
4768 [SVM_EXIT_WRITE_DR4] = dr_interception,
4769 [SVM_EXIT_WRITE_DR5] = dr_interception,
4770 [SVM_EXIT_WRITE_DR6] = dr_interception,
4771 [SVM_EXIT_WRITE_DR7] = dr_interception,
4772 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
4773 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
4774 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
4775 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
4776 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
4777 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
4778 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
4779 [SVM_EXIT_INTR] = intr_interception,
4780 [SVM_EXIT_NMI] = nmi_interception,
4781 [SVM_EXIT_SMI] = nop_on_interception,
4782 [SVM_EXIT_INIT] = nop_on_interception,
4783 [SVM_EXIT_VINTR] = interrupt_window_interception,
4784 [SVM_EXIT_RDPMC] = rdpmc_interception,
4785 [SVM_EXIT_CPUID] = cpuid_interception,
4786 [SVM_EXIT_IRET] = iret_interception,
4787 [SVM_EXIT_INVD] = emulate_on_interception,
4788 [SVM_EXIT_PAUSE] = pause_interception,
4789 [SVM_EXIT_HLT] = halt_interception,
4790 [SVM_EXIT_INVLPG] = invlpg_interception,
4791 [SVM_EXIT_INVLPGA] = invlpga_interception,
4792 [SVM_EXIT_IOIO] = io_interception,
4793 [SVM_EXIT_MSR] = msr_interception,
4794 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
4795 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
4796 [SVM_EXIT_VMRUN] = vmrun_interception,
4797 [SVM_EXIT_VMMCALL] = vmmcall_interception,
4798 [SVM_EXIT_VMLOAD] = vmload_interception,
4799 [SVM_EXIT_VMSAVE] = vmsave_interception,
4800 [SVM_EXIT_STGI] = stgi_interception,
4801 [SVM_EXIT_CLGI] = clgi_interception,
4802 [SVM_EXIT_SKINIT] = skinit_interception,
4803 [SVM_EXIT_WBINVD] = wbinvd_interception,
4804 [SVM_EXIT_MONITOR] = monitor_interception,
4805 [SVM_EXIT_MWAIT] = mwait_interception,
4806 [SVM_EXIT_XSETBV] = xsetbv_interception,
4807 [SVM_EXIT_RDPRU] = rdpru_interception,
4808 [SVM_EXIT_NPF] = npf_interception,
4809 [SVM_EXIT_RSM] = rsm_interception,
4810 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4811 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
4812};
4813
4814static void dump_vmcb(struct kvm_vcpu *vcpu)
4815{
4816 struct vcpu_svm *svm = to_svm(vcpu);
4817 struct vmcb_control_area *control = &svm->vmcb->control;
4818 struct vmcb_save_area *save = &svm->vmcb->save;
4819
4820 if (!dump_invalid_vmcb) {
4821 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
4822 return;
4823 }
4824
4825 pr_err("VMCB Control Area:\n");
4826 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
4827 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
4828 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
4829 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
4830 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
4831 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
4832 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
4833 pr_err("%-20s%d\n", "pause filter threshold:",
4834 control->pause_filter_thresh);
4835 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
4836 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
4837 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
4838 pr_err("%-20s%d\n", "asid:", control->asid);
4839 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
4840 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
4841 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
4842 pr_err("%-20s%08x\n", "int_state:", control->int_state);
4843 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
4844 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
4845 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
4846 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
4847 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
4848 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
4849 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
4850 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
4851 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
4852 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
4853 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
4854 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
4855 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
4856 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
4857 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
4858 pr_err("VMCB State Save Area:\n");
4859 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4860 "es:",
4861 save->es.selector, save->es.attrib,
4862 save->es.limit, save->es.base);
4863 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4864 "cs:",
4865 save->cs.selector, save->cs.attrib,
4866 save->cs.limit, save->cs.base);
4867 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4868 "ss:",
4869 save->ss.selector, save->ss.attrib,
4870 save->ss.limit, save->ss.base);
4871 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4872 "ds:",
4873 save->ds.selector, save->ds.attrib,
4874 save->ds.limit, save->ds.base);
4875 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4876 "fs:",
4877 save->fs.selector, save->fs.attrib,
4878 save->fs.limit, save->fs.base);
4879 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4880 "gs:",
4881 save->gs.selector, save->gs.attrib,
4882 save->gs.limit, save->gs.base);
4883 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4884 "gdtr:",
4885 save->gdtr.selector, save->gdtr.attrib,
4886 save->gdtr.limit, save->gdtr.base);
4887 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4888 "ldtr:",
4889 save->ldtr.selector, save->ldtr.attrib,
4890 save->ldtr.limit, save->ldtr.base);
4891 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4892 "idtr:",
4893 save->idtr.selector, save->idtr.attrib,
4894 save->idtr.limit, save->idtr.base);
4895 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4896 "tr:",
4897 save->tr.selector, save->tr.attrib,
4898 save->tr.limit, save->tr.base);
4899 pr_err("cpl: %d efer: %016llx\n",
4900 save->cpl, save->efer);
4901 pr_err("%-15s %016llx %-13s %016llx\n",
4902 "cr0:", save->cr0, "cr2:", save->cr2);
4903 pr_err("%-15s %016llx %-13s %016llx\n",
4904 "cr3:", save->cr3, "cr4:", save->cr4);
4905 pr_err("%-15s %016llx %-13s %016llx\n",
4906 "dr6:", save->dr6, "dr7:", save->dr7);
4907 pr_err("%-15s %016llx %-13s %016llx\n",
4908 "rip:", save->rip, "rflags:", save->rflags);
4909 pr_err("%-15s %016llx %-13s %016llx\n",
4910 "rsp:", save->rsp, "rax:", save->rax);
4911 pr_err("%-15s %016llx %-13s %016llx\n",
4912 "star:", save->star, "lstar:", save->lstar);
4913 pr_err("%-15s %016llx %-13s %016llx\n",
4914 "cstar:", save->cstar, "sfmask:", save->sfmask);
4915 pr_err("%-15s %016llx %-13s %016llx\n",
4916 "kernel_gs_base:", save->kernel_gs_base,
4917 "sysenter_cs:", save->sysenter_cs);
4918 pr_err("%-15s %016llx %-13s %016llx\n",
4919 "sysenter_esp:", save->sysenter_esp,
4920 "sysenter_eip:", save->sysenter_eip);
4921 pr_err("%-15s %016llx %-13s %016llx\n",
4922 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
4923 pr_err("%-15s %016llx %-13s %016llx\n",
4924 "br_from:", save->br_from, "br_to:", save->br_to);
4925 pr_err("%-15s %016llx %-13s %016llx\n",
4926 "excp_from:", save->last_excp_from,
4927 "excp_to:", save->last_excp_to);
4928}
4929
4930static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
4931{
4932 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
4933
4934 *info1 = control->exit_info_1;
4935 *info2 = control->exit_info_2;
4936}
4937
4938static int handle_exit(struct kvm_vcpu *vcpu)
4939{
4940 struct vcpu_svm *svm = to_svm(vcpu);
4941 struct kvm_run *kvm_run = vcpu->run;
4942 u32 exit_code = svm->vmcb->control.exit_code;
4943
4944 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
4945
4946 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
4947 vcpu->arch.cr0 = svm->vmcb->save.cr0;
4948 if (npt_enabled)
4949 vcpu->arch.cr3 = svm->vmcb->save.cr3;
4950
4951 if (unlikely(svm->nested.exit_required)) {
4952 nested_svm_vmexit(svm);
4953 svm->nested.exit_required = false;
4954
4955 return 1;
4956 }
4957
4958 if (is_guest_mode(vcpu)) {
4959 int vmexit;
4960
4961 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
4962 svm->vmcb->control.exit_info_1,
4963 svm->vmcb->control.exit_info_2,
4964 svm->vmcb->control.exit_int_info,
4965 svm->vmcb->control.exit_int_info_err,
4966 KVM_ISA_SVM);
4967
4968 vmexit = nested_svm_exit_special(svm);
4969
4970 if (vmexit == NESTED_EXIT_CONTINUE)
4971 vmexit = nested_svm_exit_handled(svm);
4972
4973 if (vmexit == NESTED_EXIT_DONE)
4974 return 1;
4975 }
4976
4977 svm_complete_interrupts(svm);
4978
4979 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
4980 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4981 kvm_run->fail_entry.hardware_entry_failure_reason
4982 = svm->vmcb->control.exit_code;
4983 dump_vmcb(vcpu);
4984 return 0;
4985 }
4986
4987 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
4988 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
4989 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
4990 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
4991 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
4992 "exit_code 0x%x\n",
4993 __func__, svm->vmcb->control.exit_int_info,
4994 exit_code);
4995
4996 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
4997 || !svm_exit_handlers[exit_code]) {
4998 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
4999 dump_vmcb(vcpu);
5000 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5001 vcpu->run->internal.suberror =
5002 KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
5003 vcpu->run->internal.ndata = 1;
5004 vcpu->run->internal.data[0] = exit_code;
5005 return 0;
5006 }
5007
5008#ifdef CONFIG_RETPOLINE
5009 if (exit_code == SVM_EXIT_MSR)
5010 return msr_interception(svm);
5011 else if (exit_code == SVM_EXIT_VINTR)
5012 return interrupt_window_interception(svm);
5013 else if (exit_code == SVM_EXIT_INTR)
5014 return intr_interception(svm);
5015 else if (exit_code == SVM_EXIT_HLT)
5016 return halt_interception(svm);
5017 else if (exit_code == SVM_EXIT_NPF)
5018 return npf_interception(svm);
5019#endif
5020 return svm_exit_handlers[exit_code](svm);
5021}
5022
5023static void reload_tss(struct kvm_vcpu *vcpu)
5024{
5025 int cpu = raw_smp_processor_id();
5026
5027 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5028 sd->tss_desc->type = 9;
5029 load_TR_desc();
5030}
5031
5032static void pre_sev_run(struct vcpu_svm *svm, int cpu)
5033{
5034 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5035 int asid = sev_get_asid(svm->vcpu.kvm);
5036
5037
5038 svm->vmcb->control.asid = asid;
5039
5040
5041
5042
5043
5044
5045
5046 if (sd->sev_vmcbs[asid] == svm->vmcb &&
5047 svm->last_cpu == cpu)
5048 return;
5049
5050 svm->last_cpu = cpu;
5051 sd->sev_vmcbs[asid] = svm->vmcb;
5052 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5053 mark_dirty(svm->vmcb, VMCB_ASID);
5054}
5055
5056static void pre_svm_run(struct vcpu_svm *svm)
5057{
5058 int cpu = raw_smp_processor_id();
5059
5060 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5061
5062 if (sev_guest(svm->vcpu.kvm))
5063 return pre_sev_run(svm, cpu);
5064
5065
5066 if (svm->asid_generation != sd->asid_generation)
5067 new_asid(svm, sd);
5068}
5069
5070static void svm_inject_nmi(struct kvm_vcpu *vcpu)
5071{
5072 struct vcpu_svm *svm = to_svm(vcpu);
5073
5074 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
5075 vcpu->arch.hflags |= HF_NMI_MASK;
5076 set_intercept(svm, INTERCEPT_IRET);
5077 ++vcpu->stat.nmi_injections;
5078}
5079
5080static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
5081{
5082 struct vmcb_control_area *control;
5083
5084
5085 control = &svm->vmcb->control;
5086 control->int_vector = irq;
5087 control->int_ctl &= ~V_INTR_PRIO_MASK;
5088 control->int_ctl |= V_IRQ_MASK |
5089 (( 0xf) << V_INTR_PRIO_SHIFT);
5090 mark_dirty(svm->vmcb, VMCB_INTR);
5091}
5092
5093static void svm_set_irq(struct kvm_vcpu *vcpu)
5094{
5095 struct vcpu_svm *svm = to_svm(vcpu);
5096
5097 BUG_ON(!(gif_set(svm)));
5098
5099 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
5100 ++vcpu->stat.irq_injections;
5101
5102 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
5103 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
5104}
5105
5106static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
5107{
5108 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
5109}
5110
5111static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
5112{
5113 struct vcpu_svm *svm = to_svm(vcpu);
5114
5115 if (svm_nested_virtualize_tpr(vcpu))
5116 return;
5117
5118 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5119
5120 if (irr == -1)
5121 return;
5122
5123 if (tpr >= irr)
5124 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5125}
5126
5127static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
5128{
5129 return;
5130}
5131
5132static bool svm_get_enable_apicv(struct kvm *kvm)
5133{
5134 return avic && irqchip_split(kvm);
5135}
5136
5137static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
5138{
5139}
5140
5141static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
5142{
5143}
5144
5145
5146static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
5147{
5148 struct vcpu_svm *svm = to_svm(vcpu);
5149 struct vmcb *vmcb = svm->vmcb;
5150
5151 if (kvm_vcpu_apicv_active(vcpu))
5152 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
5153 else
5154 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
5155 mark_dirty(vmcb, VMCB_AVIC);
5156}
5157
5158static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
5159{
5160 return;
5161}
5162
5163static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
5164{
5165 kvm_lapic_set_irr(vec, vcpu->arch.apic);
5166 smp_mb__after_atomic();
5167
5168 if (avic_vcpu_is_running(vcpu)) {
5169 int cpuid = vcpu->cpu;
5170
5171 if (cpuid != get_cpu())
5172 wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
5173 put_cpu();
5174 } else
5175 kvm_vcpu_wake_up(vcpu);
5176}
5177
5178static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
5179{
5180 return false;
5181}
5182
5183static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5184{
5185 unsigned long flags;
5186 struct amd_svm_iommu_ir *cur;
5187
5188 spin_lock_irqsave(&svm->ir_list_lock, flags);
5189 list_for_each_entry(cur, &svm->ir_list, node) {
5190 if (cur->data != pi->ir_data)
5191 continue;
5192 list_del(&cur->node);
5193 kfree(cur);
5194 break;
5195 }
5196 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5197}
5198
5199static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5200{
5201 int ret = 0;
5202 unsigned long flags;
5203 struct amd_svm_iommu_ir *ir;
5204
5205
5206
5207
5208
5209
5210 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
5211 struct kvm *kvm = svm->vcpu.kvm;
5212 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
5213 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
5214 struct vcpu_svm *prev_svm;
5215
5216 if (!prev_vcpu) {
5217 ret = -EINVAL;
5218 goto out;
5219 }
5220
5221 prev_svm = to_svm(prev_vcpu);
5222 svm_ir_list_del(prev_svm, pi);
5223 }
5224
5225
5226
5227
5228
5229 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
5230 if (!ir) {
5231 ret = -ENOMEM;
5232 goto out;
5233 }
5234 ir->data = pi->ir_data;
5235
5236 spin_lock_irqsave(&svm->ir_list_lock, flags);
5237 list_add(&ir->node, &svm->ir_list);
5238 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5239out:
5240 return ret;
5241}
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254static int
5255get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
5256 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
5257{
5258 struct kvm_lapic_irq irq;
5259 struct kvm_vcpu *vcpu = NULL;
5260
5261 kvm_set_msi_irq(kvm, e, &irq);
5262
5263 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
5264 !kvm_irq_is_postable(&irq)) {
5265 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
5266 __func__, irq.vector);
5267 return -1;
5268 }
5269
5270 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
5271 irq.vector);
5272 *svm = to_svm(vcpu);
5273 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
5274 vcpu_info->vector = irq.vector;
5275
5276 return 0;
5277}
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
5289 uint32_t guest_irq, bool set)
5290{
5291 struct kvm_kernel_irq_routing_entry *e;
5292 struct kvm_irq_routing_table *irq_rt;
5293 int idx, ret = -EINVAL;
5294
5295 if (!kvm_arch_has_assigned_device(kvm) ||
5296 !irq_remapping_cap(IRQ_POSTING_CAP))
5297 return 0;
5298
5299 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
5300 __func__, host_irq, guest_irq, set);
5301
5302 idx = srcu_read_lock(&kvm->irq_srcu);
5303 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
5304 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
5305
5306 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
5307 struct vcpu_data vcpu_info;
5308 struct vcpu_svm *svm = NULL;
5309
5310 if (e->type != KVM_IRQ_ROUTING_MSI)
5311 continue;
5312
5313
5314
5315
5316
5317
5318
5319
5320 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
5321 kvm_vcpu_apicv_active(&svm->vcpu)) {
5322 struct amd_iommu_pi_data pi;
5323
5324
5325 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
5326 AVIC_HPA_MASK);
5327 pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
5328 svm->vcpu.vcpu_id);
5329 pi.is_guest_mode = true;
5330 pi.vcpu_data = &vcpu_info;
5331 ret = irq_set_vcpu_affinity(host_irq, &pi);
5332
5333
5334
5335
5336
5337
5338
5339
5340 if (!ret && pi.is_guest_mode)
5341 svm_ir_list_add(svm, &pi);
5342 } else {
5343
5344 struct amd_iommu_pi_data pi;
5345
5346
5347
5348
5349
5350
5351 pi.is_guest_mode = false;
5352 ret = irq_set_vcpu_affinity(host_irq, &pi);
5353
5354
5355
5356
5357
5358
5359
5360 if (!ret && pi.prev_ga_tag) {
5361 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
5362 struct kvm_vcpu *vcpu;
5363
5364 vcpu = kvm_get_vcpu_by_id(kvm, id);
5365 if (vcpu)
5366 svm_ir_list_del(to_svm(vcpu), &pi);
5367 }
5368 }
5369
5370 if (!ret && svm) {
5371 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
5372 e->gsi, vcpu_info.vector,
5373 vcpu_info.pi_desc_addr, set);
5374 }
5375
5376 if (ret < 0) {
5377 pr_err("%s: failed to update PI IRTE\n", __func__);
5378 goto out;
5379 }
5380 }
5381
5382 ret = 0;
5383out:
5384 srcu_read_unlock(&kvm->irq_srcu, idx);
5385 return ret;
5386}
5387
5388static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
5389{
5390 struct vcpu_svm *svm = to_svm(vcpu);
5391 struct vmcb *vmcb = svm->vmcb;
5392 int ret;
5393 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
5394 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
5395 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
5396
5397 return ret;
5398}
5399
5400static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
5401{
5402 struct vcpu_svm *svm = to_svm(vcpu);
5403
5404 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
5405}
5406
5407static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5408{
5409 struct vcpu_svm *svm = to_svm(vcpu);
5410
5411 if (masked) {
5412 svm->vcpu.arch.hflags |= HF_NMI_MASK;
5413 set_intercept(svm, INTERCEPT_IRET);
5414 } else {
5415 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
5416 clr_intercept(svm, INTERCEPT_IRET);
5417 }
5418}
5419
5420static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
5421{
5422 struct vcpu_svm *svm = to_svm(vcpu);
5423 struct vmcb *vmcb = svm->vmcb;
5424 int ret;
5425
5426 if (!gif_set(svm) ||
5427 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
5428 return 0;
5429
5430 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
5431
5432 if (is_guest_mode(vcpu))
5433 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
5434
5435 return ret;
5436}
5437
5438static void enable_irq_window(struct kvm_vcpu *vcpu)
5439{
5440 struct vcpu_svm *svm = to_svm(vcpu);
5441
5442 if (kvm_vcpu_apicv_active(vcpu))
5443 return;
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453 if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
5454 svm_set_vintr(svm);
5455 svm_inject_irq(svm, 0x0);
5456 }
5457}
5458
5459static void enable_nmi_window(struct kvm_vcpu *vcpu)
5460{
5461 struct vcpu_svm *svm = to_svm(vcpu);
5462
5463 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
5464 == HF_NMI_MASK)
5465 return;
5466
5467 if (!gif_set(svm)) {
5468 if (vgif_enabled(svm))
5469 set_intercept(svm, INTERCEPT_STGI);
5470 return;
5471 }
5472
5473 if (svm->nested.exit_required)
5474 return;
5475
5476
5477
5478
5479
5480 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
5481 svm->nmi_singlestep = true;
5482 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
5483}
5484
5485static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
5486{
5487 return 0;
5488}
5489
5490static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5491{
5492 return 0;
5493}
5494
5495static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
5496{
5497 struct vcpu_svm *svm = to_svm(vcpu);
5498
5499 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
5500 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5501 else
5502 svm->asid_generation--;
5503}
5504
5505static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
5506{
5507 struct vcpu_svm *svm = to_svm(vcpu);
5508
5509 invlpga(gva, svm->vmcb->control.asid);
5510}
5511
5512static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
5513{
5514}
5515
5516static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
5517{
5518 struct vcpu_svm *svm = to_svm(vcpu);
5519
5520 if (svm_nested_virtualize_tpr(vcpu))
5521 return;
5522
5523 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
5524 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
5525 kvm_set_cr8(vcpu, cr8);
5526 }
5527}
5528
5529static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
5530{
5531 struct vcpu_svm *svm = to_svm(vcpu);
5532 u64 cr8;
5533
5534 if (svm_nested_virtualize_tpr(vcpu) ||
5535 kvm_vcpu_apicv_active(vcpu))
5536 return;
5537
5538 cr8 = kvm_get_cr8(vcpu);
5539 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
5540 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
5541}
5542
5543static void svm_complete_interrupts(struct vcpu_svm *svm)
5544{
5545 u8 vector;
5546 int type;
5547 u32 exitintinfo = svm->vmcb->control.exit_int_info;
5548 unsigned int3_injected = svm->int3_injected;
5549
5550 svm->int3_injected = 0;
5551
5552
5553
5554
5555
5556 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
5557 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
5558 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
5559 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5560 }
5561
5562 svm->vcpu.arch.nmi_injected = false;
5563 kvm_clear_exception_queue(&svm->vcpu);
5564 kvm_clear_interrupt_queue(&svm->vcpu);
5565
5566 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
5567 return;
5568
5569 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5570
5571 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
5572 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
5573
5574 switch (type) {
5575 case SVM_EXITINTINFO_TYPE_NMI:
5576 svm->vcpu.arch.nmi_injected = true;
5577 break;
5578 case SVM_EXITINTINFO_TYPE_EXEPT:
5579
5580
5581
5582
5583
5584 if (kvm_exception_is_soft(vector)) {
5585 if (vector == BP_VECTOR && int3_injected &&
5586 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
5587 kvm_rip_write(&svm->vcpu,
5588 kvm_rip_read(&svm->vcpu) -
5589 int3_injected);
5590 break;
5591 }
5592 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
5593 u32 err = svm->vmcb->control.exit_int_info_err;
5594 kvm_requeue_exception_e(&svm->vcpu, vector, err);
5595
5596 } else
5597 kvm_requeue_exception(&svm->vcpu, vector);
5598 break;
5599 case SVM_EXITINTINFO_TYPE_INTR:
5600 kvm_queue_interrupt(&svm->vcpu, vector, false);
5601 break;
5602 default:
5603 break;
5604 }
5605}
5606
5607static void svm_cancel_injection(struct kvm_vcpu *vcpu)
5608{
5609 struct vcpu_svm *svm = to_svm(vcpu);
5610 struct vmcb_control_area *control = &svm->vmcb->control;
5611
5612 control->exit_int_info = control->event_inj;
5613 control->exit_int_info_err = control->event_inj_err;
5614 control->event_inj = 0;
5615 svm_complete_interrupts(svm);
5616}
5617
5618static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5619{
5620 struct vcpu_svm *svm = to_svm(vcpu);
5621
5622 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
5623 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
5624 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
5625
5626
5627
5628
5629
5630 if (unlikely(svm->nested.exit_required))
5631 return;
5632
5633
5634
5635
5636
5637
5638
5639 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
5640
5641
5642
5643
5644
5645 disable_nmi_singlestep(svm);
5646 smp_send_reschedule(vcpu->cpu);
5647 }
5648
5649 pre_svm_run(svm);
5650
5651 sync_lapic_to_cr8(vcpu);
5652
5653 svm->vmcb->save.cr2 = vcpu->arch.cr2;
5654
5655 clgi();
5656 kvm_load_guest_xsave_state(vcpu);
5657
5658 if (lapic_in_kernel(vcpu) &&
5659 vcpu->arch.apic->lapic_timer.timer_advance_ns)
5660 kvm_wait_lapic_expire(vcpu);
5661
5662
5663
5664
5665
5666
5667
5668 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
5669
5670 local_irq_enable();
5671
5672 asm volatile (
5673 "push %%" _ASM_BP "; \n\t"
5674 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
5675 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
5676 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
5677 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
5678 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
5679 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
5680#ifdef CONFIG_X86_64
5681 "mov %c[r8](%[svm]), %%r8 \n\t"
5682 "mov %c[r9](%[svm]), %%r9 \n\t"
5683 "mov %c[r10](%[svm]), %%r10 \n\t"
5684 "mov %c[r11](%[svm]), %%r11 \n\t"
5685 "mov %c[r12](%[svm]), %%r12 \n\t"
5686 "mov %c[r13](%[svm]), %%r13 \n\t"
5687 "mov %c[r14](%[svm]), %%r14 \n\t"
5688 "mov %c[r15](%[svm]), %%r15 \n\t"
5689#endif
5690
5691
5692 "push %%" _ASM_AX " \n\t"
5693 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
5694 __ex("vmload %%" _ASM_AX) "\n\t"
5695 __ex("vmrun %%" _ASM_AX) "\n\t"
5696 __ex("vmsave %%" _ASM_AX) "\n\t"
5697 "pop %%" _ASM_AX " \n\t"
5698
5699
5700 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
5701 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
5702 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
5703 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
5704 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
5705 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
5706#ifdef CONFIG_X86_64
5707 "mov %%r8, %c[r8](%[svm]) \n\t"
5708 "mov %%r9, %c[r9](%[svm]) \n\t"
5709 "mov %%r10, %c[r10](%[svm]) \n\t"
5710 "mov %%r11, %c[r11](%[svm]) \n\t"
5711 "mov %%r12, %c[r12](%[svm]) \n\t"
5712 "mov %%r13, %c[r13](%[svm]) \n\t"
5713 "mov %%r14, %c[r14](%[svm]) \n\t"
5714 "mov %%r15, %c[r15](%[svm]) \n\t"
5715
5716
5717
5718
5719 "xor %%r8d, %%r8d \n\t"
5720 "xor %%r9d, %%r9d \n\t"
5721 "xor %%r10d, %%r10d \n\t"
5722 "xor %%r11d, %%r11d \n\t"
5723 "xor %%r12d, %%r12d \n\t"
5724 "xor %%r13d, %%r13d \n\t"
5725 "xor %%r14d, %%r14d \n\t"
5726 "xor %%r15d, %%r15d \n\t"
5727#endif
5728 "xor %%ebx, %%ebx \n\t"
5729 "xor %%ecx, %%ecx \n\t"
5730 "xor %%edx, %%edx \n\t"
5731 "xor %%esi, %%esi \n\t"
5732 "xor %%edi, %%edi \n\t"
5733 "pop %%" _ASM_BP
5734 :
5735 : [svm]"a"(svm),
5736 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
5737 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
5738 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
5739 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
5740 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
5741 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
5742 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
5743#ifdef CONFIG_X86_64
5744 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
5745 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
5746 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
5747 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
5748 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
5749 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
5750 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
5751 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
5752#endif
5753 : "cc", "memory"
5754#ifdef CONFIG_X86_64
5755 , "rbx", "rcx", "rdx", "rsi", "rdi"
5756 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
5757#else
5758 , "ebx", "ecx", "edx", "esi", "edi"
5759#endif
5760 );
5761
5762
5763 vmexit_fill_RSB();
5764
5765#ifdef CONFIG_X86_64
5766 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5767#else
5768 loadsegment(fs, svm->host.fs);
5769#ifndef CONFIG_X86_32_LAZY_GS
5770 loadsegment(gs, svm->host.gs);
5771#endif
5772#endif
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5790 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5791
5792 reload_tss(vcpu);
5793
5794 local_irq_disable();
5795
5796 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
5797
5798 vcpu->arch.cr2 = svm->vmcb->save.cr2;
5799 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
5800 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
5801 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
5802
5803 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5804 kvm_before_interrupt(&svm->vcpu);
5805
5806 kvm_load_host_xsave_state(vcpu);
5807 stgi();
5808
5809
5810
5811 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5812 kvm_after_interrupt(&svm->vcpu);
5813
5814 sync_cr8_to_lapic(vcpu);
5815
5816 svm->next_rip = 0;
5817
5818 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
5819
5820
5821 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
5822 svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
5823
5824 if (npt_enabled) {
5825 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
5826 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
5827 }
5828
5829
5830
5831
5832
5833 if (unlikely(svm->vmcb->control.exit_code ==
5834 SVM_EXIT_EXCP_BASE + MC_VECTOR))
5835 svm_handle_mce(svm);
5836
5837 mark_all_clean(svm->vmcb);
5838}
5839STACK_FRAME_NON_STANDARD(svm_vcpu_run);
5840
5841static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5842{
5843 struct vcpu_svm *svm = to_svm(vcpu);
5844
5845 svm->vmcb->save.cr3 = __sme_set(root);
5846 mark_dirty(svm->vmcb, VMCB_CR);
5847}
5848
5849static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5850{
5851 struct vcpu_svm *svm = to_svm(vcpu);
5852
5853 svm->vmcb->control.nested_cr3 = __sme_set(root);
5854 mark_dirty(svm->vmcb, VMCB_NPT);
5855
5856
5857 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
5858 mark_dirty(svm->vmcb, VMCB_CR);
5859}
5860
5861static int is_disabled(void)
5862{
5863 u64 vm_cr;
5864
5865 rdmsrl(MSR_VM_CR, vm_cr);
5866 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
5867 return 1;
5868
5869 return 0;
5870}
5871
5872static void
5873svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5874{
5875
5876
5877
5878 hypercall[0] = 0x0f;
5879 hypercall[1] = 0x01;
5880 hypercall[2] = 0xd9;
5881}
5882
5883static int __init svm_check_processor_compat(void)
5884{
5885 return 0;
5886}
5887
5888static bool svm_cpu_has_accelerated_tpr(void)
5889{
5890 return false;
5891}
5892
5893static bool svm_has_emulated_msr(int index)
5894{
5895 switch (index) {
5896 case MSR_IA32_MCG_EXT_CTL:
5897 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
5898 return false;
5899 default:
5900 break;
5901 }
5902
5903 return true;
5904}
5905
5906static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
5907{
5908 return 0;
5909}
5910
5911static void svm_cpuid_update(struct kvm_vcpu *vcpu)
5912{
5913 struct vcpu_svm *svm = to_svm(vcpu);
5914
5915 vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
5916 boot_cpu_has(X86_FEATURE_XSAVES);
5917
5918
5919 svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
5920
5921 if (!kvm_vcpu_apicv_active(vcpu))
5922 return;
5923
5924 guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
5925}
5926
5927#define F(x) bit(X86_FEATURE_##x)
5928
5929static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
5930{
5931 switch (func) {
5932 case 0x1:
5933 if (avic)
5934 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
5935 break;
5936 case 0x80000001:
5937 if (nested)
5938 entry->ecx |= (1 << 2);
5939 break;
5940 case 0x80000008:
5941 if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
5942 boot_cpu_has(X86_FEATURE_AMD_SSBD))
5943 entry->ebx |= F(VIRT_SSBD);
5944 break;
5945 case 0x8000000A:
5946 entry->eax = 1;
5947 entry->ebx = 8;
5948
5949 entry->ecx = 0;
5950 entry->edx = 0;
5951
5952
5953
5954 if (boot_cpu_has(X86_FEATURE_NRIPS))
5955 entry->edx |= F(NRIPS);
5956
5957
5958 if (npt_enabled)
5959 entry->edx |= F(NPT);
5960
5961 }
5962}
5963
5964static int svm_get_lpage_level(void)
5965{
5966 return PT_PDPE_LEVEL;
5967}
5968
5969static bool svm_rdtscp_supported(void)
5970{
5971 return boot_cpu_has(X86_FEATURE_RDTSCP);
5972}
5973
5974static bool svm_invpcid_supported(void)
5975{
5976 return false;
5977}
5978
5979static bool svm_mpx_supported(void)
5980{
5981 return false;
5982}
5983
5984static bool svm_xsaves_supported(void)
5985{
5986 return boot_cpu_has(X86_FEATURE_XSAVES);
5987}
5988
5989static bool svm_umip_emulated(void)
5990{
5991 return false;
5992}
5993
5994static bool svm_pt_supported(void)
5995{
5996 return false;
5997}
5998
5999static bool svm_has_wbinvd_exit(void)
6000{
6001 return true;
6002}
6003
6004#define PRE_EX(exit) { .exit_code = (exit), \
6005 .stage = X86_ICPT_PRE_EXCEPT, }
6006#define POST_EX(exit) { .exit_code = (exit), \
6007 .stage = X86_ICPT_POST_EXCEPT, }
6008#define POST_MEM(exit) { .exit_code = (exit), \
6009 .stage = X86_ICPT_POST_MEMACCESS, }
6010
6011static const struct __x86_intercept {
6012 u32 exit_code;
6013 enum x86_intercept_stage stage;
6014} x86_intercept_map[] = {
6015 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
6016 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
6017 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
6018 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
6019 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
6020 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
6021 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
6022 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
6023 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
6024 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
6025 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
6026 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
6027 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
6028 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
6029 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
6030 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
6031 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
6032 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
6033 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
6034 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
6035 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
6036 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
6037 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
6038 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
6039 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
6040 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
6041 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
6042 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
6043 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
6044 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
6045 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
6046 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
6047 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
6048 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
6049 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
6050 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
6051 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
6052 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
6053 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
6054 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
6055 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
6056 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
6057 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
6058 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
6059 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
6060 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
6061 [x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
6062};
6063
6064#undef PRE_EX
6065#undef POST_EX
6066#undef POST_MEM
6067
6068static int svm_check_intercept(struct kvm_vcpu *vcpu,
6069 struct x86_instruction_info *info,
6070 enum x86_intercept_stage stage)
6071{
6072 struct vcpu_svm *svm = to_svm(vcpu);
6073 int vmexit, ret = X86EMUL_CONTINUE;
6074 struct __x86_intercept icpt_info;
6075 struct vmcb *vmcb = svm->vmcb;
6076
6077 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
6078 goto out;
6079
6080 icpt_info = x86_intercept_map[info->intercept];
6081
6082 if (stage != icpt_info.stage)
6083 goto out;
6084
6085 switch (icpt_info.exit_code) {
6086 case SVM_EXIT_READ_CR0:
6087 if (info->intercept == x86_intercept_cr_read)
6088 icpt_info.exit_code += info->modrm_reg;
6089 break;
6090 case SVM_EXIT_WRITE_CR0: {
6091 unsigned long cr0, val;
6092 u64 intercept;
6093
6094 if (info->intercept == x86_intercept_cr_write)
6095 icpt_info.exit_code += info->modrm_reg;
6096
6097 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
6098 info->intercept == x86_intercept_clts)
6099 break;
6100
6101 intercept = svm->nested.intercept;
6102
6103 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
6104 break;
6105
6106 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
6107 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
6108
6109 if (info->intercept == x86_intercept_lmsw) {
6110 cr0 &= 0xfUL;
6111 val &= 0xfUL;
6112
6113 if (cr0 & X86_CR0_PE)
6114 val |= X86_CR0_PE;
6115 }
6116
6117 if (cr0 ^ val)
6118 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
6119
6120 break;
6121 }
6122 case SVM_EXIT_READ_DR0:
6123 case SVM_EXIT_WRITE_DR0:
6124 icpt_info.exit_code += info->modrm_reg;
6125 break;
6126 case SVM_EXIT_MSR:
6127 if (info->intercept == x86_intercept_wrmsr)
6128 vmcb->control.exit_info_1 = 1;
6129 else
6130 vmcb->control.exit_info_1 = 0;
6131 break;
6132 case SVM_EXIT_PAUSE:
6133
6134
6135
6136
6137 if (info->rep_prefix != REPE_PREFIX)
6138 goto out;
6139 break;
6140 case SVM_EXIT_IOIO: {
6141 u64 exit_info;
6142 u32 bytes;
6143
6144 if (info->intercept == x86_intercept_in ||
6145 info->intercept == x86_intercept_ins) {
6146 exit_info = ((info->src_val & 0xffff) << 16) |
6147 SVM_IOIO_TYPE_MASK;
6148 bytes = info->dst_bytes;
6149 } else {
6150 exit_info = (info->dst_val & 0xffff) << 16;
6151 bytes = info->src_bytes;
6152 }
6153
6154 if (info->intercept == x86_intercept_outs ||
6155 info->intercept == x86_intercept_ins)
6156 exit_info |= SVM_IOIO_STR_MASK;
6157
6158 if (info->rep_prefix)
6159 exit_info |= SVM_IOIO_REP_MASK;
6160
6161 bytes = min(bytes, 4u);
6162
6163 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
6164
6165 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
6166
6167 vmcb->control.exit_info_1 = exit_info;
6168 vmcb->control.exit_info_2 = info->next_rip;
6169
6170 break;
6171 }
6172 default:
6173 break;
6174 }
6175
6176
6177 if (static_cpu_has(X86_FEATURE_NRIPS))
6178 vmcb->control.next_rip = info->next_rip;
6179 vmcb->control.exit_code = icpt_info.exit_code;
6180 vmexit = nested_svm_exit_handled(svm);
6181
6182 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
6183 : X86EMUL_CONTINUE;
6184
6185out:
6186 return ret;
6187}
6188
6189static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
6190{
6191
6192}
6193
6194static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
6195{
6196 if (pause_filter_thresh)
6197 shrink_ple_window(vcpu);
6198}
6199
6200static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
6201{
6202 if (avic_handle_apic_id_update(vcpu) != 0)
6203 return;
6204 avic_handle_dfr_update(vcpu);
6205 avic_handle_ldr_update(vcpu);
6206}
6207
6208static void svm_setup_mce(struct kvm_vcpu *vcpu)
6209{
6210
6211 vcpu->arch.mcg_cap &= 0x1ff;
6212}
6213
6214static int svm_smi_allowed(struct kvm_vcpu *vcpu)
6215{
6216 struct vcpu_svm *svm = to_svm(vcpu);
6217
6218
6219 if (!gif_set(svm))
6220 return 0;
6221
6222 if (is_guest_mode(&svm->vcpu) &&
6223 svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
6224
6225 svm->vmcb->control.exit_code = SVM_EXIT_SMI;
6226 svm->nested.exit_required = true;
6227 return 0;
6228 }
6229
6230 return 1;
6231}
6232
6233static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
6234{
6235 struct vcpu_svm *svm = to_svm(vcpu);
6236 int ret;
6237
6238 if (is_guest_mode(vcpu)) {
6239
6240 put_smstate(u64, smstate, 0x7ed8, 1);
6241
6242 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
6243
6244 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
6245 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
6246 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
6247
6248 ret = nested_svm_vmexit(svm);
6249 if (ret)
6250 return ret;
6251 }
6252 return 0;
6253}
6254
6255static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
6256{
6257 struct vcpu_svm *svm = to_svm(vcpu);
6258 struct vmcb *nested_vmcb;
6259 struct kvm_host_map map;
6260 u64 guest;
6261 u64 vmcb;
6262
6263 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
6264 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
6265
6266 if (guest) {
6267 if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
6268 return 1;
6269 nested_vmcb = map.hva;
6270 enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
6271 }
6272 return 0;
6273}
6274
6275static int enable_smi_window(struct kvm_vcpu *vcpu)
6276{
6277 struct vcpu_svm *svm = to_svm(vcpu);
6278
6279 if (!gif_set(svm)) {
6280 if (vgif_enabled(svm))
6281 set_intercept(svm, INTERCEPT_STGI);
6282
6283 return 1;
6284 }
6285 return 0;
6286}
6287
6288static int sev_flush_asids(void)
6289{
6290 int ret, error;
6291
6292
6293
6294
6295
6296 down_write(&sev_deactivate_lock);
6297
6298 wbinvd_on_all_cpus();
6299 ret = sev_guest_df_flush(&error);
6300
6301 up_write(&sev_deactivate_lock);
6302
6303 if (ret)
6304 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
6305
6306 return ret;
6307}
6308
6309
6310static bool __sev_recycle_asids(void)
6311{
6312 int pos;
6313
6314
6315 pos = find_next_bit(sev_reclaim_asid_bitmap,
6316 max_sev_asid, min_sev_asid - 1);
6317 if (pos >= max_sev_asid)
6318 return false;
6319
6320 if (sev_flush_asids())
6321 return false;
6322
6323 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
6324 max_sev_asid);
6325 bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
6326
6327 return true;
6328}
6329
6330static int sev_asid_new(void)
6331{
6332 bool retry = true;
6333 int pos;
6334
6335 mutex_lock(&sev_bitmap_lock);
6336
6337
6338
6339
6340again:
6341 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
6342 if (pos >= max_sev_asid) {
6343 if (retry && __sev_recycle_asids()) {
6344 retry = false;
6345 goto again;
6346 }
6347 mutex_unlock(&sev_bitmap_lock);
6348 return -EBUSY;
6349 }
6350
6351 __set_bit(pos, sev_asid_bitmap);
6352
6353 mutex_unlock(&sev_bitmap_lock);
6354
6355 return pos + 1;
6356}
6357
6358static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
6359{
6360 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6361 int asid, ret;
6362
6363 ret = -EBUSY;
6364 if (unlikely(sev->active))
6365 return ret;
6366
6367 asid = sev_asid_new();
6368 if (asid < 0)
6369 return ret;
6370
6371 ret = sev_platform_init(&argp->error);
6372 if (ret)
6373 goto e_free;
6374
6375 sev->active = true;
6376 sev->asid = asid;
6377 INIT_LIST_HEAD(&sev->regions_list);
6378
6379 return 0;
6380
6381e_free:
6382 sev_asid_free(asid);
6383 return ret;
6384}
6385
6386static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
6387{
6388 struct sev_data_activate *data;
6389 int asid = sev_get_asid(kvm);
6390 int ret;
6391
6392 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6393 if (!data)
6394 return -ENOMEM;
6395
6396
6397 data->handle = handle;
6398 data->asid = asid;
6399 ret = sev_guest_activate(data, error);
6400 kfree(data);
6401
6402 return ret;
6403}
6404
6405static int __sev_issue_cmd(int fd, int id, void *data, int *error)
6406{
6407 struct fd f;
6408 int ret;
6409
6410 f = fdget(fd);
6411 if (!f.file)
6412 return -EBADF;
6413
6414 ret = sev_issue_cmd_external_user(f.file, id, data, error);
6415
6416 fdput(f);
6417 return ret;
6418}
6419
6420static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
6421{
6422 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6423
6424 return __sev_issue_cmd(sev->fd, id, data, error);
6425}
6426
6427static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
6428{
6429 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6430 struct sev_data_launch_start *start;
6431 struct kvm_sev_launch_start params;
6432 void *dh_blob, *session_blob;
6433 int *error = &argp->error;
6434 int ret;
6435
6436 if (!sev_guest(kvm))
6437 return -ENOTTY;
6438
6439 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6440 return -EFAULT;
6441
6442 start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
6443 if (!start)
6444 return -ENOMEM;
6445
6446 dh_blob = NULL;
6447 if (params.dh_uaddr) {
6448 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
6449 if (IS_ERR(dh_blob)) {
6450 ret = PTR_ERR(dh_blob);
6451 goto e_free;
6452 }
6453
6454 start->dh_cert_address = __sme_set(__pa(dh_blob));
6455 start->dh_cert_len = params.dh_len;
6456 }
6457
6458 session_blob = NULL;
6459 if (params.session_uaddr) {
6460 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
6461 if (IS_ERR(session_blob)) {
6462 ret = PTR_ERR(session_blob);
6463 goto e_free_dh;
6464 }
6465
6466 start->session_address = __sme_set(__pa(session_blob));
6467 start->session_len = params.session_len;
6468 }
6469
6470 start->handle = params.handle;
6471 start->policy = params.policy;
6472
6473
6474 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
6475 if (ret)
6476 goto e_free_session;
6477
6478
6479 ret = sev_bind_asid(kvm, start->handle, error);
6480 if (ret)
6481 goto e_free_session;
6482
6483
6484 params.handle = start->handle;
6485 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) {
6486 sev_unbind_asid(kvm, start->handle);
6487 ret = -EFAULT;
6488 goto e_free_session;
6489 }
6490
6491 sev->handle = start->handle;
6492 sev->fd = argp->sev_fd;
6493
6494e_free_session:
6495 kfree(session_blob);
6496e_free_dh:
6497 kfree(dh_blob);
6498e_free:
6499 kfree(start);
6500 return ret;
6501}
6502
6503static unsigned long get_num_contig_pages(unsigned long idx,
6504 struct page **inpages, unsigned long npages)
6505{
6506 unsigned long paddr, next_paddr;
6507 unsigned long i = idx + 1, pages = 1;
6508
6509
6510 paddr = __sme_page_pa(inpages[idx]);
6511 while (i < npages) {
6512 next_paddr = __sme_page_pa(inpages[i++]);
6513 if ((paddr + PAGE_SIZE) == next_paddr) {
6514 pages++;
6515 paddr = next_paddr;
6516 continue;
6517 }
6518 break;
6519 }
6520
6521 return pages;
6522}
6523
6524static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
6525{
6526 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
6527 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6528 struct kvm_sev_launch_update_data params;
6529 struct sev_data_launch_update_data *data;
6530 struct page **inpages;
6531 int ret;
6532
6533 if (!sev_guest(kvm))
6534 return -ENOTTY;
6535
6536 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6537 return -EFAULT;
6538
6539 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6540 if (!data)
6541 return -ENOMEM;
6542
6543 vaddr = params.uaddr;
6544 size = params.len;
6545 vaddr_end = vaddr + size;
6546
6547
6548 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
6549 if (!inpages) {
6550 ret = -ENOMEM;
6551 goto e_free;
6552 }
6553
6554
6555
6556
6557
6558
6559
6560 sev_clflush_pages(inpages, npages);
6561
6562 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
6563 int offset, len;
6564
6565
6566
6567
6568
6569 offset = vaddr & (PAGE_SIZE - 1);
6570
6571
6572 pages = get_num_contig_pages(i, inpages, npages);
6573
6574 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
6575
6576 data->handle = sev->handle;
6577 data->len = len;
6578 data->address = __sme_page_pa(inpages[i]) + offset;
6579 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
6580 if (ret)
6581 goto e_unpin;
6582
6583 size -= len;
6584 next_vaddr = vaddr + len;
6585 }
6586
6587e_unpin:
6588
6589 for (i = 0; i < npages; i++) {
6590 set_page_dirty_lock(inpages[i]);
6591 mark_page_accessed(inpages[i]);
6592 }
6593
6594 sev_unpin_memory(kvm, inpages, npages);
6595e_free:
6596 kfree(data);
6597 return ret;
6598}
6599
6600static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6601{
6602 void __user *measure = (void __user *)(uintptr_t)argp->data;
6603 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6604 struct sev_data_launch_measure *data;
6605 struct kvm_sev_launch_measure params;
6606 void __user *p = NULL;
6607 void *blob = NULL;
6608 int ret;
6609
6610 if (!sev_guest(kvm))
6611 return -ENOTTY;
6612
6613 if (copy_from_user(¶ms, measure, sizeof(params)))
6614 return -EFAULT;
6615
6616 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6617 if (!data)
6618 return -ENOMEM;
6619
6620
6621 if (!params.len)
6622 goto cmd;
6623
6624 p = (void __user *)(uintptr_t)params.uaddr;
6625 if (p) {
6626 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6627 ret = -EINVAL;
6628 goto e_free;
6629 }
6630
6631 ret = -ENOMEM;
6632 blob = kmalloc(params.len, GFP_KERNEL);
6633 if (!blob)
6634 goto e_free;
6635
6636 data->address = __psp_pa(blob);
6637 data->len = params.len;
6638 }
6639
6640cmd:
6641 data->handle = sev->handle;
6642 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
6643
6644
6645
6646
6647 if (!params.len)
6648 goto done;
6649
6650 if (ret)
6651 goto e_free_blob;
6652
6653 if (blob) {
6654 if (copy_to_user(p, blob, params.len))
6655 ret = -EFAULT;
6656 }
6657
6658done:
6659 params.len = data->len;
6660 if (copy_to_user(measure, ¶ms, sizeof(params)))
6661 ret = -EFAULT;
6662e_free_blob:
6663 kfree(blob);
6664e_free:
6665 kfree(data);
6666 return ret;
6667}
6668
6669static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
6670{
6671 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6672 struct sev_data_launch_finish *data;
6673 int ret;
6674
6675 if (!sev_guest(kvm))
6676 return -ENOTTY;
6677
6678 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6679 if (!data)
6680 return -ENOMEM;
6681
6682 data->handle = sev->handle;
6683 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
6684
6685 kfree(data);
6686 return ret;
6687}
6688
6689static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
6690{
6691 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6692 struct kvm_sev_guest_status params;
6693 struct sev_data_guest_status *data;
6694 int ret;
6695
6696 if (!sev_guest(kvm))
6697 return -ENOTTY;
6698
6699 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6700 if (!data)
6701 return -ENOMEM;
6702
6703 data->handle = sev->handle;
6704 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
6705 if (ret)
6706 goto e_free;
6707
6708 params.policy = data->policy;
6709 params.state = data->state;
6710 params.handle = data->handle;
6711
6712 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
6713 ret = -EFAULT;
6714e_free:
6715 kfree(data);
6716 return ret;
6717}
6718
6719static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
6720 unsigned long dst, int size,
6721 int *error, bool enc)
6722{
6723 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6724 struct sev_data_dbg *data;
6725 int ret;
6726
6727 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6728 if (!data)
6729 return -ENOMEM;
6730
6731 data->handle = sev->handle;
6732 data->dst_addr = dst;
6733 data->src_addr = src;
6734 data->len = size;
6735
6736 ret = sev_issue_cmd(kvm,
6737 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
6738 data, error);
6739 kfree(data);
6740 return ret;
6741}
6742
6743static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
6744 unsigned long dst_paddr, int sz, int *err)
6745{
6746 int offset;
6747
6748
6749
6750
6751
6752 src_paddr = round_down(src_paddr, 16);
6753 offset = src_paddr & 15;
6754 sz = round_up(sz + offset, 16);
6755
6756 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
6757}
6758
6759static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
6760 unsigned long __user dst_uaddr,
6761 unsigned long dst_paddr,
6762 int size, int *err)
6763{
6764 struct page *tpage = NULL;
6765 int ret, offset;
6766
6767
6768 if (!IS_ALIGNED(dst_paddr, 16) ||
6769 !IS_ALIGNED(paddr, 16) ||
6770 !IS_ALIGNED(size, 16)) {
6771 tpage = (void *)alloc_page(GFP_KERNEL);
6772 if (!tpage)
6773 return -ENOMEM;
6774
6775 dst_paddr = __sme_page_pa(tpage);
6776 }
6777
6778 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
6779 if (ret)
6780 goto e_free;
6781
6782 if (tpage) {
6783 offset = paddr & 15;
6784 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
6785 page_address(tpage) + offset, size))
6786 ret = -EFAULT;
6787 }
6788
6789e_free:
6790 if (tpage)
6791 __free_page(tpage);
6792
6793 return ret;
6794}
6795
6796static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
6797 unsigned long __user vaddr,
6798 unsigned long dst_paddr,
6799 unsigned long __user dst_vaddr,
6800 int size, int *error)
6801{
6802 struct page *src_tpage = NULL;
6803 struct page *dst_tpage = NULL;
6804 int ret, len = size;
6805
6806
6807 if (!IS_ALIGNED(vaddr, 16)) {
6808 src_tpage = alloc_page(GFP_KERNEL);
6809 if (!src_tpage)
6810 return -ENOMEM;
6811
6812 if (copy_from_user(page_address(src_tpage),
6813 (void __user *)(uintptr_t)vaddr, size)) {
6814 __free_page(src_tpage);
6815 return -EFAULT;
6816 }
6817
6818 paddr = __sme_page_pa(src_tpage);
6819 }
6820
6821
6822
6823
6824
6825
6826
6827 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
6828 int dst_offset;
6829
6830 dst_tpage = alloc_page(GFP_KERNEL);
6831 if (!dst_tpage) {
6832 ret = -ENOMEM;
6833 goto e_free;
6834 }
6835
6836 ret = __sev_dbg_decrypt(kvm, dst_paddr,
6837 __sme_page_pa(dst_tpage), size, error);
6838 if (ret)
6839 goto e_free;
6840
6841
6842
6843
6844
6845 dst_offset = dst_paddr & 15;
6846
6847 if (src_tpage)
6848 memcpy(page_address(dst_tpage) + dst_offset,
6849 page_address(src_tpage), size);
6850 else {
6851 if (copy_from_user(page_address(dst_tpage) + dst_offset,
6852 (void __user *)(uintptr_t)vaddr, size)) {
6853 ret = -EFAULT;
6854 goto e_free;
6855 }
6856 }
6857
6858 paddr = __sme_page_pa(dst_tpage);
6859 dst_paddr = round_down(dst_paddr, 16);
6860 len = round_up(size, 16);
6861 }
6862
6863 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
6864
6865e_free:
6866 if (src_tpage)
6867 __free_page(src_tpage);
6868 if (dst_tpage)
6869 __free_page(dst_tpage);
6870 return ret;
6871}
6872
6873static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6874{
6875 unsigned long vaddr, vaddr_end, next_vaddr;
6876 unsigned long dst_vaddr;
6877 struct page **src_p, **dst_p;
6878 struct kvm_sev_dbg debug;
6879 unsigned long n;
6880 unsigned int size;
6881 int ret;
6882
6883 if (!sev_guest(kvm))
6884 return -ENOTTY;
6885
6886 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
6887 return -EFAULT;
6888
6889 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
6890 return -EINVAL;
6891 if (!debug.dst_uaddr)
6892 return -EINVAL;
6893
6894 vaddr = debug.src_uaddr;
6895 size = debug.len;
6896 vaddr_end = vaddr + size;
6897 dst_vaddr = debug.dst_uaddr;
6898
6899 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
6900 int len, s_off, d_off;
6901
6902
6903 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
6904 if (!src_p)
6905 return -EFAULT;
6906
6907 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
6908 if (!dst_p) {
6909 sev_unpin_memory(kvm, src_p, n);
6910 return -EFAULT;
6911 }
6912
6913
6914
6915
6916
6917
6918
6919 sev_clflush_pages(src_p, 1);
6920 sev_clflush_pages(dst_p, 1);
6921
6922
6923
6924
6925
6926 s_off = vaddr & ~PAGE_MASK;
6927 d_off = dst_vaddr & ~PAGE_MASK;
6928 len = min_t(size_t, (PAGE_SIZE - s_off), size);
6929
6930 if (dec)
6931 ret = __sev_dbg_decrypt_user(kvm,
6932 __sme_page_pa(src_p[0]) + s_off,
6933 dst_vaddr,
6934 __sme_page_pa(dst_p[0]) + d_off,
6935 len, &argp->error);
6936 else
6937 ret = __sev_dbg_encrypt_user(kvm,
6938 __sme_page_pa(src_p[0]) + s_off,
6939 vaddr,
6940 __sme_page_pa(dst_p[0]) + d_off,
6941 dst_vaddr,
6942 len, &argp->error);
6943
6944 sev_unpin_memory(kvm, src_p, n);
6945 sev_unpin_memory(kvm, dst_p, n);
6946
6947 if (ret)
6948 goto err;
6949
6950 next_vaddr = vaddr + len;
6951 dst_vaddr = dst_vaddr + len;
6952 size -= len;
6953 }
6954err:
6955 return ret;
6956}
6957
6958static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6959{
6960 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6961 struct sev_data_launch_secret *data;
6962 struct kvm_sev_launch_secret params;
6963 struct page **pages;
6964 void *blob, *hdr;
6965 unsigned long n;
6966 int ret, offset;
6967
6968 if (!sev_guest(kvm))
6969 return -ENOTTY;
6970
6971 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6972 return -EFAULT;
6973
6974 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
6975 if (!pages)
6976 return -ENOMEM;
6977
6978
6979
6980
6981
6982 if (get_num_contig_pages(0, pages, n) != n) {
6983 ret = -EINVAL;
6984 goto e_unpin_memory;
6985 }
6986
6987 ret = -ENOMEM;
6988 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6989 if (!data)
6990 goto e_unpin_memory;
6991
6992 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6993 data->guest_address = __sme_page_pa(pages[0]) + offset;
6994 data->guest_len = params.guest_len;
6995
6996 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6997 if (IS_ERR(blob)) {
6998 ret = PTR_ERR(blob);
6999 goto e_free;
7000 }
7001
7002 data->trans_address = __psp_pa(blob);
7003 data->trans_len = params.trans_len;
7004
7005 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
7006 if (IS_ERR(hdr)) {
7007 ret = PTR_ERR(hdr);
7008 goto e_free_blob;
7009 }
7010 data->hdr_address = __psp_pa(hdr);
7011 data->hdr_len = params.hdr_len;
7012
7013 data->handle = sev->handle;
7014 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
7015
7016 kfree(hdr);
7017
7018e_free_blob:
7019 kfree(blob);
7020e_free:
7021 kfree(data);
7022e_unpin_memory:
7023 sev_unpin_memory(kvm, pages, n);
7024 return ret;
7025}
7026
7027static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
7028{
7029 struct kvm_sev_cmd sev_cmd;
7030 int r;
7031
7032 if (!svm_sev_enabled())
7033 return -ENOTTY;
7034
7035 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
7036 return -EFAULT;
7037
7038 mutex_lock(&kvm->lock);
7039
7040 switch (sev_cmd.id) {
7041 case KVM_SEV_INIT:
7042 r = sev_guest_init(kvm, &sev_cmd);
7043 break;
7044 case KVM_SEV_LAUNCH_START:
7045 r = sev_launch_start(kvm, &sev_cmd);
7046 break;
7047 case KVM_SEV_LAUNCH_UPDATE_DATA:
7048 r = sev_launch_update_data(kvm, &sev_cmd);
7049 break;
7050 case KVM_SEV_LAUNCH_MEASURE:
7051 r = sev_launch_measure(kvm, &sev_cmd);
7052 break;
7053 case KVM_SEV_LAUNCH_FINISH:
7054 r = sev_launch_finish(kvm, &sev_cmd);
7055 break;
7056 case KVM_SEV_GUEST_STATUS:
7057 r = sev_guest_status(kvm, &sev_cmd);
7058 break;
7059 case KVM_SEV_DBG_DECRYPT:
7060 r = sev_dbg_crypt(kvm, &sev_cmd, true);
7061 break;
7062 case KVM_SEV_DBG_ENCRYPT:
7063 r = sev_dbg_crypt(kvm, &sev_cmd, false);
7064 break;
7065 case KVM_SEV_LAUNCH_SECRET:
7066 r = sev_launch_secret(kvm, &sev_cmd);
7067 break;
7068 default:
7069 r = -EINVAL;
7070 goto out;
7071 }
7072
7073 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
7074 r = -EFAULT;
7075
7076out:
7077 mutex_unlock(&kvm->lock);
7078 return r;
7079}
7080
7081static int svm_register_enc_region(struct kvm *kvm,
7082 struct kvm_enc_region *range)
7083{
7084 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
7085 struct enc_region *region;
7086 int ret = 0;
7087
7088 if (!sev_guest(kvm))
7089 return -ENOTTY;
7090
7091 if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
7092 return -EINVAL;
7093
7094 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
7095 if (!region)
7096 return -ENOMEM;
7097
7098 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
7099 if (!region->pages) {
7100 ret = -ENOMEM;
7101 goto e_free;
7102 }
7103
7104
7105
7106
7107
7108
7109
7110 sev_clflush_pages(region->pages, region->npages);
7111
7112 region->uaddr = range->addr;
7113 region->size = range->size;
7114
7115 mutex_lock(&kvm->lock);
7116 list_add_tail(®ion->list, &sev->regions_list);
7117 mutex_unlock(&kvm->lock);
7118
7119 return ret;
7120
7121e_free:
7122 kfree(region);
7123 return ret;
7124}
7125
7126static struct enc_region *
7127find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
7128{
7129 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
7130 struct list_head *head = &sev->regions_list;
7131 struct enc_region *i;
7132
7133 list_for_each_entry(i, head, list) {
7134 if (i->uaddr == range->addr &&
7135 i->size == range->size)
7136 return i;
7137 }
7138
7139 return NULL;
7140}
7141
7142
7143static int svm_unregister_enc_region(struct kvm *kvm,
7144 struct kvm_enc_region *range)
7145{
7146 struct enc_region *region;
7147 int ret;
7148
7149 mutex_lock(&kvm->lock);
7150
7151 if (!sev_guest(kvm)) {
7152 ret = -ENOTTY;
7153 goto failed;
7154 }
7155
7156 region = find_enc_region(kvm, range);
7157 if (!region) {
7158 ret = -EINVAL;
7159 goto failed;
7160 }
7161
7162 __unregister_enc_region_locked(kvm, region);
7163
7164 mutex_unlock(&kvm->lock);
7165 return 0;
7166
7167failed:
7168 mutex_unlock(&kvm->lock);
7169 return ret;
7170}
7171
7172static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
7173{
7174 unsigned long cr4 = kvm_read_cr4(vcpu);
7175 bool smep = cr4 & X86_CR4_SMEP;
7176 bool smap = cr4 & X86_CR4_SMAP;
7177 bool is_user = svm_get_cpl(vcpu) == 3;
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219 if (smap && (!smep || is_user)) {
7220 if (!sev_guest(vcpu->kvm))
7221 return true;
7222
7223 pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
7224 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
7225 }
7226
7227 return false;
7228}
7229
7230static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
7231{
7232 struct vcpu_svm *svm = to_svm(vcpu);
7233
7234
7235
7236
7237
7238
7239
7240
7241 return !gif_set(svm) ||
7242 (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
7243}
7244
7245static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7246 .cpu_has_kvm_support = has_svm,
7247 .disabled_by_bios = is_disabled,
7248 .hardware_setup = svm_hardware_setup,
7249 .hardware_unsetup = svm_hardware_unsetup,
7250 .check_processor_compatibility = svm_check_processor_compat,
7251 .hardware_enable = svm_hardware_enable,
7252 .hardware_disable = svm_hardware_disable,
7253 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
7254 .has_emulated_msr = svm_has_emulated_msr,
7255
7256 .vcpu_create = svm_create_vcpu,
7257 .vcpu_free = svm_free_vcpu,
7258 .vcpu_reset = svm_vcpu_reset,
7259
7260 .vm_alloc = svm_vm_alloc,
7261 .vm_free = svm_vm_free,
7262 .vm_init = avic_vm_init,
7263 .vm_destroy = svm_vm_destroy,
7264
7265 .prepare_guest_switch = svm_prepare_guest_switch,
7266 .vcpu_load = svm_vcpu_load,
7267 .vcpu_put = svm_vcpu_put,
7268 .vcpu_blocking = svm_vcpu_blocking,
7269 .vcpu_unblocking = svm_vcpu_unblocking,
7270
7271 .update_bp_intercept = update_bp_intercept,
7272 .get_msr_feature = svm_get_msr_feature,
7273 .get_msr = svm_get_msr,
7274 .set_msr = svm_set_msr,
7275 .get_segment_base = svm_get_segment_base,
7276 .get_segment = svm_get_segment,
7277 .set_segment = svm_set_segment,
7278 .get_cpl = svm_get_cpl,
7279 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
7280 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
7281 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
7282 .set_cr0 = svm_set_cr0,
7283 .set_cr3 = svm_set_cr3,
7284 .set_cr4 = svm_set_cr4,
7285 .set_efer = svm_set_efer,
7286 .get_idt = svm_get_idt,
7287 .set_idt = svm_set_idt,
7288 .get_gdt = svm_get_gdt,
7289 .set_gdt = svm_set_gdt,
7290 .get_dr6 = svm_get_dr6,
7291 .set_dr6 = svm_set_dr6,
7292 .set_dr7 = svm_set_dr7,
7293 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
7294 .cache_reg = svm_cache_reg,
7295 .get_rflags = svm_get_rflags,
7296 .set_rflags = svm_set_rflags,
7297
7298 .tlb_flush = svm_flush_tlb,
7299 .tlb_flush_gva = svm_flush_tlb_gva,
7300
7301 .run = svm_vcpu_run,
7302 .handle_exit = handle_exit,
7303 .skip_emulated_instruction = skip_emulated_instruction,
7304 .set_interrupt_shadow = svm_set_interrupt_shadow,
7305 .get_interrupt_shadow = svm_get_interrupt_shadow,
7306 .patch_hypercall = svm_patch_hypercall,
7307 .set_irq = svm_set_irq,
7308 .set_nmi = svm_inject_nmi,
7309 .queue_exception = svm_queue_exception,
7310 .cancel_injection = svm_cancel_injection,
7311 .interrupt_allowed = svm_interrupt_allowed,
7312 .nmi_allowed = svm_nmi_allowed,
7313 .get_nmi_mask = svm_get_nmi_mask,
7314 .set_nmi_mask = svm_set_nmi_mask,
7315 .enable_nmi_window = enable_nmi_window,
7316 .enable_irq_window = enable_irq_window,
7317 .update_cr8_intercept = update_cr8_intercept,
7318 .set_virtual_apic_mode = svm_set_virtual_apic_mode,
7319 .get_enable_apicv = svm_get_enable_apicv,
7320 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
7321 .load_eoi_exitmap = svm_load_eoi_exitmap,
7322 .hwapic_irr_update = svm_hwapic_irr_update,
7323 .hwapic_isr_update = svm_hwapic_isr_update,
7324 .sync_pir_to_irr = kvm_lapic_find_highest_irr,
7325 .apicv_post_state_restore = avic_post_state_restore,
7326
7327 .set_tss_addr = svm_set_tss_addr,
7328 .set_identity_map_addr = svm_set_identity_map_addr,
7329 .get_tdp_level = get_npt_level,
7330 .get_mt_mask = svm_get_mt_mask,
7331
7332 .get_exit_info = svm_get_exit_info,
7333
7334 .get_lpage_level = svm_get_lpage_level,
7335
7336 .cpuid_update = svm_cpuid_update,
7337
7338 .rdtscp_supported = svm_rdtscp_supported,
7339 .invpcid_supported = svm_invpcid_supported,
7340 .mpx_supported = svm_mpx_supported,
7341 .xsaves_supported = svm_xsaves_supported,
7342 .umip_emulated = svm_umip_emulated,
7343 .pt_supported = svm_pt_supported,
7344
7345 .set_supported_cpuid = svm_set_supported_cpuid,
7346
7347 .has_wbinvd_exit = svm_has_wbinvd_exit,
7348
7349 .read_l1_tsc_offset = svm_read_l1_tsc_offset,
7350 .write_l1_tsc_offset = svm_write_l1_tsc_offset,
7351
7352 .set_tdp_cr3 = set_tdp_cr3,
7353
7354 .check_intercept = svm_check_intercept,
7355 .handle_exit_irqoff = svm_handle_exit_irqoff,
7356
7357 .request_immediate_exit = __kvm_request_immediate_exit,
7358
7359 .sched_in = svm_sched_in,
7360
7361 .pmu_ops = &amd_pmu_ops,
7362 .deliver_posted_interrupt = svm_deliver_avic_intr,
7363 .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
7364 .update_pi_irte = svm_update_pi_irte,
7365 .setup_mce = svm_setup_mce,
7366
7367 .smi_allowed = svm_smi_allowed,
7368 .pre_enter_smm = svm_pre_enter_smm,
7369 .pre_leave_smm = svm_pre_leave_smm,
7370 .enable_smi_window = enable_smi_window,
7371
7372 .mem_enc_op = svm_mem_enc_op,
7373 .mem_enc_reg_region = svm_register_enc_region,
7374 .mem_enc_unreg_region = svm_unregister_enc_region,
7375
7376 .nested_enable_evmcs = NULL,
7377 .nested_get_evmcs_version = NULL,
7378
7379 .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
7380
7381 .apic_init_signal_blocked = svm_apic_init_signal_blocked,
7382};
7383
7384static int __init svm_init(void)
7385{
7386 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
7387 __alignof__(struct vcpu_svm), THIS_MODULE);
7388}
7389
7390static void __exit svm_exit(void)
7391{
7392 kvm_exit();
7393}
7394
7395module_init(svm_init)
7396module_exit(svm_exit)
7397