1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#define pr_fmt(fmt) "SVM: " fmt
19
20#include <linux/kvm_host.h>
21
22#include "irq.h"
23#include "mmu.h"
24#include "kvm_cache_regs.h"
25#include "x86.h"
26#include "cpuid.h"
27#include "pmu.h"
28
29#include <linux/module.h>
30#include <linux/mod_devicetable.h>
31#include <linux/kernel.h>
32#include <linux/vmalloc.h>
33#include <linux/highmem.h>
34#include <linux/sched.h>
35#include <linux/trace_events.h>
36#include <linux/slab.h>
37#include <linux/amd-iommu.h>
38#include <linux/hashtable.h>
39#include <linux/frame.h>
40#include <linux/psp-sev.h>
41#include <linux/file.h>
42#include <linux/pagemap.h>
43#include <linux/swap.h>
44
45#include <asm/apic.h>
46#include <asm/perf_event.h>
47#include <asm/tlbflush.h>
48#include <asm/desc.h>
49#include <asm/debugreg.h>
50#include <asm/kvm_para.h>
51#include <asm/irq_remapping.h>
52#include <asm/spec-ctrl.h>
53
54#include <asm/virtext.h>
55#include "trace.h"
56
57#define __ex(x) __kvm_handle_fault_on_reboot(x)
58
59MODULE_AUTHOR("Qumranet");
60MODULE_LICENSE("GPL");
61
62static const struct x86_cpu_id svm_cpu_id[] = {
63 X86_FEATURE_MATCH(X86_FEATURE_SVM),
64 {}
65};
66MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
67
68#define IOPM_ALLOC_ORDER 2
69#define MSRPM_ALLOC_ORDER 1
70
71#define SEG_TYPE_LDT 2
72#define SEG_TYPE_BUSY_TSS16 3
73
74#define SVM_FEATURE_NPT (1 << 0)
75#define SVM_FEATURE_LBRV (1 << 1)
76#define SVM_FEATURE_SVML (1 << 2)
77#define SVM_FEATURE_NRIP (1 << 3)
78#define SVM_FEATURE_TSC_RATE (1 << 4)
79#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
80#define SVM_FEATURE_FLUSH_ASID (1 << 6)
81#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
82#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
83
84#define SVM_AVIC_DOORBELL 0xc001011b
85
86#define NESTED_EXIT_HOST 0
87#define NESTED_EXIT_DONE 1
88#define NESTED_EXIT_CONTINUE 2
89
90#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
91
92#define TSC_RATIO_RSVD 0xffffff0000000000ULL
93#define TSC_RATIO_MIN 0x0000000000000001ULL
94#define TSC_RATIO_MAX 0x000000ffffffffffULL
95
96#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
97
98
99
100
101
102#define AVIC_MAX_PHYSICAL_ID_COUNT 255
103
104#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
105#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
106#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
107
108
109#define AVIC_VCPU_ID_BITS 8
110#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
111
112#define AVIC_VM_ID_BITS 24
113#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
114#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
115
116#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
117 (y & AVIC_VCPU_ID_MASK))
118#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
119#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
120
121static bool erratum_383_found __read_mostly;
122
123static const u32 host_save_user_msrs[] = {
124#ifdef CONFIG_X86_64
125 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
126 MSR_FS_BASE,
127#endif
128 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
129 MSR_TSC_AUX,
130};
131
132#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
133
134struct kvm_sev_info {
135 bool active;
136 unsigned int asid;
137 unsigned int handle;
138 int fd;
139 unsigned long pages_locked;
140 struct list_head regions_list;
141};
142
143struct kvm_svm {
144 struct kvm kvm;
145
146
147 u32 avic_vm_id;
148 u32 ldr_mode;
149 struct page *avic_logical_id_table_page;
150 struct page *avic_physical_id_table_page;
151 struct hlist_node hnode;
152
153 struct kvm_sev_info sev_info;
154};
155
156struct kvm_vcpu;
157
158struct nested_state {
159 struct vmcb *hsave;
160 u64 hsave_msr;
161 u64 vm_cr_msr;
162 u64 vmcb;
163
164
165 u32 *msrpm;
166
167
168 u64 vmcb_msrpm;
169 u64 vmcb_iopm;
170
171
172 bool exit_required;
173
174
175 u32 intercept_cr;
176 u32 intercept_dr;
177 u32 intercept_exceptions;
178 u64 intercept;
179
180
181 u64 nested_cr3;
182};
183
184#define MSRPM_OFFSETS 16
185static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
186
187
188
189
190
191static uint64_t osvw_len = 4, osvw_status;
192
193struct vcpu_svm {
194 struct kvm_vcpu vcpu;
195 struct vmcb *vmcb;
196 unsigned long vmcb_pa;
197 struct svm_cpu_data *svm_data;
198 uint64_t asid_generation;
199 uint64_t sysenter_esp;
200 uint64_t sysenter_eip;
201 uint64_t tsc_aux;
202
203 u64 msr_decfg;
204
205 u64 next_rip;
206
207 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
208 struct {
209 u16 fs;
210 u16 gs;
211 u16 ldt;
212 u64 gs_base;
213 } host;
214
215 u64 spec_ctrl;
216
217
218
219
220
221 u64 virt_spec_ctrl;
222
223 u32 *msrpm;
224
225 ulong nmi_iret_rip;
226
227 struct nested_state nested;
228
229 bool nmi_singlestep;
230 u64 nmi_singlestep_guest_rflags;
231
232 unsigned int3_injected;
233 unsigned long int3_rip;
234
235
236 bool nrips_enabled : 1;
237
238 u32 ldr_reg;
239 struct page *avic_backing_page;
240 u64 *avic_physical_id_cache;
241 bool avic_is_running;
242
243
244
245
246
247
248
249 struct list_head ir_list;
250 spinlock_t ir_list_lock;
251
252
253 unsigned int last_cpu;
254};
255
256
257
258
259struct amd_svm_iommu_ir {
260 struct list_head node;
261 void *data;
262};
263
264#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
265#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
266
267#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
268#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
269#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
270#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
271
272static DEFINE_PER_CPU(u64, current_tsc_ratio);
273#define TSC_RATIO_DEFAULT 0x0100000000ULL
274
275#define MSR_INVALID 0xffffffffU
276
277static const struct svm_direct_access_msrs {
278 u32 index;
279 bool always;
280} direct_access_msrs[] = {
281 { .index = MSR_STAR, .always = true },
282 { .index = MSR_IA32_SYSENTER_CS, .always = true },
283#ifdef CONFIG_X86_64
284 { .index = MSR_GS_BASE, .always = true },
285 { .index = MSR_FS_BASE, .always = true },
286 { .index = MSR_KERNEL_GS_BASE, .always = true },
287 { .index = MSR_LSTAR, .always = true },
288 { .index = MSR_CSTAR, .always = true },
289 { .index = MSR_SYSCALL_MASK, .always = true },
290#endif
291 { .index = MSR_IA32_SPEC_CTRL, .always = false },
292 { .index = MSR_IA32_PRED_CMD, .always = false },
293 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
294 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
295 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
296 { .index = MSR_IA32_LASTINTTOIP, .always = false },
297 { .index = MSR_INVALID, .always = false },
298};
299
300
301#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
302static bool npt_enabled = true;
303#else
304static bool npt_enabled;
305#endif
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
338module_param(pause_filter_thresh, ushort, 0444);
339
340static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
341module_param(pause_filter_count, ushort, 0444);
342
343
344static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
345module_param(pause_filter_count_grow, ushort, 0444);
346
347
348static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
349module_param(pause_filter_count_shrink, ushort, 0444);
350
351
352static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
353module_param(pause_filter_count_max, ushort, 0444);
354
355
356static int npt = true;
357module_param(npt, int, S_IRUGO);
358
359
360static int nested = true;
361module_param(nested, int, S_IRUGO);
362
363
364static int avic;
365#ifdef CONFIG_X86_LOCAL_APIC
366module_param(avic, int, S_IRUGO);
367#endif
368
369
370static int vls = true;
371module_param(vls, int, 0444);
372
373
374static int vgif = true;
375module_param(vgif, int, 0444);
376
377
378static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
379module_param(sev, int, 0444);
380
381static u8 rsm_ins_bytes[] = "\x0f\xaa";
382
383static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
384static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
385static void svm_complete_interrupts(struct vcpu_svm *svm);
386
387static int nested_svm_exit_handled(struct vcpu_svm *svm);
388static int nested_svm_intercept(struct vcpu_svm *svm);
389static int nested_svm_vmexit(struct vcpu_svm *svm);
390static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
391 bool has_error_code, u32 error_code);
392
393enum {
394 VMCB_INTERCEPTS,
395
396 VMCB_PERM_MAP,
397 VMCB_ASID,
398 VMCB_INTR,
399 VMCB_NPT,
400 VMCB_CR,
401 VMCB_DR,
402 VMCB_DT,
403 VMCB_SEG,
404 VMCB_CR2,
405 VMCB_LBR,
406 VMCB_AVIC,
407
408
409
410 VMCB_DIRTY_MAX,
411};
412
413
414#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
415
416#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
417
418static unsigned int max_sev_asid;
419static unsigned int min_sev_asid;
420static unsigned long *sev_asid_bitmap;
421#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
422
423struct enc_region {
424 struct list_head list;
425 unsigned long npages;
426 struct page **pages;
427 unsigned long uaddr;
428 unsigned long size;
429};
430
431
432static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
433{
434 return container_of(kvm, struct kvm_svm, kvm);
435}
436
437static inline bool svm_sev_enabled(void)
438{
439 return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
440}
441
442static inline bool sev_guest(struct kvm *kvm)
443{
444#ifdef CONFIG_KVM_AMD_SEV
445 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
446
447 return sev->active;
448#else
449 return false;
450#endif
451}
452
453static inline int sev_get_asid(struct kvm *kvm)
454{
455 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
456
457 return sev->asid;
458}
459
460static inline void mark_all_dirty(struct vmcb *vmcb)
461{
462 vmcb->control.clean = 0;
463}
464
465static inline void mark_all_clean(struct vmcb *vmcb)
466{
467 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
468 & ~VMCB_ALWAYS_DIRTY_MASK;
469}
470
471static inline void mark_dirty(struct vmcb *vmcb, int bit)
472{
473 vmcb->control.clean &= ~(1 << bit);
474}
475
476static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
477{
478 return container_of(vcpu, struct vcpu_svm, vcpu);
479}
480
481static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
482{
483 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
484 mark_dirty(svm->vmcb, VMCB_AVIC);
485}
486
487static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
488{
489 struct vcpu_svm *svm = to_svm(vcpu);
490 u64 *entry = svm->avic_physical_id_cache;
491
492 if (!entry)
493 return false;
494
495 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
496}
497
498static void recalc_intercepts(struct vcpu_svm *svm)
499{
500 struct vmcb_control_area *c, *h;
501 struct nested_state *g;
502
503 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
504
505 if (!is_guest_mode(&svm->vcpu))
506 return;
507
508 c = &svm->vmcb->control;
509 h = &svm->nested.hsave->control;
510 g = &svm->nested;
511
512 c->intercept_cr = h->intercept_cr | g->intercept_cr;
513 c->intercept_dr = h->intercept_dr | g->intercept_dr;
514 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
515 c->intercept = h->intercept | g->intercept;
516}
517
518static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
519{
520 if (is_guest_mode(&svm->vcpu))
521 return svm->nested.hsave;
522 else
523 return svm->vmcb;
524}
525
526static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
527{
528 struct vmcb *vmcb = get_host_vmcb(svm);
529
530 vmcb->control.intercept_cr |= (1U << bit);
531
532 recalc_intercepts(svm);
533}
534
535static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
536{
537 struct vmcb *vmcb = get_host_vmcb(svm);
538
539 vmcb->control.intercept_cr &= ~(1U << bit);
540
541 recalc_intercepts(svm);
542}
543
544static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
545{
546 struct vmcb *vmcb = get_host_vmcb(svm);
547
548 return vmcb->control.intercept_cr & (1U << bit);
549}
550
551static inline void set_dr_intercepts(struct vcpu_svm *svm)
552{
553 struct vmcb *vmcb = get_host_vmcb(svm);
554
555 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
556 | (1 << INTERCEPT_DR1_READ)
557 | (1 << INTERCEPT_DR2_READ)
558 | (1 << INTERCEPT_DR3_READ)
559 | (1 << INTERCEPT_DR4_READ)
560 | (1 << INTERCEPT_DR5_READ)
561 | (1 << INTERCEPT_DR6_READ)
562 | (1 << INTERCEPT_DR7_READ)
563 | (1 << INTERCEPT_DR0_WRITE)
564 | (1 << INTERCEPT_DR1_WRITE)
565 | (1 << INTERCEPT_DR2_WRITE)
566 | (1 << INTERCEPT_DR3_WRITE)
567 | (1 << INTERCEPT_DR4_WRITE)
568 | (1 << INTERCEPT_DR5_WRITE)
569 | (1 << INTERCEPT_DR6_WRITE)
570 | (1 << INTERCEPT_DR7_WRITE);
571
572 recalc_intercepts(svm);
573}
574
575static inline void clr_dr_intercepts(struct vcpu_svm *svm)
576{
577 struct vmcb *vmcb = get_host_vmcb(svm);
578
579 vmcb->control.intercept_dr = 0;
580
581 recalc_intercepts(svm);
582}
583
584static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
585{
586 struct vmcb *vmcb = get_host_vmcb(svm);
587
588 vmcb->control.intercept_exceptions |= (1U << bit);
589
590 recalc_intercepts(svm);
591}
592
593static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
594{
595 struct vmcb *vmcb = get_host_vmcb(svm);
596
597 vmcb->control.intercept_exceptions &= ~(1U << bit);
598
599 recalc_intercepts(svm);
600}
601
602static inline void set_intercept(struct vcpu_svm *svm, int bit)
603{
604 struct vmcb *vmcb = get_host_vmcb(svm);
605
606 vmcb->control.intercept |= (1ULL << bit);
607
608 recalc_intercepts(svm);
609}
610
611static inline void clr_intercept(struct vcpu_svm *svm, int bit)
612{
613 struct vmcb *vmcb = get_host_vmcb(svm);
614
615 vmcb->control.intercept &= ~(1ULL << bit);
616
617 recalc_intercepts(svm);
618}
619
620static inline bool vgif_enabled(struct vcpu_svm *svm)
621{
622 return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
623}
624
625static inline void enable_gif(struct vcpu_svm *svm)
626{
627 if (vgif_enabled(svm))
628 svm->vmcb->control.int_ctl |= V_GIF_MASK;
629 else
630 svm->vcpu.arch.hflags |= HF_GIF_MASK;
631}
632
633static inline void disable_gif(struct vcpu_svm *svm)
634{
635 if (vgif_enabled(svm))
636 svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
637 else
638 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
639}
640
641static inline bool gif_set(struct vcpu_svm *svm)
642{
643 if (vgif_enabled(svm))
644 return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
645 else
646 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
647}
648
649static unsigned long iopm_base;
650
651struct kvm_ldttss_desc {
652 u16 limit0;
653 u16 base0;
654 unsigned base1:8, type:5, dpl:2, p:1;
655 unsigned limit1:4, zero0:3, g:1, base2:8;
656 u32 base3;
657 u32 zero1;
658} __attribute__((packed));
659
660struct svm_cpu_data {
661 int cpu;
662
663 u64 asid_generation;
664 u32 max_asid;
665 u32 next_asid;
666 u32 min_asid;
667 struct kvm_ldttss_desc *tss_desc;
668
669 struct page *save_area;
670 struct vmcb *current_vmcb;
671
672
673 struct vmcb **sev_vmcbs;
674};
675
676static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
677
678struct svm_init_data {
679 int cpu;
680 int r;
681};
682
683static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
684
685#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
686#define MSRS_RANGE_SIZE 2048
687#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
688
689static u32 svm_msrpm_offset(u32 msr)
690{
691 u32 offset;
692 int i;
693
694 for (i = 0; i < NUM_MSR_MAPS; i++) {
695 if (msr < msrpm_ranges[i] ||
696 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
697 continue;
698
699 offset = (msr - msrpm_ranges[i]) / 4;
700 offset += (i * MSRS_RANGE_SIZE);
701
702
703 return offset / 4;
704 }
705
706
707 return MSR_INVALID;
708}
709
710#define MAX_INST_SIZE 15
711
712static inline void clgi(void)
713{
714 asm volatile (__ex(SVM_CLGI));
715}
716
717static inline void stgi(void)
718{
719 asm volatile (__ex(SVM_STGI));
720}
721
722static inline void invlpga(unsigned long addr, u32 asid)
723{
724 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
725}
726
727static int get_npt_level(struct kvm_vcpu *vcpu)
728{
729#ifdef CONFIG_X86_64
730 return PT64_ROOT_4LEVEL;
731#else
732 return PT32E_ROOT_LEVEL;
733#endif
734}
735
736static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
737{
738 vcpu->arch.efer = efer;
739 if (!npt_enabled && !(efer & EFER_LMA))
740 efer &= ~EFER_LME;
741
742 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
743 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
744}
745
746static int is_external_interrupt(u32 info)
747{
748 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
749 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
750}
751
752static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
753{
754 struct vcpu_svm *svm = to_svm(vcpu);
755 u32 ret = 0;
756
757 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
758 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
759 return ret;
760}
761
762static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
763{
764 struct vcpu_svm *svm = to_svm(vcpu);
765
766 if (mask == 0)
767 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
768 else
769 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
770
771}
772
773static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
774{
775 struct vcpu_svm *svm = to_svm(vcpu);
776
777 if (svm->vmcb->control.next_rip != 0) {
778 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
779 svm->next_rip = svm->vmcb->control.next_rip;
780 }
781
782 if (!svm->next_rip) {
783 if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
784 EMULATE_DONE)
785 printk(KERN_DEBUG "%s: NOP\n", __func__);
786 return;
787 }
788 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
789 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
790 __func__, kvm_rip_read(vcpu), svm->next_rip);
791
792 kvm_rip_write(vcpu, svm->next_rip);
793 svm_set_interrupt_shadow(vcpu, 0);
794}
795
796static void svm_queue_exception(struct kvm_vcpu *vcpu)
797{
798 struct vcpu_svm *svm = to_svm(vcpu);
799 unsigned nr = vcpu->arch.exception.nr;
800 bool has_error_code = vcpu->arch.exception.has_error_code;
801 bool reinject = vcpu->arch.exception.injected;
802 u32 error_code = vcpu->arch.exception.error_code;
803
804
805
806
807
808 if (!reinject &&
809 nested_svm_check_exception(svm, nr, has_error_code, error_code))
810 return;
811
812 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
813 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
814
815
816
817
818
819
820
821
822 skip_emulated_instruction(&svm->vcpu);
823 rip = kvm_rip_read(&svm->vcpu);
824 svm->int3_rip = rip + svm->vmcb->save.cs.base;
825 svm->int3_injected = rip - old_rip;
826 }
827
828 svm->vmcb->control.event_inj = nr
829 | SVM_EVTINJ_VALID
830 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
831 | SVM_EVTINJ_TYPE_EXEPT;
832 svm->vmcb->control.event_inj_err = error_code;
833}
834
835static void svm_init_erratum_383(void)
836{
837 u32 low, high;
838 int err;
839 u64 val;
840
841 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
842 return;
843
844
845 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
846 if (err)
847 return;
848
849 val |= (1ULL << 47);
850
851 low = lower_32_bits(val);
852 high = upper_32_bits(val);
853
854 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
855
856 erratum_383_found = true;
857}
858
859static void svm_init_osvw(struct kvm_vcpu *vcpu)
860{
861
862
863
864
865 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
866 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
867
868
869
870
871
872
873
874
875
876 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
877 vcpu->arch.osvw.status |= 1;
878}
879
880static int has_svm(void)
881{
882 const char *msg;
883
884 if (!cpu_has_svm(&msg)) {
885 printk(KERN_INFO "has_svm: %s\n", msg);
886 return 0;
887 }
888
889 return 1;
890}
891
892static void svm_hardware_disable(void)
893{
894
895 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
896 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
897
898 cpu_svm_disable();
899
900 amd_pmu_disable_virt();
901}
902
903static int svm_hardware_enable(void)
904{
905
906 struct svm_cpu_data *sd;
907 uint64_t efer;
908 struct desc_struct *gdt;
909 int me = raw_smp_processor_id();
910
911 rdmsrl(MSR_EFER, efer);
912 if (efer & EFER_SVME)
913 return -EBUSY;
914
915 if (!has_svm()) {
916 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
917 return -EINVAL;
918 }
919 sd = per_cpu(svm_data, me);
920 if (!sd) {
921 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
922 return -EINVAL;
923 }
924
925 sd->asid_generation = 1;
926 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
927 sd->next_asid = sd->max_asid + 1;
928 sd->min_asid = max_sev_asid + 1;
929
930 gdt = get_current_gdt_rw();
931 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
932
933 wrmsrl(MSR_EFER, efer | EFER_SVME);
934
935 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
936
937 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
938 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
939 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
940 }
941
942
943
944
945
946
947
948
949
950
951
952 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
953 uint64_t len, status = 0;
954 int err;
955
956 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
957 if (!err)
958 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
959 &err);
960
961 if (err)
962 osvw_status = osvw_len = 0;
963 else {
964 if (len < osvw_len)
965 osvw_len = len;
966 osvw_status |= status;
967 osvw_status &= (1ULL << osvw_len) - 1;
968 }
969 } else
970 osvw_status = osvw_len = 0;
971
972 svm_init_erratum_383();
973
974 amd_pmu_enable_virt();
975
976 return 0;
977}
978
979static void svm_cpu_uninit(int cpu)
980{
981 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
982
983 if (!sd)
984 return;
985
986 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
987 kfree(sd->sev_vmcbs);
988 __free_page(sd->save_area);
989 kfree(sd);
990}
991
992static int svm_cpu_init(int cpu)
993{
994 struct svm_cpu_data *sd;
995 int r;
996
997 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
998 if (!sd)
999 return -ENOMEM;
1000 sd->cpu = cpu;
1001 r = -ENOMEM;
1002 sd->save_area = alloc_page(GFP_KERNEL);
1003 if (!sd->save_area)
1004 goto err_1;
1005
1006 if (svm_sev_enabled()) {
1007 r = -ENOMEM;
1008 sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
1009 sizeof(void *),
1010 GFP_KERNEL);
1011 if (!sd->sev_vmcbs)
1012 goto err_1;
1013 }
1014
1015 per_cpu(svm_data, cpu) = sd;
1016
1017 return 0;
1018
1019err_1:
1020 kfree(sd);
1021 return r;
1022
1023}
1024
1025static bool valid_msr_intercept(u32 index)
1026{
1027 int i;
1028
1029 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
1030 if (direct_access_msrs[i].index == index)
1031 return true;
1032
1033 return false;
1034}
1035
1036static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
1037{
1038 u8 bit_write;
1039 unsigned long tmp;
1040 u32 offset;
1041 u32 *msrpm;
1042
1043 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
1044 to_svm(vcpu)->msrpm;
1045
1046 offset = svm_msrpm_offset(msr);
1047 bit_write = 2 * (msr & 0x0f) + 1;
1048 tmp = msrpm[offset];
1049
1050 BUG_ON(offset == MSR_INVALID);
1051
1052 return !!test_bit(bit_write, &tmp);
1053}
1054
1055static void set_msr_interception(u32 *msrpm, unsigned msr,
1056 int read, int write)
1057{
1058 u8 bit_read, bit_write;
1059 unsigned long tmp;
1060 u32 offset;
1061
1062
1063
1064
1065
1066 WARN_ON(!valid_msr_intercept(msr));
1067
1068 offset = svm_msrpm_offset(msr);
1069 bit_read = 2 * (msr & 0x0f);
1070 bit_write = 2 * (msr & 0x0f) + 1;
1071 tmp = msrpm[offset];
1072
1073 BUG_ON(offset == MSR_INVALID);
1074
1075 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
1076 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
1077
1078 msrpm[offset] = tmp;
1079}
1080
1081static void svm_vcpu_init_msrpm(u32 *msrpm)
1082{
1083 int i;
1084
1085 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
1086
1087 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1088 if (!direct_access_msrs[i].always)
1089 continue;
1090
1091 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
1092 }
1093}
1094
1095static void add_msr_offset(u32 offset)
1096{
1097 int i;
1098
1099 for (i = 0; i < MSRPM_OFFSETS; ++i) {
1100
1101
1102 if (msrpm_offsets[i] == offset)
1103 return;
1104
1105
1106 if (msrpm_offsets[i] != MSR_INVALID)
1107 continue;
1108
1109
1110 msrpm_offsets[i] = offset;
1111
1112 return;
1113 }
1114
1115
1116
1117
1118
1119 BUG();
1120}
1121
1122static void init_msrpm_offsets(void)
1123{
1124 int i;
1125
1126 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
1127
1128 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1129 u32 offset;
1130
1131 offset = svm_msrpm_offset(direct_access_msrs[i].index);
1132 BUG_ON(offset == MSR_INVALID);
1133
1134 add_msr_offset(offset);
1135 }
1136}
1137
1138static void svm_enable_lbrv(struct vcpu_svm *svm)
1139{
1140 u32 *msrpm = svm->msrpm;
1141
1142 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
1143 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
1144 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
1145 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
1146 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
1147}
1148
1149static void svm_disable_lbrv(struct vcpu_svm *svm)
1150{
1151 u32 *msrpm = svm->msrpm;
1152
1153 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
1154 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
1155 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
1156 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
1157 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
1158}
1159
1160static void disable_nmi_singlestep(struct vcpu_svm *svm)
1161{
1162 svm->nmi_singlestep = false;
1163
1164 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
1165
1166 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1167 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
1168 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1169 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
1170 }
1171}
1172
1173
1174
1175
1176
1177
1178#define SVM_VM_DATA_HASH_BITS 8
1179static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
1180static u32 next_vm_id = 0;
1181static bool next_vm_id_wrapped = 0;
1182static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
1183
1184
1185
1186
1187
1188static int avic_ga_log_notifier(u32 ga_tag)
1189{
1190 unsigned long flags;
1191 struct kvm_svm *kvm_svm;
1192 struct kvm_vcpu *vcpu = NULL;
1193 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
1194 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
1195
1196 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
1197
1198 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1199 hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
1200 if (kvm_svm->avic_vm_id != vm_id)
1201 continue;
1202 vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
1203 break;
1204 }
1205 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1206
1207
1208
1209
1210
1211
1212 if (vcpu)
1213 kvm_vcpu_wake_up(vcpu);
1214
1215 return 0;
1216}
1217
1218static __init int sev_hardware_setup(void)
1219{
1220 struct sev_user_data_status *status;
1221 int rc;
1222
1223
1224 max_sev_asid = cpuid_ecx(0x8000001F);
1225
1226 if (!max_sev_asid)
1227 return 1;
1228
1229
1230 min_sev_asid = cpuid_edx(0x8000001F);
1231
1232
1233 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1234 if (!sev_asid_bitmap)
1235 return 1;
1236
1237 status = kmalloc(sizeof(*status), GFP_KERNEL);
1238 if (!status)
1239 return 1;
1240
1241
1242
1243
1244
1245
1246
1247
1248 rc = sev_platform_status(status, NULL);
1249 if (rc)
1250 goto err;
1251
1252 pr_info("SEV supported\n");
1253
1254err:
1255 kfree(status);
1256 return rc;
1257}
1258
1259static void grow_ple_window(struct kvm_vcpu *vcpu)
1260{
1261 struct vcpu_svm *svm = to_svm(vcpu);
1262 struct vmcb_control_area *control = &svm->vmcb->control;
1263 int old = control->pause_filter_count;
1264
1265 control->pause_filter_count = __grow_ple_window(old,
1266 pause_filter_count,
1267 pause_filter_count_grow,
1268 pause_filter_count_max);
1269
1270 if (control->pause_filter_count != old)
1271 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1272
1273 trace_kvm_ple_window_grow(vcpu->vcpu_id,
1274 control->pause_filter_count, old);
1275}
1276
1277static void shrink_ple_window(struct kvm_vcpu *vcpu)
1278{
1279 struct vcpu_svm *svm = to_svm(vcpu);
1280 struct vmcb_control_area *control = &svm->vmcb->control;
1281 int old = control->pause_filter_count;
1282
1283 control->pause_filter_count =
1284 __shrink_ple_window(old,
1285 pause_filter_count,
1286 pause_filter_count_shrink,
1287 pause_filter_count);
1288 if (control->pause_filter_count != old)
1289 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1290
1291 trace_kvm_ple_window_shrink(vcpu->vcpu_id,
1292 control->pause_filter_count, old);
1293}
1294
1295static __init int svm_hardware_setup(void)
1296{
1297 int cpu;
1298 struct page *iopm_pages;
1299 void *iopm_va;
1300 int r;
1301
1302 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
1303
1304 if (!iopm_pages)
1305 return -ENOMEM;
1306
1307 iopm_va = page_address(iopm_pages);
1308 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
1309 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
1310
1311 init_msrpm_offsets();
1312
1313 if (boot_cpu_has(X86_FEATURE_NX))
1314 kvm_enable_efer_bits(EFER_NX);
1315
1316 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
1317 kvm_enable_efer_bits(EFER_FFXSR);
1318
1319 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1320 kvm_has_tsc_control = true;
1321 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
1322 kvm_tsc_scaling_ratio_frac_bits = 32;
1323 }
1324
1325
1326 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1327 pause_filter_count = 0;
1328 pause_filter_thresh = 0;
1329 } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
1330 pause_filter_thresh = 0;
1331 }
1332
1333 if (nested) {
1334 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
1335 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
1336 }
1337
1338 if (sev) {
1339 if (boot_cpu_has(X86_FEATURE_SEV) &&
1340 IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
1341 r = sev_hardware_setup();
1342 if (r)
1343 sev = false;
1344 } else {
1345 sev = false;
1346 }
1347 }
1348
1349 for_each_possible_cpu(cpu) {
1350 r = svm_cpu_init(cpu);
1351 if (r)
1352 goto err;
1353 }
1354
1355 if (!boot_cpu_has(X86_FEATURE_NPT))
1356 npt_enabled = false;
1357
1358 if (npt_enabled && !npt) {
1359 printk(KERN_INFO "kvm: Nested Paging disabled\n");
1360 npt_enabled = false;
1361 }
1362
1363 if (npt_enabled) {
1364 printk(KERN_INFO "kvm: Nested Paging enabled\n");
1365 kvm_enable_tdp();
1366 } else
1367 kvm_disable_tdp();
1368
1369 if (avic) {
1370 if (!npt_enabled ||
1371 !boot_cpu_has(X86_FEATURE_AVIC) ||
1372 !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
1373 avic = false;
1374 } else {
1375 pr_info("AVIC enabled\n");
1376
1377 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1378 }
1379 }
1380
1381 if (vls) {
1382 if (!npt_enabled ||
1383 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
1384 !IS_ENABLED(CONFIG_X86_64)) {
1385 vls = false;
1386 } else {
1387 pr_info("Virtual VMLOAD VMSAVE supported\n");
1388 }
1389 }
1390
1391 if (vgif) {
1392 if (!boot_cpu_has(X86_FEATURE_VGIF))
1393 vgif = false;
1394 else
1395 pr_info("Virtual GIF supported\n");
1396 }
1397
1398 return 0;
1399
1400err:
1401 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
1402 iopm_base = 0;
1403 return r;
1404}
1405
1406static __exit void svm_hardware_unsetup(void)
1407{
1408 int cpu;
1409
1410 if (svm_sev_enabled())
1411 bitmap_free(sev_asid_bitmap);
1412
1413 for_each_possible_cpu(cpu)
1414 svm_cpu_uninit(cpu);
1415
1416 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
1417 iopm_base = 0;
1418}
1419
1420static void init_seg(struct vmcb_seg *seg)
1421{
1422 seg->selector = 0;
1423 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
1424 SVM_SELECTOR_WRITE_MASK;
1425 seg->limit = 0xffff;
1426 seg->base = 0;
1427}
1428
1429static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1430{
1431 seg->selector = 0;
1432 seg->attrib = SVM_SELECTOR_P_MASK | type;
1433 seg->limit = 0xffff;
1434 seg->base = 0;
1435}
1436
1437static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1438{
1439 struct vcpu_svm *svm = to_svm(vcpu);
1440
1441 if (is_guest_mode(vcpu))
1442 return svm->nested.hsave->control.tsc_offset;
1443
1444 return vcpu->arch.tsc_offset;
1445}
1446
1447static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1448{
1449 struct vcpu_svm *svm = to_svm(vcpu);
1450 u64 g_tsc_offset = 0;
1451
1452 if (is_guest_mode(vcpu)) {
1453
1454 g_tsc_offset = svm->vmcb->control.tsc_offset -
1455 svm->nested.hsave->control.tsc_offset;
1456 svm->nested.hsave->control.tsc_offset = offset;
1457 } else
1458 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1459 svm->vmcb->control.tsc_offset,
1460 offset);
1461
1462 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1463
1464 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1465}
1466
1467static void avic_init_vmcb(struct vcpu_svm *svm)
1468{
1469 struct vmcb *vmcb = svm->vmcb;
1470 struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
1471 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
1472 phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
1473 phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
1474
1475 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1476 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1477 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1478 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1479 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
1480}
1481
1482static void init_vmcb(struct vcpu_svm *svm)
1483{
1484 struct vmcb_control_area *control = &svm->vmcb->control;
1485 struct vmcb_save_area *save = &svm->vmcb->save;
1486
1487 svm->vcpu.arch.hflags = 0;
1488
1489 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1490 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1491 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1492 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1493 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1494 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1495 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1496 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1497
1498 set_dr_intercepts(svm);
1499
1500 set_exception_intercept(svm, PF_VECTOR);
1501 set_exception_intercept(svm, UD_VECTOR);
1502 set_exception_intercept(svm, MC_VECTOR);
1503 set_exception_intercept(svm, AC_VECTOR);
1504 set_exception_intercept(svm, DB_VECTOR);
1505
1506
1507
1508
1509
1510
1511 if (enable_vmware_backdoor)
1512 set_exception_intercept(svm, GP_VECTOR);
1513
1514 set_intercept(svm, INTERCEPT_INTR);
1515 set_intercept(svm, INTERCEPT_NMI);
1516 set_intercept(svm, INTERCEPT_SMI);
1517 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1518 set_intercept(svm, INTERCEPT_RDPMC);
1519 set_intercept(svm, INTERCEPT_CPUID);
1520 set_intercept(svm, INTERCEPT_INVD);
1521 set_intercept(svm, INTERCEPT_INVLPG);
1522 set_intercept(svm, INTERCEPT_INVLPGA);
1523 set_intercept(svm, INTERCEPT_IOIO_PROT);
1524 set_intercept(svm, INTERCEPT_MSR_PROT);
1525 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1526 set_intercept(svm, INTERCEPT_SHUTDOWN);
1527 set_intercept(svm, INTERCEPT_VMRUN);
1528 set_intercept(svm, INTERCEPT_VMMCALL);
1529 set_intercept(svm, INTERCEPT_VMLOAD);
1530 set_intercept(svm, INTERCEPT_VMSAVE);
1531 set_intercept(svm, INTERCEPT_STGI);
1532 set_intercept(svm, INTERCEPT_CLGI);
1533 set_intercept(svm, INTERCEPT_SKINIT);
1534 set_intercept(svm, INTERCEPT_WBINVD);
1535 set_intercept(svm, INTERCEPT_XSETBV);
1536 set_intercept(svm, INTERCEPT_RSM);
1537
1538 if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
1539 set_intercept(svm, INTERCEPT_MONITOR);
1540 set_intercept(svm, INTERCEPT_MWAIT);
1541 }
1542
1543 if (!kvm_hlt_in_guest(svm->vcpu.kvm))
1544 set_intercept(svm, INTERCEPT_HLT);
1545
1546 control->iopm_base_pa = __sme_set(iopm_base);
1547 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1548 control->int_ctl = V_INTR_MASKING_MASK;
1549
1550 init_seg(&save->es);
1551 init_seg(&save->ss);
1552 init_seg(&save->ds);
1553 init_seg(&save->fs);
1554 init_seg(&save->gs);
1555
1556 save->cs.selector = 0xf000;
1557 save->cs.base = 0xffff0000;
1558
1559 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1560 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1561 save->cs.limit = 0xffff;
1562
1563 save->gdtr.limit = 0xffff;
1564 save->idtr.limit = 0xffff;
1565
1566 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1567 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1568
1569 svm_set_efer(&svm->vcpu, 0);
1570 save->dr6 = 0xffff0ff0;
1571 kvm_set_rflags(&svm->vcpu, 2);
1572 save->rip = 0x0000fff0;
1573 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1574
1575
1576
1577
1578
1579 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1580 kvm_mmu_reset_context(&svm->vcpu);
1581
1582 save->cr4 = X86_CR4_PAE;
1583
1584
1585 if (npt_enabled) {
1586
1587 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1588 clr_intercept(svm, INTERCEPT_INVLPG);
1589 clr_exception_intercept(svm, PF_VECTOR);
1590 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1591 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1592 save->g_pat = svm->vcpu.arch.pat;
1593 save->cr3 = 0;
1594 save->cr4 = 0;
1595 }
1596 svm->asid_generation = 0;
1597
1598 svm->nested.vmcb = 0;
1599 svm->vcpu.arch.hflags = 0;
1600
1601 if (pause_filter_count) {
1602 control->pause_filter_count = pause_filter_count;
1603 if (pause_filter_thresh)
1604 control->pause_filter_thresh = pause_filter_thresh;
1605 set_intercept(svm, INTERCEPT_PAUSE);
1606 } else {
1607 clr_intercept(svm, INTERCEPT_PAUSE);
1608 }
1609
1610 if (kvm_vcpu_apicv_active(&svm->vcpu))
1611 avic_init_vmcb(svm);
1612
1613
1614
1615
1616
1617 if (vls) {
1618 clr_intercept(svm, INTERCEPT_VMLOAD);
1619 clr_intercept(svm, INTERCEPT_VMSAVE);
1620 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1621 }
1622
1623 if (vgif) {
1624 clr_intercept(svm, INTERCEPT_STGI);
1625 clr_intercept(svm, INTERCEPT_CLGI);
1626 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1627 }
1628
1629 if (sev_guest(svm->vcpu.kvm)) {
1630 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
1631 clr_exception_intercept(svm, UD_VECTOR);
1632 }
1633
1634 mark_all_dirty(svm->vmcb);
1635
1636 enable_gif(svm);
1637
1638}
1639
1640static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
1641 unsigned int index)
1642{
1643 u64 *avic_physical_id_table;
1644 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
1645
1646 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1647 return NULL;
1648
1649 avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
1650
1651 return &avic_physical_id_table[index];
1652}
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662static int avic_init_access_page(struct kvm_vcpu *vcpu)
1663{
1664 struct kvm *kvm = vcpu->kvm;
1665 int ret;
1666
1667 if (kvm->arch.apic_access_page_done)
1668 return 0;
1669
1670 ret = x86_set_memory_region(kvm,
1671 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1672 APIC_DEFAULT_PHYS_BASE,
1673 PAGE_SIZE);
1674 if (ret)
1675 return ret;
1676
1677 kvm->arch.apic_access_page_done = true;
1678 return 0;
1679}
1680
1681static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1682{
1683 int ret;
1684 u64 *entry, new_entry;
1685 int id = vcpu->vcpu_id;
1686 struct vcpu_svm *svm = to_svm(vcpu);
1687
1688 ret = avic_init_access_page(vcpu);
1689 if (ret)
1690 return ret;
1691
1692 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1693 return -EINVAL;
1694
1695 if (!svm->vcpu.arch.apic->regs)
1696 return -EINVAL;
1697
1698 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1699
1700
1701 entry = avic_get_physical_id_entry(vcpu, id);
1702 if (!entry)
1703 return -EINVAL;
1704
1705 new_entry = READ_ONCE(*entry);
1706 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
1707 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1708 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
1709 WRITE_ONCE(*entry, new_entry);
1710
1711 svm->avic_physical_id_cache = entry;
1712
1713 return 0;
1714}
1715
1716static void __sev_asid_free(int asid)
1717{
1718 struct svm_cpu_data *sd;
1719 int cpu, pos;
1720
1721 pos = asid - 1;
1722 clear_bit(pos, sev_asid_bitmap);
1723
1724 for_each_possible_cpu(cpu) {
1725 sd = per_cpu(svm_data, cpu);
1726 sd->sev_vmcbs[pos] = NULL;
1727 }
1728}
1729
1730static void sev_asid_free(struct kvm *kvm)
1731{
1732 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1733
1734 __sev_asid_free(sev->asid);
1735}
1736
1737static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
1738{
1739 struct sev_data_decommission *decommission;
1740 struct sev_data_deactivate *data;
1741
1742 if (!handle)
1743 return;
1744
1745 data = kzalloc(sizeof(*data), GFP_KERNEL);
1746 if (!data)
1747 return;
1748
1749
1750 data->handle = handle;
1751 sev_guest_deactivate(data, NULL);
1752
1753 wbinvd_on_all_cpus();
1754 sev_guest_df_flush(NULL);
1755 kfree(data);
1756
1757 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
1758 if (!decommission)
1759 return;
1760
1761
1762 decommission->handle = handle;
1763 sev_guest_decommission(decommission, NULL);
1764
1765 kfree(decommission);
1766}
1767
1768static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
1769 unsigned long ulen, unsigned long *n,
1770 int write)
1771{
1772 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1773 unsigned long npages, npinned, size;
1774 unsigned long locked, lock_limit;
1775 struct page **pages;
1776 unsigned long first, last;
1777
1778 if (ulen == 0 || uaddr + ulen < uaddr)
1779 return NULL;
1780
1781
1782 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
1783 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
1784 npages = (last - first + 1);
1785
1786 locked = sev->pages_locked + npages;
1787 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1788 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
1789 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
1790 return NULL;
1791 }
1792
1793
1794 size = npages * sizeof(struct page *);
1795 if (size > PAGE_SIZE)
1796 pages = vmalloc(size);
1797 else
1798 pages = kmalloc(size, GFP_KERNEL);
1799
1800 if (!pages)
1801 return NULL;
1802
1803
1804 npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
1805 if (npinned != npages) {
1806 pr_err("SEV: Failure locking %lu pages.\n", npages);
1807 goto err;
1808 }
1809
1810 *n = npages;
1811 sev->pages_locked = locked;
1812
1813 return pages;
1814
1815err:
1816 if (npinned > 0)
1817 release_pages(pages, npinned);
1818
1819 kvfree(pages);
1820 return NULL;
1821}
1822
1823static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
1824 unsigned long npages)
1825{
1826 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1827
1828 release_pages(pages, npages);
1829 kvfree(pages);
1830 sev->pages_locked -= npages;
1831}
1832
1833static void sev_clflush_pages(struct page *pages[], unsigned long npages)
1834{
1835 uint8_t *page_virtual;
1836 unsigned long i;
1837
1838 if (npages == 0 || pages == NULL)
1839 return;
1840
1841 for (i = 0; i < npages; i++) {
1842 page_virtual = kmap_atomic(pages[i]);
1843 clflush_cache_range(page_virtual, PAGE_SIZE);
1844 kunmap_atomic(page_virtual);
1845 }
1846}
1847
1848static void __unregister_enc_region_locked(struct kvm *kvm,
1849 struct enc_region *region)
1850{
1851
1852
1853
1854
1855
1856
1857 sev_clflush_pages(region->pages, region->npages);
1858
1859 sev_unpin_memory(kvm, region->pages, region->npages);
1860 list_del(®ion->list);
1861 kfree(region);
1862}
1863
1864static struct kvm *svm_vm_alloc(void)
1865{
1866 struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
1867 return &kvm_svm->kvm;
1868}
1869
1870static void svm_vm_free(struct kvm *kvm)
1871{
1872 vfree(to_kvm_svm(kvm));
1873}
1874
1875static void sev_vm_destroy(struct kvm *kvm)
1876{
1877 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1878 struct list_head *head = &sev->regions_list;
1879 struct list_head *pos, *q;
1880
1881 if (!sev_guest(kvm))
1882 return;
1883
1884 mutex_lock(&kvm->lock);
1885
1886
1887
1888
1889
1890 if (!list_empty(head)) {
1891 list_for_each_safe(pos, q, head) {
1892 __unregister_enc_region_locked(kvm,
1893 list_entry(pos, struct enc_region, list));
1894 }
1895 }
1896
1897 mutex_unlock(&kvm->lock);
1898
1899 sev_unbind_asid(kvm, sev->handle);
1900 sev_asid_free(kvm);
1901}
1902
1903static void avic_vm_destroy(struct kvm *kvm)
1904{
1905 unsigned long flags;
1906 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1907
1908 if (!avic)
1909 return;
1910
1911 if (kvm_svm->avic_logical_id_table_page)
1912 __free_page(kvm_svm->avic_logical_id_table_page);
1913 if (kvm_svm->avic_physical_id_table_page)
1914 __free_page(kvm_svm->avic_physical_id_table_page);
1915
1916 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1917 hash_del(&kvm_svm->hnode);
1918 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1919}
1920
1921static void svm_vm_destroy(struct kvm *kvm)
1922{
1923 avic_vm_destroy(kvm);
1924 sev_vm_destroy(kvm);
1925}
1926
1927static int avic_vm_init(struct kvm *kvm)
1928{
1929 unsigned long flags;
1930 int err = -ENOMEM;
1931 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1932 struct kvm_svm *k2;
1933 struct page *p_page;
1934 struct page *l_page;
1935 u32 vm_id;
1936
1937 if (!avic)
1938 return 0;
1939
1940
1941 p_page = alloc_page(GFP_KERNEL);
1942 if (!p_page)
1943 goto free_avic;
1944
1945 kvm_svm->avic_physical_id_table_page = p_page;
1946 clear_page(page_address(p_page));
1947
1948
1949 l_page = alloc_page(GFP_KERNEL);
1950 if (!l_page)
1951 goto free_avic;
1952
1953 kvm_svm->avic_logical_id_table_page = l_page;
1954 clear_page(page_address(l_page));
1955
1956 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1957 again:
1958 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
1959 if (vm_id == 0) {
1960 next_vm_id_wrapped = 1;
1961 goto again;
1962 }
1963
1964 if (next_vm_id_wrapped) {
1965 hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
1966 if (k2->avic_vm_id == vm_id)
1967 goto again;
1968 }
1969 }
1970 kvm_svm->avic_vm_id = vm_id;
1971 hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
1972 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1973
1974 return 0;
1975
1976free_avic:
1977 avic_vm_destroy(kvm);
1978 return err;
1979}
1980
1981static inline int
1982avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
1983{
1984 int ret = 0;
1985 unsigned long flags;
1986 struct amd_svm_iommu_ir *ir;
1987 struct vcpu_svm *svm = to_svm(vcpu);
1988
1989 if (!kvm_arch_has_assigned_device(vcpu->kvm))
1990 return 0;
1991
1992
1993
1994
1995
1996 spin_lock_irqsave(&svm->ir_list_lock, flags);
1997
1998 if (list_empty(&svm->ir_list))
1999 goto out;
2000
2001 list_for_each_entry(ir, &svm->ir_list, node) {
2002 ret = amd_iommu_update_ga(cpu, r, ir->data);
2003 if (ret)
2004 break;
2005 }
2006out:
2007 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
2008 return ret;
2009}
2010
2011static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2012{
2013 u64 entry;
2014
2015 int h_physical_id = kvm_cpu_get_apicid(cpu);
2016 struct vcpu_svm *svm = to_svm(vcpu);
2017
2018 if (!kvm_vcpu_apicv_active(vcpu))
2019 return;
2020
2021 if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
2022 return;
2023
2024 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2025 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
2026
2027 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
2028 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
2029
2030 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2031 if (svm->avic_is_running)
2032 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2033
2034 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2035 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
2036 svm->avic_is_running);
2037}
2038
2039static void avic_vcpu_put(struct kvm_vcpu *vcpu)
2040{
2041 u64 entry;
2042 struct vcpu_svm *svm = to_svm(vcpu);
2043
2044 if (!kvm_vcpu_apicv_active(vcpu))
2045 return;
2046
2047 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2048 if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
2049 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
2050
2051 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2052 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2053}
2054
2055
2056
2057
2058static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
2059{
2060 struct vcpu_svm *svm = to_svm(vcpu);
2061
2062 svm->avic_is_running = is_run;
2063 if (is_run)
2064 avic_vcpu_load(vcpu, vcpu->cpu);
2065 else
2066 avic_vcpu_put(vcpu);
2067}
2068
2069static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2070{
2071 struct vcpu_svm *svm = to_svm(vcpu);
2072 u32 dummy;
2073 u32 eax = 1;
2074
2075 vcpu->arch.microcode_version = 0x01000065;
2076 svm->spec_ctrl = 0;
2077 svm->virt_spec_ctrl = 0;
2078
2079 if (!init_event) {
2080 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
2081 MSR_IA32_APICBASE_ENABLE;
2082 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
2083 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
2084 }
2085 init_vmcb(svm);
2086
2087 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
2088 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
2089
2090 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
2091 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
2092}
2093
2094static int avic_init_vcpu(struct vcpu_svm *svm)
2095{
2096 int ret;
2097
2098 if (!kvm_vcpu_apicv_active(&svm->vcpu))
2099 return 0;
2100
2101 ret = avic_init_backing_page(&svm->vcpu);
2102 if (ret)
2103 return ret;
2104
2105 INIT_LIST_HEAD(&svm->ir_list);
2106 spin_lock_init(&svm->ir_list_lock);
2107
2108 return ret;
2109}
2110
2111static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
2112{
2113 struct vcpu_svm *svm;
2114 struct page *page;
2115 struct page *msrpm_pages;
2116 struct page *hsave_page;
2117 struct page *nested_msrpm_pages;
2118 int err;
2119
2120 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2121 if (!svm) {
2122 err = -ENOMEM;
2123 goto out;
2124 }
2125
2126 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
2127 if (err)
2128 goto free_svm;
2129
2130 err = -ENOMEM;
2131 page = alloc_page(GFP_KERNEL);
2132 if (!page)
2133 goto uninit;
2134
2135 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
2136 if (!msrpm_pages)
2137 goto free_page1;
2138
2139 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
2140 if (!nested_msrpm_pages)
2141 goto free_page2;
2142
2143 hsave_page = alloc_page(GFP_KERNEL);
2144 if (!hsave_page)
2145 goto free_page3;
2146
2147 err = avic_init_vcpu(svm);
2148 if (err)
2149 goto free_page4;
2150
2151
2152
2153
2154 svm->avic_is_running = true;
2155
2156 svm->nested.hsave = page_address(hsave_page);
2157
2158 svm->msrpm = page_address(msrpm_pages);
2159 svm_vcpu_init_msrpm(svm->msrpm);
2160
2161 svm->nested.msrpm = page_address(nested_msrpm_pages);
2162 svm_vcpu_init_msrpm(svm->nested.msrpm);
2163
2164 svm->vmcb = page_address(page);
2165 clear_page(svm->vmcb);
2166 svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
2167 svm->asid_generation = 0;
2168 init_vmcb(svm);
2169
2170 svm_init_osvw(&svm->vcpu);
2171
2172 return &svm->vcpu;
2173
2174free_page4:
2175 __free_page(hsave_page);
2176free_page3:
2177 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
2178free_page2:
2179 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
2180free_page1:
2181 __free_page(page);
2182uninit:
2183 kvm_vcpu_uninit(&svm->vcpu);
2184free_svm:
2185 kmem_cache_free(kvm_vcpu_cache, svm);
2186out:
2187 return ERR_PTR(err);
2188}
2189
2190static void svm_free_vcpu(struct kvm_vcpu *vcpu)
2191{
2192 struct vcpu_svm *svm = to_svm(vcpu);
2193
2194 __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
2195 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
2196 __free_page(virt_to_page(svm->nested.hsave));
2197 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
2198 kvm_vcpu_uninit(vcpu);
2199 kmem_cache_free(kvm_vcpu_cache, svm);
2200
2201
2202
2203
2204 indirect_branch_prediction_barrier();
2205}
2206
2207static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2208{
2209 struct vcpu_svm *svm = to_svm(vcpu);
2210 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2211 int i;
2212
2213 if (unlikely(cpu != vcpu->cpu)) {
2214 svm->asid_generation = 0;
2215 mark_all_dirty(svm->vmcb);
2216 }
2217
2218#ifdef CONFIG_X86_64
2219 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
2220#endif
2221 savesegment(fs, svm->host.fs);
2222 savesegment(gs, svm->host.gs);
2223 svm->host.ldt = kvm_read_ldt();
2224
2225 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2226 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2227
2228 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
2229 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
2230 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
2231 __this_cpu_write(current_tsc_ratio, tsc_ratio);
2232 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
2233 }
2234 }
2235
2236 if (static_cpu_has(X86_FEATURE_RDTSCP))
2237 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
2238
2239 if (sd->current_vmcb != svm->vmcb) {
2240 sd->current_vmcb = svm->vmcb;
2241 indirect_branch_prediction_barrier();
2242 }
2243 avic_vcpu_load(vcpu, cpu);
2244}
2245
2246static void svm_vcpu_put(struct kvm_vcpu *vcpu)
2247{
2248 struct vcpu_svm *svm = to_svm(vcpu);
2249 int i;
2250
2251 avic_vcpu_put(vcpu);
2252
2253 ++vcpu->stat.host_state_reload;
2254 kvm_load_ldt(svm->host.ldt);
2255#ifdef CONFIG_X86_64
2256 loadsegment(fs, svm->host.fs);
2257 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
2258 load_gs_index(svm->host.gs);
2259#else
2260#ifdef CONFIG_X86_32_LAZY_GS
2261 loadsegment(gs, svm->host.gs);
2262#endif
2263#endif
2264 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2265 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2266}
2267
2268static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
2269{
2270 avic_set_running(vcpu, false);
2271}
2272
2273static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
2274{
2275 avic_set_running(vcpu, true);
2276}
2277
2278static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
2279{
2280 struct vcpu_svm *svm = to_svm(vcpu);
2281 unsigned long rflags = svm->vmcb->save.rflags;
2282
2283 if (svm->nmi_singlestep) {
2284
2285 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
2286 rflags &= ~X86_EFLAGS_TF;
2287 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
2288 rflags &= ~X86_EFLAGS_RF;
2289 }
2290 return rflags;
2291}
2292
2293static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2294{
2295 if (to_svm(vcpu)->nmi_singlestep)
2296 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2297
2298
2299
2300
2301
2302
2303 to_svm(vcpu)->vmcb->save.rflags = rflags;
2304}
2305
2306static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2307{
2308 switch (reg) {
2309 case VCPU_EXREG_PDPTR:
2310 BUG_ON(!npt_enabled);
2311 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
2312 break;
2313 default:
2314 BUG();
2315 }
2316}
2317
2318static void svm_set_vintr(struct vcpu_svm *svm)
2319{
2320 set_intercept(svm, INTERCEPT_VINTR);
2321}
2322
2323static void svm_clear_vintr(struct vcpu_svm *svm)
2324{
2325 clr_intercept(svm, INTERCEPT_VINTR);
2326}
2327
2328static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
2329{
2330 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2331
2332 switch (seg) {
2333 case VCPU_SREG_CS: return &save->cs;
2334 case VCPU_SREG_DS: return &save->ds;
2335 case VCPU_SREG_ES: return &save->es;
2336 case VCPU_SREG_FS: return &save->fs;
2337 case VCPU_SREG_GS: return &save->gs;
2338 case VCPU_SREG_SS: return &save->ss;
2339 case VCPU_SREG_TR: return &save->tr;
2340 case VCPU_SREG_LDTR: return &save->ldtr;
2341 }
2342 BUG();
2343 return NULL;
2344}
2345
2346static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2347{
2348 struct vmcb_seg *s = svm_seg(vcpu, seg);
2349
2350 return s->base;
2351}
2352
2353static void svm_get_segment(struct kvm_vcpu *vcpu,
2354 struct kvm_segment *var, int seg)
2355{
2356 struct vmcb_seg *s = svm_seg(vcpu, seg);
2357
2358 var->base = s->base;
2359 var->limit = s->limit;
2360 var->selector = s->selector;
2361 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
2362 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
2363 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
2364 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
2365 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
2366 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
2367 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377 var->g = s->limit > 0xfffff;
2378
2379
2380
2381
2382
2383 var->unusable = !var->present;
2384
2385 switch (seg) {
2386 case VCPU_SREG_TR:
2387
2388
2389
2390
2391 var->type |= 0x2;
2392 break;
2393 case VCPU_SREG_DS:
2394 case VCPU_SREG_ES:
2395 case VCPU_SREG_FS:
2396 case VCPU_SREG_GS:
2397
2398
2399
2400
2401
2402
2403
2404 if (!var->unusable)
2405 var->type |= 0x1;
2406 break;
2407 case VCPU_SREG_SS:
2408
2409
2410
2411
2412
2413
2414 if (var->unusable)
2415 var->db = 0;
2416
2417 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
2418 break;
2419 }
2420}
2421
2422static int svm_get_cpl(struct kvm_vcpu *vcpu)
2423{
2424 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2425
2426 return save->cpl;
2427}
2428
2429static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2430{
2431 struct vcpu_svm *svm = to_svm(vcpu);
2432
2433 dt->size = svm->vmcb->save.idtr.limit;
2434 dt->address = svm->vmcb->save.idtr.base;
2435}
2436
2437static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2438{
2439 struct vcpu_svm *svm = to_svm(vcpu);
2440
2441 svm->vmcb->save.idtr.limit = dt->size;
2442 svm->vmcb->save.idtr.base = dt->address ;
2443 mark_dirty(svm->vmcb, VMCB_DT);
2444}
2445
2446static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2447{
2448 struct vcpu_svm *svm = to_svm(vcpu);
2449
2450 dt->size = svm->vmcb->save.gdtr.limit;
2451 dt->address = svm->vmcb->save.gdtr.base;
2452}
2453
2454static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2455{
2456 struct vcpu_svm *svm = to_svm(vcpu);
2457
2458 svm->vmcb->save.gdtr.limit = dt->size;
2459 svm->vmcb->save.gdtr.base = dt->address ;
2460 mark_dirty(svm->vmcb, VMCB_DT);
2461}
2462
2463static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2464{
2465}
2466
2467static void svm_decache_cr3(struct kvm_vcpu *vcpu)
2468{
2469}
2470
2471static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
2472{
2473}
2474
2475static void update_cr0_intercept(struct vcpu_svm *svm)
2476{
2477 ulong gcr0 = svm->vcpu.arch.cr0;
2478 u64 *hcr0 = &svm->vmcb->save.cr0;
2479
2480 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
2481 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
2482
2483 mark_dirty(svm->vmcb, VMCB_CR);
2484
2485 if (gcr0 == *hcr0) {
2486 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
2487 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2488 } else {
2489 set_cr_intercept(svm, INTERCEPT_CR0_READ);
2490 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2491 }
2492}
2493
2494static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2495{
2496 struct vcpu_svm *svm = to_svm(vcpu);
2497
2498#ifdef CONFIG_X86_64
2499 if (vcpu->arch.efer & EFER_LME) {
2500 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
2501 vcpu->arch.efer |= EFER_LMA;
2502 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
2503 }
2504
2505 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
2506 vcpu->arch.efer &= ~EFER_LMA;
2507 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
2508 }
2509 }
2510#endif
2511 vcpu->arch.cr0 = cr0;
2512
2513 if (!npt_enabled)
2514 cr0 |= X86_CR0_PG | X86_CR0_WP;
2515
2516
2517
2518
2519
2520
2521 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
2522 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
2523 svm->vmcb->save.cr0 = cr0;
2524 mark_dirty(svm->vmcb, VMCB_CR);
2525 update_cr0_intercept(svm);
2526}
2527
2528static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
2529{
2530 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
2531 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
2532
2533 if (cr4 & X86_CR4_VMXE)
2534 return 1;
2535
2536 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
2537 svm_flush_tlb(vcpu, true);
2538
2539 vcpu->arch.cr4 = cr4;
2540 if (!npt_enabled)
2541 cr4 |= X86_CR4_PAE;
2542 cr4 |= host_cr4_mce;
2543 to_svm(vcpu)->vmcb->save.cr4 = cr4;
2544 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
2545 return 0;
2546}
2547
2548static void svm_set_segment(struct kvm_vcpu *vcpu,
2549 struct kvm_segment *var, int seg)
2550{
2551 struct vcpu_svm *svm = to_svm(vcpu);
2552 struct vmcb_seg *s = svm_seg(vcpu, seg);
2553
2554 s->base = var->base;
2555 s->limit = var->limit;
2556 s->selector = var->selector;
2557 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
2558 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
2559 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
2560 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
2561 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
2562 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
2563 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
2564 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
2565
2566
2567
2568
2569
2570
2571
2572 if (seg == VCPU_SREG_SS)
2573
2574 svm->vmcb->save.cpl = (var->dpl & 3);
2575
2576 mark_dirty(svm->vmcb, VMCB_SEG);
2577}
2578
2579static void update_bp_intercept(struct kvm_vcpu *vcpu)
2580{
2581 struct vcpu_svm *svm = to_svm(vcpu);
2582
2583 clr_exception_intercept(svm, BP_VECTOR);
2584
2585 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
2586 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2587 set_exception_intercept(svm, BP_VECTOR);
2588 } else
2589 vcpu->guest_debug = 0;
2590}
2591
2592static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
2593{
2594 if (sd->next_asid > sd->max_asid) {
2595 ++sd->asid_generation;
2596 sd->next_asid = sd->min_asid;
2597 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
2598 }
2599
2600 svm->asid_generation = sd->asid_generation;
2601 svm->vmcb->control.asid = sd->next_asid++;
2602
2603 mark_dirty(svm->vmcb, VMCB_ASID);
2604}
2605
2606static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
2607{
2608 return to_svm(vcpu)->vmcb->save.dr6;
2609}
2610
2611static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
2612{
2613 struct vcpu_svm *svm = to_svm(vcpu);
2614
2615 svm->vmcb->save.dr6 = value;
2616 mark_dirty(svm->vmcb, VMCB_DR);
2617}
2618
2619static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
2620{
2621 struct vcpu_svm *svm = to_svm(vcpu);
2622
2623 get_debugreg(vcpu->arch.db[0], 0);
2624 get_debugreg(vcpu->arch.db[1], 1);
2625 get_debugreg(vcpu->arch.db[2], 2);
2626 get_debugreg(vcpu->arch.db[3], 3);
2627 vcpu->arch.dr6 = svm_get_dr6(vcpu);
2628 vcpu->arch.dr7 = svm->vmcb->save.dr7;
2629
2630 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
2631 set_dr_intercepts(svm);
2632}
2633
2634static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
2635{
2636 struct vcpu_svm *svm = to_svm(vcpu);
2637
2638 svm->vmcb->save.dr7 = value;
2639 mark_dirty(svm->vmcb, VMCB_DR);
2640}
2641
2642static int pf_interception(struct vcpu_svm *svm)
2643{
2644 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2645 u64 error_code = svm->vmcb->control.exit_info_1;
2646
2647 return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
2648 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2649 svm->vmcb->control.insn_bytes : NULL,
2650 svm->vmcb->control.insn_len);
2651}
2652
2653static int npf_interception(struct vcpu_svm *svm)
2654{
2655 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2656 u64 error_code = svm->vmcb->control.exit_info_1;
2657
2658 trace_kvm_page_fault(fault_address, error_code);
2659 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
2660 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2661 svm->vmcb->control.insn_bytes : NULL,
2662 svm->vmcb->control.insn_len);
2663}
2664
2665static int db_interception(struct vcpu_svm *svm)
2666{
2667 struct kvm_run *kvm_run = svm->vcpu.run;
2668
2669 if (!(svm->vcpu.guest_debug &
2670 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
2671 !svm->nmi_singlestep) {
2672 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
2673 return 1;
2674 }
2675
2676 if (svm->nmi_singlestep) {
2677 disable_nmi_singlestep(svm);
2678 }
2679
2680 if (svm->vcpu.guest_debug &
2681 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
2682 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2683 kvm_run->debug.arch.pc =
2684 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2685 kvm_run->debug.arch.exception = DB_VECTOR;
2686 return 0;
2687 }
2688
2689 return 1;
2690}
2691
2692static int bp_interception(struct vcpu_svm *svm)
2693{
2694 struct kvm_run *kvm_run = svm->vcpu.run;
2695
2696 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2697 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2698 kvm_run->debug.arch.exception = BP_VECTOR;
2699 return 0;
2700}
2701
2702static int ud_interception(struct vcpu_svm *svm)
2703{
2704 return handle_ud(&svm->vcpu);
2705}
2706
2707static int ac_interception(struct vcpu_svm *svm)
2708{
2709 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
2710 return 1;
2711}
2712
2713static int gp_interception(struct vcpu_svm *svm)
2714{
2715 struct kvm_vcpu *vcpu = &svm->vcpu;
2716 u32 error_code = svm->vmcb->control.exit_info_1;
2717 int er;
2718
2719 WARN_ON_ONCE(!enable_vmware_backdoor);
2720
2721 er = kvm_emulate_instruction(vcpu,
2722 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
2723 if (er == EMULATE_USER_EXIT)
2724 return 0;
2725 else if (er != EMULATE_DONE)
2726 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2727 return 1;
2728}
2729
2730static bool is_erratum_383(void)
2731{
2732 int err, i;
2733 u64 value;
2734
2735 if (!erratum_383_found)
2736 return false;
2737
2738 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2739 if (err)
2740 return false;
2741
2742
2743 value &= ~(1ULL << 62);
2744
2745 if (value != 0xb600000000010015ULL)
2746 return false;
2747
2748
2749 for (i = 0; i < 6; ++i)
2750 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2751
2752 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2753 if (!err) {
2754 u32 low, high;
2755
2756 value &= ~(1ULL << 2);
2757 low = lower_32_bits(value);
2758 high = upper_32_bits(value);
2759
2760 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2761 }
2762
2763
2764 __flush_tlb_all();
2765
2766 return true;
2767}
2768
2769static void svm_handle_mce(struct vcpu_svm *svm)
2770{
2771 if (is_erratum_383()) {
2772
2773
2774
2775
2776 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2777
2778 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
2779
2780 return;
2781 }
2782
2783
2784
2785
2786
2787 asm volatile (
2788 "int $0x12\n");
2789
2790
2791 return;
2792}
2793
2794static int mc_interception(struct vcpu_svm *svm)
2795{
2796 return 1;
2797}
2798
2799static int shutdown_interception(struct vcpu_svm *svm)
2800{
2801 struct kvm_run *kvm_run = svm->vcpu.run;
2802
2803
2804
2805
2806
2807 clear_page(svm->vmcb);
2808 init_vmcb(svm);
2809
2810 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2811 return 0;
2812}
2813
2814static int io_interception(struct vcpu_svm *svm)
2815{
2816 struct kvm_vcpu *vcpu = &svm->vcpu;
2817 u32 io_info = svm->vmcb->control.exit_info_1;
2818 int size, in, string;
2819 unsigned port;
2820
2821 ++svm->vcpu.stat.io_exits;
2822 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2823 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2824 if (string)
2825 return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
2826
2827 port = io_info >> 16;
2828 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
2829 svm->next_rip = svm->vmcb->control.exit_info_2;
2830
2831 return kvm_fast_pio(&svm->vcpu, size, port, in);
2832}
2833
2834static int nmi_interception(struct vcpu_svm *svm)
2835{
2836 return 1;
2837}
2838
2839static int intr_interception(struct vcpu_svm *svm)
2840{
2841 ++svm->vcpu.stat.irq_exits;
2842 return 1;
2843}
2844
2845static int nop_on_interception(struct vcpu_svm *svm)
2846{
2847 return 1;
2848}
2849
2850static int halt_interception(struct vcpu_svm *svm)
2851{
2852 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
2853 return kvm_emulate_halt(&svm->vcpu);
2854}
2855
2856static int vmmcall_interception(struct vcpu_svm *svm)
2857{
2858 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2859 return kvm_emulate_hypercall(&svm->vcpu);
2860}
2861
2862static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
2863{
2864 struct vcpu_svm *svm = to_svm(vcpu);
2865
2866 return svm->nested.nested_cr3;
2867}
2868
2869static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
2870{
2871 struct vcpu_svm *svm = to_svm(vcpu);
2872 u64 cr3 = svm->nested.nested_cr3;
2873 u64 pdpte;
2874 int ret;
2875
2876 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
2877 offset_in_page(cr3) + index * 8, 8);
2878 if (ret)
2879 return 0;
2880 return pdpte;
2881}
2882
2883static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
2884 unsigned long root)
2885{
2886 struct vcpu_svm *svm = to_svm(vcpu);
2887
2888 svm->vmcb->control.nested_cr3 = __sme_set(root);
2889 mark_dirty(svm->vmcb, VMCB_NPT);
2890}
2891
2892static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
2893 struct x86_exception *fault)
2894{
2895 struct vcpu_svm *svm = to_svm(vcpu);
2896
2897 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
2898
2899
2900
2901
2902 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
2903 svm->vmcb->control.exit_code_hi = 0;
2904 svm->vmcb->control.exit_info_1 = (1ULL << 32);
2905 svm->vmcb->control.exit_info_2 = fault->address;
2906 }
2907
2908 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
2909 svm->vmcb->control.exit_info_1 |= fault->error_code;
2910
2911
2912
2913
2914
2915 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
2916 svm->vmcb->control.exit_info_1 &= ~1;
2917
2918 nested_svm_vmexit(svm);
2919}
2920
2921static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
2922{
2923 WARN_ON(mmu_is_nested(vcpu));
2924 kvm_init_shadow_mmu(vcpu);
2925 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
2926 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
2927 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
2928 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
2929 vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
2930 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
2931 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
2932}
2933
2934static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
2935{
2936 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
2937}
2938
2939static int nested_svm_check_permissions(struct vcpu_svm *svm)
2940{
2941 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
2942 !is_paging(&svm->vcpu)) {
2943 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2944 return 1;
2945 }
2946
2947 if (svm->vmcb->save.cpl) {
2948 kvm_inject_gp(&svm->vcpu, 0);
2949 return 1;
2950 }
2951
2952 return 0;
2953}
2954
2955static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2956 bool has_error_code, u32 error_code)
2957{
2958 int vmexit;
2959
2960 if (!is_guest_mode(&svm->vcpu))
2961 return 0;
2962
2963 vmexit = nested_svm_intercept(svm);
2964 if (vmexit != NESTED_EXIT_DONE)
2965 return 0;
2966
2967 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2968 svm->vmcb->control.exit_code_hi = 0;
2969 svm->vmcb->control.exit_info_1 = error_code;
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980 if (svm->vcpu.arch.exception.nested_apf)
2981 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
2982 else
2983 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
2984
2985 svm->nested.exit_required = true;
2986 return vmexit;
2987}
2988
2989
2990static inline bool nested_svm_intr(struct vcpu_svm *svm)
2991{
2992 if (!is_guest_mode(&svm->vcpu))
2993 return true;
2994
2995 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2996 return true;
2997
2998 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
2999 return false;
3000
3001
3002
3003
3004
3005
3006 if (svm->nested.exit_required)
3007 return false;
3008
3009 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
3010 svm->vmcb->control.exit_info_1 = 0;
3011 svm->vmcb->control.exit_info_2 = 0;
3012
3013 if (svm->nested.intercept & 1ULL) {
3014
3015
3016
3017
3018
3019
3020 svm->nested.exit_required = true;
3021 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
3022 return false;
3023 }
3024
3025 return true;
3026}
3027
3028
3029static inline bool nested_svm_nmi(struct vcpu_svm *svm)
3030{
3031 if (!is_guest_mode(&svm->vcpu))
3032 return true;
3033
3034 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
3035 return true;
3036
3037 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
3038 svm->nested.exit_required = true;
3039
3040 return false;
3041}
3042
3043static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
3044{
3045 struct page *page;
3046
3047 might_sleep();
3048
3049 page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
3050 if (is_error_page(page))
3051 goto error;
3052
3053 *_page = page;
3054
3055 return kmap(page);
3056
3057error:
3058 kvm_inject_gp(&svm->vcpu, 0);
3059
3060 return NULL;
3061}
3062
3063static void nested_svm_unmap(struct page *page)
3064{
3065 kunmap(page);
3066 kvm_release_page_dirty(page);
3067}
3068
3069static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
3070{
3071 unsigned port, size, iopm_len;
3072 u16 val, mask;
3073 u8 start_bit;
3074 u64 gpa;
3075
3076 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
3077 return NESTED_EXIT_HOST;
3078
3079 port = svm->vmcb->control.exit_info_1 >> 16;
3080 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
3081 SVM_IOIO_SIZE_SHIFT;
3082 gpa = svm->nested.vmcb_iopm + (port / 8);
3083 start_bit = port % 8;
3084 iopm_len = (start_bit + size > 8) ? 2 : 1;
3085 mask = (0xf >> (4 - size)) << start_bit;
3086 val = 0;
3087
3088 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
3089 return NESTED_EXIT_DONE;
3090
3091 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3092}
3093
3094static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
3095{
3096 u32 offset, msr, value;
3097 int write, mask;
3098
3099 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3100 return NESTED_EXIT_HOST;
3101
3102 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3103 offset = svm_msrpm_offset(msr);
3104 write = svm->vmcb->control.exit_info_1 & 1;
3105 mask = 1 << ((2 * (msr & 0xf)) + write);
3106
3107 if (offset == MSR_INVALID)
3108 return NESTED_EXIT_DONE;
3109
3110
3111 offset *= 4;
3112
3113 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
3114 return NESTED_EXIT_DONE;
3115
3116 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3117}
3118
3119
3120static int nested_svm_intercept_db(struct vcpu_svm *svm)
3121{
3122 unsigned long dr6;
3123
3124
3125 if (!svm->nmi_singlestep)
3126 return NESTED_EXIT_DONE;
3127
3128
3129 if (kvm_get_dr(&svm->vcpu, 6, &dr6))
3130 return NESTED_EXIT_DONE;
3131 if (!(dr6 & DR6_BS))
3132 return NESTED_EXIT_DONE;
3133
3134
3135 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
3136 disable_nmi_singlestep(svm);
3137 return NESTED_EXIT_DONE;
3138 }
3139
3140
3141 return NESTED_EXIT_HOST;
3142}
3143
3144static int nested_svm_exit_special(struct vcpu_svm *svm)
3145{
3146 u32 exit_code = svm->vmcb->control.exit_code;
3147
3148 switch (exit_code) {
3149 case SVM_EXIT_INTR:
3150 case SVM_EXIT_NMI:
3151 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
3152 return NESTED_EXIT_HOST;
3153 case SVM_EXIT_NPF:
3154
3155 if (npt_enabled)
3156 return NESTED_EXIT_HOST;
3157 break;
3158 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
3159
3160 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
3161 return NESTED_EXIT_HOST;
3162 break;
3163 default:
3164 break;
3165 }
3166
3167 return NESTED_EXIT_CONTINUE;
3168}
3169
3170
3171
3172
3173static int nested_svm_intercept(struct vcpu_svm *svm)
3174{
3175 u32 exit_code = svm->vmcb->control.exit_code;
3176 int vmexit = NESTED_EXIT_HOST;
3177
3178 switch (exit_code) {
3179 case SVM_EXIT_MSR:
3180 vmexit = nested_svm_exit_handled_msr(svm);
3181 break;
3182 case SVM_EXIT_IOIO:
3183 vmexit = nested_svm_intercept_ioio(svm);
3184 break;
3185 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
3186 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
3187 if (svm->nested.intercept_cr & bit)
3188 vmexit = NESTED_EXIT_DONE;
3189 break;
3190 }
3191 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
3192 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
3193 if (svm->nested.intercept_dr & bit)
3194 vmexit = NESTED_EXIT_DONE;
3195 break;
3196 }
3197 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
3198 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
3199 if (svm->nested.intercept_exceptions & excp_bits) {
3200 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
3201 vmexit = nested_svm_intercept_db(svm);
3202 else
3203 vmexit = NESTED_EXIT_DONE;
3204 }
3205
3206 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
3207 svm->vcpu.arch.exception.nested_apf != 0)
3208 vmexit = NESTED_EXIT_DONE;
3209 break;
3210 }
3211 case SVM_EXIT_ERR: {
3212 vmexit = NESTED_EXIT_DONE;
3213 break;
3214 }
3215 default: {
3216 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
3217 if (svm->nested.intercept & exit_bits)
3218 vmexit = NESTED_EXIT_DONE;
3219 }
3220 }
3221
3222 return vmexit;
3223}
3224
3225static int nested_svm_exit_handled(struct vcpu_svm *svm)
3226{
3227 int vmexit;
3228
3229 vmexit = nested_svm_intercept(svm);
3230
3231 if (vmexit == NESTED_EXIT_DONE)
3232 nested_svm_vmexit(svm);
3233
3234 return vmexit;
3235}
3236
3237static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
3238{
3239 struct vmcb_control_area *dst = &dst_vmcb->control;
3240 struct vmcb_control_area *from = &from_vmcb->control;
3241
3242 dst->intercept_cr = from->intercept_cr;
3243 dst->intercept_dr = from->intercept_dr;
3244 dst->intercept_exceptions = from->intercept_exceptions;
3245 dst->intercept = from->intercept;
3246 dst->iopm_base_pa = from->iopm_base_pa;
3247 dst->msrpm_base_pa = from->msrpm_base_pa;
3248 dst->tsc_offset = from->tsc_offset;
3249 dst->asid = from->asid;
3250 dst->tlb_ctl = from->tlb_ctl;
3251 dst->int_ctl = from->int_ctl;
3252 dst->int_vector = from->int_vector;
3253 dst->int_state = from->int_state;
3254 dst->exit_code = from->exit_code;
3255 dst->exit_code_hi = from->exit_code_hi;
3256 dst->exit_info_1 = from->exit_info_1;
3257 dst->exit_info_2 = from->exit_info_2;
3258 dst->exit_int_info = from->exit_int_info;
3259 dst->exit_int_info_err = from->exit_int_info_err;
3260 dst->nested_ctl = from->nested_ctl;
3261 dst->event_inj = from->event_inj;
3262 dst->event_inj_err = from->event_inj_err;
3263 dst->nested_cr3 = from->nested_cr3;
3264 dst->virt_ext = from->virt_ext;
3265}
3266
3267static int nested_svm_vmexit(struct vcpu_svm *svm)
3268{
3269 struct vmcb *nested_vmcb;
3270 struct vmcb *hsave = svm->nested.hsave;
3271 struct vmcb *vmcb = svm->vmcb;
3272 struct page *page;
3273
3274 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
3275 vmcb->control.exit_info_1,
3276 vmcb->control.exit_info_2,
3277 vmcb->control.exit_int_info,
3278 vmcb->control.exit_int_info_err,
3279 KVM_ISA_SVM);
3280
3281 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
3282 if (!nested_vmcb)
3283 return 1;
3284
3285
3286 leave_guest_mode(&svm->vcpu);
3287 svm->nested.vmcb = 0;
3288
3289
3290 disable_gif(svm);
3291
3292 nested_vmcb->save.es = vmcb->save.es;
3293 nested_vmcb->save.cs = vmcb->save.cs;
3294 nested_vmcb->save.ss = vmcb->save.ss;
3295 nested_vmcb->save.ds = vmcb->save.ds;
3296 nested_vmcb->save.gdtr = vmcb->save.gdtr;
3297 nested_vmcb->save.idtr = vmcb->save.idtr;
3298 nested_vmcb->save.efer = svm->vcpu.arch.efer;
3299 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
3300 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
3301 nested_vmcb->save.cr2 = vmcb->save.cr2;
3302 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
3303 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
3304 nested_vmcb->save.rip = vmcb->save.rip;
3305 nested_vmcb->save.rsp = vmcb->save.rsp;
3306 nested_vmcb->save.rax = vmcb->save.rax;
3307 nested_vmcb->save.dr7 = vmcb->save.dr7;
3308 nested_vmcb->save.dr6 = vmcb->save.dr6;
3309 nested_vmcb->save.cpl = vmcb->save.cpl;
3310
3311 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
3312 nested_vmcb->control.int_vector = vmcb->control.int_vector;
3313 nested_vmcb->control.int_state = vmcb->control.int_state;
3314 nested_vmcb->control.exit_code = vmcb->control.exit_code;
3315 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
3316 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
3317 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
3318 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
3319 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
3320
3321 if (svm->nrips_enabled)
3322 nested_vmcb->control.next_rip = vmcb->control.next_rip;
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
3333 struct vmcb_control_area *nc = &nested_vmcb->control;
3334
3335 nc->exit_int_info = vmcb->control.event_inj;
3336 nc->exit_int_info_err = vmcb->control.event_inj_err;
3337 }
3338
3339 nested_vmcb->control.tlb_ctl = 0;
3340 nested_vmcb->control.event_inj = 0;
3341 nested_vmcb->control.event_inj_err = 0;
3342
3343
3344 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3345 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
3346
3347
3348 copy_vmcb_control_area(vmcb, hsave);
3349
3350 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
3351 kvm_clear_exception_queue(&svm->vcpu);
3352 kvm_clear_interrupt_queue(&svm->vcpu);
3353
3354 svm->nested.nested_cr3 = 0;
3355
3356
3357 svm->vmcb->save.es = hsave->save.es;
3358 svm->vmcb->save.cs = hsave->save.cs;
3359 svm->vmcb->save.ss = hsave->save.ss;
3360 svm->vmcb->save.ds = hsave->save.ds;
3361 svm->vmcb->save.gdtr = hsave->save.gdtr;
3362 svm->vmcb->save.idtr = hsave->save.idtr;
3363 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
3364 svm_set_efer(&svm->vcpu, hsave->save.efer);
3365 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
3366 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
3367 if (npt_enabled) {
3368 svm->vmcb->save.cr3 = hsave->save.cr3;
3369 svm->vcpu.arch.cr3 = hsave->save.cr3;
3370 } else {
3371 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
3372 }
3373 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
3374 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
3375 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
3376 svm->vmcb->save.dr7 = 0;
3377 svm->vmcb->save.cpl = 0;
3378 svm->vmcb->control.exit_int_info = 0;
3379
3380 mark_all_dirty(svm->vmcb);
3381
3382 nested_svm_unmap(page);
3383
3384 nested_svm_uninit_mmu_context(&svm->vcpu);
3385 kvm_mmu_reset_context(&svm->vcpu);
3386 kvm_mmu_load(&svm->vcpu);
3387
3388 return 0;
3389}
3390
3391static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3392{
3393
3394
3395
3396
3397
3398 int i;
3399
3400 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3401 return true;
3402
3403 for (i = 0; i < MSRPM_OFFSETS; i++) {
3404 u32 value, p;
3405 u64 offset;
3406
3407 if (msrpm_offsets[i] == 0xffffffff)
3408 break;
3409
3410 p = msrpm_offsets[i];
3411 offset = svm->nested.vmcb_msrpm + (p * 4);
3412
3413 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
3414 return false;
3415
3416 svm->nested.msrpm[p] = svm->msrpm[p] | value;
3417 }
3418
3419 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
3420
3421 return true;
3422}
3423
3424static bool nested_vmcb_checks(struct vmcb *vmcb)
3425{
3426 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
3427 return false;
3428
3429 if (vmcb->control.asid == 0)
3430 return false;
3431
3432 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
3433 !npt_enabled)
3434 return false;
3435
3436 return true;
3437}
3438
3439static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
3440 struct vmcb *nested_vmcb, struct page *page)
3441{
3442 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3443 svm->vcpu.arch.hflags |= HF_HIF_MASK;
3444 else
3445 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
3446
3447 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
3448 kvm_mmu_unload(&svm->vcpu);
3449 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
3450 nested_svm_init_mmu_context(&svm->vcpu);
3451 }
3452
3453
3454 svm->vmcb->save.es = nested_vmcb->save.es;
3455 svm->vmcb->save.cs = nested_vmcb->save.cs;
3456 svm->vmcb->save.ss = nested_vmcb->save.ss;
3457 svm->vmcb->save.ds = nested_vmcb->save.ds;
3458 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
3459 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
3460 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3461 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
3462 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
3463 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
3464 if (npt_enabled) {
3465 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
3466 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
3467 } else
3468 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
3469
3470
3471 kvm_mmu_reset_context(&svm->vcpu);
3472
3473 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3474 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
3475 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
3476 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
3477
3478
3479 svm->vmcb->save.rax = nested_vmcb->save.rax;
3480 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
3481 svm->vmcb->save.rip = nested_vmcb->save.rip;
3482 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
3483 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
3484 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
3485
3486 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
3487 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3488
3489
3490 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3491 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
3492 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
3493 svm->nested.intercept = nested_vmcb->control.intercept;
3494
3495 svm_flush_tlb(&svm->vcpu, true);
3496 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3497 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
3498 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
3499 else
3500 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
3501
3502 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
3503
3504 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
3505 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3506 }
3507
3508
3509 clr_intercept(svm, INTERCEPT_VMMCALL);
3510
3511 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
3512 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
3513
3514 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
3515 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
3516 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
3517 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
3518 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
3519
3520 nested_svm_unmap(page);
3521
3522
3523 enter_guest_mode(&svm->vcpu);
3524
3525
3526
3527
3528
3529 recalc_intercepts(svm);
3530
3531 svm->nested.vmcb = vmcb_gpa;
3532
3533 enable_gif(svm);
3534
3535 mark_all_dirty(svm->vmcb);
3536}
3537
3538static bool nested_svm_vmrun(struct vcpu_svm *svm)
3539{
3540 struct vmcb *nested_vmcb;
3541 struct vmcb *hsave = svm->nested.hsave;
3542 struct vmcb *vmcb = svm->vmcb;
3543 struct page *page;
3544 u64 vmcb_gpa;
3545
3546 vmcb_gpa = svm->vmcb->save.rax;
3547
3548 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3549 if (!nested_vmcb)
3550 return false;
3551
3552 if (!nested_vmcb_checks(nested_vmcb)) {
3553 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
3554 nested_vmcb->control.exit_code_hi = 0;
3555 nested_vmcb->control.exit_info_1 = 0;
3556 nested_vmcb->control.exit_info_2 = 0;
3557
3558 nested_svm_unmap(page);
3559
3560 return false;
3561 }
3562
3563 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
3564 nested_vmcb->save.rip,
3565 nested_vmcb->control.int_ctl,
3566 nested_vmcb->control.event_inj,
3567 nested_vmcb->control.nested_ctl);
3568
3569 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
3570 nested_vmcb->control.intercept_cr >> 16,
3571 nested_vmcb->control.intercept_exceptions,
3572 nested_vmcb->control.intercept);
3573
3574
3575 kvm_clear_exception_queue(&svm->vcpu);
3576 kvm_clear_interrupt_queue(&svm->vcpu);
3577
3578
3579
3580
3581
3582 hsave->save.es = vmcb->save.es;
3583 hsave->save.cs = vmcb->save.cs;
3584 hsave->save.ss = vmcb->save.ss;
3585 hsave->save.ds = vmcb->save.ds;
3586 hsave->save.gdtr = vmcb->save.gdtr;
3587 hsave->save.idtr = vmcb->save.idtr;
3588 hsave->save.efer = svm->vcpu.arch.efer;
3589 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
3590 hsave->save.cr4 = svm->vcpu.arch.cr4;
3591 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
3592 hsave->save.rip = kvm_rip_read(&svm->vcpu);
3593 hsave->save.rsp = vmcb->save.rsp;
3594 hsave->save.rax = vmcb->save.rax;
3595 if (npt_enabled)
3596 hsave->save.cr3 = vmcb->save.cr3;
3597 else
3598 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
3599
3600 copy_vmcb_control_area(hsave, vmcb);
3601
3602 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
3603
3604 return true;
3605}
3606
3607static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
3608{
3609 to_vmcb->save.fs = from_vmcb->save.fs;
3610 to_vmcb->save.gs = from_vmcb->save.gs;
3611 to_vmcb->save.tr = from_vmcb->save.tr;
3612 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
3613 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
3614 to_vmcb->save.star = from_vmcb->save.star;
3615 to_vmcb->save.lstar = from_vmcb->save.lstar;
3616 to_vmcb->save.cstar = from_vmcb->save.cstar;
3617 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
3618 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
3619 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
3620 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
3621}
3622
3623static int vmload_interception(struct vcpu_svm *svm)
3624{
3625 struct vmcb *nested_vmcb;
3626 struct page *page;
3627 int ret;
3628
3629 if (nested_svm_check_permissions(svm))
3630 return 1;
3631
3632 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3633 if (!nested_vmcb)
3634 return 1;
3635
3636 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3637 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3638
3639 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
3640 nested_svm_unmap(page);
3641
3642 return ret;
3643}
3644
3645static int vmsave_interception(struct vcpu_svm *svm)
3646{
3647 struct vmcb *nested_vmcb;
3648 struct page *page;
3649 int ret;
3650
3651 if (nested_svm_check_permissions(svm))
3652 return 1;
3653
3654 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3655 if (!nested_vmcb)
3656 return 1;
3657
3658 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3659 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3660
3661 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
3662 nested_svm_unmap(page);
3663
3664 return ret;
3665}
3666
3667static int vmrun_interception(struct vcpu_svm *svm)
3668{
3669 if (nested_svm_check_permissions(svm))
3670 return 1;
3671
3672
3673 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
3674
3675 if (!nested_svm_vmrun(svm))
3676 return 1;
3677
3678 if (!nested_svm_vmrun_msrpm(svm))
3679 goto failed;
3680
3681 return 1;
3682
3683failed:
3684
3685 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
3686 svm->vmcb->control.exit_code_hi = 0;
3687 svm->vmcb->control.exit_info_1 = 0;
3688 svm->vmcb->control.exit_info_2 = 0;
3689
3690 nested_svm_vmexit(svm);
3691
3692 return 1;
3693}
3694
3695static int stgi_interception(struct vcpu_svm *svm)
3696{
3697 int ret;
3698
3699 if (nested_svm_check_permissions(svm))
3700 return 1;
3701
3702
3703
3704
3705
3706 if (vgif_enabled(svm))
3707 clr_intercept(svm, INTERCEPT_STGI);
3708
3709 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3710 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3711 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3712
3713 enable_gif(svm);
3714
3715 return ret;
3716}
3717
3718static int clgi_interception(struct vcpu_svm *svm)
3719{
3720 int ret;
3721
3722 if (nested_svm_check_permissions(svm))
3723 return 1;
3724
3725 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3726 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3727
3728 disable_gif(svm);
3729
3730
3731 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
3732 svm_clear_vintr(svm);
3733 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3734 mark_dirty(svm->vmcb, VMCB_INTR);
3735 }
3736
3737 return ret;
3738}
3739
3740static int invlpga_interception(struct vcpu_svm *svm)
3741{
3742 struct kvm_vcpu *vcpu = &svm->vcpu;
3743
3744 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
3745 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
3746
3747
3748 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
3749
3750 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3751 return kvm_skip_emulated_instruction(&svm->vcpu);
3752}
3753
3754static int skinit_interception(struct vcpu_svm *svm)
3755{
3756 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
3757
3758 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3759 return 1;
3760}
3761
3762static int wbinvd_interception(struct vcpu_svm *svm)
3763{
3764 return kvm_emulate_wbinvd(&svm->vcpu);
3765}
3766
3767static int xsetbv_interception(struct vcpu_svm *svm)
3768{
3769 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
3770 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
3771
3772 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
3773 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3774 return kvm_skip_emulated_instruction(&svm->vcpu);
3775 }
3776
3777 return 1;
3778}
3779
3780static int task_switch_interception(struct vcpu_svm *svm)
3781{
3782 u16 tss_selector;
3783 int reason;
3784 int int_type = svm->vmcb->control.exit_int_info &
3785 SVM_EXITINTINFO_TYPE_MASK;
3786 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
3787 uint32_t type =
3788 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
3789 uint32_t idt_v =
3790 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
3791 bool has_error_code = false;
3792 u32 error_code = 0;
3793
3794 tss_selector = (u16)svm->vmcb->control.exit_info_1;
3795
3796 if (svm->vmcb->control.exit_info_2 &
3797 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
3798 reason = TASK_SWITCH_IRET;
3799 else if (svm->vmcb->control.exit_info_2 &
3800 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
3801 reason = TASK_SWITCH_JMP;
3802 else if (idt_v)
3803 reason = TASK_SWITCH_GATE;
3804 else
3805 reason = TASK_SWITCH_CALL;
3806
3807 if (reason == TASK_SWITCH_GATE) {
3808 switch (type) {
3809 case SVM_EXITINTINFO_TYPE_NMI:
3810 svm->vcpu.arch.nmi_injected = false;
3811 break;
3812 case SVM_EXITINTINFO_TYPE_EXEPT:
3813 if (svm->vmcb->control.exit_info_2 &
3814 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
3815 has_error_code = true;
3816 error_code =
3817 (u32)svm->vmcb->control.exit_info_2;
3818 }
3819 kvm_clear_exception_queue(&svm->vcpu);
3820 break;
3821 case SVM_EXITINTINFO_TYPE_INTR:
3822 kvm_clear_interrupt_queue(&svm->vcpu);
3823 break;
3824 default:
3825 break;
3826 }
3827 }
3828
3829 if (reason != TASK_SWITCH_GATE ||
3830 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
3831 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
3832 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
3833 skip_emulated_instruction(&svm->vcpu);
3834
3835 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
3836 int_vec = -1;
3837
3838 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
3839 has_error_code, error_code) == EMULATE_FAIL) {
3840 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3841 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3842 svm->vcpu.run->internal.ndata = 0;
3843 return 0;
3844 }
3845 return 1;
3846}
3847
3848static int cpuid_interception(struct vcpu_svm *svm)
3849{
3850 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3851 return kvm_emulate_cpuid(&svm->vcpu);
3852}
3853
3854static int iret_interception(struct vcpu_svm *svm)
3855{
3856 ++svm->vcpu.stat.nmi_window_exits;
3857 clr_intercept(svm, INTERCEPT_IRET);
3858 svm->vcpu.arch.hflags |= HF_IRET_MASK;
3859 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
3860 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3861 return 1;
3862}
3863
3864static int invlpg_interception(struct vcpu_svm *svm)
3865{
3866 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3867 return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3868
3869 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
3870 return kvm_skip_emulated_instruction(&svm->vcpu);
3871}
3872
3873static int emulate_on_interception(struct vcpu_svm *svm)
3874{
3875 return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3876}
3877
3878static int rsm_interception(struct vcpu_svm *svm)
3879{
3880 return kvm_emulate_instruction_from_buffer(&svm->vcpu,
3881 rsm_ins_bytes, 2) == EMULATE_DONE;
3882}
3883
3884static int rdpmc_interception(struct vcpu_svm *svm)
3885{
3886 int err;
3887
3888 if (!static_cpu_has(X86_FEATURE_NRIPS))
3889 return emulate_on_interception(svm);
3890
3891 err = kvm_rdpmc(&svm->vcpu);
3892 return kvm_complete_insn_gp(&svm->vcpu, err);
3893}
3894
3895static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
3896 unsigned long val)
3897{
3898 unsigned long cr0 = svm->vcpu.arch.cr0;
3899 bool ret = false;
3900 u64 intercept;
3901
3902 intercept = svm->nested.intercept;
3903
3904 if (!is_guest_mode(&svm->vcpu) ||
3905 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
3906 return false;
3907
3908 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
3909 val &= ~SVM_CR0_SELECTIVE_MASK;
3910
3911 if (cr0 ^ val) {
3912 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3913 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
3914 }
3915
3916 return ret;
3917}
3918
3919#define CR_VALID (1ULL << 63)
3920
3921static int cr_interception(struct vcpu_svm *svm)
3922{
3923 int reg, cr;
3924 unsigned long val;
3925 int err;
3926
3927 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3928 return emulate_on_interception(svm);
3929
3930 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
3931 return emulate_on_interception(svm);
3932
3933 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
3934 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
3935 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
3936 else
3937 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
3938
3939 err = 0;
3940 if (cr >= 16) {
3941 cr -= 16;
3942 val = kvm_register_read(&svm->vcpu, reg);
3943 switch (cr) {
3944 case 0:
3945 if (!check_selective_cr0_intercepted(svm, val))
3946 err = kvm_set_cr0(&svm->vcpu, val);
3947 else
3948 return 1;
3949
3950 break;
3951 case 3:
3952 err = kvm_set_cr3(&svm->vcpu, val);
3953 break;
3954 case 4:
3955 err = kvm_set_cr4(&svm->vcpu, val);
3956 break;
3957 case 8:
3958 err = kvm_set_cr8(&svm->vcpu, val);
3959 break;
3960 default:
3961 WARN(1, "unhandled write to CR%d", cr);
3962 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3963 return 1;
3964 }
3965 } else {
3966 switch (cr) {
3967 case 0:
3968 val = kvm_read_cr0(&svm->vcpu);
3969 break;
3970 case 2:
3971 val = svm->vcpu.arch.cr2;
3972 break;
3973 case 3:
3974 val = kvm_read_cr3(&svm->vcpu);
3975 break;
3976 case 4:
3977 val = kvm_read_cr4(&svm->vcpu);
3978 break;
3979 case 8:
3980 val = kvm_get_cr8(&svm->vcpu);
3981 break;
3982 default:
3983 WARN(1, "unhandled read from CR%d", cr);
3984 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3985 return 1;
3986 }
3987 kvm_register_write(&svm->vcpu, reg, val);
3988 }
3989 return kvm_complete_insn_gp(&svm->vcpu, err);
3990}
3991
3992static int dr_interception(struct vcpu_svm *svm)
3993{
3994 int reg, dr;
3995 unsigned long val;
3996
3997 if (svm->vcpu.guest_debug == 0) {
3998
3999
4000
4001
4002
4003 clr_dr_intercepts(svm);
4004 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
4005 return 1;
4006 }
4007
4008 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
4009 return emulate_on_interception(svm);
4010
4011 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
4012 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
4013
4014 if (dr >= 16) {
4015 if (!kvm_require_dr(&svm->vcpu, dr - 16))
4016 return 1;
4017 val = kvm_register_read(&svm->vcpu, reg);
4018 kvm_set_dr(&svm->vcpu, dr - 16, val);
4019 } else {
4020 if (!kvm_require_dr(&svm->vcpu, dr))
4021 return 1;
4022 kvm_get_dr(&svm->vcpu, dr, &val);
4023 kvm_register_write(&svm->vcpu, reg, val);
4024 }
4025
4026 return kvm_skip_emulated_instruction(&svm->vcpu);
4027}
4028
4029static int cr8_write_interception(struct vcpu_svm *svm)
4030{
4031 struct kvm_run *kvm_run = svm->vcpu.run;
4032 int r;
4033
4034 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
4035
4036 r = cr_interception(svm);
4037 if (lapic_in_kernel(&svm->vcpu))
4038 return r;
4039 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
4040 return r;
4041 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
4042 return 0;
4043}
4044
4045static int svm_get_msr_feature(struct kvm_msr_entry *msr)
4046{
4047 msr->data = 0;
4048
4049 switch (msr->index) {
4050 case MSR_F10H_DECFG:
4051 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
4052 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
4053 break;
4054 default:
4055 return 1;
4056 }
4057
4058 return 0;
4059}
4060
4061static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
4062{
4063 struct vcpu_svm *svm = to_svm(vcpu);
4064
4065 switch (msr_info->index) {
4066 case MSR_STAR:
4067 msr_info->data = svm->vmcb->save.star;
4068 break;
4069#ifdef CONFIG_X86_64
4070 case MSR_LSTAR:
4071 msr_info->data = svm->vmcb->save.lstar;
4072 break;
4073 case MSR_CSTAR:
4074 msr_info->data = svm->vmcb->save.cstar;
4075 break;
4076 case MSR_KERNEL_GS_BASE:
4077 msr_info->data = svm->vmcb->save.kernel_gs_base;
4078 break;
4079 case MSR_SYSCALL_MASK:
4080 msr_info->data = svm->vmcb->save.sfmask;
4081 break;
4082#endif
4083 case MSR_IA32_SYSENTER_CS:
4084 msr_info->data = svm->vmcb->save.sysenter_cs;
4085 break;
4086 case MSR_IA32_SYSENTER_EIP:
4087 msr_info->data = svm->sysenter_eip;
4088 break;
4089 case MSR_IA32_SYSENTER_ESP:
4090 msr_info->data = svm->sysenter_esp;
4091 break;
4092 case MSR_TSC_AUX:
4093 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4094 return 1;
4095 msr_info->data = svm->tsc_aux;
4096 break;
4097
4098
4099
4100
4101
4102 case MSR_IA32_DEBUGCTLMSR:
4103 msr_info->data = svm->vmcb->save.dbgctl;
4104 break;
4105 case MSR_IA32_LASTBRANCHFROMIP:
4106 msr_info->data = svm->vmcb->save.br_from;
4107 break;
4108 case MSR_IA32_LASTBRANCHTOIP:
4109 msr_info->data = svm->vmcb->save.br_to;
4110 break;
4111 case MSR_IA32_LASTINTFROMIP:
4112 msr_info->data = svm->vmcb->save.last_excp_from;
4113 break;
4114 case MSR_IA32_LASTINTTOIP:
4115 msr_info->data = svm->vmcb->save.last_excp_to;
4116 break;
4117 case MSR_VM_HSAVE_PA:
4118 msr_info->data = svm->nested.hsave_msr;
4119 break;
4120 case MSR_VM_CR:
4121 msr_info->data = svm->nested.vm_cr_msr;
4122 break;
4123 case MSR_IA32_SPEC_CTRL:
4124 if (!msr_info->host_initiated &&
4125 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
4126 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
4127 return 1;
4128
4129 msr_info->data = svm->spec_ctrl;
4130 break;
4131 case MSR_AMD64_VIRT_SPEC_CTRL:
4132 if (!msr_info->host_initiated &&
4133 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4134 return 1;
4135
4136 msr_info->data = svm->virt_spec_ctrl;
4137 break;
4138 case MSR_F15H_IC_CFG: {
4139
4140 int family, model;
4141
4142 family = guest_cpuid_family(vcpu);
4143 model = guest_cpuid_model(vcpu);
4144
4145 if (family < 0 || model < 0)
4146 return kvm_get_msr_common(vcpu, msr_info);
4147
4148 msr_info->data = 0;
4149
4150 if (family == 0x15 &&
4151 (model >= 0x2 && model < 0x20))
4152 msr_info->data = 0x1E;
4153 }
4154 break;
4155 case MSR_F10H_DECFG:
4156 msr_info->data = svm->msr_decfg;
4157 break;
4158 default:
4159 return kvm_get_msr_common(vcpu, msr_info);
4160 }
4161 return 0;
4162}
4163
4164static int rdmsr_interception(struct vcpu_svm *svm)
4165{
4166 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
4167 struct msr_data msr_info;
4168
4169 msr_info.index = ecx;
4170 msr_info.host_initiated = false;
4171 if (svm_get_msr(&svm->vcpu, &msr_info)) {
4172 trace_kvm_msr_read_ex(ecx);
4173 kvm_inject_gp(&svm->vcpu, 0);
4174 return 1;
4175 } else {
4176 trace_kvm_msr_read(ecx, msr_info.data);
4177
4178 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
4179 msr_info.data & 0xffffffff);
4180 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
4181 msr_info.data >> 32);
4182 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
4183 return kvm_skip_emulated_instruction(&svm->vcpu);
4184 }
4185}
4186
4187static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
4188{
4189 struct vcpu_svm *svm = to_svm(vcpu);
4190 int svm_dis, chg_mask;
4191
4192 if (data & ~SVM_VM_CR_VALID_MASK)
4193 return 1;
4194
4195 chg_mask = SVM_VM_CR_VALID_MASK;
4196
4197 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
4198 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
4199
4200 svm->nested.vm_cr_msr &= ~chg_mask;
4201 svm->nested.vm_cr_msr |= (data & chg_mask);
4202
4203 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
4204
4205
4206 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
4207 return 1;
4208
4209 return 0;
4210}
4211
4212static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4213{
4214 struct vcpu_svm *svm = to_svm(vcpu);
4215
4216 u32 ecx = msr->index;
4217 u64 data = msr->data;
4218 switch (ecx) {
4219 case MSR_IA32_CR_PAT:
4220 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4221 return 1;
4222 vcpu->arch.pat = data;
4223 svm->vmcb->save.g_pat = data;
4224 mark_dirty(svm->vmcb, VMCB_NPT);
4225 break;
4226 case MSR_IA32_SPEC_CTRL:
4227 if (!msr->host_initiated &&
4228 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
4229 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
4230 return 1;
4231
4232
4233 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
4234 return 1;
4235
4236 svm->spec_ctrl = data;
4237
4238 if (!data)
4239 break;
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252 set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
4253 break;
4254 case MSR_IA32_PRED_CMD:
4255 if (!msr->host_initiated &&
4256 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
4257 return 1;
4258
4259 if (data & ~PRED_CMD_IBPB)
4260 return 1;
4261
4262 if (!data)
4263 break;
4264
4265 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4266 if (is_guest_mode(vcpu))
4267 break;
4268 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
4269 break;
4270 case MSR_AMD64_VIRT_SPEC_CTRL:
4271 if (!msr->host_initiated &&
4272 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4273 return 1;
4274
4275 if (data & ~SPEC_CTRL_SSBD)
4276 return 1;
4277
4278 svm->virt_spec_ctrl = data;
4279 break;
4280 case MSR_STAR:
4281 svm->vmcb->save.star = data;
4282 break;
4283#ifdef CONFIG_X86_64
4284 case MSR_LSTAR:
4285 svm->vmcb->save.lstar = data;
4286 break;
4287 case MSR_CSTAR:
4288 svm->vmcb->save.cstar = data;
4289 break;
4290 case MSR_KERNEL_GS_BASE:
4291 svm->vmcb->save.kernel_gs_base = data;
4292 break;
4293 case MSR_SYSCALL_MASK:
4294 svm->vmcb->save.sfmask = data;
4295 break;
4296#endif
4297 case MSR_IA32_SYSENTER_CS:
4298 svm->vmcb->save.sysenter_cs = data;
4299 break;
4300 case MSR_IA32_SYSENTER_EIP:
4301 svm->sysenter_eip = data;
4302 svm->vmcb->save.sysenter_eip = data;
4303 break;
4304 case MSR_IA32_SYSENTER_ESP:
4305 svm->sysenter_esp = data;
4306 svm->vmcb->save.sysenter_esp = data;
4307 break;
4308 case MSR_TSC_AUX:
4309 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4310 return 1;
4311
4312
4313
4314
4315
4316
4317 svm->tsc_aux = data;
4318 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
4319 break;
4320 case MSR_IA32_DEBUGCTLMSR:
4321 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
4322 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
4323 __func__, data);
4324 break;
4325 }
4326 if (data & DEBUGCTL_RESERVED_BITS)
4327 return 1;
4328
4329 svm->vmcb->save.dbgctl = data;
4330 mark_dirty(svm->vmcb, VMCB_LBR);
4331 if (data & (1ULL<<0))
4332 svm_enable_lbrv(svm);
4333 else
4334 svm_disable_lbrv(svm);
4335 break;
4336 case MSR_VM_HSAVE_PA:
4337 svm->nested.hsave_msr = data;
4338 break;
4339 case MSR_VM_CR:
4340 return svm_set_vm_cr(vcpu, data);
4341 case MSR_VM_IGNNE:
4342 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
4343 break;
4344 case MSR_F10H_DECFG: {
4345 struct kvm_msr_entry msr_entry;
4346
4347 msr_entry.index = msr->index;
4348 if (svm_get_msr_feature(&msr_entry))
4349 return 1;
4350
4351
4352 if (data & ~msr_entry.data)
4353 return 1;
4354
4355
4356 if (!msr->host_initiated && (data ^ msr_entry.data))
4357 return 1;
4358
4359 svm->msr_decfg = data;
4360 break;
4361 }
4362 case MSR_IA32_APICBASE:
4363 if (kvm_vcpu_apicv_active(vcpu))
4364 avic_update_vapic_bar(to_svm(vcpu), data);
4365
4366 default:
4367 return kvm_set_msr_common(vcpu, msr);
4368 }
4369 return 0;
4370}
4371
4372static int wrmsr_interception(struct vcpu_svm *svm)
4373{
4374 struct msr_data msr;
4375 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
4376 u64 data = kvm_read_edx_eax(&svm->vcpu);
4377
4378 msr.data = data;
4379 msr.index = ecx;
4380 msr.host_initiated = false;
4381
4382 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
4383 if (kvm_set_msr(&svm->vcpu, &msr)) {
4384 trace_kvm_msr_write_ex(ecx, data);
4385 kvm_inject_gp(&svm->vcpu, 0);
4386 return 1;
4387 } else {
4388 trace_kvm_msr_write(ecx, data);
4389 return kvm_skip_emulated_instruction(&svm->vcpu);
4390 }
4391}
4392
4393static int msr_interception(struct vcpu_svm *svm)
4394{
4395 if (svm->vmcb->control.exit_info_1)
4396 return wrmsr_interception(svm);
4397 else
4398 return rdmsr_interception(svm);
4399}
4400
4401static int interrupt_window_interception(struct vcpu_svm *svm)
4402{
4403 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
4404 svm_clear_vintr(svm);
4405 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
4406 mark_dirty(svm->vmcb, VMCB_INTR);
4407 ++svm->vcpu.stat.irq_window_exits;
4408 return 1;
4409}
4410
4411static int pause_interception(struct vcpu_svm *svm)
4412{
4413 struct kvm_vcpu *vcpu = &svm->vcpu;
4414 bool in_kernel = (svm_get_cpl(vcpu) == 0);
4415
4416 if (pause_filter_thresh)
4417 grow_ple_window(vcpu);
4418
4419 kvm_vcpu_on_spin(vcpu, in_kernel);
4420 return 1;
4421}
4422
4423static int nop_interception(struct vcpu_svm *svm)
4424{
4425 return kvm_skip_emulated_instruction(&(svm->vcpu));
4426}
4427
4428static int monitor_interception(struct vcpu_svm *svm)
4429{
4430 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
4431 return nop_interception(svm);
4432}
4433
4434static int mwait_interception(struct vcpu_svm *svm)
4435{
4436 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
4437 return nop_interception(svm);
4438}
4439
4440enum avic_ipi_failure_cause {
4441 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
4442 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
4443 AVIC_IPI_FAILURE_INVALID_TARGET,
4444 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
4445};
4446
4447static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
4448{
4449 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
4450 u32 icrl = svm->vmcb->control.exit_info_1;
4451 u32 id = svm->vmcb->control.exit_info_2 >> 32;
4452 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
4453 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4454
4455 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
4456
4457 switch (id) {
4458 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
4471 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4472 break;
4473 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
4474 int i;
4475 struct kvm_vcpu *vcpu;
4476 struct kvm *kvm = svm->vcpu.kvm;
4477 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4478
4479
4480
4481
4482
4483
4484 kvm_for_each_vcpu(i, vcpu, kvm) {
4485 bool m = kvm_apic_match_dest(vcpu, apic,
4486 icrl & KVM_APIC_SHORT_MASK,
4487 GET_APIC_DEST_FIELD(icrh),
4488 icrl & KVM_APIC_DEST_MASK);
4489
4490 if (m && !avic_vcpu_is_running(vcpu))
4491 kvm_vcpu_wake_up(vcpu);
4492 }
4493 break;
4494 }
4495 case AVIC_IPI_FAILURE_INVALID_TARGET:
4496 break;
4497 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
4498 WARN_ONCE(1, "Invalid backing page\n");
4499 break;
4500 default:
4501 pr_err("Unknown IPI interception\n");
4502 }
4503
4504 return 1;
4505}
4506
4507static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
4508{
4509 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
4510 int index;
4511 u32 *logical_apic_id_table;
4512 int dlid = GET_APIC_LOGICAL_ID(ldr);
4513
4514 if (!dlid)
4515 return NULL;
4516
4517 if (flat) {
4518 index = ffs(dlid) - 1;
4519 if (index > 7)
4520 return NULL;
4521 } else {
4522 int cluster = (dlid & 0xf0) >> 4;
4523 int apic = ffs(dlid & 0x0f) - 1;
4524
4525 if ((apic < 0) || (apic > 7) ||
4526 (cluster >= 0xf))
4527 return NULL;
4528 index = (cluster << 2) + apic;
4529 }
4530
4531 logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
4532
4533 return &logical_apic_id_table[index];
4534}
4535
4536static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
4537 bool valid)
4538{
4539 bool flat;
4540 u32 *entry, new_entry;
4541
4542 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
4543 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
4544 if (!entry)
4545 return -EINVAL;
4546
4547 new_entry = READ_ONCE(*entry);
4548 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
4549 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
4550 if (valid)
4551 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4552 else
4553 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4554 WRITE_ONCE(*entry, new_entry);
4555
4556 return 0;
4557}
4558
4559static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
4560{
4561 int ret;
4562 struct vcpu_svm *svm = to_svm(vcpu);
4563 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
4564
4565 if (!ldr)
4566 return 1;
4567
4568 ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
4569 if (ret && svm->ldr_reg) {
4570 avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
4571 svm->ldr_reg = 0;
4572 } else {
4573 svm->ldr_reg = ldr;
4574 }
4575 return ret;
4576}
4577
4578static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
4579{
4580 u64 *old, *new;
4581 struct vcpu_svm *svm = to_svm(vcpu);
4582 u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
4583 u32 id = (apic_id_reg >> 24) & 0xff;
4584
4585 if (vcpu->vcpu_id == id)
4586 return 0;
4587
4588 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
4589 new = avic_get_physical_id_entry(vcpu, id);
4590 if (!new || !old)
4591 return 1;
4592
4593
4594 *new = *old;
4595 *old = 0ULL;
4596 to_svm(vcpu)->avic_physical_id_cache = new;
4597
4598
4599
4600
4601
4602 if (svm->ldr_reg)
4603 avic_handle_ldr_update(vcpu);
4604
4605 return 0;
4606}
4607
4608static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
4609{
4610 struct vcpu_svm *svm = to_svm(vcpu);
4611 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
4612 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
4613 u32 mod = (dfr >> 28) & 0xf;
4614
4615
4616
4617
4618
4619
4620 if (kvm_svm->ldr_mode == mod)
4621 return 0;
4622
4623 clear_page(page_address(kvm_svm->avic_logical_id_table_page));
4624 kvm_svm->ldr_mode = mod;
4625
4626 if (svm->ldr_reg)
4627 avic_handle_ldr_update(vcpu);
4628 return 0;
4629}
4630
4631static int avic_unaccel_trap_write(struct vcpu_svm *svm)
4632{
4633 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4634 u32 offset = svm->vmcb->control.exit_info_1 &
4635 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4636
4637 switch (offset) {
4638 case APIC_ID:
4639 if (avic_handle_apic_id_update(&svm->vcpu))
4640 return 0;
4641 break;
4642 case APIC_LDR:
4643 if (avic_handle_ldr_update(&svm->vcpu))
4644 return 0;
4645 break;
4646 case APIC_DFR:
4647 avic_handle_dfr_update(&svm->vcpu);
4648 break;
4649 default:
4650 break;
4651 }
4652
4653 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
4654
4655 return 1;
4656}
4657
4658static bool is_avic_unaccelerated_access_trap(u32 offset)
4659{
4660 bool ret = false;
4661
4662 switch (offset) {
4663 case APIC_ID:
4664 case APIC_EOI:
4665 case APIC_RRR:
4666 case APIC_LDR:
4667 case APIC_DFR:
4668 case APIC_SPIV:
4669 case APIC_ESR:
4670 case APIC_ICR:
4671 case APIC_LVTT:
4672 case APIC_LVTTHMR:
4673 case APIC_LVTPC:
4674 case APIC_LVT0:
4675 case APIC_LVT1:
4676 case APIC_LVTERR:
4677 case APIC_TMICT:
4678 case APIC_TDCR:
4679 ret = true;
4680 break;
4681 default:
4682 break;
4683 }
4684 return ret;
4685}
4686
4687static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
4688{
4689 int ret = 0;
4690 u32 offset = svm->vmcb->control.exit_info_1 &
4691 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4692 u32 vector = svm->vmcb->control.exit_info_2 &
4693 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
4694 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
4695 AVIC_UNACCEL_ACCESS_WRITE_MASK;
4696 bool trap = is_avic_unaccelerated_access_trap(offset);
4697
4698 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
4699 trap, write, vector);
4700 if (trap) {
4701
4702 WARN_ONCE(!write, "svm: Handling trap read.\n");
4703 ret = avic_unaccel_trap_write(svm);
4704 } else {
4705
4706 ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
4707 }
4708
4709 return ret;
4710}
4711
4712static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4713 [SVM_EXIT_READ_CR0] = cr_interception,
4714 [SVM_EXIT_READ_CR3] = cr_interception,
4715 [SVM_EXIT_READ_CR4] = cr_interception,
4716 [SVM_EXIT_READ_CR8] = cr_interception,
4717 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
4718 [SVM_EXIT_WRITE_CR0] = cr_interception,
4719 [SVM_EXIT_WRITE_CR3] = cr_interception,
4720 [SVM_EXIT_WRITE_CR4] = cr_interception,
4721 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
4722 [SVM_EXIT_READ_DR0] = dr_interception,
4723 [SVM_EXIT_READ_DR1] = dr_interception,
4724 [SVM_EXIT_READ_DR2] = dr_interception,
4725 [SVM_EXIT_READ_DR3] = dr_interception,
4726 [SVM_EXIT_READ_DR4] = dr_interception,
4727 [SVM_EXIT_READ_DR5] = dr_interception,
4728 [SVM_EXIT_READ_DR6] = dr_interception,
4729 [SVM_EXIT_READ_DR7] = dr_interception,
4730 [SVM_EXIT_WRITE_DR0] = dr_interception,
4731 [SVM_EXIT_WRITE_DR1] = dr_interception,
4732 [SVM_EXIT_WRITE_DR2] = dr_interception,
4733 [SVM_EXIT_WRITE_DR3] = dr_interception,
4734 [SVM_EXIT_WRITE_DR4] = dr_interception,
4735 [SVM_EXIT_WRITE_DR5] = dr_interception,
4736 [SVM_EXIT_WRITE_DR6] = dr_interception,
4737 [SVM_EXIT_WRITE_DR7] = dr_interception,
4738 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
4739 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
4740 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
4741 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
4742 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
4743 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
4744 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
4745 [SVM_EXIT_INTR] = intr_interception,
4746 [SVM_EXIT_NMI] = nmi_interception,
4747 [SVM_EXIT_SMI] = nop_on_interception,
4748 [SVM_EXIT_INIT] = nop_on_interception,
4749 [SVM_EXIT_VINTR] = interrupt_window_interception,
4750 [SVM_EXIT_RDPMC] = rdpmc_interception,
4751 [SVM_EXIT_CPUID] = cpuid_interception,
4752 [SVM_EXIT_IRET] = iret_interception,
4753 [SVM_EXIT_INVD] = emulate_on_interception,
4754 [SVM_EXIT_PAUSE] = pause_interception,
4755 [SVM_EXIT_HLT] = halt_interception,
4756 [SVM_EXIT_INVLPG] = invlpg_interception,
4757 [SVM_EXIT_INVLPGA] = invlpga_interception,
4758 [SVM_EXIT_IOIO] = io_interception,
4759 [SVM_EXIT_MSR] = msr_interception,
4760 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
4761 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
4762 [SVM_EXIT_VMRUN] = vmrun_interception,
4763 [SVM_EXIT_VMMCALL] = vmmcall_interception,
4764 [SVM_EXIT_VMLOAD] = vmload_interception,
4765 [SVM_EXIT_VMSAVE] = vmsave_interception,
4766 [SVM_EXIT_STGI] = stgi_interception,
4767 [SVM_EXIT_CLGI] = clgi_interception,
4768 [SVM_EXIT_SKINIT] = skinit_interception,
4769 [SVM_EXIT_WBINVD] = wbinvd_interception,
4770 [SVM_EXIT_MONITOR] = monitor_interception,
4771 [SVM_EXIT_MWAIT] = mwait_interception,
4772 [SVM_EXIT_XSETBV] = xsetbv_interception,
4773 [SVM_EXIT_NPF] = npf_interception,
4774 [SVM_EXIT_RSM] = rsm_interception,
4775 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4776 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
4777};
4778
4779static void dump_vmcb(struct kvm_vcpu *vcpu)
4780{
4781 struct vcpu_svm *svm = to_svm(vcpu);
4782 struct vmcb_control_area *control = &svm->vmcb->control;
4783 struct vmcb_save_area *save = &svm->vmcb->save;
4784
4785 pr_err("VMCB Control Area:\n");
4786 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
4787 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
4788 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
4789 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
4790 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
4791 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
4792 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
4793 pr_err("%-20s%d\n", "pause filter threshold:",
4794 control->pause_filter_thresh);
4795 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
4796 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
4797 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
4798 pr_err("%-20s%d\n", "asid:", control->asid);
4799 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
4800 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
4801 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
4802 pr_err("%-20s%08x\n", "int_state:", control->int_state);
4803 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
4804 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
4805 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
4806 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
4807 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
4808 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
4809 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
4810 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
4811 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
4812 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
4813 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
4814 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
4815 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
4816 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
4817 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
4818 pr_err("VMCB State Save Area:\n");
4819 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4820 "es:",
4821 save->es.selector, save->es.attrib,
4822 save->es.limit, save->es.base);
4823 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4824 "cs:",
4825 save->cs.selector, save->cs.attrib,
4826 save->cs.limit, save->cs.base);
4827 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4828 "ss:",
4829 save->ss.selector, save->ss.attrib,
4830 save->ss.limit, save->ss.base);
4831 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4832 "ds:",
4833 save->ds.selector, save->ds.attrib,
4834 save->ds.limit, save->ds.base);
4835 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4836 "fs:",
4837 save->fs.selector, save->fs.attrib,
4838 save->fs.limit, save->fs.base);
4839 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4840 "gs:",
4841 save->gs.selector, save->gs.attrib,
4842 save->gs.limit, save->gs.base);
4843 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4844 "gdtr:",
4845 save->gdtr.selector, save->gdtr.attrib,
4846 save->gdtr.limit, save->gdtr.base);
4847 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4848 "ldtr:",
4849 save->ldtr.selector, save->ldtr.attrib,
4850 save->ldtr.limit, save->ldtr.base);
4851 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4852 "idtr:",
4853 save->idtr.selector, save->idtr.attrib,
4854 save->idtr.limit, save->idtr.base);
4855 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4856 "tr:",
4857 save->tr.selector, save->tr.attrib,
4858 save->tr.limit, save->tr.base);
4859 pr_err("cpl: %d efer: %016llx\n",
4860 save->cpl, save->efer);
4861 pr_err("%-15s %016llx %-13s %016llx\n",
4862 "cr0:", save->cr0, "cr2:", save->cr2);
4863 pr_err("%-15s %016llx %-13s %016llx\n",
4864 "cr3:", save->cr3, "cr4:", save->cr4);
4865 pr_err("%-15s %016llx %-13s %016llx\n",
4866 "dr6:", save->dr6, "dr7:", save->dr7);
4867 pr_err("%-15s %016llx %-13s %016llx\n",
4868 "rip:", save->rip, "rflags:", save->rflags);
4869 pr_err("%-15s %016llx %-13s %016llx\n",
4870 "rsp:", save->rsp, "rax:", save->rax);
4871 pr_err("%-15s %016llx %-13s %016llx\n",
4872 "star:", save->star, "lstar:", save->lstar);
4873 pr_err("%-15s %016llx %-13s %016llx\n",
4874 "cstar:", save->cstar, "sfmask:", save->sfmask);
4875 pr_err("%-15s %016llx %-13s %016llx\n",
4876 "kernel_gs_base:", save->kernel_gs_base,
4877 "sysenter_cs:", save->sysenter_cs);
4878 pr_err("%-15s %016llx %-13s %016llx\n",
4879 "sysenter_esp:", save->sysenter_esp,
4880 "sysenter_eip:", save->sysenter_eip);
4881 pr_err("%-15s %016llx %-13s %016llx\n",
4882 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
4883 pr_err("%-15s %016llx %-13s %016llx\n",
4884 "br_from:", save->br_from, "br_to:", save->br_to);
4885 pr_err("%-15s %016llx %-13s %016llx\n",
4886 "excp_from:", save->last_excp_from,
4887 "excp_to:", save->last_excp_to);
4888}
4889
4890static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
4891{
4892 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
4893
4894 *info1 = control->exit_info_1;
4895 *info2 = control->exit_info_2;
4896}
4897
4898static int handle_exit(struct kvm_vcpu *vcpu)
4899{
4900 struct vcpu_svm *svm = to_svm(vcpu);
4901 struct kvm_run *kvm_run = vcpu->run;
4902 u32 exit_code = svm->vmcb->control.exit_code;
4903
4904 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
4905
4906 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
4907 vcpu->arch.cr0 = svm->vmcb->save.cr0;
4908 if (npt_enabled)
4909 vcpu->arch.cr3 = svm->vmcb->save.cr3;
4910
4911 if (unlikely(svm->nested.exit_required)) {
4912 nested_svm_vmexit(svm);
4913 svm->nested.exit_required = false;
4914
4915 return 1;
4916 }
4917
4918 if (is_guest_mode(vcpu)) {
4919 int vmexit;
4920
4921 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
4922 svm->vmcb->control.exit_info_1,
4923 svm->vmcb->control.exit_info_2,
4924 svm->vmcb->control.exit_int_info,
4925 svm->vmcb->control.exit_int_info_err,
4926 KVM_ISA_SVM);
4927
4928 vmexit = nested_svm_exit_special(svm);
4929
4930 if (vmexit == NESTED_EXIT_CONTINUE)
4931 vmexit = nested_svm_exit_handled(svm);
4932
4933 if (vmexit == NESTED_EXIT_DONE)
4934 return 1;
4935 }
4936
4937 svm_complete_interrupts(svm);
4938
4939 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
4940 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4941 kvm_run->fail_entry.hardware_entry_failure_reason
4942 = svm->vmcb->control.exit_code;
4943 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
4944 dump_vmcb(vcpu);
4945 return 0;
4946 }
4947
4948 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
4949 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
4950 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
4951 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
4952 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
4953 "exit_code 0x%x\n",
4954 __func__, svm->vmcb->control.exit_int_info,
4955 exit_code);
4956
4957 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
4958 || !svm_exit_handlers[exit_code]) {
4959 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
4960 kvm_queue_exception(vcpu, UD_VECTOR);
4961 return 1;
4962 }
4963
4964 return svm_exit_handlers[exit_code](svm);
4965}
4966
4967static void reload_tss(struct kvm_vcpu *vcpu)
4968{
4969 int cpu = raw_smp_processor_id();
4970
4971 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4972 sd->tss_desc->type = 9;
4973 load_TR_desc();
4974}
4975
4976static void pre_sev_run(struct vcpu_svm *svm, int cpu)
4977{
4978 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4979 int asid = sev_get_asid(svm->vcpu.kvm);
4980
4981
4982 svm->vmcb->control.asid = asid;
4983
4984
4985
4986
4987
4988
4989
4990 if (sd->sev_vmcbs[asid] == svm->vmcb &&
4991 svm->last_cpu == cpu)
4992 return;
4993
4994 svm->last_cpu = cpu;
4995 sd->sev_vmcbs[asid] = svm->vmcb;
4996 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
4997 mark_dirty(svm->vmcb, VMCB_ASID);
4998}
4999
5000static void pre_svm_run(struct vcpu_svm *svm)
5001{
5002 int cpu = raw_smp_processor_id();
5003
5004 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5005
5006 if (sev_guest(svm->vcpu.kvm))
5007 return pre_sev_run(svm, cpu);
5008
5009
5010 if (svm->asid_generation != sd->asid_generation)
5011 new_asid(svm, sd);
5012}
5013
5014static void svm_inject_nmi(struct kvm_vcpu *vcpu)
5015{
5016 struct vcpu_svm *svm = to_svm(vcpu);
5017
5018 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
5019 vcpu->arch.hflags |= HF_NMI_MASK;
5020 set_intercept(svm, INTERCEPT_IRET);
5021 ++vcpu->stat.nmi_injections;
5022}
5023
5024static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
5025{
5026 struct vmcb_control_area *control;
5027
5028
5029 control = &svm->vmcb->control;
5030 control->int_vector = irq;
5031 control->int_ctl &= ~V_INTR_PRIO_MASK;
5032 control->int_ctl |= V_IRQ_MASK |
5033 (( 0xf) << V_INTR_PRIO_SHIFT);
5034 mark_dirty(svm->vmcb, VMCB_INTR);
5035}
5036
5037static void svm_set_irq(struct kvm_vcpu *vcpu)
5038{
5039 struct vcpu_svm *svm = to_svm(vcpu);
5040
5041 BUG_ON(!(gif_set(svm)));
5042
5043 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
5044 ++vcpu->stat.irq_injections;
5045
5046 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
5047 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
5048}
5049
5050static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
5051{
5052 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
5053}
5054
5055static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
5056{
5057 struct vcpu_svm *svm = to_svm(vcpu);
5058
5059 if (svm_nested_virtualize_tpr(vcpu) ||
5060 kvm_vcpu_apicv_active(vcpu))
5061 return;
5062
5063 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5064
5065 if (irr == -1)
5066 return;
5067
5068 if (tpr >= irr)
5069 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5070}
5071
5072static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
5073{
5074 return;
5075}
5076
5077static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
5078{
5079 return avic && irqchip_split(vcpu->kvm);
5080}
5081
5082static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
5083{
5084}
5085
5086static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
5087{
5088}
5089
5090
5091static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
5092{
5093 struct vcpu_svm *svm = to_svm(vcpu);
5094 struct vmcb *vmcb = svm->vmcb;
5095
5096 if (!kvm_vcpu_apicv_active(&svm->vcpu))
5097 return;
5098
5099 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
5100 mark_dirty(vmcb, VMCB_INTR);
5101}
5102
5103static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
5104{
5105 return;
5106}
5107
5108static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
5109{
5110 kvm_lapic_set_irr(vec, vcpu->arch.apic);
5111 smp_mb__after_atomic();
5112
5113 if (avic_vcpu_is_running(vcpu))
5114 wrmsrl(SVM_AVIC_DOORBELL,
5115 kvm_cpu_get_apicid(vcpu->cpu));
5116 else
5117 kvm_vcpu_wake_up(vcpu);
5118}
5119
5120static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5121{
5122 unsigned long flags;
5123 struct amd_svm_iommu_ir *cur;
5124
5125 spin_lock_irqsave(&svm->ir_list_lock, flags);
5126 list_for_each_entry(cur, &svm->ir_list, node) {
5127 if (cur->data != pi->ir_data)
5128 continue;
5129 list_del(&cur->node);
5130 kfree(cur);
5131 break;
5132 }
5133 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5134}
5135
5136static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5137{
5138 int ret = 0;
5139 unsigned long flags;
5140 struct amd_svm_iommu_ir *ir;
5141
5142
5143
5144
5145
5146
5147 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
5148 struct kvm *kvm = svm->vcpu.kvm;
5149 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
5150 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
5151 struct vcpu_svm *prev_svm;
5152
5153 if (!prev_vcpu) {
5154 ret = -EINVAL;
5155 goto out;
5156 }
5157
5158 prev_svm = to_svm(prev_vcpu);
5159 svm_ir_list_del(prev_svm, pi);
5160 }
5161
5162
5163
5164
5165
5166 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
5167 if (!ir) {
5168 ret = -ENOMEM;
5169 goto out;
5170 }
5171 ir->data = pi->ir_data;
5172
5173 spin_lock_irqsave(&svm->ir_list_lock, flags);
5174 list_add(&ir->node, &svm->ir_list);
5175 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5176out:
5177 return ret;
5178}
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191static int
5192get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
5193 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
5194{
5195 struct kvm_lapic_irq irq;
5196 struct kvm_vcpu *vcpu = NULL;
5197
5198 kvm_set_msi_irq(kvm, e, &irq);
5199
5200 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
5201 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
5202 __func__, irq.vector);
5203 return -1;
5204 }
5205
5206 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
5207 irq.vector);
5208 *svm = to_svm(vcpu);
5209 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
5210 vcpu_info->vector = irq.vector;
5211
5212 return 0;
5213}
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
5225 uint32_t guest_irq, bool set)
5226{
5227 struct kvm_kernel_irq_routing_entry *e;
5228 struct kvm_irq_routing_table *irq_rt;
5229 int idx, ret = -EINVAL;
5230
5231 if (!kvm_arch_has_assigned_device(kvm) ||
5232 !irq_remapping_cap(IRQ_POSTING_CAP))
5233 return 0;
5234
5235 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
5236 __func__, host_irq, guest_irq, set);
5237
5238 idx = srcu_read_lock(&kvm->irq_srcu);
5239 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
5240 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
5241
5242 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
5243 struct vcpu_data vcpu_info;
5244 struct vcpu_svm *svm = NULL;
5245
5246 if (e->type != KVM_IRQ_ROUTING_MSI)
5247 continue;
5248
5249
5250
5251
5252
5253
5254
5255 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
5256 kvm_vcpu_apicv_active(&svm->vcpu)) {
5257 struct amd_iommu_pi_data pi;
5258
5259
5260 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
5261 AVIC_HPA_MASK);
5262 pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
5263 svm->vcpu.vcpu_id);
5264 pi.is_guest_mode = true;
5265 pi.vcpu_data = &vcpu_info;
5266 ret = irq_set_vcpu_affinity(host_irq, &pi);
5267
5268
5269
5270
5271
5272
5273
5274
5275 if (!ret && pi.is_guest_mode)
5276 svm_ir_list_add(svm, &pi);
5277 } else {
5278
5279 struct amd_iommu_pi_data pi;
5280
5281
5282
5283
5284
5285
5286 pi.is_guest_mode = false;
5287 ret = irq_set_vcpu_affinity(host_irq, &pi);
5288
5289
5290
5291
5292
5293
5294
5295 if (!ret && pi.prev_ga_tag) {
5296 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
5297 struct kvm_vcpu *vcpu;
5298
5299 vcpu = kvm_get_vcpu_by_id(kvm, id);
5300 if (vcpu)
5301 svm_ir_list_del(to_svm(vcpu), &pi);
5302 }
5303 }
5304
5305 if (!ret && svm) {
5306 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
5307 e->gsi, vcpu_info.vector,
5308 vcpu_info.pi_desc_addr, set);
5309 }
5310
5311 if (ret < 0) {
5312 pr_err("%s: failed to update PI IRTE\n", __func__);
5313 goto out;
5314 }
5315 }
5316
5317 ret = 0;
5318out:
5319 srcu_read_unlock(&kvm->irq_srcu, idx);
5320 return ret;
5321}
5322
5323static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
5324{
5325 struct vcpu_svm *svm = to_svm(vcpu);
5326 struct vmcb *vmcb = svm->vmcb;
5327 int ret;
5328 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
5329 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
5330 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
5331
5332 return ret;
5333}
5334
5335static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
5336{
5337 struct vcpu_svm *svm = to_svm(vcpu);
5338
5339 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
5340}
5341
5342static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5343{
5344 struct vcpu_svm *svm = to_svm(vcpu);
5345
5346 if (masked) {
5347 svm->vcpu.arch.hflags |= HF_NMI_MASK;
5348 set_intercept(svm, INTERCEPT_IRET);
5349 } else {
5350 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
5351 clr_intercept(svm, INTERCEPT_IRET);
5352 }
5353}
5354
5355static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
5356{
5357 struct vcpu_svm *svm = to_svm(vcpu);
5358 struct vmcb *vmcb = svm->vmcb;
5359 int ret;
5360
5361 if (!gif_set(svm) ||
5362 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
5363 return 0;
5364
5365 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
5366
5367 if (is_guest_mode(vcpu))
5368 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
5369
5370 return ret;
5371}
5372
5373static void enable_irq_window(struct kvm_vcpu *vcpu)
5374{
5375 struct vcpu_svm *svm = to_svm(vcpu);
5376
5377 if (kvm_vcpu_apicv_active(vcpu))
5378 return;
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388 if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
5389 svm_set_vintr(svm);
5390 svm_inject_irq(svm, 0x0);
5391 }
5392}
5393
5394static void enable_nmi_window(struct kvm_vcpu *vcpu)
5395{
5396 struct vcpu_svm *svm = to_svm(vcpu);
5397
5398 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
5399 == HF_NMI_MASK)
5400 return;
5401
5402 if (!gif_set(svm)) {
5403 if (vgif_enabled(svm))
5404 set_intercept(svm, INTERCEPT_STGI);
5405 return;
5406 }
5407
5408 if (svm->nested.exit_required)
5409 return;
5410
5411
5412
5413
5414
5415 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
5416 svm->nmi_singlestep = true;
5417 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
5418}
5419
5420static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
5421{
5422 return 0;
5423}
5424
5425static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5426{
5427 return 0;
5428}
5429
5430static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
5431{
5432 struct vcpu_svm *svm = to_svm(vcpu);
5433
5434 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
5435 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5436 else
5437 svm->asid_generation--;
5438}
5439
5440static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
5441{
5442 struct vcpu_svm *svm = to_svm(vcpu);
5443
5444 invlpga(gva, svm->vmcb->control.asid);
5445}
5446
5447static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
5448{
5449}
5450
5451static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
5452{
5453 struct vcpu_svm *svm = to_svm(vcpu);
5454
5455 if (svm_nested_virtualize_tpr(vcpu))
5456 return;
5457
5458 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
5459 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
5460 kvm_set_cr8(vcpu, cr8);
5461 }
5462}
5463
5464static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
5465{
5466 struct vcpu_svm *svm = to_svm(vcpu);
5467 u64 cr8;
5468
5469 if (svm_nested_virtualize_tpr(vcpu) ||
5470 kvm_vcpu_apicv_active(vcpu))
5471 return;
5472
5473 cr8 = kvm_get_cr8(vcpu);
5474 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
5475 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
5476}
5477
5478static void svm_complete_interrupts(struct vcpu_svm *svm)
5479{
5480 u8 vector;
5481 int type;
5482 u32 exitintinfo = svm->vmcb->control.exit_int_info;
5483 unsigned int3_injected = svm->int3_injected;
5484
5485 svm->int3_injected = 0;
5486
5487
5488
5489
5490
5491 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
5492 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
5493 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
5494 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5495 }
5496
5497 svm->vcpu.arch.nmi_injected = false;
5498 kvm_clear_exception_queue(&svm->vcpu);
5499 kvm_clear_interrupt_queue(&svm->vcpu);
5500
5501 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
5502 return;
5503
5504 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5505
5506 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
5507 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
5508
5509 switch (type) {
5510 case SVM_EXITINTINFO_TYPE_NMI:
5511 svm->vcpu.arch.nmi_injected = true;
5512 break;
5513 case SVM_EXITINTINFO_TYPE_EXEPT:
5514
5515
5516
5517
5518
5519 if (kvm_exception_is_soft(vector)) {
5520 if (vector == BP_VECTOR && int3_injected &&
5521 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
5522 kvm_rip_write(&svm->vcpu,
5523 kvm_rip_read(&svm->vcpu) -
5524 int3_injected);
5525 break;
5526 }
5527 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
5528 u32 err = svm->vmcb->control.exit_int_info_err;
5529 kvm_requeue_exception_e(&svm->vcpu, vector, err);
5530
5531 } else
5532 kvm_requeue_exception(&svm->vcpu, vector);
5533 break;
5534 case SVM_EXITINTINFO_TYPE_INTR:
5535 kvm_queue_interrupt(&svm->vcpu, vector, false);
5536 break;
5537 default:
5538 break;
5539 }
5540}
5541
5542static void svm_cancel_injection(struct kvm_vcpu *vcpu)
5543{
5544 struct vcpu_svm *svm = to_svm(vcpu);
5545 struct vmcb_control_area *control = &svm->vmcb->control;
5546
5547 control->exit_int_info = control->event_inj;
5548 control->exit_int_info_err = control->event_inj_err;
5549 control->event_inj = 0;
5550 svm_complete_interrupts(svm);
5551}
5552
5553static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5554{
5555 struct vcpu_svm *svm = to_svm(vcpu);
5556
5557 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
5558 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
5559 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
5560
5561
5562
5563
5564
5565 if (unlikely(svm->nested.exit_required))
5566 return;
5567
5568
5569
5570
5571
5572
5573
5574 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
5575
5576
5577
5578
5579
5580 disable_nmi_singlestep(svm);
5581 smp_send_reschedule(vcpu->cpu);
5582 }
5583
5584 pre_svm_run(svm);
5585
5586 sync_lapic_to_cr8(vcpu);
5587
5588 svm->vmcb->save.cr2 = vcpu->arch.cr2;
5589
5590 clgi();
5591
5592
5593
5594
5595
5596
5597
5598 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
5599
5600 local_irq_enable();
5601
5602 asm volatile (
5603 "push %%" _ASM_BP "; \n\t"
5604 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
5605 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
5606 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
5607 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
5608 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
5609 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
5610#ifdef CONFIG_X86_64
5611 "mov %c[r8](%[svm]), %%r8 \n\t"
5612 "mov %c[r9](%[svm]), %%r9 \n\t"
5613 "mov %c[r10](%[svm]), %%r10 \n\t"
5614 "mov %c[r11](%[svm]), %%r11 \n\t"
5615 "mov %c[r12](%[svm]), %%r12 \n\t"
5616 "mov %c[r13](%[svm]), %%r13 \n\t"
5617 "mov %c[r14](%[svm]), %%r14 \n\t"
5618 "mov %c[r15](%[svm]), %%r15 \n\t"
5619#endif
5620
5621
5622 "push %%" _ASM_AX " \n\t"
5623 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
5624 __ex(SVM_VMLOAD) "\n\t"
5625 __ex(SVM_VMRUN) "\n\t"
5626 __ex(SVM_VMSAVE) "\n\t"
5627 "pop %%" _ASM_AX " \n\t"
5628
5629
5630 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
5631 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
5632 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
5633 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
5634 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
5635 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
5636#ifdef CONFIG_X86_64
5637 "mov %%r8, %c[r8](%[svm]) \n\t"
5638 "mov %%r9, %c[r9](%[svm]) \n\t"
5639 "mov %%r10, %c[r10](%[svm]) \n\t"
5640 "mov %%r11, %c[r11](%[svm]) \n\t"
5641 "mov %%r12, %c[r12](%[svm]) \n\t"
5642 "mov %%r13, %c[r13](%[svm]) \n\t"
5643 "mov %%r14, %c[r14](%[svm]) \n\t"
5644 "mov %%r15, %c[r15](%[svm]) \n\t"
5645#endif
5646
5647
5648
5649
5650 "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
5651 "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
5652 "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
5653 "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
5654 "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
5655#ifdef CONFIG_X86_64
5656 "xor %%r8, %%r8 \n\t"
5657 "xor %%r9, %%r9 \n\t"
5658 "xor %%r10, %%r10 \n\t"
5659 "xor %%r11, %%r11 \n\t"
5660 "xor %%r12, %%r12 \n\t"
5661 "xor %%r13, %%r13 \n\t"
5662 "xor %%r14, %%r14 \n\t"
5663 "xor %%r15, %%r15 \n\t"
5664#endif
5665 "pop %%" _ASM_BP
5666 :
5667 : [svm]"a"(svm),
5668 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
5669 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
5670 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
5671 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
5672 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
5673 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
5674 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
5675#ifdef CONFIG_X86_64
5676 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
5677 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
5678 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
5679 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
5680 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
5681 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
5682 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
5683 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
5684#endif
5685 : "cc", "memory"
5686#ifdef CONFIG_X86_64
5687 , "rbx", "rcx", "rdx", "rsi", "rdi"
5688 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
5689#else
5690 , "ebx", "ecx", "edx", "esi", "edi"
5691#endif
5692 );
5693
5694
5695 vmexit_fill_RSB();
5696
5697#ifdef CONFIG_X86_64
5698 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5699#else
5700 loadsegment(fs, svm->host.fs);
5701#ifndef CONFIG_X86_32_LAZY_GS
5702 loadsegment(gs, svm->host.gs);
5703#endif
5704#endif
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5722 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5723
5724 reload_tss(vcpu);
5725
5726 local_irq_disable();
5727
5728 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
5729
5730 vcpu->arch.cr2 = svm->vmcb->save.cr2;
5731 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
5732 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
5733 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
5734
5735 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5736 kvm_before_interrupt(&svm->vcpu);
5737
5738 stgi();
5739
5740
5741
5742 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5743 kvm_after_interrupt(&svm->vcpu);
5744
5745 sync_cr8_to_lapic(vcpu);
5746
5747 svm->next_rip = 0;
5748
5749 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
5750
5751
5752 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
5753 svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
5754
5755 if (npt_enabled) {
5756 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
5757 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
5758 }
5759
5760
5761
5762
5763
5764 if (unlikely(svm->vmcb->control.exit_code ==
5765 SVM_EXIT_EXCP_BASE + MC_VECTOR))
5766 svm_handle_mce(svm);
5767
5768 mark_all_clean(svm->vmcb);
5769}
5770STACK_FRAME_NON_STANDARD(svm_vcpu_run);
5771
5772static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5773{
5774 struct vcpu_svm *svm = to_svm(vcpu);
5775
5776 svm->vmcb->save.cr3 = __sme_set(root);
5777 mark_dirty(svm->vmcb, VMCB_CR);
5778}
5779
5780static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5781{
5782 struct vcpu_svm *svm = to_svm(vcpu);
5783
5784 svm->vmcb->control.nested_cr3 = __sme_set(root);
5785 mark_dirty(svm->vmcb, VMCB_NPT);
5786
5787
5788 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
5789 mark_dirty(svm->vmcb, VMCB_CR);
5790}
5791
5792static int is_disabled(void)
5793{
5794 u64 vm_cr;
5795
5796 rdmsrl(MSR_VM_CR, vm_cr);
5797 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
5798 return 1;
5799
5800 return 0;
5801}
5802
5803static void
5804svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5805{
5806
5807
5808
5809 hypercall[0] = 0x0f;
5810 hypercall[1] = 0x01;
5811 hypercall[2] = 0xd9;
5812}
5813
5814static void svm_check_processor_compat(void *rtn)
5815{
5816 *(int *)rtn = 0;
5817}
5818
5819static bool svm_cpu_has_accelerated_tpr(void)
5820{
5821 return false;
5822}
5823
5824static bool svm_has_emulated_msr(int index)
5825{
5826 return true;
5827}
5828
5829static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
5830{
5831 return 0;
5832}
5833
5834static void svm_cpuid_update(struct kvm_vcpu *vcpu)
5835{
5836 struct vcpu_svm *svm = to_svm(vcpu);
5837
5838
5839 svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
5840
5841 if (!kvm_vcpu_apicv_active(vcpu))
5842 return;
5843
5844 guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
5845}
5846
5847static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
5848{
5849 switch (func) {
5850 case 0x1:
5851 if (avic)
5852 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
5853 break;
5854 case 0x80000001:
5855 if (nested)
5856 entry->ecx |= (1 << 2);
5857 break;
5858 case 0x8000000A:
5859 entry->eax = 1;
5860 entry->ebx = 8;
5861
5862 entry->ecx = 0;
5863 entry->edx = 0;
5864
5865
5866
5867 if (boot_cpu_has(X86_FEATURE_NRIPS))
5868 entry->edx |= SVM_FEATURE_NRIP;
5869
5870
5871 if (npt_enabled)
5872 entry->edx |= SVM_FEATURE_NPT;
5873
5874 break;
5875 case 0x8000001F:
5876
5877 if (boot_cpu_has(X86_FEATURE_SEV))
5878 cpuid(0x8000001f, &entry->eax, &entry->ebx,
5879 &entry->ecx, &entry->edx);
5880
5881 }
5882}
5883
5884static int svm_get_lpage_level(void)
5885{
5886 return PT_PDPE_LEVEL;
5887}
5888
5889static bool svm_rdtscp_supported(void)
5890{
5891 return boot_cpu_has(X86_FEATURE_RDTSCP);
5892}
5893
5894static bool svm_invpcid_supported(void)
5895{
5896 return false;
5897}
5898
5899static bool svm_mpx_supported(void)
5900{
5901 return false;
5902}
5903
5904static bool svm_xsaves_supported(void)
5905{
5906 return false;
5907}
5908
5909static bool svm_umip_emulated(void)
5910{
5911 return false;
5912}
5913
5914static bool svm_has_wbinvd_exit(void)
5915{
5916 return true;
5917}
5918
5919#define PRE_EX(exit) { .exit_code = (exit), \
5920 .stage = X86_ICPT_PRE_EXCEPT, }
5921#define POST_EX(exit) { .exit_code = (exit), \
5922 .stage = X86_ICPT_POST_EXCEPT, }
5923#define POST_MEM(exit) { .exit_code = (exit), \
5924 .stage = X86_ICPT_POST_MEMACCESS, }
5925
5926static const struct __x86_intercept {
5927 u32 exit_code;
5928 enum x86_intercept_stage stage;
5929} x86_intercept_map[] = {
5930 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
5931 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
5932 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
5933 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
5934 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
5935 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
5936 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
5937 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
5938 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
5939 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
5940 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
5941 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
5942 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
5943 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
5944 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
5945 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
5946 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
5947 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
5948 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
5949 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
5950 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
5951 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
5952 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
5953 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
5954 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
5955 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
5956 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
5957 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
5958 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
5959 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
5960 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
5961 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
5962 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
5963 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
5964 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
5965 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
5966 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
5967 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
5968 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
5969 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
5970 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
5971 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
5972 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
5973 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
5974 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
5975 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
5976};
5977
5978#undef PRE_EX
5979#undef POST_EX
5980#undef POST_MEM
5981
5982static int svm_check_intercept(struct kvm_vcpu *vcpu,
5983 struct x86_instruction_info *info,
5984 enum x86_intercept_stage stage)
5985{
5986 struct vcpu_svm *svm = to_svm(vcpu);
5987 int vmexit, ret = X86EMUL_CONTINUE;
5988 struct __x86_intercept icpt_info;
5989 struct vmcb *vmcb = svm->vmcb;
5990
5991 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
5992 goto out;
5993
5994 icpt_info = x86_intercept_map[info->intercept];
5995
5996 if (stage != icpt_info.stage)
5997 goto out;
5998
5999 switch (icpt_info.exit_code) {
6000 case SVM_EXIT_READ_CR0:
6001 if (info->intercept == x86_intercept_cr_read)
6002 icpt_info.exit_code += info->modrm_reg;
6003 break;
6004 case SVM_EXIT_WRITE_CR0: {
6005 unsigned long cr0, val;
6006 u64 intercept;
6007
6008 if (info->intercept == x86_intercept_cr_write)
6009 icpt_info.exit_code += info->modrm_reg;
6010
6011 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
6012 info->intercept == x86_intercept_clts)
6013 break;
6014
6015 intercept = svm->nested.intercept;
6016
6017 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
6018 break;
6019
6020 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
6021 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
6022
6023 if (info->intercept == x86_intercept_lmsw) {
6024 cr0 &= 0xfUL;
6025 val &= 0xfUL;
6026
6027 if (cr0 & X86_CR0_PE)
6028 val |= X86_CR0_PE;
6029 }
6030
6031 if (cr0 ^ val)
6032 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
6033
6034 break;
6035 }
6036 case SVM_EXIT_READ_DR0:
6037 case SVM_EXIT_WRITE_DR0:
6038 icpt_info.exit_code += info->modrm_reg;
6039 break;
6040 case SVM_EXIT_MSR:
6041 if (info->intercept == x86_intercept_wrmsr)
6042 vmcb->control.exit_info_1 = 1;
6043 else
6044 vmcb->control.exit_info_1 = 0;
6045 break;
6046 case SVM_EXIT_PAUSE:
6047
6048
6049
6050
6051 if (info->rep_prefix != REPE_PREFIX)
6052 goto out;
6053 break;
6054 case SVM_EXIT_IOIO: {
6055 u64 exit_info;
6056 u32 bytes;
6057
6058 if (info->intercept == x86_intercept_in ||
6059 info->intercept == x86_intercept_ins) {
6060 exit_info = ((info->src_val & 0xffff) << 16) |
6061 SVM_IOIO_TYPE_MASK;
6062 bytes = info->dst_bytes;
6063 } else {
6064 exit_info = (info->dst_val & 0xffff) << 16;
6065 bytes = info->src_bytes;
6066 }
6067
6068 if (info->intercept == x86_intercept_outs ||
6069 info->intercept == x86_intercept_ins)
6070 exit_info |= SVM_IOIO_STR_MASK;
6071
6072 if (info->rep_prefix)
6073 exit_info |= SVM_IOIO_REP_MASK;
6074
6075 bytes = min(bytes, 4u);
6076
6077 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
6078
6079 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
6080
6081 vmcb->control.exit_info_1 = exit_info;
6082 vmcb->control.exit_info_2 = info->next_rip;
6083
6084 break;
6085 }
6086 default:
6087 break;
6088 }
6089
6090
6091 if (static_cpu_has(X86_FEATURE_NRIPS))
6092 vmcb->control.next_rip = info->next_rip;
6093 vmcb->control.exit_code = icpt_info.exit_code;
6094 vmexit = nested_svm_exit_handled(svm);
6095
6096 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
6097 : X86EMUL_CONTINUE;
6098
6099out:
6100 return ret;
6101}
6102
6103static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
6104{
6105 local_irq_enable();
6106
6107
6108
6109
6110 asm("nop");
6111 local_irq_disable();
6112}
6113
6114static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
6115{
6116 if (pause_filter_thresh)
6117 shrink_ple_window(vcpu);
6118}
6119
6120static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
6121{
6122 if (avic_handle_apic_id_update(vcpu) != 0)
6123 return;
6124 if (avic_handle_dfr_update(vcpu) != 0)
6125 return;
6126 avic_handle_ldr_update(vcpu);
6127}
6128
6129static void svm_setup_mce(struct kvm_vcpu *vcpu)
6130{
6131
6132 vcpu->arch.mcg_cap &= 0x1ff;
6133}
6134
6135static int svm_smi_allowed(struct kvm_vcpu *vcpu)
6136{
6137 struct vcpu_svm *svm = to_svm(vcpu);
6138
6139
6140 if (!gif_set(svm))
6141 return 0;
6142
6143 if (is_guest_mode(&svm->vcpu) &&
6144 svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
6145
6146 svm->vmcb->control.exit_code = SVM_EXIT_SMI;
6147 svm->nested.exit_required = true;
6148 return 0;
6149 }
6150
6151 return 1;
6152}
6153
6154static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
6155{
6156 struct vcpu_svm *svm = to_svm(vcpu);
6157 int ret;
6158
6159 if (is_guest_mode(vcpu)) {
6160
6161 put_smstate(u64, smstate, 0x7ed8, 1);
6162
6163 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
6164
6165 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
6166 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
6167 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
6168
6169 ret = nested_svm_vmexit(svm);
6170 if (ret)
6171 return ret;
6172 }
6173 return 0;
6174}
6175
6176static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
6177{
6178 struct vcpu_svm *svm = to_svm(vcpu);
6179 struct vmcb *nested_vmcb;
6180 struct page *page;
6181 struct {
6182 u64 guest;
6183 u64 vmcb;
6184 } svm_state_save;
6185 int ret;
6186
6187 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
6188 sizeof(svm_state_save));
6189 if (ret)
6190 return ret;
6191
6192 if (svm_state_save.guest) {
6193 vcpu->arch.hflags &= ~HF_SMM_MASK;
6194 nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
6195 if (nested_vmcb)
6196 enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
6197 else
6198 ret = 1;
6199 vcpu->arch.hflags |= HF_SMM_MASK;
6200 }
6201 return ret;
6202}
6203
6204static int enable_smi_window(struct kvm_vcpu *vcpu)
6205{
6206 struct vcpu_svm *svm = to_svm(vcpu);
6207
6208 if (!gif_set(svm)) {
6209 if (vgif_enabled(svm))
6210 set_intercept(svm, INTERCEPT_STGI);
6211
6212 return 1;
6213 }
6214 return 0;
6215}
6216
6217static int sev_asid_new(void)
6218{
6219 int pos;
6220
6221
6222
6223
6224 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
6225 if (pos >= max_sev_asid)
6226 return -EBUSY;
6227
6228 set_bit(pos, sev_asid_bitmap);
6229 return pos + 1;
6230}
6231
6232static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
6233{
6234 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6235 int asid, ret;
6236
6237 ret = -EBUSY;
6238 asid = sev_asid_new();
6239 if (asid < 0)
6240 return ret;
6241
6242 ret = sev_platform_init(&argp->error);
6243 if (ret)
6244 goto e_free;
6245
6246 sev->active = true;
6247 sev->asid = asid;
6248 INIT_LIST_HEAD(&sev->regions_list);
6249
6250 return 0;
6251
6252e_free:
6253 __sev_asid_free(asid);
6254 return ret;
6255}
6256
6257static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
6258{
6259 struct sev_data_activate *data;
6260 int asid = sev_get_asid(kvm);
6261 int ret;
6262
6263 wbinvd_on_all_cpus();
6264
6265 ret = sev_guest_df_flush(error);
6266 if (ret)
6267 return ret;
6268
6269 data = kzalloc(sizeof(*data), GFP_KERNEL);
6270 if (!data)
6271 return -ENOMEM;
6272
6273
6274 data->handle = handle;
6275 data->asid = asid;
6276 ret = sev_guest_activate(data, error);
6277 kfree(data);
6278
6279 return ret;
6280}
6281
6282static int __sev_issue_cmd(int fd, int id, void *data, int *error)
6283{
6284 struct fd f;
6285 int ret;
6286
6287 f = fdget(fd);
6288 if (!f.file)
6289 return -EBADF;
6290
6291 ret = sev_issue_cmd_external_user(f.file, id, data, error);
6292
6293 fdput(f);
6294 return ret;
6295}
6296
6297static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
6298{
6299 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6300
6301 return __sev_issue_cmd(sev->fd, id, data, error);
6302}
6303
6304static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
6305{
6306 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6307 struct sev_data_launch_start *start;
6308 struct kvm_sev_launch_start params;
6309 void *dh_blob, *session_blob;
6310 int *error = &argp->error;
6311 int ret;
6312
6313 if (!sev_guest(kvm))
6314 return -ENOTTY;
6315
6316 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6317 return -EFAULT;
6318
6319 start = kzalloc(sizeof(*start), GFP_KERNEL);
6320 if (!start)
6321 return -ENOMEM;
6322
6323 dh_blob = NULL;
6324 if (params.dh_uaddr) {
6325 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
6326 if (IS_ERR(dh_blob)) {
6327 ret = PTR_ERR(dh_blob);
6328 goto e_free;
6329 }
6330
6331 start->dh_cert_address = __sme_set(__pa(dh_blob));
6332 start->dh_cert_len = params.dh_len;
6333 }
6334
6335 session_blob = NULL;
6336 if (params.session_uaddr) {
6337 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
6338 if (IS_ERR(session_blob)) {
6339 ret = PTR_ERR(session_blob);
6340 goto e_free_dh;
6341 }
6342
6343 start->session_address = __sme_set(__pa(session_blob));
6344 start->session_len = params.session_len;
6345 }
6346
6347 start->handle = params.handle;
6348 start->policy = params.policy;
6349
6350
6351 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
6352 if (ret)
6353 goto e_free_session;
6354
6355
6356 ret = sev_bind_asid(kvm, start->handle, error);
6357 if (ret)
6358 goto e_free_session;
6359
6360
6361 params.handle = start->handle;
6362 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) {
6363 sev_unbind_asid(kvm, start->handle);
6364 ret = -EFAULT;
6365 goto e_free_session;
6366 }
6367
6368 sev->handle = start->handle;
6369 sev->fd = argp->sev_fd;
6370
6371e_free_session:
6372 kfree(session_blob);
6373e_free_dh:
6374 kfree(dh_blob);
6375e_free:
6376 kfree(start);
6377 return ret;
6378}
6379
6380static int get_num_contig_pages(int idx, struct page **inpages,
6381 unsigned long npages)
6382{
6383 unsigned long paddr, next_paddr;
6384 int i = idx + 1, pages = 1;
6385
6386
6387 paddr = __sme_page_pa(inpages[idx]);
6388 while (i < npages) {
6389 next_paddr = __sme_page_pa(inpages[i++]);
6390 if ((paddr + PAGE_SIZE) == next_paddr) {
6391 pages++;
6392 paddr = next_paddr;
6393 continue;
6394 }
6395 break;
6396 }
6397
6398 return pages;
6399}
6400
6401static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
6402{
6403 unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
6404 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6405 struct kvm_sev_launch_update_data params;
6406 struct sev_data_launch_update_data *data;
6407 struct page **inpages;
6408 int i, ret, pages;
6409
6410 if (!sev_guest(kvm))
6411 return -ENOTTY;
6412
6413 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6414 return -EFAULT;
6415
6416 data = kzalloc(sizeof(*data), GFP_KERNEL);
6417 if (!data)
6418 return -ENOMEM;
6419
6420 vaddr = params.uaddr;
6421 size = params.len;
6422 vaddr_end = vaddr + size;
6423
6424
6425 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
6426 if (!inpages) {
6427 ret = -ENOMEM;
6428 goto e_free;
6429 }
6430
6431
6432
6433
6434
6435
6436
6437 sev_clflush_pages(inpages, npages);
6438
6439 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
6440 int offset, len;
6441
6442
6443
6444
6445
6446 offset = vaddr & (PAGE_SIZE - 1);
6447
6448
6449 pages = get_num_contig_pages(i, inpages, npages);
6450
6451 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
6452
6453 data->handle = sev->handle;
6454 data->len = len;
6455 data->address = __sme_page_pa(inpages[i]) + offset;
6456 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
6457 if (ret)
6458 goto e_unpin;
6459
6460 size -= len;
6461 next_vaddr = vaddr + len;
6462 }
6463
6464e_unpin:
6465
6466 for (i = 0; i < npages; i++) {
6467 set_page_dirty_lock(inpages[i]);
6468 mark_page_accessed(inpages[i]);
6469 }
6470
6471 sev_unpin_memory(kvm, inpages, npages);
6472e_free:
6473 kfree(data);
6474 return ret;
6475}
6476
6477static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6478{
6479 void __user *measure = (void __user *)(uintptr_t)argp->data;
6480 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6481 struct sev_data_launch_measure *data;
6482 struct kvm_sev_launch_measure params;
6483 void __user *p = NULL;
6484 void *blob = NULL;
6485 int ret;
6486
6487 if (!sev_guest(kvm))
6488 return -ENOTTY;
6489
6490 if (copy_from_user(¶ms, measure, sizeof(params)))
6491 return -EFAULT;
6492
6493 data = kzalloc(sizeof(*data), GFP_KERNEL);
6494 if (!data)
6495 return -ENOMEM;
6496
6497
6498 if (!params.len)
6499 goto cmd;
6500
6501 p = (void __user *)(uintptr_t)params.uaddr;
6502 if (p) {
6503 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6504 ret = -EINVAL;
6505 goto e_free;
6506 }
6507
6508 ret = -ENOMEM;
6509 blob = kmalloc(params.len, GFP_KERNEL);
6510 if (!blob)
6511 goto e_free;
6512
6513 data->address = __psp_pa(blob);
6514 data->len = params.len;
6515 }
6516
6517cmd:
6518 data->handle = sev->handle;
6519 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
6520
6521
6522
6523
6524 if (!params.len)
6525 goto done;
6526
6527 if (ret)
6528 goto e_free_blob;
6529
6530 if (blob) {
6531 if (copy_to_user(p, blob, params.len))
6532 ret = -EFAULT;
6533 }
6534
6535done:
6536 params.len = data->len;
6537 if (copy_to_user(measure, ¶ms, sizeof(params)))
6538 ret = -EFAULT;
6539e_free_blob:
6540 kfree(blob);
6541e_free:
6542 kfree(data);
6543 return ret;
6544}
6545
6546static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
6547{
6548 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6549 struct sev_data_launch_finish *data;
6550 int ret;
6551
6552 if (!sev_guest(kvm))
6553 return -ENOTTY;
6554
6555 data = kzalloc(sizeof(*data), GFP_KERNEL);
6556 if (!data)
6557 return -ENOMEM;
6558
6559 data->handle = sev->handle;
6560 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
6561
6562 kfree(data);
6563 return ret;
6564}
6565
6566static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
6567{
6568 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6569 struct kvm_sev_guest_status params;
6570 struct sev_data_guest_status *data;
6571 int ret;
6572
6573 if (!sev_guest(kvm))
6574 return -ENOTTY;
6575
6576 data = kzalloc(sizeof(*data), GFP_KERNEL);
6577 if (!data)
6578 return -ENOMEM;
6579
6580 data->handle = sev->handle;
6581 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
6582 if (ret)
6583 goto e_free;
6584
6585 params.policy = data->policy;
6586 params.state = data->state;
6587 params.handle = data->handle;
6588
6589 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
6590 ret = -EFAULT;
6591e_free:
6592 kfree(data);
6593 return ret;
6594}
6595
6596static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
6597 unsigned long dst, int size,
6598 int *error, bool enc)
6599{
6600 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6601 struct sev_data_dbg *data;
6602 int ret;
6603
6604 data = kzalloc(sizeof(*data), GFP_KERNEL);
6605 if (!data)
6606 return -ENOMEM;
6607
6608 data->handle = sev->handle;
6609 data->dst_addr = dst;
6610 data->src_addr = src;
6611 data->len = size;
6612
6613 ret = sev_issue_cmd(kvm,
6614 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
6615 data, error);
6616 kfree(data);
6617 return ret;
6618}
6619
6620static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
6621 unsigned long dst_paddr, int sz, int *err)
6622{
6623 int offset;
6624
6625
6626
6627
6628
6629 src_paddr = round_down(src_paddr, 16);
6630 offset = src_paddr & 15;
6631 sz = round_up(sz + offset, 16);
6632
6633 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
6634}
6635
6636static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
6637 unsigned long __user dst_uaddr,
6638 unsigned long dst_paddr,
6639 int size, int *err)
6640{
6641 struct page *tpage = NULL;
6642 int ret, offset;
6643
6644
6645 if (!IS_ALIGNED(dst_paddr, 16) ||
6646 !IS_ALIGNED(paddr, 16) ||
6647 !IS_ALIGNED(size, 16)) {
6648 tpage = (void *)alloc_page(GFP_KERNEL);
6649 if (!tpage)
6650 return -ENOMEM;
6651
6652 dst_paddr = __sme_page_pa(tpage);
6653 }
6654
6655 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
6656 if (ret)
6657 goto e_free;
6658
6659 if (tpage) {
6660 offset = paddr & 15;
6661 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
6662 page_address(tpage) + offset, size))
6663 ret = -EFAULT;
6664 }
6665
6666e_free:
6667 if (tpage)
6668 __free_page(tpage);
6669
6670 return ret;
6671}
6672
6673static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
6674 unsigned long __user vaddr,
6675 unsigned long dst_paddr,
6676 unsigned long __user dst_vaddr,
6677 int size, int *error)
6678{
6679 struct page *src_tpage = NULL;
6680 struct page *dst_tpage = NULL;
6681 int ret, len = size;
6682
6683
6684 if (!IS_ALIGNED(vaddr, 16)) {
6685 src_tpage = alloc_page(GFP_KERNEL);
6686 if (!src_tpage)
6687 return -ENOMEM;
6688
6689 if (copy_from_user(page_address(src_tpage),
6690 (void __user *)(uintptr_t)vaddr, size)) {
6691 __free_page(src_tpage);
6692 return -EFAULT;
6693 }
6694
6695 paddr = __sme_page_pa(src_tpage);
6696 }
6697
6698
6699
6700
6701
6702
6703
6704 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
6705 int dst_offset;
6706
6707 dst_tpage = alloc_page(GFP_KERNEL);
6708 if (!dst_tpage) {
6709 ret = -ENOMEM;
6710 goto e_free;
6711 }
6712
6713 ret = __sev_dbg_decrypt(kvm, dst_paddr,
6714 __sme_page_pa(dst_tpage), size, error);
6715 if (ret)
6716 goto e_free;
6717
6718
6719
6720
6721
6722 dst_offset = dst_paddr & 15;
6723
6724 if (src_tpage)
6725 memcpy(page_address(dst_tpage) + dst_offset,
6726 page_address(src_tpage), size);
6727 else {
6728 if (copy_from_user(page_address(dst_tpage) + dst_offset,
6729 (void __user *)(uintptr_t)vaddr, size)) {
6730 ret = -EFAULT;
6731 goto e_free;
6732 }
6733 }
6734
6735 paddr = __sme_page_pa(dst_tpage);
6736 dst_paddr = round_down(dst_paddr, 16);
6737 len = round_up(size, 16);
6738 }
6739
6740 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
6741
6742e_free:
6743 if (src_tpage)
6744 __free_page(src_tpage);
6745 if (dst_tpage)
6746 __free_page(dst_tpage);
6747 return ret;
6748}
6749
6750static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6751{
6752 unsigned long vaddr, vaddr_end, next_vaddr;
6753 unsigned long dst_vaddr;
6754 struct page **src_p, **dst_p;
6755 struct kvm_sev_dbg debug;
6756 unsigned long n;
6757 int ret, size;
6758
6759 if (!sev_guest(kvm))
6760 return -ENOTTY;
6761
6762 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
6763 return -EFAULT;
6764
6765 vaddr = debug.src_uaddr;
6766 size = debug.len;
6767 vaddr_end = vaddr + size;
6768 dst_vaddr = debug.dst_uaddr;
6769
6770 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
6771 int len, s_off, d_off;
6772
6773
6774 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
6775 if (!src_p)
6776 return -EFAULT;
6777
6778 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
6779 if (!dst_p) {
6780 sev_unpin_memory(kvm, src_p, n);
6781 return -EFAULT;
6782 }
6783
6784
6785
6786
6787
6788
6789
6790 sev_clflush_pages(src_p, 1);
6791 sev_clflush_pages(dst_p, 1);
6792
6793
6794
6795
6796
6797 s_off = vaddr & ~PAGE_MASK;
6798 d_off = dst_vaddr & ~PAGE_MASK;
6799 len = min_t(size_t, (PAGE_SIZE - s_off), size);
6800
6801 if (dec)
6802 ret = __sev_dbg_decrypt_user(kvm,
6803 __sme_page_pa(src_p[0]) + s_off,
6804 dst_vaddr,
6805 __sme_page_pa(dst_p[0]) + d_off,
6806 len, &argp->error);
6807 else
6808 ret = __sev_dbg_encrypt_user(kvm,
6809 __sme_page_pa(src_p[0]) + s_off,
6810 vaddr,
6811 __sme_page_pa(dst_p[0]) + d_off,
6812 dst_vaddr,
6813 len, &argp->error);
6814
6815 sev_unpin_memory(kvm, src_p, 1);
6816 sev_unpin_memory(kvm, dst_p, 1);
6817
6818 if (ret)
6819 goto err;
6820
6821 next_vaddr = vaddr + len;
6822 dst_vaddr = dst_vaddr + len;
6823 size -= len;
6824 }
6825err:
6826 return ret;
6827}
6828
6829static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6830{
6831 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6832 struct sev_data_launch_secret *data;
6833 struct kvm_sev_launch_secret params;
6834 struct page **pages;
6835 void *blob, *hdr;
6836 unsigned long n;
6837 int ret, offset;
6838
6839 if (!sev_guest(kvm))
6840 return -ENOTTY;
6841
6842 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6843 return -EFAULT;
6844
6845 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
6846 if (!pages)
6847 return -ENOMEM;
6848
6849
6850
6851
6852
6853 if (get_num_contig_pages(0, pages, n) != n) {
6854 ret = -EINVAL;
6855 goto e_unpin_memory;
6856 }
6857
6858 ret = -ENOMEM;
6859 data = kzalloc(sizeof(*data), GFP_KERNEL);
6860 if (!data)
6861 goto e_unpin_memory;
6862
6863 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6864 data->guest_address = __sme_page_pa(pages[0]) + offset;
6865 data->guest_len = params.guest_len;
6866
6867 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6868 if (IS_ERR(blob)) {
6869 ret = PTR_ERR(blob);
6870 goto e_free;
6871 }
6872
6873 data->trans_address = __psp_pa(blob);
6874 data->trans_len = params.trans_len;
6875
6876 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
6877 if (IS_ERR(hdr)) {
6878 ret = PTR_ERR(hdr);
6879 goto e_free_blob;
6880 }
6881 data->hdr_address = __psp_pa(hdr);
6882 data->hdr_len = params.hdr_len;
6883
6884 data->handle = sev->handle;
6885 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
6886
6887 kfree(hdr);
6888
6889e_free_blob:
6890 kfree(blob);
6891e_free:
6892 kfree(data);
6893e_unpin_memory:
6894 sev_unpin_memory(kvm, pages, n);
6895 return ret;
6896}
6897
6898static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
6899{
6900 struct kvm_sev_cmd sev_cmd;
6901 int r;
6902
6903 if (!svm_sev_enabled())
6904 return -ENOTTY;
6905
6906 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
6907 return -EFAULT;
6908
6909 mutex_lock(&kvm->lock);
6910
6911 switch (sev_cmd.id) {
6912 case KVM_SEV_INIT:
6913 r = sev_guest_init(kvm, &sev_cmd);
6914 break;
6915 case KVM_SEV_LAUNCH_START:
6916 r = sev_launch_start(kvm, &sev_cmd);
6917 break;
6918 case KVM_SEV_LAUNCH_UPDATE_DATA:
6919 r = sev_launch_update_data(kvm, &sev_cmd);
6920 break;
6921 case KVM_SEV_LAUNCH_MEASURE:
6922 r = sev_launch_measure(kvm, &sev_cmd);
6923 break;
6924 case KVM_SEV_LAUNCH_FINISH:
6925 r = sev_launch_finish(kvm, &sev_cmd);
6926 break;
6927 case KVM_SEV_GUEST_STATUS:
6928 r = sev_guest_status(kvm, &sev_cmd);
6929 break;
6930 case KVM_SEV_DBG_DECRYPT:
6931 r = sev_dbg_crypt(kvm, &sev_cmd, true);
6932 break;
6933 case KVM_SEV_DBG_ENCRYPT:
6934 r = sev_dbg_crypt(kvm, &sev_cmd, false);
6935 break;
6936 case KVM_SEV_LAUNCH_SECRET:
6937 r = sev_launch_secret(kvm, &sev_cmd);
6938 break;
6939 default:
6940 r = -EINVAL;
6941 goto out;
6942 }
6943
6944 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
6945 r = -EFAULT;
6946
6947out:
6948 mutex_unlock(&kvm->lock);
6949 return r;
6950}
6951
6952static int svm_register_enc_region(struct kvm *kvm,
6953 struct kvm_enc_region *range)
6954{
6955 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6956 struct enc_region *region;
6957 int ret = 0;
6958
6959 if (!sev_guest(kvm))
6960 return -ENOTTY;
6961
6962 if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
6963 return -EINVAL;
6964
6965 region = kzalloc(sizeof(*region), GFP_KERNEL);
6966 if (!region)
6967 return -ENOMEM;
6968
6969 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
6970 if (!region->pages) {
6971 ret = -ENOMEM;
6972 goto e_free;
6973 }
6974
6975
6976
6977
6978
6979
6980
6981 sev_clflush_pages(region->pages, region->npages);
6982
6983 region->uaddr = range->addr;
6984 region->size = range->size;
6985
6986 mutex_lock(&kvm->lock);
6987 list_add_tail(®ion->list, &sev->regions_list);
6988 mutex_unlock(&kvm->lock);
6989
6990 return ret;
6991
6992e_free:
6993 kfree(region);
6994 return ret;
6995}
6996
6997static struct enc_region *
6998find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
6999{
7000 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
7001 struct list_head *head = &sev->regions_list;
7002 struct enc_region *i;
7003
7004 list_for_each_entry(i, head, list) {
7005 if (i->uaddr == range->addr &&
7006 i->size == range->size)
7007 return i;
7008 }
7009
7010 return NULL;
7011}
7012
7013
7014static int svm_unregister_enc_region(struct kvm *kvm,
7015 struct kvm_enc_region *range)
7016{
7017 struct enc_region *region;
7018 int ret;
7019
7020 mutex_lock(&kvm->lock);
7021
7022 if (!sev_guest(kvm)) {
7023 ret = -ENOTTY;
7024 goto failed;
7025 }
7026
7027 region = find_enc_region(kvm, range);
7028 if (!region) {
7029 ret = -EINVAL;
7030 goto failed;
7031 }
7032
7033 __unregister_enc_region_locked(kvm, region);
7034
7035 mutex_unlock(&kvm->lock);
7036 return 0;
7037
7038failed:
7039 mutex_unlock(&kvm->lock);
7040 return ret;
7041}
7042
7043static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7044 .cpu_has_kvm_support = has_svm,
7045 .disabled_by_bios = is_disabled,
7046 .hardware_setup = svm_hardware_setup,
7047 .hardware_unsetup = svm_hardware_unsetup,
7048 .check_processor_compatibility = svm_check_processor_compat,
7049 .hardware_enable = svm_hardware_enable,
7050 .hardware_disable = svm_hardware_disable,
7051 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
7052 .has_emulated_msr = svm_has_emulated_msr,
7053
7054 .vcpu_create = svm_create_vcpu,
7055 .vcpu_free = svm_free_vcpu,
7056 .vcpu_reset = svm_vcpu_reset,
7057
7058 .vm_alloc = svm_vm_alloc,
7059 .vm_free = svm_vm_free,
7060 .vm_init = avic_vm_init,
7061 .vm_destroy = svm_vm_destroy,
7062
7063 .prepare_guest_switch = svm_prepare_guest_switch,
7064 .vcpu_load = svm_vcpu_load,
7065 .vcpu_put = svm_vcpu_put,
7066 .vcpu_blocking = svm_vcpu_blocking,
7067 .vcpu_unblocking = svm_vcpu_unblocking,
7068
7069 .update_bp_intercept = update_bp_intercept,
7070 .get_msr_feature = svm_get_msr_feature,
7071 .get_msr = svm_get_msr,
7072 .set_msr = svm_set_msr,
7073 .get_segment_base = svm_get_segment_base,
7074 .get_segment = svm_get_segment,
7075 .set_segment = svm_set_segment,
7076 .get_cpl = svm_get_cpl,
7077 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
7078 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
7079 .decache_cr3 = svm_decache_cr3,
7080 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
7081 .set_cr0 = svm_set_cr0,
7082 .set_cr3 = svm_set_cr3,
7083 .set_cr4 = svm_set_cr4,
7084 .set_efer = svm_set_efer,
7085 .get_idt = svm_get_idt,
7086 .set_idt = svm_set_idt,
7087 .get_gdt = svm_get_gdt,
7088 .set_gdt = svm_set_gdt,
7089 .get_dr6 = svm_get_dr6,
7090 .set_dr6 = svm_set_dr6,
7091 .set_dr7 = svm_set_dr7,
7092 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
7093 .cache_reg = svm_cache_reg,
7094 .get_rflags = svm_get_rflags,
7095 .set_rflags = svm_set_rflags,
7096
7097 .tlb_flush = svm_flush_tlb,
7098 .tlb_flush_gva = svm_flush_tlb_gva,
7099
7100 .run = svm_vcpu_run,
7101 .handle_exit = handle_exit,
7102 .skip_emulated_instruction = skip_emulated_instruction,
7103 .set_interrupt_shadow = svm_set_interrupt_shadow,
7104 .get_interrupt_shadow = svm_get_interrupt_shadow,
7105 .patch_hypercall = svm_patch_hypercall,
7106 .set_irq = svm_set_irq,
7107 .set_nmi = svm_inject_nmi,
7108 .queue_exception = svm_queue_exception,
7109 .cancel_injection = svm_cancel_injection,
7110 .interrupt_allowed = svm_interrupt_allowed,
7111 .nmi_allowed = svm_nmi_allowed,
7112 .get_nmi_mask = svm_get_nmi_mask,
7113 .set_nmi_mask = svm_set_nmi_mask,
7114 .enable_nmi_window = enable_nmi_window,
7115 .enable_irq_window = enable_irq_window,
7116 .update_cr8_intercept = update_cr8_intercept,
7117 .set_virtual_apic_mode = svm_set_virtual_apic_mode,
7118 .get_enable_apicv = svm_get_enable_apicv,
7119 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
7120 .load_eoi_exitmap = svm_load_eoi_exitmap,
7121 .hwapic_irr_update = svm_hwapic_irr_update,
7122 .hwapic_isr_update = svm_hwapic_isr_update,
7123 .sync_pir_to_irr = kvm_lapic_find_highest_irr,
7124 .apicv_post_state_restore = avic_post_state_restore,
7125
7126 .set_tss_addr = svm_set_tss_addr,
7127 .set_identity_map_addr = svm_set_identity_map_addr,
7128 .get_tdp_level = get_npt_level,
7129 .get_mt_mask = svm_get_mt_mask,
7130
7131 .get_exit_info = svm_get_exit_info,
7132
7133 .get_lpage_level = svm_get_lpage_level,
7134
7135 .cpuid_update = svm_cpuid_update,
7136
7137 .rdtscp_supported = svm_rdtscp_supported,
7138 .invpcid_supported = svm_invpcid_supported,
7139 .mpx_supported = svm_mpx_supported,
7140 .xsaves_supported = svm_xsaves_supported,
7141 .umip_emulated = svm_umip_emulated,
7142
7143 .set_supported_cpuid = svm_set_supported_cpuid,
7144
7145 .has_wbinvd_exit = svm_has_wbinvd_exit,
7146
7147 .read_l1_tsc_offset = svm_read_l1_tsc_offset,
7148 .write_tsc_offset = svm_write_tsc_offset,
7149
7150 .set_tdp_cr3 = set_tdp_cr3,
7151
7152 .check_intercept = svm_check_intercept,
7153 .handle_external_intr = svm_handle_external_intr,
7154
7155 .request_immediate_exit = __kvm_request_immediate_exit,
7156
7157 .sched_in = svm_sched_in,
7158
7159 .pmu_ops = &amd_pmu_ops,
7160 .deliver_posted_interrupt = svm_deliver_avic_intr,
7161 .update_pi_irte = svm_update_pi_irte,
7162 .setup_mce = svm_setup_mce,
7163
7164 .smi_allowed = svm_smi_allowed,
7165 .pre_enter_smm = svm_pre_enter_smm,
7166 .pre_leave_smm = svm_pre_leave_smm,
7167 .enable_smi_window = enable_smi_window,
7168
7169 .mem_enc_op = svm_mem_enc_op,
7170 .mem_enc_reg_region = svm_register_enc_region,
7171 .mem_enc_unreg_region = svm_unregister_enc_region,
7172};
7173
7174static int __init svm_init(void)
7175{
7176 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
7177 __alignof__(struct vcpu_svm), THIS_MODULE);
7178}
7179
7180static void __exit svm_exit(void)
7181{
7182 kvm_exit();
7183}
7184
7185module_init(svm_init)
7186module_exit(svm_exit)
7187