1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#define pr_fmt(fmt) "SVM: " fmt
19
20#include <linux/kvm_host.h>
21
22#include "irq.h"
23#include "mmu.h"
24#include "kvm_cache_regs.h"
25#include "x86.h"
26#include "cpuid.h"
27#include "pmu.h"
28
29#include <linux/module.h>
30#include <linux/mod_devicetable.h>
31#include <linux/kernel.h>
32#include <linux/vmalloc.h>
33#include <linux/highmem.h>
34#include <linux/sched.h>
35#include <linux/trace_events.h>
36#include <linux/slab.h>
37#include <linux/amd-iommu.h>
38#include <linux/hashtable.h>
39#include <linux/frame.h>
40#include <linux/psp-sev.h>
41#include <linux/file.h>
42#include <linux/pagemap.h>
43#include <linux/swap.h>
44
45#include <asm/apic.h>
46#include <asm/perf_event.h>
47#include <asm/tlbflush.h>
48#include <asm/desc.h>
49#include <asm/debugreg.h>
50#include <asm/kvm_para.h>
51#include <asm/irq_remapping.h>
52#include <asm/spec-ctrl.h>
53
54#include <asm/virtext.h>
55#include "trace.h"
56
57#define __ex(x) __kvm_handle_fault_on_reboot(x)
58
59MODULE_AUTHOR("Qumranet");
60MODULE_LICENSE("GPL");
61
62static const struct x86_cpu_id svm_cpu_id[] = {
63 X86_FEATURE_MATCH(X86_FEATURE_SVM),
64 {}
65};
66MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
67
68#define IOPM_ALLOC_ORDER 2
69#define MSRPM_ALLOC_ORDER 1
70
71#define SEG_TYPE_LDT 2
72#define SEG_TYPE_BUSY_TSS16 3
73
74#define SVM_FEATURE_NPT (1 << 0)
75#define SVM_FEATURE_LBRV (1 << 1)
76#define SVM_FEATURE_SVML (1 << 2)
77#define SVM_FEATURE_NRIP (1 << 3)
78#define SVM_FEATURE_TSC_RATE (1 << 4)
79#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
80#define SVM_FEATURE_FLUSH_ASID (1 << 6)
81#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
82#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
83
84#define SVM_AVIC_DOORBELL 0xc001011b
85
86#define NESTED_EXIT_HOST 0
87#define NESTED_EXIT_DONE 1
88#define NESTED_EXIT_CONTINUE 2
89
90#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
91
92#define TSC_RATIO_RSVD 0xffffff0000000000ULL
93#define TSC_RATIO_MIN 0x0000000000000001ULL
94#define TSC_RATIO_MAX 0x000000ffffffffffULL
95
96#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
97
98
99
100
101
102#define AVIC_MAX_PHYSICAL_ID_COUNT 255
103
104#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
105#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
106#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
107
108
109#define AVIC_VCPU_ID_BITS 8
110#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
111
112#define AVIC_VM_ID_BITS 24
113#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
114#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
115
116#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
117 (y & AVIC_VCPU_ID_MASK))
118#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
119#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
120
121static bool erratum_383_found __read_mostly;
122
123static const u32 host_save_user_msrs[] = {
124#ifdef CONFIG_X86_64
125 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
126 MSR_FS_BASE,
127#endif
128 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
129 MSR_TSC_AUX,
130};
131
132#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
133
134struct kvm_sev_info {
135 bool active;
136 unsigned int asid;
137 unsigned int handle;
138 int fd;
139 unsigned long pages_locked;
140 struct list_head regions_list;
141};
142
143struct kvm_svm {
144 struct kvm kvm;
145
146
147 u32 avic_vm_id;
148 u32 ldr_mode;
149 struct page *avic_logical_id_table_page;
150 struct page *avic_physical_id_table_page;
151 struct hlist_node hnode;
152
153 struct kvm_sev_info sev_info;
154};
155
156struct kvm_vcpu;
157
158struct nested_state {
159 struct vmcb *hsave;
160 u64 hsave_msr;
161 u64 vm_cr_msr;
162 u64 vmcb;
163
164
165 u32 *msrpm;
166
167
168 u64 vmcb_msrpm;
169 u64 vmcb_iopm;
170
171
172 bool exit_required;
173
174
175 u32 intercept_cr;
176 u32 intercept_dr;
177 u32 intercept_exceptions;
178 u64 intercept;
179
180
181 u64 nested_cr3;
182};
183
184#define MSRPM_OFFSETS 16
185static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
186
187
188
189
190
191static uint64_t osvw_len = 4, osvw_status;
192
193struct vcpu_svm {
194 struct kvm_vcpu vcpu;
195 struct vmcb *vmcb;
196 unsigned long vmcb_pa;
197 struct svm_cpu_data *svm_data;
198 uint64_t asid_generation;
199 uint64_t sysenter_esp;
200 uint64_t sysenter_eip;
201 uint64_t tsc_aux;
202
203 u64 msr_decfg;
204
205 u64 next_rip;
206
207 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
208 struct {
209 u16 fs;
210 u16 gs;
211 u16 ldt;
212 u64 gs_base;
213 } host;
214
215 u64 spec_ctrl;
216
217
218
219
220
221 u64 virt_spec_ctrl;
222
223 u32 *msrpm;
224
225 ulong nmi_iret_rip;
226
227 struct nested_state nested;
228
229 bool nmi_singlestep;
230 u64 nmi_singlestep_guest_rflags;
231
232 unsigned int3_injected;
233 unsigned long int3_rip;
234
235
236 bool nrips_enabled : 1;
237
238 u32 ldr_reg;
239 struct page *avic_backing_page;
240 u64 *avic_physical_id_cache;
241 bool avic_is_running;
242
243
244
245
246
247
248
249 struct list_head ir_list;
250 spinlock_t ir_list_lock;
251
252
253 unsigned int last_cpu;
254};
255
256
257
258
259struct amd_svm_iommu_ir {
260 struct list_head node;
261 void *data;
262};
263
264#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
265#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
266
267#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
268#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
269#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
270#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
271
272static DEFINE_PER_CPU(u64, current_tsc_ratio);
273#define TSC_RATIO_DEFAULT 0x0100000000ULL
274
275#define MSR_INVALID 0xffffffffU
276
277static const struct svm_direct_access_msrs {
278 u32 index;
279 bool always;
280} direct_access_msrs[] = {
281 { .index = MSR_STAR, .always = true },
282 { .index = MSR_IA32_SYSENTER_CS, .always = true },
283#ifdef CONFIG_X86_64
284 { .index = MSR_GS_BASE, .always = true },
285 { .index = MSR_FS_BASE, .always = true },
286 { .index = MSR_KERNEL_GS_BASE, .always = true },
287 { .index = MSR_LSTAR, .always = true },
288 { .index = MSR_CSTAR, .always = true },
289 { .index = MSR_SYSCALL_MASK, .always = true },
290#endif
291 { .index = MSR_IA32_SPEC_CTRL, .always = false },
292 { .index = MSR_IA32_PRED_CMD, .always = false },
293 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
294 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
295 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
296 { .index = MSR_IA32_LASTINTTOIP, .always = false },
297 { .index = MSR_INVALID, .always = false },
298};
299
300
301#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
302static bool npt_enabled = true;
303#else
304static bool npt_enabled;
305#endif
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
338module_param(pause_filter_thresh, ushort, 0444);
339
340static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
341module_param(pause_filter_count, ushort, 0444);
342
343
344static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
345module_param(pause_filter_count_grow, ushort, 0444);
346
347
348static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
349module_param(pause_filter_count_shrink, ushort, 0444);
350
351
352static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
353module_param(pause_filter_count_max, ushort, 0444);
354
355
356static int npt = true;
357module_param(npt, int, S_IRUGO);
358
359
360static int nested = true;
361module_param(nested, int, S_IRUGO);
362
363
364static int avic;
365#ifdef CONFIG_X86_LOCAL_APIC
366module_param(avic, int, S_IRUGO);
367#endif
368
369
370static int vls = true;
371module_param(vls, int, 0444);
372
373
374static int vgif = true;
375module_param(vgif, int, 0444);
376
377
378static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
379module_param(sev, int, 0444);
380
381static u8 rsm_ins_bytes[] = "\x0f\xaa";
382
383static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
384static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
385static void svm_complete_interrupts(struct vcpu_svm *svm);
386
387static int nested_svm_exit_handled(struct vcpu_svm *svm);
388static int nested_svm_intercept(struct vcpu_svm *svm);
389static int nested_svm_vmexit(struct vcpu_svm *svm);
390static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
391 bool has_error_code, u32 error_code);
392
393enum {
394 VMCB_INTERCEPTS,
395
396 VMCB_PERM_MAP,
397 VMCB_ASID,
398 VMCB_INTR,
399 VMCB_NPT,
400 VMCB_CR,
401 VMCB_DR,
402 VMCB_DT,
403 VMCB_SEG,
404 VMCB_CR2,
405 VMCB_LBR,
406 VMCB_AVIC,
407
408
409
410 VMCB_DIRTY_MAX,
411};
412
413
414#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
415
416#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
417
418static unsigned int max_sev_asid;
419static unsigned int min_sev_asid;
420static unsigned long *sev_asid_bitmap;
421#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
422
423struct enc_region {
424 struct list_head list;
425 unsigned long npages;
426 struct page **pages;
427 unsigned long uaddr;
428 unsigned long size;
429};
430
431
432static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
433{
434 return container_of(kvm, struct kvm_svm, kvm);
435}
436
437static inline bool svm_sev_enabled(void)
438{
439 return max_sev_asid;
440}
441
442static inline bool sev_guest(struct kvm *kvm)
443{
444 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
445
446 return sev->active;
447}
448
449static inline int sev_get_asid(struct kvm *kvm)
450{
451 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
452
453 return sev->asid;
454}
455
456static inline void mark_all_dirty(struct vmcb *vmcb)
457{
458 vmcb->control.clean = 0;
459}
460
461static inline void mark_all_clean(struct vmcb *vmcb)
462{
463 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
464 & ~VMCB_ALWAYS_DIRTY_MASK;
465}
466
467static inline void mark_dirty(struct vmcb *vmcb, int bit)
468{
469 vmcb->control.clean &= ~(1 << bit);
470}
471
472static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
473{
474 return container_of(vcpu, struct vcpu_svm, vcpu);
475}
476
477static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
478{
479 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
480 mark_dirty(svm->vmcb, VMCB_AVIC);
481}
482
483static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
484{
485 struct vcpu_svm *svm = to_svm(vcpu);
486 u64 *entry = svm->avic_physical_id_cache;
487
488 if (!entry)
489 return false;
490
491 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
492}
493
494static void recalc_intercepts(struct vcpu_svm *svm)
495{
496 struct vmcb_control_area *c, *h;
497 struct nested_state *g;
498
499 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
500
501 if (!is_guest_mode(&svm->vcpu))
502 return;
503
504 c = &svm->vmcb->control;
505 h = &svm->nested.hsave->control;
506 g = &svm->nested;
507
508 c->intercept_cr = h->intercept_cr | g->intercept_cr;
509 c->intercept_dr = h->intercept_dr | g->intercept_dr;
510 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
511 c->intercept = h->intercept | g->intercept;
512}
513
514static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
515{
516 if (is_guest_mode(&svm->vcpu))
517 return svm->nested.hsave;
518 else
519 return svm->vmcb;
520}
521
522static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
523{
524 struct vmcb *vmcb = get_host_vmcb(svm);
525
526 vmcb->control.intercept_cr |= (1U << bit);
527
528 recalc_intercepts(svm);
529}
530
531static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
532{
533 struct vmcb *vmcb = get_host_vmcb(svm);
534
535 vmcb->control.intercept_cr &= ~(1U << bit);
536
537 recalc_intercepts(svm);
538}
539
540static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
541{
542 struct vmcb *vmcb = get_host_vmcb(svm);
543
544 return vmcb->control.intercept_cr & (1U << bit);
545}
546
547static inline void set_dr_intercepts(struct vcpu_svm *svm)
548{
549 struct vmcb *vmcb = get_host_vmcb(svm);
550
551 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
552 | (1 << INTERCEPT_DR1_READ)
553 | (1 << INTERCEPT_DR2_READ)
554 | (1 << INTERCEPT_DR3_READ)
555 | (1 << INTERCEPT_DR4_READ)
556 | (1 << INTERCEPT_DR5_READ)
557 | (1 << INTERCEPT_DR6_READ)
558 | (1 << INTERCEPT_DR7_READ)
559 | (1 << INTERCEPT_DR0_WRITE)
560 | (1 << INTERCEPT_DR1_WRITE)
561 | (1 << INTERCEPT_DR2_WRITE)
562 | (1 << INTERCEPT_DR3_WRITE)
563 | (1 << INTERCEPT_DR4_WRITE)
564 | (1 << INTERCEPT_DR5_WRITE)
565 | (1 << INTERCEPT_DR6_WRITE)
566 | (1 << INTERCEPT_DR7_WRITE);
567
568 recalc_intercepts(svm);
569}
570
571static inline void clr_dr_intercepts(struct vcpu_svm *svm)
572{
573 struct vmcb *vmcb = get_host_vmcb(svm);
574
575 vmcb->control.intercept_dr = 0;
576
577 recalc_intercepts(svm);
578}
579
580static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
581{
582 struct vmcb *vmcb = get_host_vmcb(svm);
583
584 vmcb->control.intercept_exceptions |= (1U << bit);
585
586 recalc_intercepts(svm);
587}
588
589static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
590{
591 struct vmcb *vmcb = get_host_vmcb(svm);
592
593 vmcb->control.intercept_exceptions &= ~(1U << bit);
594
595 recalc_intercepts(svm);
596}
597
598static inline void set_intercept(struct vcpu_svm *svm, int bit)
599{
600 struct vmcb *vmcb = get_host_vmcb(svm);
601
602 vmcb->control.intercept |= (1ULL << bit);
603
604 recalc_intercepts(svm);
605}
606
607static inline void clr_intercept(struct vcpu_svm *svm, int bit)
608{
609 struct vmcb *vmcb = get_host_vmcb(svm);
610
611 vmcb->control.intercept &= ~(1ULL << bit);
612
613 recalc_intercepts(svm);
614}
615
616static inline bool vgif_enabled(struct vcpu_svm *svm)
617{
618 return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
619}
620
621static inline void enable_gif(struct vcpu_svm *svm)
622{
623 if (vgif_enabled(svm))
624 svm->vmcb->control.int_ctl |= V_GIF_MASK;
625 else
626 svm->vcpu.arch.hflags |= HF_GIF_MASK;
627}
628
629static inline void disable_gif(struct vcpu_svm *svm)
630{
631 if (vgif_enabled(svm))
632 svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
633 else
634 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
635}
636
637static inline bool gif_set(struct vcpu_svm *svm)
638{
639 if (vgif_enabled(svm))
640 return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
641 else
642 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
643}
644
645static unsigned long iopm_base;
646
647struct kvm_ldttss_desc {
648 u16 limit0;
649 u16 base0;
650 unsigned base1:8, type:5, dpl:2, p:1;
651 unsigned limit1:4, zero0:3, g:1, base2:8;
652 u32 base3;
653 u32 zero1;
654} __attribute__((packed));
655
656struct svm_cpu_data {
657 int cpu;
658
659 u64 asid_generation;
660 u32 max_asid;
661 u32 next_asid;
662 u32 min_asid;
663 struct kvm_ldttss_desc *tss_desc;
664
665 struct page *save_area;
666 struct vmcb *current_vmcb;
667
668
669 struct vmcb **sev_vmcbs;
670};
671
672static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
673
674struct svm_init_data {
675 int cpu;
676 int r;
677};
678
679static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
680
681#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
682#define MSRS_RANGE_SIZE 2048
683#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
684
685static u32 svm_msrpm_offset(u32 msr)
686{
687 u32 offset;
688 int i;
689
690 for (i = 0; i < NUM_MSR_MAPS; i++) {
691 if (msr < msrpm_ranges[i] ||
692 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
693 continue;
694
695 offset = (msr - msrpm_ranges[i]) / 4;
696 offset += (i * MSRS_RANGE_SIZE);
697
698
699 return offset / 4;
700 }
701
702
703 return MSR_INVALID;
704}
705
706#define MAX_INST_SIZE 15
707
708static inline void clgi(void)
709{
710 asm volatile (__ex(SVM_CLGI));
711}
712
713static inline void stgi(void)
714{
715 asm volatile (__ex(SVM_STGI));
716}
717
718static inline void invlpga(unsigned long addr, u32 asid)
719{
720 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
721}
722
723static int get_npt_level(struct kvm_vcpu *vcpu)
724{
725#ifdef CONFIG_X86_64
726 return PT64_ROOT_4LEVEL;
727#else
728 return PT32E_ROOT_LEVEL;
729#endif
730}
731
732static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
733{
734 vcpu->arch.efer = efer;
735 if (!npt_enabled && !(efer & EFER_LMA))
736 efer &= ~EFER_LME;
737
738 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
739 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
740}
741
742static int is_external_interrupt(u32 info)
743{
744 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
745 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
746}
747
748static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
749{
750 struct vcpu_svm *svm = to_svm(vcpu);
751 u32 ret = 0;
752
753 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
754 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
755 return ret;
756}
757
758static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
759{
760 struct vcpu_svm *svm = to_svm(vcpu);
761
762 if (mask == 0)
763 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
764 else
765 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
766
767}
768
769static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
770{
771 struct vcpu_svm *svm = to_svm(vcpu);
772
773 if (svm->vmcb->control.next_rip != 0) {
774 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
775 svm->next_rip = svm->vmcb->control.next_rip;
776 }
777
778 if (!svm->next_rip) {
779 if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
780 EMULATE_DONE)
781 printk(KERN_DEBUG "%s: NOP\n", __func__);
782 return;
783 }
784 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
785 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
786 __func__, kvm_rip_read(vcpu), svm->next_rip);
787
788 kvm_rip_write(vcpu, svm->next_rip);
789 svm_set_interrupt_shadow(vcpu, 0);
790}
791
792static void svm_queue_exception(struct kvm_vcpu *vcpu)
793{
794 struct vcpu_svm *svm = to_svm(vcpu);
795 unsigned nr = vcpu->arch.exception.nr;
796 bool has_error_code = vcpu->arch.exception.has_error_code;
797 bool reinject = vcpu->arch.exception.injected;
798 u32 error_code = vcpu->arch.exception.error_code;
799
800
801
802
803
804 if (!reinject &&
805 nested_svm_check_exception(svm, nr, has_error_code, error_code))
806 return;
807
808 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
809 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
810
811
812
813
814
815
816
817
818 skip_emulated_instruction(&svm->vcpu);
819 rip = kvm_rip_read(&svm->vcpu);
820 svm->int3_rip = rip + svm->vmcb->save.cs.base;
821 svm->int3_injected = rip - old_rip;
822 }
823
824 svm->vmcb->control.event_inj = nr
825 | SVM_EVTINJ_VALID
826 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
827 | SVM_EVTINJ_TYPE_EXEPT;
828 svm->vmcb->control.event_inj_err = error_code;
829}
830
831static void svm_init_erratum_383(void)
832{
833 u32 low, high;
834 int err;
835 u64 val;
836
837 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
838 return;
839
840
841 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
842 if (err)
843 return;
844
845 val |= (1ULL << 47);
846
847 low = lower_32_bits(val);
848 high = upper_32_bits(val);
849
850 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
851
852 erratum_383_found = true;
853}
854
855static void svm_init_osvw(struct kvm_vcpu *vcpu)
856{
857
858
859
860
861 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
862 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
863
864
865
866
867
868
869
870
871
872 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
873 vcpu->arch.osvw.status |= 1;
874}
875
876static int has_svm(void)
877{
878 const char *msg;
879
880 if (!cpu_has_svm(&msg)) {
881 printk(KERN_INFO "has_svm: %s\n", msg);
882 return 0;
883 }
884
885 return 1;
886}
887
888static void svm_hardware_disable(void)
889{
890
891 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
892 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
893
894 cpu_svm_disable();
895
896 amd_pmu_disable_virt();
897}
898
899static int svm_hardware_enable(void)
900{
901
902 struct svm_cpu_data *sd;
903 uint64_t efer;
904 struct desc_struct *gdt;
905 int me = raw_smp_processor_id();
906
907 rdmsrl(MSR_EFER, efer);
908 if (efer & EFER_SVME)
909 return -EBUSY;
910
911 if (!has_svm()) {
912 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
913 return -EINVAL;
914 }
915 sd = per_cpu(svm_data, me);
916 if (!sd) {
917 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
918 return -EINVAL;
919 }
920
921 sd->asid_generation = 1;
922 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
923 sd->next_asid = sd->max_asid + 1;
924 sd->min_asid = max_sev_asid + 1;
925
926 gdt = get_current_gdt_rw();
927 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
928
929 wrmsrl(MSR_EFER, efer | EFER_SVME);
930
931 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
932
933 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
934 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
935 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
936 }
937
938
939
940
941
942
943
944
945
946
947
948 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
949 uint64_t len, status = 0;
950 int err;
951
952 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
953 if (!err)
954 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
955 &err);
956
957 if (err)
958 osvw_status = osvw_len = 0;
959 else {
960 if (len < osvw_len)
961 osvw_len = len;
962 osvw_status |= status;
963 osvw_status &= (1ULL << osvw_len) - 1;
964 }
965 } else
966 osvw_status = osvw_len = 0;
967
968 svm_init_erratum_383();
969
970 amd_pmu_enable_virt();
971
972 return 0;
973}
974
975static void svm_cpu_uninit(int cpu)
976{
977 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
978
979 if (!sd)
980 return;
981
982 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
983 kfree(sd->sev_vmcbs);
984 __free_page(sd->save_area);
985 kfree(sd);
986}
987
988static int svm_cpu_init(int cpu)
989{
990 struct svm_cpu_data *sd;
991 int r;
992
993 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
994 if (!sd)
995 return -ENOMEM;
996 sd->cpu = cpu;
997 r = -ENOMEM;
998 sd->save_area = alloc_page(GFP_KERNEL);
999 if (!sd->save_area)
1000 goto err_1;
1001
1002 if (svm_sev_enabled()) {
1003 r = -ENOMEM;
1004 sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
1005 if (!sd->sev_vmcbs)
1006 goto err_1;
1007 }
1008
1009 per_cpu(svm_data, cpu) = sd;
1010
1011 return 0;
1012
1013err_1:
1014 kfree(sd);
1015 return r;
1016
1017}
1018
1019static bool valid_msr_intercept(u32 index)
1020{
1021 int i;
1022
1023 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
1024 if (direct_access_msrs[i].index == index)
1025 return true;
1026
1027 return false;
1028}
1029
1030static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
1031{
1032 u8 bit_write;
1033 unsigned long tmp;
1034 u32 offset;
1035 u32 *msrpm;
1036
1037 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
1038 to_svm(vcpu)->msrpm;
1039
1040 offset = svm_msrpm_offset(msr);
1041 bit_write = 2 * (msr & 0x0f) + 1;
1042 tmp = msrpm[offset];
1043
1044 BUG_ON(offset == MSR_INVALID);
1045
1046 return !!test_bit(bit_write, &tmp);
1047}
1048
1049static void set_msr_interception(u32 *msrpm, unsigned msr,
1050 int read, int write)
1051{
1052 u8 bit_read, bit_write;
1053 unsigned long tmp;
1054 u32 offset;
1055
1056
1057
1058
1059
1060 WARN_ON(!valid_msr_intercept(msr));
1061
1062 offset = svm_msrpm_offset(msr);
1063 bit_read = 2 * (msr & 0x0f);
1064 bit_write = 2 * (msr & 0x0f) + 1;
1065 tmp = msrpm[offset];
1066
1067 BUG_ON(offset == MSR_INVALID);
1068
1069 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
1070 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
1071
1072 msrpm[offset] = tmp;
1073}
1074
1075static void svm_vcpu_init_msrpm(u32 *msrpm)
1076{
1077 int i;
1078
1079 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
1080
1081 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1082 if (!direct_access_msrs[i].always)
1083 continue;
1084
1085 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
1086 }
1087}
1088
1089static void add_msr_offset(u32 offset)
1090{
1091 int i;
1092
1093 for (i = 0; i < MSRPM_OFFSETS; ++i) {
1094
1095
1096 if (msrpm_offsets[i] == offset)
1097 return;
1098
1099
1100 if (msrpm_offsets[i] != MSR_INVALID)
1101 continue;
1102
1103
1104 msrpm_offsets[i] = offset;
1105
1106 return;
1107 }
1108
1109
1110
1111
1112
1113 BUG();
1114}
1115
1116static void init_msrpm_offsets(void)
1117{
1118 int i;
1119
1120 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
1121
1122 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1123 u32 offset;
1124
1125 offset = svm_msrpm_offset(direct_access_msrs[i].index);
1126 BUG_ON(offset == MSR_INVALID);
1127
1128 add_msr_offset(offset);
1129 }
1130}
1131
1132static void svm_enable_lbrv(struct vcpu_svm *svm)
1133{
1134 u32 *msrpm = svm->msrpm;
1135
1136 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
1137 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
1138 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
1139 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
1140 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
1141}
1142
1143static void svm_disable_lbrv(struct vcpu_svm *svm)
1144{
1145 u32 *msrpm = svm->msrpm;
1146
1147 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
1148 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
1149 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
1150 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
1151 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
1152}
1153
1154static void disable_nmi_singlestep(struct vcpu_svm *svm)
1155{
1156 svm->nmi_singlestep = false;
1157
1158 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
1159
1160 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1161 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
1162 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1163 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
1164 }
1165}
1166
1167
1168
1169
1170
1171
1172#define SVM_VM_DATA_HASH_BITS 8
1173static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
1174static u32 next_vm_id = 0;
1175static bool next_vm_id_wrapped = 0;
1176static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
1177
1178
1179
1180
1181
1182static int avic_ga_log_notifier(u32 ga_tag)
1183{
1184 unsigned long flags;
1185 struct kvm_svm *kvm_svm;
1186 struct kvm_vcpu *vcpu = NULL;
1187 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
1188 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
1189
1190 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
1191
1192 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1193 hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
1194 if (kvm_svm->avic_vm_id != vm_id)
1195 continue;
1196 vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
1197 break;
1198 }
1199 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1200
1201
1202
1203
1204
1205
1206 if (vcpu)
1207 kvm_vcpu_wake_up(vcpu);
1208
1209 return 0;
1210}
1211
1212static __init int sev_hardware_setup(void)
1213{
1214 struct sev_user_data_status *status;
1215 int rc;
1216
1217
1218 max_sev_asid = cpuid_ecx(0x8000001F);
1219
1220 if (!max_sev_asid)
1221 return 1;
1222
1223
1224 min_sev_asid = cpuid_edx(0x8000001F);
1225
1226
1227 sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
1228 sizeof(unsigned long), GFP_KERNEL);
1229 if (!sev_asid_bitmap)
1230 return 1;
1231
1232 status = kmalloc(sizeof(*status), GFP_KERNEL);
1233 if (!status)
1234 return 1;
1235
1236
1237
1238
1239
1240
1241
1242
1243 rc = sev_platform_status(status, NULL);
1244 if (rc)
1245 goto err;
1246
1247 pr_info("SEV supported\n");
1248
1249err:
1250 kfree(status);
1251 return rc;
1252}
1253
1254static void grow_ple_window(struct kvm_vcpu *vcpu)
1255{
1256 struct vcpu_svm *svm = to_svm(vcpu);
1257 struct vmcb_control_area *control = &svm->vmcb->control;
1258 int old = control->pause_filter_count;
1259
1260 control->pause_filter_count = __grow_ple_window(old,
1261 pause_filter_count,
1262 pause_filter_count_grow,
1263 pause_filter_count_max);
1264
1265 if (control->pause_filter_count != old)
1266 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1267
1268 trace_kvm_ple_window_grow(vcpu->vcpu_id,
1269 control->pause_filter_count, old);
1270}
1271
1272static void shrink_ple_window(struct kvm_vcpu *vcpu)
1273{
1274 struct vcpu_svm *svm = to_svm(vcpu);
1275 struct vmcb_control_area *control = &svm->vmcb->control;
1276 int old = control->pause_filter_count;
1277
1278 control->pause_filter_count =
1279 __shrink_ple_window(old,
1280 pause_filter_count,
1281 pause_filter_count_shrink,
1282 pause_filter_count);
1283 if (control->pause_filter_count != old)
1284 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1285
1286 trace_kvm_ple_window_shrink(vcpu->vcpu_id,
1287 control->pause_filter_count, old);
1288}
1289
1290static __init int svm_hardware_setup(void)
1291{
1292 int cpu;
1293 struct page *iopm_pages;
1294 void *iopm_va;
1295 int r;
1296
1297 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
1298
1299 if (!iopm_pages)
1300 return -ENOMEM;
1301
1302 iopm_va = page_address(iopm_pages);
1303 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
1304 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
1305
1306 init_msrpm_offsets();
1307
1308 if (boot_cpu_has(X86_FEATURE_NX))
1309 kvm_enable_efer_bits(EFER_NX);
1310
1311 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
1312 kvm_enable_efer_bits(EFER_FFXSR);
1313
1314 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1315 kvm_has_tsc_control = true;
1316 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
1317 kvm_tsc_scaling_ratio_frac_bits = 32;
1318 }
1319
1320
1321 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1322 pause_filter_count = 0;
1323 pause_filter_thresh = 0;
1324 } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
1325 pause_filter_thresh = 0;
1326 }
1327
1328 if (nested) {
1329 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
1330 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
1331 }
1332
1333 if (sev) {
1334 if (boot_cpu_has(X86_FEATURE_SEV) &&
1335 IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
1336 r = sev_hardware_setup();
1337 if (r)
1338 sev = false;
1339 } else {
1340 sev = false;
1341 }
1342 }
1343
1344 for_each_possible_cpu(cpu) {
1345 r = svm_cpu_init(cpu);
1346 if (r)
1347 goto err;
1348 }
1349
1350 if (!boot_cpu_has(X86_FEATURE_NPT))
1351 npt_enabled = false;
1352
1353 if (npt_enabled && !npt) {
1354 printk(KERN_INFO "kvm: Nested Paging disabled\n");
1355 npt_enabled = false;
1356 }
1357
1358 if (npt_enabled) {
1359 printk(KERN_INFO "kvm: Nested Paging enabled\n");
1360 kvm_enable_tdp();
1361 } else
1362 kvm_disable_tdp();
1363
1364 if (avic) {
1365 if (!npt_enabled ||
1366 !boot_cpu_has(X86_FEATURE_AVIC) ||
1367 !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
1368 avic = false;
1369 } else {
1370 pr_info("AVIC enabled\n");
1371
1372 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1373 }
1374 }
1375
1376 if (vls) {
1377 if (!npt_enabled ||
1378 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
1379 !IS_ENABLED(CONFIG_X86_64)) {
1380 vls = false;
1381 } else {
1382 pr_info("Virtual VMLOAD VMSAVE supported\n");
1383 }
1384 }
1385
1386 if (vgif) {
1387 if (!boot_cpu_has(X86_FEATURE_VGIF))
1388 vgif = false;
1389 else
1390 pr_info("Virtual GIF supported\n");
1391 }
1392
1393 return 0;
1394
1395err:
1396 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
1397 iopm_base = 0;
1398 return r;
1399}
1400
1401static __exit void svm_hardware_unsetup(void)
1402{
1403 int cpu;
1404
1405 if (svm_sev_enabled())
1406 kfree(sev_asid_bitmap);
1407
1408 for_each_possible_cpu(cpu)
1409 svm_cpu_uninit(cpu);
1410
1411 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
1412 iopm_base = 0;
1413}
1414
1415static void init_seg(struct vmcb_seg *seg)
1416{
1417 seg->selector = 0;
1418 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
1419 SVM_SELECTOR_WRITE_MASK;
1420 seg->limit = 0xffff;
1421 seg->base = 0;
1422}
1423
1424static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1425{
1426 seg->selector = 0;
1427 seg->attrib = SVM_SELECTOR_P_MASK | type;
1428 seg->limit = 0xffff;
1429 seg->base = 0;
1430}
1431
1432static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1433{
1434 struct vcpu_svm *svm = to_svm(vcpu);
1435
1436 if (is_guest_mode(vcpu))
1437 return svm->nested.hsave->control.tsc_offset;
1438
1439 return vcpu->arch.tsc_offset;
1440}
1441
1442static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1443{
1444 struct vcpu_svm *svm = to_svm(vcpu);
1445 u64 g_tsc_offset = 0;
1446
1447 if (is_guest_mode(vcpu)) {
1448
1449 g_tsc_offset = svm->vmcb->control.tsc_offset -
1450 svm->nested.hsave->control.tsc_offset;
1451 svm->nested.hsave->control.tsc_offset = offset;
1452 } else
1453 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1454 svm->vmcb->control.tsc_offset,
1455 offset);
1456
1457 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1458
1459 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1460}
1461
1462static void avic_init_vmcb(struct vcpu_svm *svm)
1463{
1464 struct vmcb *vmcb = svm->vmcb;
1465 struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
1466 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
1467 phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
1468 phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
1469
1470 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1471 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1472 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1473 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1474 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
1475}
1476
1477static void init_vmcb(struct vcpu_svm *svm)
1478{
1479 struct vmcb_control_area *control = &svm->vmcb->control;
1480 struct vmcb_save_area *save = &svm->vmcb->save;
1481
1482 svm->vcpu.arch.hflags = 0;
1483
1484 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1485 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1486 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1487 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1488 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1489 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1490 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1491 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1492
1493 set_dr_intercepts(svm);
1494
1495 set_exception_intercept(svm, PF_VECTOR);
1496 set_exception_intercept(svm, UD_VECTOR);
1497 set_exception_intercept(svm, MC_VECTOR);
1498 set_exception_intercept(svm, AC_VECTOR);
1499 set_exception_intercept(svm, DB_VECTOR);
1500
1501
1502
1503
1504
1505
1506 if (enable_vmware_backdoor)
1507 set_exception_intercept(svm, GP_VECTOR);
1508
1509 set_intercept(svm, INTERCEPT_INTR);
1510 set_intercept(svm, INTERCEPT_NMI);
1511 set_intercept(svm, INTERCEPT_SMI);
1512 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1513 set_intercept(svm, INTERCEPT_RDPMC);
1514 set_intercept(svm, INTERCEPT_CPUID);
1515 set_intercept(svm, INTERCEPT_INVD);
1516 set_intercept(svm, INTERCEPT_INVLPG);
1517 set_intercept(svm, INTERCEPT_INVLPGA);
1518 set_intercept(svm, INTERCEPT_IOIO_PROT);
1519 set_intercept(svm, INTERCEPT_MSR_PROT);
1520 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1521 set_intercept(svm, INTERCEPT_SHUTDOWN);
1522 set_intercept(svm, INTERCEPT_VMRUN);
1523 set_intercept(svm, INTERCEPT_VMMCALL);
1524 set_intercept(svm, INTERCEPT_VMLOAD);
1525 set_intercept(svm, INTERCEPT_VMSAVE);
1526 set_intercept(svm, INTERCEPT_STGI);
1527 set_intercept(svm, INTERCEPT_CLGI);
1528 set_intercept(svm, INTERCEPT_SKINIT);
1529 set_intercept(svm, INTERCEPT_WBINVD);
1530 set_intercept(svm, INTERCEPT_XSETBV);
1531 set_intercept(svm, INTERCEPT_RSM);
1532
1533 if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
1534 set_intercept(svm, INTERCEPT_MONITOR);
1535 set_intercept(svm, INTERCEPT_MWAIT);
1536 }
1537
1538 if (!kvm_hlt_in_guest(svm->vcpu.kvm))
1539 set_intercept(svm, INTERCEPT_HLT);
1540
1541 control->iopm_base_pa = __sme_set(iopm_base);
1542 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1543 control->int_ctl = V_INTR_MASKING_MASK;
1544
1545 init_seg(&save->es);
1546 init_seg(&save->ss);
1547 init_seg(&save->ds);
1548 init_seg(&save->fs);
1549 init_seg(&save->gs);
1550
1551 save->cs.selector = 0xf000;
1552 save->cs.base = 0xffff0000;
1553
1554 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1555 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1556 save->cs.limit = 0xffff;
1557
1558 save->gdtr.limit = 0xffff;
1559 save->idtr.limit = 0xffff;
1560
1561 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1562 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1563
1564 svm_set_efer(&svm->vcpu, 0);
1565 save->dr6 = 0xffff0ff0;
1566 kvm_set_rflags(&svm->vcpu, 2);
1567 save->rip = 0x0000fff0;
1568 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1569
1570
1571
1572
1573
1574 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1575 kvm_mmu_reset_context(&svm->vcpu);
1576
1577 save->cr4 = X86_CR4_PAE;
1578
1579
1580 if (npt_enabled) {
1581
1582 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1583 clr_intercept(svm, INTERCEPT_INVLPG);
1584 clr_exception_intercept(svm, PF_VECTOR);
1585 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1586 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1587 save->g_pat = svm->vcpu.arch.pat;
1588 save->cr3 = 0;
1589 save->cr4 = 0;
1590 }
1591 svm->asid_generation = 0;
1592
1593 svm->nested.vmcb = 0;
1594 svm->vcpu.arch.hflags = 0;
1595
1596 if (pause_filter_count) {
1597 control->pause_filter_count = pause_filter_count;
1598 if (pause_filter_thresh)
1599 control->pause_filter_thresh = pause_filter_thresh;
1600 set_intercept(svm, INTERCEPT_PAUSE);
1601 } else {
1602 clr_intercept(svm, INTERCEPT_PAUSE);
1603 }
1604
1605 if (kvm_vcpu_apicv_active(&svm->vcpu))
1606 avic_init_vmcb(svm);
1607
1608
1609
1610
1611
1612 if (vls) {
1613 clr_intercept(svm, INTERCEPT_VMLOAD);
1614 clr_intercept(svm, INTERCEPT_VMSAVE);
1615 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1616 }
1617
1618 if (vgif) {
1619 clr_intercept(svm, INTERCEPT_STGI);
1620 clr_intercept(svm, INTERCEPT_CLGI);
1621 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1622 }
1623
1624 if (sev_guest(svm->vcpu.kvm)) {
1625 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
1626 clr_exception_intercept(svm, UD_VECTOR);
1627 }
1628
1629 mark_all_dirty(svm->vmcb);
1630
1631 enable_gif(svm);
1632
1633}
1634
1635static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
1636 unsigned int index)
1637{
1638 u64 *avic_physical_id_table;
1639 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
1640
1641 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1642 return NULL;
1643
1644 avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
1645
1646 return &avic_physical_id_table[index];
1647}
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657static int avic_init_access_page(struct kvm_vcpu *vcpu)
1658{
1659 struct kvm *kvm = vcpu->kvm;
1660 int ret;
1661
1662 if (kvm->arch.apic_access_page_done)
1663 return 0;
1664
1665 ret = x86_set_memory_region(kvm,
1666 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1667 APIC_DEFAULT_PHYS_BASE,
1668 PAGE_SIZE);
1669 if (ret)
1670 return ret;
1671
1672 kvm->arch.apic_access_page_done = true;
1673 return 0;
1674}
1675
1676static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1677{
1678 int ret;
1679 u64 *entry, new_entry;
1680 int id = vcpu->vcpu_id;
1681 struct vcpu_svm *svm = to_svm(vcpu);
1682
1683 ret = avic_init_access_page(vcpu);
1684 if (ret)
1685 return ret;
1686
1687 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1688 return -EINVAL;
1689
1690 if (!svm->vcpu.arch.apic->regs)
1691 return -EINVAL;
1692
1693 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1694
1695
1696 entry = avic_get_physical_id_entry(vcpu, id);
1697 if (!entry)
1698 return -EINVAL;
1699
1700 new_entry = READ_ONCE(*entry);
1701 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
1702 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1703 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
1704 WRITE_ONCE(*entry, new_entry);
1705
1706 svm->avic_physical_id_cache = entry;
1707
1708 return 0;
1709}
1710
1711static void __sev_asid_free(int asid)
1712{
1713 struct svm_cpu_data *sd;
1714 int cpu, pos;
1715
1716 pos = asid - 1;
1717 clear_bit(pos, sev_asid_bitmap);
1718
1719 for_each_possible_cpu(cpu) {
1720 sd = per_cpu(svm_data, cpu);
1721 sd->sev_vmcbs[pos] = NULL;
1722 }
1723}
1724
1725static void sev_asid_free(struct kvm *kvm)
1726{
1727 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1728
1729 __sev_asid_free(sev->asid);
1730}
1731
1732static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
1733{
1734 struct sev_data_decommission *decommission;
1735 struct sev_data_deactivate *data;
1736
1737 if (!handle)
1738 return;
1739
1740 data = kzalloc(sizeof(*data), GFP_KERNEL);
1741 if (!data)
1742 return;
1743
1744
1745 data->handle = handle;
1746 sev_guest_deactivate(data, NULL);
1747
1748 wbinvd_on_all_cpus();
1749 sev_guest_df_flush(NULL);
1750 kfree(data);
1751
1752 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
1753 if (!decommission)
1754 return;
1755
1756
1757 decommission->handle = handle;
1758 sev_guest_decommission(decommission, NULL);
1759
1760 kfree(decommission);
1761}
1762
1763static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
1764 unsigned long ulen, unsigned long *n,
1765 int write)
1766{
1767 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1768 unsigned long npages, npinned, size;
1769 unsigned long locked, lock_limit;
1770 struct page **pages;
1771 int first, last;
1772
1773
1774 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
1775 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
1776 npages = (last - first + 1);
1777
1778 locked = sev->pages_locked + npages;
1779 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1780 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
1781 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
1782 return NULL;
1783 }
1784
1785
1786 size = npages * sizeof(struct page *);
1787 if (size > PAGE_SIZE)
1788 pages = vmalloc(size);
1789 else
1790 pages = kmalloc(size, GFP_KERNEL);
1791
1792 if (!pages)
1793 return NULL;
1794
1795
1796 npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
1797 if (npinned != npages) {
1798 pr_err("SEV: Failure locking %lu pages.\n", npages);
1799 goto err;
1800 }
1801
1802 *n = npages;
1803 sev->pages_locked = locked;
1804
1805 return pages;
1806
1807err:
1808 if (npinned > 0)
1809 release_pages(pages, npinned);
1810
1811 kvfree(pages);
1812 return NULL;
1813}
1814
1815static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
1816 unsigned long npages)
1817{
1818 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1819
1820 release_pages(pages, npages);
1821 kvfree(pages);
1822 sev->pages_locked -= npages;
1823}
1824
1825static void sev_clflush_pages(struct page *pages[], unsigned long npages)
1826{
1827 uint8_t *page_virtual;
1828 unsigned long i;
1829
1830 if (npages == 0 || pages == NULL)
1831 return;
1832
1833 for (i = 0; i < npages; i++) {
1834 page_virtual = kmap_atomic(pages[i]);
1835 clflush_cache_range(page_virtual, PAGE_SIZE);
1836 kunmap_atomic(page_virtual);
1837 }
1838}
1839
1840static void __unregister_enc_region_locked(struct kvm *kvm,
1841 struct enc_region *region)
1842{
1843
1844
1845
1846
1847
1848
1849 sev_clflush_pages(region->pages, region->npages);
1850
1851 sev_unpin_memory(kvm, region->pages, region->npages);
1852 list_del(®ion->list);
1853 kfree(region);
1854}
1855
1856static struct kvm *svm_vm_alloc(void)
1857{
1858 struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
1859 return &kvm_svm->kvm;
1860}
1861
1862static void svm_vm_free(struct kvm *kvm)
1863{
1864 kfree(to_kvm_svm(kvm));
1865}
1866
1867static void sev_vm_destroy(struct kvm *kvm)
1868{
1869 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1870 struct list_head *head = &sev->regions_list;
1871 struct list_head *pos, *q;
1872
1873 if (!sev_guest(kvm))
1874 return;
1875
1876 mutex_lock(&kvm->lock);
1877
1878
1879
1880
1881
1882 if (!list_empty(head)) {
1883 list_for_each_safe(pos, q, head) {
1884 __unregister_enc_region_locked(kvm,
1885 list_entry(pos, struct enc_region, list));
1886 }
1887 }
1888
1889 mutex_unlock(&kvm->lock);
1890
1891 sev_unbind_asid(kvm, sev->handle);
1892 sev_asid_free(kvm);
1893}
1894
1895static void avic_vm_destroy(struct kvm *kvm)
1896{
1897 unsigned long flags;
1898 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1899
1900 if (!avic)
1901 return;
1902
1903 if (kvm_svm->avic_logical_id_table_page)
1904 __free_page(kvm_svm->avic_logical_id_table_page);
1905 if (kvm_svm->avic_physical_id_table_page)
1906 __free_page(kvm_svm->avic_physical_id_table_page);
1907
1908 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1909 hash_del(&kvm_svm->hnode);
1910 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1911}
1912
1913static void svm_vm_destroy(struct kvm *kvm)
1914{
1915 avic_vm_destroy(kvm);
1916 sev_vm_destroy(kvm);
1917}
1918
1919static int avic_vm_init(struct kvm *kvm)
1920{
1921 unsigned long flags;
1922 int err = -ENOMEM;
1923 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1924 struct kvm_svm *k2;
1925 struct page *p_page;
1926 struct page *l_page;
1927 u32 vm_id;
1928
1929 if (!avic)
1930 return 0;
1931
1932
1933 p_page = alloc_page(GFP_KERNEL);
1934 if (!p_page)
1935 goto free_avic;
1936
1937 kvm_svm->avic_physical_id_table_page = p_page;
1938 clear_page(page_address(p_page));
1939
1940
1941 l_page = alloc_page(GFP_KERNEL);
1942 if (!l_page)
1943 goto free_avic;
1944
1945 kvm_svm->avic_logical_id_table_page = l_page;
1946 clear_page(page_address(l_page));
1947
1948 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1949 again:
1950 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
1951 if (vm_id == 0) {
1952 next_vm_id_wrapped = 1;
1953 goto again;
1954 }
1955
1956 if (next_vm_id_wrapped) {
1957 hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
1958 if (k2->avic_vm_id == vm_id)
1959 goto again;
1960 }
1961 }
1962 kvm_svm->avic_vm_id = vm_id;
1963 hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
1964 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1965
1966 return 0;
1967
1968free_avic:
1969 avic_vm_destroy(kvm);
1970 return err;
1971}
1972
1973static inline int
1974avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
1975{
1976 int ret = 0;
1977 unsigned long flags;
1978 struct amd_svm_iommu_ir *ir;
1979 struct vcpu_svm *svm = to_svm(vcpu);
1980
1981 if (!kvm_arch_has_assigned_device(vcpu->kvm))
1982 return 0;
1983
1984
1985
1986
1987
1988 spin_lock_irqsave(&svm->ir_list_lock, flags);
1989
1990 if (list_empty(&svm->ir_list))
1991 goto out;
1992
1993 list_for_each_entry(ir, &svm->ir_list, node) {
1994 ret = amd_iommu_update_ga(cpu, r, ir->data);
1995 if (ret)
1996 break;
1997 }
1998out:
1999 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
2000 return ret;
2001}
2002
2003static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2004{
2005 u64 entry;
2006
2007 int h_physical_id = kvm_cpu_get_apicid(cpu);
2008 struct vcpu_svm *svm = to_svm(vcpu);
2009
2010 if (!kvm_vcpu_apicv_active(vcpu))
2011 return;
2012
2013 if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
2014 return;
2015
2016 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2017 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
2018
2019 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
2020 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
2021
2022 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2023 if (svm->avic_is_running)
2024 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2025
2026 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2027 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
2028 svm->avic_is_running);
2029}
2030
2031static void avic_vcpu_put(struct kvm_vcpu *vcpu)
2032{
2033 u64 entry;
2034 struct vcpu_svm *svm = to_svm(vcpu);
2035
2036 if (!kvm_vcpu_apicv_active(vcpu))
2037 return;
2038
2039 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2040 if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
2041 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
2042
2043 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2044 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2045}
2046
2047
2048
2049
2050static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
2051{
2052 struct vcpu_svm *svm = to_svm(vcpu);
2053
2054 svm->avic_is_running = is_run;
2055 if (is_run)
2056 avic_vcpu_load(vcpu, vcpu->cpu);
2057 else
2058 avic_vcpu_put(vcpu);
2059}
2060
2061static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2062{
2063 struct vcpu_svm *svm = to_svm(vcpu);
2064 u32 dummy;
2065 u32 eax = 1;
2066
2067 vcpu->arch.microcode_version = 0x01000065;
2068 svm->spec_ctrl = 0;
2069 svm->virt_spec_ctrl = 0;
2070
2071 if (!init_event) {
2072 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
2073 MSR_IA32_APICBASE_ENABLE;
2074 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
2075 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
2076 }
2077 init_vmcb(svm);
2078
2079 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
2080 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
2081
2082 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
2083 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
2084}
2085
2086static int avic_init_vcpu(struct vcpu_svm *svm)
2087{
2088 int ret;
2089
2090 if (!kvm_vcpu_apicv_active(&svm->vcpu))
2091 return 0;
2092
2093 ret = avic_init_backing_page(&svm->vcpu);
2094 if (ret)
2095 return ret;
2096
2097 INIT_LIST_HEAD(&svm->ir_list);
2098 spin_lock_init(&svm->ir_list_lock);
2099
2100 return ret;
2101}
2102
2103static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
2104{
2105 struct vcpu_svm *svm;
2106 struct page *page;
2107 struct page *msrpm_pages;
2108 struct page *hsave_page;
2109 struct page *nested_msrpm_pages;
2110 int err;
2111
2112 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2113 if (!svm) {
2114 err = -ENOMEM;
2115 goto out;
2116 }
2117
2118 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
2119 if (err)
2120 goto free_svm;
2121
2122 err = -ENOMEM;
2123 page = alloc_page(GFP_KERNEL);
2124 if (!page)
2125 goto uninit;
2126
2127 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
2128 if (!msrpm_pages)
2129 goto free_page1;
2130
2131 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
2132 if (!nested_msrpm_pages)
2133 goto free_page2;
2134
2135 hsave_page = alloc_page(GFP_KERNEL);
2136 if (!hsave_page)
2137 goto free_page3;
2138
2139 err = avic_init_vcpu(svm);
2140 if (err)
2141 goto free_page4;
2142
2143
2144
2145
2146 svm->avic_is_running = true;
2147
2148 svm->nested.hsave = page_address(hsave_page);
2149
2150 svm->msrpm = page_address(msrpm_pages);
2151 svm_vcpu_init_msrpm(svm->msrpm);
2152
2153 svm->nested.msrpm = page_address(nested_msrpm_pages);
2154 svm_vcpu_init_msrpm(svm->nested.msrpm);
2155
2156 svm->vmcb = page_address(page);
2157 clear_page(svm->vmcb);
2158 svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
2159 svm->asid_generation = 0;
2160 init_vmcb(svm);
2161
2162 svm_init_osvw(&svm->vcpu);
2163
2164 return &svm->vcpu;
2165
2166free_page4:
2167 __free_page(hsave_page);
2168free_page3:
2169 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
2170free_page2:
2171 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
2172free_page1:
2173 __free_page(page);
2174uninit:
2175 kvm_vcpu_uninit(&svm->vcpu);
2176free_svm:
2177 kmem_cache_free(kvm_vcpu_cache, svm);
2178out:
2179 return ERR_PTR(err);
2180}
2181
2182static void svm_free_vcpu(struct kvm_vcpu *vcpu)
2183{
2184 struct vcpu_svm *svm = to_svm(vcpu);
2185
2186 __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
2187 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
2188 __free_page(virt_to_page(svm->nested.hsave));
2189 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
2190 kvm_vcpu_uninit(vcpu);
2191 kmem_cache_free(kvm_vcpu_cache, svm);
2192
2193
2194
2195
2196 indirect_branch_prediction_barrier();
2197}
2198
2199static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2200{
2201 struct vcpu_svm *svm = to_svm(vcpu);
2202 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2203 int i;
2204
2205 if (unlikely(cpu != vcpu->cpu)) {
2206 svm->asid_generation = 0;
2207 mark_all_dirty(svm->vmcb);
2208 }
2209
2210#ifdef CONFIG_X86_64
2211 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
2212#endif
2213 savesegment(fs, svm->host.fs);
2214 savesegment(gs, svm->host.gs);
2215 svm->host.ldt = kvm_read_ldt();
2216
2217 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2218 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2219
2220 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
2221 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
2222 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
2223 __this_cpu_write(current_tsc_ratio, tsc_ratio);
2224 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
2225 }
2226 }
2227
2228 if (static_cpu_has(X86_FEATURE_RDTSCP))
2229 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
2230
2231 if (sd->current_vmcb != svm->vmcb) {
2232 sd->current_vmcb = svm->vmcb;
2233 indirect_branch_prediction_barrier();
2234 }
2235 avic_vcpu_load(vcpu, cpu);
2236}
2237
2238static void svm_vcpu_put(struct kvm_vcpu *vcpu)
2239{
2240 struct vcpu_svm *svm = to_svm(vcpu);
2241 int i;
2242
2243 avic_vcpu_put(vcpu);
2244
2245 ++vcpu->stat.host_state_reload;
2246 kvm_load_ldt(svm->host.ldt);
2247#ifdef CONFIG_X86_64
2248 loadsegment(fs, svm->host.fs);
2249 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
2250 load_gs_index(svm->host.gs);
2251#else
2252#ifdef CONFIG_X86_32_LAZY_GS
2253 loadsegment(gs, svm->host.gs);
2254#endif
2255#endif
2256 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2257 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2258}
2259
2260static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
2261{
2262 avic_set_running(vcpu, false);
2263}
2264
2265static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
2266{
2267 avic_set_running(vcpu, true);
2268}
2269
2270static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
2271{
2272 struct vcpu_svm *svm = to_svm(vcpu);
2273 unsigned long rflags = svm->vmcb->save.rflags;
2274
2275 if (svm->nmi_singlestep) {
2276
2277 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
2278 rflags &= ~X86_EFLAGS_TF;
2279 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
2280 rflags &= ~X86_EFLAGS_RF;
2281 }
2282 return rflags;
2283}
2284
2285static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2286{
2287 if (to_svm(vcpu)->nmi_singlestep)
2288 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2289
2290
2291
2292
2293
2294
2295 to_svm(vcpu)->vmcb->save.rflags = rflags;
2296}
2297
2298static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2299{
2300 switch (reg) {
2301 case VCPU_EXREG_PDPTR:
2302 BUG_ON(!npt_enabled);
2303 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
2304 break;
2305 default:
2306 BUG();
2307 }
2308}
2309
2310static void svm_set_vintr(struct vcpu_svm *svm)
2311{
2312 set_intercept(svm, INTERCEPT_VINTR);
2313}
2314
2315static void svm_clear_vintr(struct vcpu_svm *svm)
2316{
2317 clr_intercept(svm, INTERCEPT_VINTR);
2318}
2319
2320static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
2321{
2322 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2323
2324 switch (seg) {
2325 case VCPU_SREG_CS: return &save->cs;
2326 case VCPU_SREG_DS: return &save->ds;
2327 case VCPU_SREG_ES: return &save->es;
2328 case VCPU_SREG_FS: return &save->fs;
2329 case VCPU_SREG_GS: return &save->gs;
2330 case VCPU_SREG_SS: return &save->ss;
2331 case VCPU_SREG_TR: return &save->tr;
2332 case VCPU_SREG_LDTR: return &save->ldtr;
2333 }
2334 BUG();
2335 return NULL;
2336}
2337
2338static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2339{
2340 struct vmcb_seg *s = svm_seg(vcpu, seg);
2341
2342 return s->base;
2343}
2344
2345static void svm_get_segment(struct kvm_vcpu *vcpu,
2346 struct kvm_segment *var, int seg)
2347{
2348 struct vmcb_seg *s = svm_seg(vcpu, seg);
2349
2350 var->base = s->base;
2351 var->limit = s->limit;
2352 var->selector = s->selector;
2353 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
2354 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
2355 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
2356 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
2357 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
2358 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
2359 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369 var->g = s->limit > 0xfffff;
2370
2371
2372
2373
2374
2375 var->unusable = !var->present;
2376
2377 switch (seg) {
2378 case VCPU_SREG_TR:
2379
2380
2381
2382
2383 var->type |= 0x2;
2384 break;
2385 case VCPU_SREG_DS:
2386 case VCPU_SREG_ES:
2387 case VCPU_SREG_FS:
2388 case VCPU_SREG_GS:
2389
2390
2391
2392
2393
2394
2395
2396 if (!var->unusable)
2397 var->type |= 0x1;
2398 break;
2399 case VCPU_SREG_SS:
2400
2401
2402
2403
2404
2405
2406 if (var->unusable)
2407 var->db = 0;
2408
2409 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
2410 break;
2411 }
2412}
2413
2414static int svm_get_cpl(struct kvm_vcpu *vcpu)
2415{
2416 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2417
2418 return save->cpl;
2419}
2420
2421static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2422{
2423 struct vcpu_svm *svm = to_svm(vcpu);
2424
2425 dt->size = svm->vmcb->save.idtr.limit;
2426 dt->address = svm->vmcb->save.idtr.base;
2427}
2428
2429static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2430{
2431 struct vcpu_svm *svm = to_svm(vcpu);
2432
2433 svm->vmcb->save.idtr.limit = dt->size;
2434 svm->vmcb->save.idtr.base = dt->address ;
2435 mark_dirty(svm->vmcb, VMCB_DT);
2436}
2437
2438static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2439{
2440 struct vcpu_svm *svm = to_svm(vcpu);
2441
2442 dt->size = svm->vmcb->save.gdtr.limit;
2443 dt->address = svm->vmcb->save.gdtr.base;
2444}
2445
2446static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2447{
2448 struct vcpu_svm *svm = to_svm(vcpu);
2449
2450 svm->vmcb->save.gdtr.limit = dt->size;
2451 svm->vmcb->save.gdtr.base = dt->address ;
2452 mark_dirty(svm->vmcb, VMCB_DT);
2453}
2454
2455static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2456{
2457}
2458
2459static void svm_decache_cr3(struct kvm_vcpu *vcpu)
2460{
2461}
2462
2463static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
2464{
2465}
2466
2467static void update_cr0_intercept(struct vcpu_svm *svm)
2468{
2469 ulong gcr0 = svm->vcpu.arch.cr0;
2470 u64 *hcr0 = &svm->vmcb->save.cr0;
2471
2472 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
2473 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
2474
2475 mark_dirty(svm->vmcb, VMCB_CR);
2476
2477 if (gcr0 == *hcr0) {
2478 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
2479 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2480 } else {
2481 set_cr_intercept(svm, INTERCEPT_CR0_READ);
2482 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2483 }
2484}
2485
2486static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2487{
2488 struct vcpu_svm *svm = to_svm(vcpu);
2489
2490#ifdef CONFIG_X86_64
2491 if (vcpu->arch.efer & EFER_LME) {
2492 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
2493 vcpu->arch.efer |= EFER_LMA;
2494 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
2495 }
2496
2497 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
2498 vcpu->arch.efer &= ~EFER_LMA;
2499 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
2500 }
2501 }
2502#endif
2503 vcpu->arch.cr0 = cr0;
2504
2505 if (!npt_enabled)
2506 cr0 |= X86_CR0_PG | X86_CR0_WP;
2507
2508
2509
2510
2511
2512
2513 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
2514 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
2515 svm->vmcb->save.cr0 = cr0;
2516 mark_dirty(svm->vmcb, VMCB_CR);
2517 update_cr0_intercept(svm);
2518}
2519
2520static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
2521{
2522 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
2523 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
2524
2525 if (cr4 & X86_CR4_VMXE)
2526 return 1;
2527
2528 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
2529 svm_flush_tlb(vcpu, true);
2530
2531 vcpu->arch.cr4 = cr4;
2532 if (!npt_enabled)
2533 cr4 |= X86_CR4_PAE;
2534 cr4 |= host_cr4_mce;
2535 to_svm(vcpu)->vmcb->save.cr4 = cr4;
2536 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
2537 return 0;
2538}
2539
2540static void svm_set_segment(struct kvm_vcpu *vcpu,
2541 struct kvm_segment *var, int seg)
2542{
2543 struct vcpu_svm *svm = to_svm(vcpu);
2544 struct vmcb_seg *s = svm_seg(vcpu, seg);
2545
2546 s->base = var->base;
2547 s->limit = var->limit;
2548 s->selector = var->selector;
2549 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
2550 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
2551 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
2552 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
2553 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
2554 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
2555 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
2556 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
2557
2558
2559
2560
2561
2562
2563
2564 if (seg == VCPU_SREG_SS)
2565
2566 svm->vmcb->save.cpl = (var->dpl & 3);
2567
2568 mark_dirty(svm->vmcb, VMCB_SEG);
2569}
2570
2571static void update_bp_intercept(struct kvm_vcpu *vcpu)
2572{
2573 struct vcpu_svm *svm = to_svm(vcpu);
2574
2575 clr_exception_intercept(svm, BP_VECTOR);
2576
2577 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
2578 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2579 set_exception_intercept(svm, BP_VECTOR);
2580 } else
2581 vcpu->guest_debug = 0;
2582}
2583
2584static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
2585{
2586 if (sd->next_asid > sd->max_asid) {
2587 ++sd->asid_generation;
2588 sd->next_asid = sd->min_asid;
2589 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
2590 }
2591
2592 svm->asid_generation = sd->asid_generation;
2593 svm->vmcb->control.asid = sd->next_asid++;
2594
2595 mark_dirty(svm->vmcb, VMCB_ASID);
2596}
2597
2598static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
2599{
2600 return to_svm(vcpu)->vmcb->save.dr6;
2601}
2602
2603static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
2604{
2605 struct vcpu_svm *svm = to_svm(vcpu);
2606
2607 svm->vmcb->save.dr6 = value;
2608 mark_dirty(svm->vmcb, VMCB_DR);
2609}
2610
2611static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
2612{
2613 struct vcpu_svm *svm = to_svm(vcpu);
2614
2615 get_debugreg(vcpu->arch.db[0], 0);
2616 get_debugreg(vcpu->arch.db[1], 1);
2617 get_debugreg(vcpu->arch.db[2], 2);
2618 get_debugreg(vcpu->arch.db[3], 3);
2619 vcpu->arch.dr6 = svm_get_dr6(vcpu);
2620 vcpu->arch.dr7 = svm->vmcb->save.dr7;
2621
2622 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
2623 set_dr_intercepts(svm);
2624}
2625
2626static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
2627{
2628 struct vcpu_svm *svm = to_svm(vcpu);
2629
2630 svm->vmcb->save.dr7 = value;
2631 mark_dirty(svm->vmcb, VMCB_DR);
2632}
2633
2634static int pf_interception(struct vcpu_svm *svm)
2635{
2636 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2637 u64 error_code = svm->vmcb->control.exit_info_1;
2638
2639 return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
2640 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2641 svm->vmcb->control.insn_bytes : NULL,
2642 svm->vmcb->control.insn_len);
2643}
2644
2645static int npf_interception(struct vcpu_svm *svm)
2646{
2647 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2648 u64 error_code = svm->vmcb->control.exit_info_1;
2649
2650 trace_kvm_page_fault(fault_address, error_code);
2651 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
2652 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2653 svm->vmcb->control.insn_bytes : NULL,
2654 svm->vmcb->control.insn_len);
2655}
2656
2657static int db_interception(struct vcpu_svm *svm)
2658{
2659 struct kvm_run *kvm_run = svm->vcpu.run;
2660
2661 if (!(svm->vcpu.guest_debug &
2662 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
2663 !svm->nmi_singlestep) {
2664 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
2665 return 1;
2666 }
2667
2668 if (svm->nmi_singlestep) {
2669 disable_nmi_singlestep(svm);
2670 }
2671
2672 if (svm->vcpu.guest_debug &
2673 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
2674 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2675 kvm_run->debug.arch.pc =
2676 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2677 kvm_run->debug.arch.exception = DB_VECTOR;
2678 return 0;
2679 }
2680
2681 return 1;
2682}
2683
2684static int bp_interception(struct vcpu_svm *svm)
2685{
2686 struct kvm_run *kvm_run = svm->vcpu.run;
2687
2688 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2689 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2690 kvm_run->debug.arch.exception = BP_VECTOR;
2691 return 0;
2692}
2693
2694static int ud_interception(struct vcpu_svm *svm)
2695{
2696 return handle_ud(&svm->vcpu);
2697}
2698
2699static int ac_interception(struct vcpu_svm *svm)
2700{
2701 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
2702 return 1;
2703}
2704
2705static int gp_interception(struct vcpu_svm *svm)
2706{
2707 struct kvm_vcpu *vcpu = &svm->vcpu;
2708 u32 error_code = svm->vmcb->control.exit_info_1;
2709 int er;
2710
2711 WARN_ON_ONCE(!enable_vmware_backdoor);
2712
2713 er = emulate_instruction(vcpu,
2714 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
2715 if (er == EMULATE_USER_EXIT)
2716 return 0;
2717 else if (er != EMULATE_DONE)
2718 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2719 return 1;
2720}
2721
2722static bool is_erratum_383(void)
2723{
2724 int err, i;
2725 u64 value;
2726
2727 if (!erratum_383_found)
2728 return false;
2729
2730 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2731 if (err)
2732 return false;
2733
2734
2735 value &= ~(1ULL << 62);
2736
2737 if (value != 0xb600000000010015ULL)
2738 return false;
2739
2740
2741 for (i = 0; i < 6; ++i)
2742 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2743
2744 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2745 if (!err) {
2746 u32 low, high;
2747
2748 value &= ~(1ULL << 2);
2749 low = lower_32_bits(value);
2750 high = upper_32_bits(value);
2751
2752 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2753 }
2754
2755
2756 __flush_tlb_all();
2757
2758 return true;
2759}
2760
2761static void svm_handle_mce(struct vcpu_svm *svm)
2762{
2763 if (is_erratum_383()) {
2764
2765
2766
2767
2768 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2769
2770 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
2771
2772 return;
2773 }
2774
2775
2776
2777
2778
2779 asm volatile (
2780 "int $0x12\n");
2781
2782
2783 return;
2784}
2785
2786static int mc_interception(struct vcpu_svm *svm)
2787{
2788 return 1;
2789}
2790
2791static int shutdown_interception(struct vcpu_svm *svm)
2792{
2793 struct kvm_run *kvm_run = svm->vcpu.run;
2794
2795
2796
2797
2798
2799 clear_page(svm->vmcb);
2800 init_vmcb(svm);
2801
2802 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2803 return 0;
2804}
2805
2806static int io_interception(struct vcpu_svm *svm)
2807{
2808 struct kvm_vcpu *vcpu = &svm->vcpu;
2809 u32 io_info = svm->vmcb->control.exit_info_1;
2810 int size, in, string;
2811 unsigned port;
2812
2813 ++svm->vcpu.stat.io_exits;
2814 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2815 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2816 if (string)
2817 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
2818
2819 port = io_info >> 16;
2820 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
2821 svm->next_rip = svm->vmcb->control.exit_info_2;
2822
2823 return kvm_fast_pio(&svm->vcpu, size, port, in);
2824}
2825
2826static int nmi_interception(struct vcpu_svm *svm)
2827{
2828 return 1;
2829}
2830
2831static int intr_interception(struct vcpu_svm *svm)
2832{
2833 ++svm->vcpu.stat.irq_exits;
2834 return 1;
2835}
2836
2837static int nop_on_interception(struct vcpu_svm *svm)
2838{
2839 return 1;
2840}
2841
2842static int halt_interception(struct vcpu_svm *svm)
2843{
2844 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
2845 return kvm_emulate_halt(&svm->vcpu);
2846}
2847
2848static int vmmcall_interception(struct vcpu_svm *svm)
2849{
2850 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2851 return kvm_emulate_hypercall(&svm->vcpu);
2852}
2853
2854static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
2855{
2856 struct vcpu_svm *svm = to_svm(vcpu);
2857
2858 return svm->nested.nested_cr3;
2859}
2860
2861static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
2862{
2863 struct vcpu_svm *svm = to_svm(vcpu);
2864 u64 cr3 = svm->nested.nested_cr3;
2865 u64 pdpte;
2866 int ret;
2867
2868 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
2869 offset_in_page(cr3) + index * 8, 8);
2870 if (ret)
2871 return 0;
2872 return pdpte;
2873}
2874
2875static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
2876 unsigned long root)
2877{
2878 struct vcpu_svm *svm = to_svm(vcpu);
2879
2880 svm->vmcb->control.nested_cr3 = __sme_set(root);
2881 mark_dirty(svm->vmcb, VMCB_NPT);
2882 svm_flush_tlb(vcpu, true);
2883}
2884
2885static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
2886 struct x86_exception *fault)
2887{
2888 struct vcpu_svm *svm = to_svm(vcpu);
2889
2890 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
2891
2892
2893
2894
2895 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
2896 svm->vmcb->control.exit_code_hi = 0;
2897 svm->vmcb->control.exit_info_1 = (1ULL << 32);
2898 svm->vmcb->control.exit_info_2 = fault->address;
2899 }
2900
2901 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
2902 svm->vmcb->control.exit_info_1 |= fault->error_code;
2903
2904
2905
2906
2907
2908 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
2909 svm->vmcb->control.exit_info_1 &= ~1;
2910
2911 nested_svm_vmexit(svm);
2912}
2913
2914static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
2915{
2916 WARN_ON(mmu_is_nested(vcpu));
2917 kvm_init_shadow_mmu(vcpu);
2918 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
2919 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
2920 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
2921 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
2922 vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
2923 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
2924 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
2925}
2926
2927static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
2928{
2929 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
2930}
2931
2932static int nested_svm_check_permissions(struct vcpu_svm *svm)
2933{
2934 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
2935 !is_paging(&svm->vcpu)) {
2936 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2937 return 1;
2938 }
2939
2940 if (svm->vmcb->save.cpl) {
2941 kvm_inject_gp(&svm->vcpu, 0);
2942 return 1;
2943 }
2944
2945 return 0;
2946}
2947
2948static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2949 bool has_error_code, u32 error_code)
2950{
2951 int vmexit;
2952
2953 if (!is_guest_mode(&svm->vcpu))
2954 return 0;
2955
2956 vmexit = nested_svm_intercept(svm);
2957 if (vmexit != NESTED_EXIT_DONE)
2958 return 0;
2959
2960 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2961 svm->vmcb->control.exit_code_hi = 0;
2962 svm->vmcb->control.exit_info_1 = error_code;
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973 if (svm->vcpu.arch.exception.nested_apf)
2974 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
2975 else
2976 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
2977
2978 svm->nested.exit_required = true;
2979 return vmexit;
2980}
2981
2982
2983static inline bool nested_svm_intr(struct vcpu_svm *svm)
2984{
2985 if (!is_guest_mode(&svm->vcpu))
2986 return true;
2987
2988 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2989 return true;
2990
2991 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
2992 return false;
2993
2994
2995
2996
2997
2998
2999 if (svm->nested.exit_required)
3000 return false;
3001
3002 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
3003 svm->vmcb->control.exit_info_1 = 0;
3004 svm->vmcb->control.exit_info_2 = 0;
3005
3006 if (svm->nested.intercept & 1ULL) {
3007
3008
3009
3010
3011
3012
3013 svm->nested.exit_required = true;
3014 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
3015 return false;
3016 }
3017
3018 return true;
3019}
3020
3021
3022static inline bool nested_svm_nmi(struct vcpu_svm *svm)
3023{
3024 if (!is_guest_mode(&svm->vcpu))
3025 return true;
3026
3027 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
3028 return true;
3029
3030 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
3031 svm->nested.exit_required = true;
3032
3033 return false;
3034}
3035
3036static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
3037{
3038 struct page *page;
3039
3040 might_sleep();
3041
3042 page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
3043 if (is_error_page(page))
3044 goto error;
3045
3046 *_page = page;
3047
3048 return kmap(page);
3049
3050error:
3051 kvm_inject_gp(&svm->vcpu, 0);
3052
3053 return NULL;
3054}
3055
3056static void nested_svm_unmap(struct page *page)
3057{
3058 kunmap(page);
3059 kvm_release_page_dirty(page);
3060}
3061
3062static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
3063{
3064 unsigned port, size, iopm_len;
3065 u16 val, mask;
3066 u8 start_bit;
3067 u64 gpa;
3068
3069 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
3070 return NESTED_EXIT_HOST;
3071
3072 port = svm->vmcb->control.exit_info_1 >> 16;
3073 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
3074 SVM_IOIO_SIZE_SHIFT;
3075 gpa = svm->nested.vmcb_iopm + (port / 8);
3076 start_bit = port % 8;
3077 iopm_len = (start_bit + size > 8) ? 2 : 1;
3078 mask = (0xf >> (4 - size)) << start_bit;
3079 val = 0;
3080
3081 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
3082 return NESTED_EXIT_DONE;
3083
3084 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3085}
3086
3087static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
3088{
3089 u32 offset, msr, value;
3090 int write, mask;
3091
3092 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3093 return NESTED_EXIT_HOST;
3094
3095 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3096 offset = svm_msrpm_offset(msr);
3097 write = svm->vmcb->control.exit_info_1 & 1;
3098 mask = 1 << ((2 * (msr & 0xf)) + write);
3099
3100 if (offset == MSR_INVALID)
3101 return NESTED_EXIT_DONE;
3102
3103
3104 offset *= 4;
3105
3106 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
3107 return NESTED_EXIT_DONE;
3108
3109 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3110}
3111
3112
3113static int nested_svm_intercept_db(struct vcpu_svm *svm)
3114{
3115 unsigned long dr6;
3116
3117
3118 if (!svm->nmi_singlestep)
3119 return NESTED_EXIT_DONE;
3120
3121
3122 if (kvm_get_dr(&svm->vcpu, 6, &dr6))
3123 return NESTED_EXIT_DONE;
3124 if (!(dr6 & DR6_BS))
3125 return NESTED_EXIT_DONE;
3126
3127
3128 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
3129 disable_nmi_singlestep(svm);
3130 return NESTED_EXIT_DONE;
3131 }
3132
3133
3134 return NESTED_EXIT_HOST;
3135}
3136
3137static int nested_svm_exit_special(struct vcpu_svm *svm)
3138{
3139 u32 exit_code = svm->vmcb->control.exit_code;
3140
3141 switch (exit_code) {
3142 case SVM_EXIT_INTR:
3143 case SVM_EXIT_NMI:
3144 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
3145 return NESTED_EXIT_HOST;
3146 case SVM_EXIT_NPF:
3147
3148 if (npt_enabled)
3149 return NESTED_EXIT_HOST;
3150 break;
3151 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
3152
3153 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
3154 return NESTED_EXIT_HOST;
3155 break;
3156 default:
3157 break;
3158 }
3159
3160 return NESTED_EXIT_CONTINUE;
3161}
3162
3163
3164
3165
3166static int nested_svm_intercept(struct vcpu_svm *svm)
3167{
3168 u32 exit_code = svm->vmcb->control.exit_code;
3169 int vmexit = NESTED_EXIT_HOST;
3170
3171 switch (exit_code) {
3172 case SVM_EXIT_MSR:
3173 vmexit = nested_svm_exit_handled_msr(svm);
3174 break;
3175 case SVM_EXIT_IOIO:
3176 vmexit = nested_svm_intercept_ioio(svm);
3177 break;
3178 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
3179 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
3180 if (svm->nested.intercept_cr & bit)
3181 vmexit = NESTED_EXIT_DONE;
3182 break;
3183 }
3184 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
3185 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
3186 if (svm->nested.intercept_dr & bit)
3187 vmexit = NESTED_EXIT_DONE;
3188 break;
3189 }
3190 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
3191 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
3192 if (svm->nested.intercept_exceptions & excp_bits) {
3193 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
3194 vmexit = nested_svm_intercept_db(svm);
3195 else
3196 vmexit = NESTED_EXIT_DONE;
3197 }
3198
3199 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
3200 svm->vcpu.arch.exception.nested_apf != 0)
3201 vmexit = NESTED_EXIT_DONE;
3202 break;
3203 }
3204 case SVM_EXIT_ERR: {
3205 vmexit = NESTED_EXIT_DONE;
3206 break;
3207 }
3208 default: {
3209 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
3210 if (svm->nested.intercept & exit_bits)
3211 vmexit = NESTED_EXIT_DONE;
3212 }
3213 }
3214
3215 return vmexit;
3216}
3217
3218static int nested_svm_exit_handled(struct vcpu_svm *svm)
3219{
3220 int vmexit;
3221
3222 vmexit = nested_svm_intercept(svm);
3223
3224 if (vmexit == NESTED_EXIT_DONE)
3225 nested_svm_vmexit(svm);
3226
3227 return vmexit;
3228}
3229
3230static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
3231{
3232 struct vmcb_control_area *dst = &dst_vmcb->control;
3233 struct vmcb_control_area *from = &from_vmcb->control;
3234
3235 dst->intercept_cr = from->intercept_cr;
3236 dst->intercept_dr = from->intercept_dr;
3237 dst->intercept_exceptions = from->intercept_exceptions;
3238 dst->intercept = from->intercept;
3239 dst->iopm_base_pa = from->iopm_base_pa;
3240 dst->msrpm_base_pa = from->msrpm_base_pa;
3241 dst->tsc_offset = from->tsc_offset;
3242 dst->asid = from->asid;
3243 dst->tlb_ctl = from->tlb_ctl;
3244 dst->int_ctl = from->int_ctl;
3245 dst->int_vector = from->int_vector;
3246 dst->int_state = from->int_state;
3247 dst->exit_code = from->exit_code;
3248 dst->exit_code_hi = from->exit_code_hi;
3249 dst->exit_info_1 = from->exit_info_1;
3250 dst->exit_info_2 = from->exit_info_2;
3251 dst->exit_int_info = from->exit_int_info;
3252 dst->exit_int_info_err = from->exit_int_info_err;
3253 dst->nested_ctl = from->nested_ctl;
3254 dst->event_inj = from->event_inj;
3255 dst->event_inj_err = from->event_inj_err;
3256 dst->nested_cr3 = from->nested_cr3;
3257 dst->virt_ext = from->virt_ext;
3258}
3259
3260static int nested_svm_vmexit(struct vcpu_svm *svm)
3261{
3262 struct vmcb *nested_vmcb;
3263 struct vmcb *hsave = svm->nested.hsave;
3264 struct vmcb *vmcb = svm->vmcb;
3265 struct page *page;
3266
3267 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
3268 vmcb->control.exit_info_1,
3269 vmcb->control.exit_info_2,
3270 vmcb->control.exit_int_info,
3271 vmcb->control.exit_int_info_err,
3272 KVM_ISA_SVM);
3273
3274 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
3275 if (!nested_vmcb)
3276 return 1;
3277
3278
3279 leave_guest_mode(&svm->vcpu);
3280 svm->nested.vmcb = 0;
3281
3282
3283 disable_gif(svm);
3284
3285 nested_vmcb->save.es = vmcb->save.es;
3286 nested_vmcb->save.cs = vmcb->save.cs;
3287 nested_vmcb->save.ss = vmcb->save.ss;
3288 nested_vmcb->save.ds = vmcb->save.ds;
3289 nested_vmcb->save.gdtr = vmcb->save.gdtr;
3290 nested_vmcb->save.idtr = vmcb->save.idtr;
3291 nested_vmcb->save.efer = svm->vcpu.arch.efer;
3292 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
3293 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
3294 nested_vmcb->save.cr2 = vmcb->save.cr2;
3295 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
3296 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
3297 nested_vmcb->save.rip = vmcb->save.rip;
3298 nested_vmcb->save.rsp = vmcb->save.rsp;
3299 nested_vmcb->save.rax = vmcb->save.rax;
3300 nested_vmcb->save.dr7 = vmcb->save.dr7;
3301 nested_vmcb->save.dr6 = vmcb->save.dr6;
3302 nested_vmcb->save.cpl = vmcb->save.cpl;
3303
3304 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
3305 nested_vmcb->control.int_vector = vmcb->control.int_vector;
3306 nested_vmcb->control.int_state = vmcb->control.int_state;
3307 nested_vmcb->control.exit_code = vmcb->control.exit_code;
3308 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
3309 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
3310 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
3311 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
3312 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
3313
3314 if (svm->nrips_enabled)
3315 nested_vmcb->control.next_rip = vmcb->control.next_rip;
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
3326 struct vmcb_control_area *nc = &nested_vmcb->control;
3327
3328 nc->exit_int_info = vmcb->control.event_inj;
3329 nc->exit_int_info_err = vmcb->control.event_inj_err;
3330 }
3331
3332 nested_vmcb->control.tlb_ctl = 0;
3333 nested_vmcb->control.event_inj = 0;
3334 nested_vmcb->control.event_inj_err = 0;
3335
3336
3337 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3338 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
3339
3340
3341 copy_vmcb_control_area(vmcb, hsave);
3342
3343 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
3344 kvm_clear_exception_queue(&svm->vcpu);
3345 kvm_clear_interrupt_queue(&svm->vcpu);
3346
3347 svm->nested.nested_cr3 = 0;
3348
3349
3350 svm->vmcb->save.es = hsave->save.es;
3351 svm->vmcb->save.cs = hsave->save.cs;
3352 svm->vmcb->save.ss = hsave->save.ss;
3353 svm->vmcb->save.ds = hsave->save.ds;
3354 svm->vmcb->save.gdtr = hsave->save.gdtr;
3355 svm->vmcb->save.idtr = hsave->save.idtr;
3356 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
3357 svm_set_efer(&svm->vcpu, hsave->save.efer);
3358 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
3359 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
3360 if (npt_enabled) {
3361 svm->vmcb->save.cr3 = hsave->save.cr3;
3362 svm->vcpu.arch.cr3 = hsave->save.cr3;
3363 } else {
3364 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
3365 }
3366 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
3367 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
3368 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
3369 svm->vmcb->save.dr7 = 0;
3370 svm->vmcb->save.cpl = 0;
3371 svm->vmcb->control.exit_int_info = 0;
3372
3373 mark_all_dirty(svm->vmcb);
3374
3375 nested_svm_unmap(page);
3376
3377 nested_svm_uninit_mmu_context(&svm->vcpu);
3378 kvm_mmu_reset_context(&svm->vcpu);
3379 kvm_mmu_load(&svm->vcpu);
3380
3381 return 0;
3382}
3383
3384static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3385{
3386
3387
3388
3389
3390
3391 int i;
3392
3393 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3394 return true;
3395
3396 for (i = 0; i < MSRPM_OFFSETS; i++) {
3397 u32 value, p;
3398 u64 offset;
3399
3400 if (msrpm_offsets[i] == 0xffffffff)
3401 break;
3402
3403 p = msrpm_offsets[i];
3404 offset = svm->nested.vmcb_msrpm + (p * 4);
3405
3406 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
3407 return false;
3408
3409 svm->nested.msrpm[p] = svm->msrpm[p] | value;
3410 }
3411
3412 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
3413
3414 return true;
3415}
3416
3417static bool nested_vmcb_checks(struct vmcb *vmcb)
3418{
3419 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
3420 return false;
3421
3422 if (vmcb->control.asid == 0)
3423 return false;
3424
3425 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
3426 !npt_enabled)
3427 return false;
3428
3429 return true;
3430}
3431
3432static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
3433 struct vmcb *nested_vmcb, struct page *page)
3434{
3435 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3436 svm->vcpu.arch.hflags |= HF_HIF_MASK;
3437 else
3438 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
3439
3440 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
3441 kvm_mmu_unload(&svm->vcpu);
3442 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
3443 nested_svm_init_mmu_context(&svm->vcpu);
3444 }
3445
3446
3447 svm->vmcb->save.es = nested_vmcb->save.es;
3448 svm->vmcb->save.cs = nested_vmcb->save.cs;
3449 svm->vmcb->save.ss = nested_vmcb->save.ss;
3450 svm->vmcb->save.ds = nested_vmcb->save.ds;
3451 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
3452 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
3453 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3454 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
3455 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
3456 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
3457 if (npt_enabled) {
3458 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
3459 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
3460 } else
3461 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
3462
3463
3464 kvm_mmu_reset_context(&svm->vcpu);
3465
3466 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3467 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
3468 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
3469 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
3470
3471
3472 svm->vmcb->save.rax = nested_vmcb->save.rax;
3473 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
3474 svm->vmcb->save.rip = nested_vmcb->save.rip;
3475 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
3476 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
3477 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
3478
3479 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
3480 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3481
3482
3483 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3484 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
3485 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
3486 svm->nested.intercept = nested_vmcb->control.intercept;
3487
3488 svm_flush_tlb(&svm->vcpu, true);
3489 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3490 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
3491 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
3492 else
3493 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
3494
3495 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
3496
3497 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
3498 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3499 }
3500
3501
3502 clr_intercept(svm, INTERCEPT_VMMCALL);
3503
3504 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
3505 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
3506
3507 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
3508 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
3509 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
3510 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
3511 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
3512
3513 nested_svm_unmap(page);
3514
3515
3516 enter_guest_mode(&svm->vcpu);
3517
3518
3519
3520
3521
3522 recalc_intercepts(svm);
3523
3524 svm->nested.vmcb = vmcb_gpa;
3525
3526 enable_gif(svm);
3527
3528 mark_all_dirty(svm->vmcb);
3529}
3530
3531static bool nested_svm_vmrun(struct vcpu_svm *svm)
3532{
3533 struct vmcb *nested_vmcb;
3534 struct vmcb *hsave = svm->nested.hsave;
3535 struct vmcb *vmcb = svm->vmcb;
3536 struct page *page;
3537 u64 vmcb_gpa;
3538
3539 vmcb_gpa = svm->vmcb->save.rax;
3540
3541 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3542 if (!nested_vmcb)
3543 return false;
3544
3545 if (!nested_vmcb_checks(nested_vmcb)) {
3546 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
3547 nested_vmcb->control.exit_code_hi = 0;
3548 nested_vmcb->control.exit_info_1 = 0;
3549 nested_vmcb->control.exit_info_2 = 0;
3550
3551 nested_svm_unmap(page);
3552
3553 return false;
3554 }
3555
3556 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
3557 nested_vmcb->save.rip,
3558 nested_vmcb->control.int_ctl,
3559 nested_vmcb->control.event_inj,
3560 nested_vmcb->control.nested_ctl);
3561
3562 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
3563 nested_vmcb->control.intercept_cr >> 16,
3564 nested_vmcb->control.intercept_exceptions,
3565 nested_vmcb->control.intercept);
3566
3567
3568 kvm_clear_exception_queue(&svm->vcpu);
3569 kvm_clear_interrupt_queue(&svm->vcpu);
3570
3571
3572
3573
3574
3575 hsave->save.es = vmcb->save.es;
3576 hsave->save.cs = vmcb->save.cs;
3577 hsave->save.ss = vmcb->save.ss;
3578 hsave->save.ds = vmcb->save.ds;
3579 hsave->save.gdtr = vmcb->save.gdtr;
3580 hsave->save.idtr = vmcb->save.idtr;
3581 hsave->save.efer = svm->vcpu.arch.efer;
3582 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
3583 hsave->save.cr4 = svm->vcpu.arch.cr4;
3584 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
3585 hsave->save.rip = kvm_rip_read(&svm->vcpu);
3586 hsave->save.rsp = vmcb->save.rsp;
3587 hsave->save.rax = vmcb->save.rax;
3588 if (npt_enabled)
3589 hsave->save.cr3 = vmcb->save.cr3;
3590 else
3591 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
3592
3593 copy_vmcb_control_area(hsave, vmcb);
3594
3595 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
3596
3597 return true;
3598}
3599
3600static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
3601{
3602 to_vmcb->save.fs = from_vmcb->save.fs;
3603 to_vmcb->save.gs = from_vmcb->save.gs;
3604 to_vmcb->save.tr = from_vmcb->save.tr;
3605 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
3606 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
3607 to_vmcb->save.star = from_vmcb->save.star;
3608 to_vmcb->save.lstar = from_vmcb->save.lstar;
3609 to_vmcb->save.cstar = from_vmcb->save.cstar;
3610 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
3611 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
3612 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
3613 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
3614}
3615
3616static int vmload_interception(struct vcpu_svm *svm)
3617{
3618 struct vmcb *nested_vmcb;
3619 struct page *page;
3620 int ret;
3621
3622 if (nested_svm_check_permissions(svm))
3623 return 1;
3624
3625 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3626 if (!nested_vmcb)
3627 return 1;
3628
3629 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3630 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3631
3632 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
3633 nested_svm_unmap(page);
3634
3635 return ret;
3636}
3637
3638static int vmsave_interception(struct vcpu_svm *svm)
3639{
3640 struct vmcb *nested_vmcb;
3641 struct page *page;
3642 int ret;
3643
3644 if (nested_svm_check_permissions(svm))
3645 return 1;
3646
3647 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
3648 if (!nested_vmcb)
3649 return 1;
3650
3651 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3652 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3653
3654 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
3655 nested_svm_unmap(page);
3656
3657 return ret;
3658}
3659
3660static int vmrun_interception(struct vcpu_svm *svm)
3661{
3662 if (nested_svm_check_permissions(svm))
3663 return 1;
3664
3665
3666 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
3667
3668 if (!nested_svm_vmrun(svm))
3669 return 1;
3670
3671 if (!nested_svm_vmrun_msrpm(svm))
3672 goto failed;
3673
3674 return 1;
3675
3676failed:
3677
3678 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
3679 svm->vmcb->control.exit_code_hi = 0;
3680 svm->vmcb->control.exit_info_1 = 0;
3681 svm->vmcb->control.exit_info_2 = 0;
3682
3683 nested_svm_vmexit(svm);
3684
3685 return 1;
3686}
3687
3688static int stgi_interception(struct vcpu_svm *svm)
3689{
3690 int ret;
3691
3692 if (nested_svm_check_permissions(svm))
3693 return 1;
3694
3695
3696
3697
3698
3699 if (vgif_enabled(svm))
3700 clr_intercept(svm, INTERCEPT_STGI);
3701
3702 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3703 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3704 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3705
3706 enable_gif(svm);
3707
3708 return ret;
3709}
3710
3711static int clgi_interception(struct vcpu_svm *svm)
3712{
3713 int ret;
3714
3715 if (nested_svm_check_permissions(svm))
3716 return 1;
3717
3718 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3719 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3720
3721 disable_gif(svm);
3722
3723
3724 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
3725 svm_clear_vintr(svm);
3726 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3727 mark_dirty(svm->vmcb, VMCB_INTR);
3728 }
3729
3730 return ret;
3731}
3732
3733static int invlpga_interception(struct vcpu_svm *svm)
3734{
3735 struct kvm_vcpu *vcpu = &svm->vcpu;
3736
3737 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
3738 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
3739
3740
3741 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
3742
3743 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3744 return kvm_skip_emulated_instruction(&svm->vcpu);
3745}
3746
3747static int skinit_interception(struct vcpu_svm *svm)
3748{
3749 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
3750
3751 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3752 return 1;
3753}
3754
3755static int wbinvd_interception(struct vcpu_svm *svm)
3756{
3757 return kvm_emulate_wbinvd(&svm->vcpu);
3758}
3759
3760static int xsetbv_interception(struct vcpu_svm *svm)
3761{
3762 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
3763 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
3764
3765 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
3766 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
3767 return kvm_skip_emulated_instruction(&svm->vcpu);
3768 }
3769
3770 return 1;
3771}
3772
3773static int task_switch_interception(struct vcpu_svm *svm)
3774{
3775 u16 tss_selector;
3776 int reason;
3777 int int_type = svm->vmcb->control.exit_int_info &
3778 SVM_EXITINTINFO_TYPE_MASK;
3779 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
3780 uint32_t type =
3781 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
3782 uint32_t idt_v =
3783 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
3784 bool has_error_code = false;
3785 u32 error_code = 0;
3786
3787 tss_selector = (u16)svm->vmcb->control.exit_info_1;
3788
3789 if (svm->vmcb->control.exit_info_2 &
3790 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
3791 reason = TASK_SWITCH_IRET;
3792 else if (svm->vmcb->control.exit_info_2 &
3793 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
3794 reason = TASK_SWITCH_JMP;
3795 else if (idt_v)
3796 reason = TASK_SWITCH_GATE;
3797 else
3798 reason = TASK_SWITCH_CALL;
3799
3800 if (reason == TASK_SWITCH_GATE) {
3801 switch (type) {
3802 case SVM_EXITINTINFO_TYPE_NMI:
3803 svm->vcpu.arch.nmi_injected = false;
3804 break;
3805 case SVM_EXITINTINFO_TYPE_EXEPT:
3806 if (svm->vmcb->control.exit_info_2 &
3807 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
3808 has_error_code = true;
3809 error_code =
3810 (u32)svm->vmcb->control.exit_info_2;
3811 }
3812 kvm_clear_exception_queue(&svm->vcpu);
3813 break;
3814 case SVM_EXITINTINFO_TYPE_INTR:
3815 kvm_clear_interrupt_queue(&svm->vcpu);
3816 break;
3817 default:
3818 break;
3819 }
3820 }
3821
3822 if (reason != TASK_SWITCH_GATE ||
3823 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
3824 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
3825 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
3826 skip_emulated_instruction(&svm->vcpu);
3827
3828 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
3829 int_vec = -1;
3830
3831 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
3832 has_error_code, error_code) == EMULATE_FAIL) {
3833 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3834 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3835 svm->vcpu.run->internal.ndata = 0;
3836 return 0;
3837 }
3838 return 1;
3839}
3840
3841static int cpuid_interception(struct vcpu_svm *svm)
3842{
3843 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3844 return kvm_emulate_cpuid(&svm->vcpu);
3845}
3846
3847static int iret_interception(struct vcpu_svm *svm)
3848{
3849 ++svm->vcpu.stat.nmi_window_exits;
3850 clr_intercept(svm, INTERCEPT_IRET);
3851 svm->vcpu.arch.hflags |= HF_IRET_MASK;
3852 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
3853 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3854 return 1;
3855}
3856
3857static int invlpg_interception(struct vcpu_svm *svm)
3858{
3859 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3860 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3861
3862 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
3863 return kvm_skip_emulated_instruction(&svm->vcpu);
3864}
3865
3866static int emulate_on_interception(struct vcpu_svm *svm)
3867{
3868 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
3869}
3870
3871static int rsm_interception(struct vcpu_svm *svm)
3872{
3873 return x86_emulate_instruction(&svm->vcpu, 0, 0,
3874 rsm_ins_bytes, 2) == EMULATE_DONE;
3875}
3876
3877static int rdpmc_interception(struct vcpu_svm *svm)
3878{
3879 int err;
3880
3881 if (!static_cpu_has(X86_FEATURE_NRIPS))
3882 return emulate_on_interception(svm);
3883
3884 err = kvm_rdpmc(&svm->vcpu);
3885 return kvm_complete_insn_gp(&svm->vcpu, err);
3886}
3887
3888static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
3889 unsigned long val)
3890{
3891 unsigned long cr0 = svm->vcpu.arch.cr0;
3892 bool ret = false;
3893 u64 intercept;
3894
3895 intercept = svm->nested.intercept;
3896
3897 if (!is_guest_mode(&svm->vcpu) ||
3898 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
3899 return false;
3900
3901 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
3902 val &= ~SVM_CR0_SELECTIVE_MASK;
3903
3904 if (cr0 ^ val) {
3905 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3906 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
3907 }
3908
3909 return ret;
3910}
3911
3912#define CR_VALID (1ULL << 63)
3913
3914static int cr_interception(struct vcpu_svm *svm)
3915{
3916 int reg, cr;
3917 unsigned long val;
3918 int err;
3919
3920 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3921 return emulate_on_interception(svm);
3922
3923 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
3924 return emulate_on_interception(svm);
3925
3926 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
3927 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
3928 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
3929 else
3930 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
3931
3932 err = 0;
3933 if (cr >= 16) {
3934 cr -= 16;
3935 val = kvm_register_read(&svm->vcpu, reg);
3936 switch (cr) {
3937 case 0:
3938 if (!check_selective_cr0_intercepted(svm, val))
3939 err = kvm_set_cr0(&svm->vcpu, val);
3940 else
3941 return 1;
3942
3943 break;
3944 case 3:
3945 err = kvm_set_cr3(&svm->vcpu, val);
3946 break;
3947 case 4:
3948 err = kvm_set_cr4(&svm->vcpu, val);
3949 break;
3950 case 8:
3951 err = kvm_set_cr8(&svm->vcpu, val);
3952 break;
3953 default:
3954 WARN(1, "unhandled write to CR%d", cr);
3955 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3956 return 1;
3957 }
3958 } else {
3959 switch (cr) {
3960 case 0:
3961 val = kvm_read_cr0(&svm->vcpu);
3962 break;
3963 case 2:
3964 val = svm->vcpu.arch.cr2;
3965 break;
3966 case 3:
3967 val = kvm_read_cr3(&svm->vcpu);
3968 break;
3969 case 4:
3970 val = kvm_read_cr4(&svm->vcpu);
3971 break;
3972 case 8:
3973 val = kvm_get_cr8(&svm->vcpu);
3974 break;
3975 default:
3976 WARN(1, "unhandled read from CR%d", cr);
3977 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3978 return 1;
3979 }
3980 kvm_register_write(&svm->vcpu, reg, val);
3981 }
3982 return kvm_complete_insn_gp(&svm->vcpu, err);
3983}
3984
3985static int dr_interception(struct vcpu_svm *svm)
3986{
3987 int reg, dr;
3988 unsigned long val;
3989
3990 if (svm->vcpu.guest_debug == 0) {
3991
3992
3993
3994
3995
3996 clr_dr_intercepts(svm);
3997 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
3998 return 1;
3999 }
4000
4001 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
4002 return emulate_on_interception(svm);
4003
4004 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
4005 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
4006
4007 if (dr >= 16) {
4008 if (!kvm_require_dr(&svm->vcpu, dr - 16))
4009 return 1;
4010 val = kvm_register_read(&svm->vcpu, reg);
4011 kvm_set_dr(&svm->vcpu, dr - 16, val);
4012 } else {
4013 if (!kvm_require_dr(&svm->vcpu, dr))
4014 return 1;
4015 kvm_get_dr(&svm->vcpu, dr, &val);
4016 kvm_register_write(&svm->vcpu, reg, val);
4017 }
4018
4019 return kvm_skip_emulated_instruction(&svm->vcpu);
4020}
4021
4022static int cr8_write_interception(struct vcpu_svm *svm)
4023{
4024 struct kvm_run *kvm_run = svm->vcpu.run;
4025 int r;
4026
4027 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
4028
4029 r = cr_interception(svm);
4030 if (lapic_in_kernel(&svm->vcpu))
4031 return r;
4032 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
4033 return r;
4034 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
4035 return 0;
4036}
4037
4038static int svm_get_msr_feature(struct kvm_msr_entry *msr)
4039{
4040 msr->data = 0;
4041
4042 switch (msr->index) {
4043 case MSR_F10H_DECFG:
4044 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
4045 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
4046 break;
4047 default:
4048 return 1;
4049 }
4050
4051 return 0;
4052}
4053
4054static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
4055{
4056 struct vcpu_svm *svm = to_svm(vcpu);
4057
4058 switch (msr_info->index) {
4059 case MSR_STAR:
4060 msr_info->data = svm->vmcb->save.star;
4061 break;
4062#ifdef CONFIG_X86_64
4063 case MSR_LSTAR:
4064 msr_info->data = svm->vmcb->save.lstar;
4065 break;
4066 case MSR_CSTAR:
4067 msr_info->data = svm->vmcb->save.cstar;
4068 break;
4069 case MSR_KERNEL_GS_BASE:
4070 msr_info->data = svm->vmcb->save.kernel_gs_base;
4071 break;
4072 case MSR_SYSCALL_MASK:
4073 msr_info->data = svm->vmcb->save.sfmask;
4074 break;
4075#endif
4076 case MSR_IA32_SYSENTER_CS:
4077 msr_info->data = svm->vmcb->save.sysenter_cs;
4078 break;
4079 case MSR_IA32_SYSENTER_EIP:
4080 msr_info->data = svm->sysenter_eip;
4081 break;
4082 case MSR_IA32_SYSENTER_ESP:
4083 msr_info->data = svm->sysenter_esp;
4084 break;
4085 case MSR_TSC_AUX:
4086 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4087 return 1;
4088 msr_info->data = svm->tsc_aux;
4089 break;
4090
4091
4092
4093
4094
4095 case MSR_IA32_DEBUGCTLMSR:
4096 msr_info->data = svm->vmcb->save.dbgctl;
4097 break;
4098 case MSR_IA32_LASTBRANCHFROMIP:
4099 msr_info->data = svm->vmcb->save.br_from;
4100 break;
4101 case MSR_IA32_LASTBRANCHTOIP:
4102 msr_info->data = svm->vmcb->save.br_to;
4103 break;
4104 case MSR_IA32_LASTINTFROMIP:
4105 msr_info->data = svm->vmcb->save.last_excp_from;
4106 break;
4107 case MSR_IA32_LASTINTTOIP:
4108 msr_info->data = svm->vmcb->save.last_excp_to;
4109 break;
4110 case MSR_VM_HSAVE_PA:
4111 msr_info->data = svm->nested.hsave_msr;
4112 break;
4113 case MSR_VM_CR:
4114 msr_info->data = svm->nested.vm_cr_msr;
4115 break;
4116 case MSR_IA32_SPEC_CTRL:
4117 if (!msr_info->host_initiated &&
4118 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
4119 return 1;
4120
4121 msr_info->data = svm->spec_ctrl;
4122 break;
4123 case MSR_AMD64_VIRT_SPEC_CTRL:
4124 if (!msr_info->host_initiated &&
4125 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4126 return 1;
4127
4128 msr_info->data = svm->virt_spec_ctrl;
4129 break;
4130 case MSR_F15H_IC_CFG: {
4131
4132 int family, model;
4133
4134 family = guest_cpuid_family(vcpu);
4135 model = guest_cpuid_model(vcpu);
4136
4137 if (family < 0 || model < 0)
4138 return kvm_get_msr_common(vcpu, msr_info);
4139
4140 msr_info->data = 0;
4141
4142 if (family == 0x15 &&
4143 (model >= 0x2 && model < 0x20))
4144 msr_info->data = 0x1E;
4145 }
4146 break;
4147 case MSR_F10H_DECFG:
4148 msr_info->data = svm->msr_decfg;
4149 break;
4150 default:
4151 return kvm_get_msr_common(vcpu, msr_info);
4152 }
4153 return 0;
4154}
4155
4156static int rdmsr_interception(struct vcpu_svm *svm)
4157{
4158 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
4159 struct msr_data msr_info;
4160
4161 msr_info.index = ecx;
4162 msr_info.host_initiated = false;
4163 if (svm_get_msr(&svm->vcpu, &msr_info)) {
4164 trace_kvm_msr_read_ex(ecx);
4165 kvm_inject_gp(&svm->vcpu, 0);
4166 return 1;
4167 } else {
4168 trace_kvm_msr_read(ecx, msr_info.data);
4169
4170 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
4171 msr_info.data & 0xffffffff);
4172 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
4173 msr_info.data >> 32);
4174 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
4175 return kvm_skip_emulated_instruction(&svm->vcpu);
4176 }
4177}
4178
4179static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
4180{
4181 struct vcpu_svm *svm = to_svm(vcpu);
4182 int svm_dis, chg_mask;
4183
4184 if (data & ~SVM_VM_CR_VALID_MASK)
4185 return 1;
4186
4187 chg_mask = SVM_VM_CR_VALID_MASK;
4188
4189 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
4190 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
4191
4192 svm->nested.vm_cr_msr &= ~chg_mask;
4193 svm->nested.vm_cr_msr |= (data & chg_mask);
4194
4195 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
4196
4197
4198 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
4199 return 1;
4200
4201 return 0;
4202}
4203
4204static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4205{
4206 struct vcpu_svm *svm = to_svm(vcpu);
4207
4208 u32 ecx = msr->index;
4209 u64 data = msr->data;
4210 switch (ecx) {
4211 case MSR_IA32_CR_PAT:
4212 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4213 return 1;
4214 vcpu->arch.pat = data;
4215 svm->vmcb->save.g_pat = data;
4216 mark_dirty(svm->vmcb, VMCB_NPT);
4217 break;
4218 case MSR_IA32_SPEC_CTRL:
4219 if (!msr->host_initiated &&
4220 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
4221 return 1;
4222
4223
4224 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
4225 return 1;
4226
4227 svm->spec_ctrl = data;
4228
4229 if (!data)
4230 break;
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243 set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
4244 break;
4245 case MSR_IA32_PRED_CMD:
4246 if (!msr->host_initiated &&
4247 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
4248 return 1;
4249
4250 if (data & ~PRED_CMD_IBPB)
4251 return 1;
4252
4253 if (!data)
4254 break;
4255
4256 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4257 if (is_guest_mode(vcpu))
4258 break;
4259 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
4260 break;
4261 case MSR_AMD64_VIRT_SPEC_CTRL:
4262 if (!msr->host_initiated &&
4263 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4264 return 1;
4265
4266 if (data & ~SPEC_CTRL_SSBD)
4267 return 1;
4268
4269 svm->virt_spec_ctrl = data;
4270 break;
4271 case MSR_STAR:
4272 svm->vmcb->save.star = data;
4273 break;
4274#ifdef CONFIG_X86_64
4275 case MSR_LSTAR:
4276 svm->vmcb->save.lstar = data;
4277 break;
4278 case MSR_CSTAR:
4279 svm->vmcb->save.cstar = data;
4280 break;
4281 case MSR_KERNEL_GS_BASE:
4282 svm->vmcb->save.kernel_gs_base = data;
4283 break;
4284 case MSR_SYSCALL_MASK:
4285 svm->vmcb->save.sfmask = data;
4286 break;
4287#endif
4288 case MSR_IA32_SYSENTER_CS:
4289 svm->vmcb->save.sysenter_cs = data;
4290 break;
4291 case MSR_IA32_SYSENTER_EIP:
4292 svm->sysenter_eip = data;
4293 svm->vmcb->save.sysenter_eip = data;
4294 break;
4295 case MSR_IA32_SYSENTER_ESP:
4296 svm->sysenter_esp = data;
4297 svm->vmcb->save.sysenter_esp = data;
4298 break;
4299 case MSR_TSC_AUX:
4300 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4301 return 1;
4302
4303
4304
4305
4306
4307
4308 svm->tsc_aux = data;
4309 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
4310 break;
4311 case MSR_IA32_DEBUGCTLMSR:
4312 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
4313 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
4314 __func__, data);
4315 break;
4316 }
4317 if (data & DEBUGCTL_RESERVED_BITS)
4318 return 1;
4319
4320 svm->vmcb->save.dbgctl = data;
4321 mark_dirty(svm->vmcb, VMCB_LBR);
4322 if (data & (1ULL<<0))
4323 svm_enable_lbrv(svm);
4324 else
4325 svm_disable_lbrv(svm);
4326 break;
4327 case MSR_VM_HSAVE_PA:
4328 svm->nested.hsave_msr = data;
4329 break;
4330 case MSR_VM_CR:
4331 return svm_set_vm_cr(vcpu, data);
4332 case MSR_VM_IGNNE:
4333 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
4334 break;
4335 case MSR_F10H_DECFG: {
4336 struct kvm_msr_entry msr_entry;
4337
4338 msr_entry.index = msr->index;
4339 if (svm_get_msr_feature(&msr_entry))
4340 return 1;
4341
4342
4343 if (data & ~msr_entry.data)
4344 return 1;
4345
4346
4347 if (!msr->host_initiated && (data ^ msr_entry.data))
4348 return 1;
4349
4350 svm->msr_decfg = data;
4351 break;
4352 }
4353 case MSR_IA32_APICBASE:
4354 if (kvm_vcpu_apicv_active(vcpu))
4355 avic_update_vapic_bar(to_svm(vcpu), data);
4356
4357 default:
4358 return kvm_set_msr_common(vcpu, msr);
4359 }
4360 return 0;
4361}
4362
4363static int wrmsr_interception(struct vcpu_svm *svm)
4364{
4365 struct msr_data msr;
4366 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
4367 u64 data = kvm_read_edx_eax(&svm->vcpu);
4368
4369 msr.data = data;
4370 msr.index = ecx;
4371 msr.host_initiated = false;
4372
4373 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
4374 if (kvm_set_msr(&svm->vcpu, &msr)) {
4375 trace_kvm_msr_write_ex(ecx, data);
4376 kvm_inject_gp(&svm->vcpu, 0);
4377 return 1;
4378 } else {
4379 trace_kvm_msr_write(ecx, data);
4380 return kvm_skip_emulated_instruction(&svm->vcpu);
4381 }
4382}
4383
4384static int msr_interception(struct vcpu_svm *svm)
4385{
4386 if (svm->vmcb->control.exit_info_1)
4387 return wrmsr_interception(svm);
4388 else
4389 return rdmsr_interception(svm);
4390}
4391
4392static int interrupt_window_interception(struct vcpu_svm *svm)
4393{
4394 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
4395 svm_clear_vintr(svm);
4396 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
4397 mark_dirty(svm->vmcb, VMCB_INTR);
4398 ++svm->vcpu.stat.irq_window_exits;
4399 return 1;
4400}
4401
4402static int pause_interception(struct vcpu_svm *svm)
4403{
4404 struct kvm_vcpu *vcpu = &svm->vcpu;
4405 bool in_kernel = (svm_get_cpl(vcpu) == 0);
4406
4407 if (pause_filter_thresh)
4408 grow_ple_window(vcpu);
4409
4410 kvm_vcpu_on_spin(vcpu, in_kernel);
4411 return 1;
4412}
4413
4414static int nop_interception(struct vcpu_svm *svm)
4415{
4416 return kvm_skip_emulated_instruction(&(svm->vcpu));
4417}
4418
4419static int monitor_interception(struct vcpu_svm *svm)
4420{
4421 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
4422 return nop_interception(svm);
4423}
4424
4425static int mwait_interception(struct vcpu_svm *svm)
4426{
4427 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
4428 return nop_interception(svm);
4429}
4430
4431enum avic_ipi_failure_cause {
4432 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
4433 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
4434 AVIC_IPI_FAILURE_INVALID_TARGET,
4435 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
4436};
4437
4438static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
4439{
4440 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
4441 u32 icrl = svm->vmcb->control.exit_info_1;
4442 u32 id = svm->vmcb->control.exit_info_2 >> 32;
4443 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
4444 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4445
4446 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
4447
4448 switch (id) {
4449 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
4462 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4463 break;
4464 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
4465 int i;
4466 struct kvm_vcpu *vcpu;
4467 struct kvm *kvm = svm->vcpu.kvm;
4468 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4469
4470
4471
4472
4473
4474
4475 kvm_for_each_vcpu(i, vcpu, kvm) {
4476 bool m = kvm_apic_match_dest(vcpu, apic,
4477 icrl & KVM_APIC_SHORT_MASK,
4478 GET_APIC_DEST_FIELD(icrh),
4479 icrl & KVM_APIC_DEST_MASK);
4480
4481 if (m && !avic_vcpu_is_running(vcpu))
4482 kvm_vcpu_wake_up(vcpu);
4483 }
4484 break;
4485 }
4486 case AVIC_IPI_FAILURE_INVALID_TARGET:
4487 break;
4488 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
4489 WARN_ONCE(1, "Invalid backing page\n");
4490 break;
4491 default:
4492 pr_err("Unknown IPI interception\n");
4493 }
4494
4495 return 1;
4496}
4497
4498static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
4499{
4500 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
4501 int index;
4502 u32 *logical_apic_id_table;
4503 int dlid = GET_APIC_LOGICAL_ID(ldr);
4504
4505 if (!dlid)
4506 return NULL;
4507
4508 if (flat) {
4509 index = ffs(dlid) - 1;
4510 if (index > 7)
4511 return NULL;
4512 } else {
4513 int cluster = (dlid & 0xf0) >> 4;
4514 int apic = ffs(dlid & 0x0f) - 1;
4515
4516 if ((apic < 0) || (apic > 7) ||
4517 (cluster >= 0xf))
4518 return NULL;
4519 index = (cluster << 2) + apic;
4520 }
4521
4522 logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
4523
4524 return &logical_apic_id_table[index];
4525}
4526
4527static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
4528 bool valid)
4529{
4530 bool flat;
4531 u32 *entry, new_entry;
4532
4533 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
4534 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
4535 if (!entry)
4536 return -EINVAL;
4537
4538 new_entry = READ_ONCE(*entry);
4539 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
4540 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
4541 if (valid)
4542 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4543 else
4544 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4545 WRITE_ONCE(*entry, new_entry);
4546
4547 return 0;
4548}
4549
4550static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
4551{
4552 int ret;
4553 struct vcpu_svm *svm = to_svm(vcpu);
4554 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
4555
4556 if (!ldr)
4557 return 1;
4558
4559 ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
4560 if (ret && svm->ldr_reg) {
4561 avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
4562 svm->ldr_reg = 0;
4563 } else {
4564 svm->ldr_reg = ldr;
4565 }
4566 return ret;
4567}
4568
4569static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
4570{
4571 u64 *old, *new;
4572 struct vcpu_svm *svm = to_svm(vcpu);
4573 u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
4574 u32 id = (apic_id_reg >> 24) & 0xff;
4575
4576 if (vcpu->vcpu_id == id)
4577 return 0;
4578
4579 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
4580 new = avic_get_physical_id_entry(vcpu, id);
4581 if (!new || !old)
4582 return 1;
4583
4584
4585 *new = *old;
4586 *old = 0ULL;
4587 to_svm(vcpu)->avic_physical_id_cache = new;
4588
4589
4590
4591
4592
4593 if (svm->ldr_reg)
4594 avic_handle_ldr_update(vcpu);
4595
4596 return 0;
4597}
4598
4599static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
4600{
4601 struct vcpu_svm *svm = to_svm(vcpu);
4602 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
4603 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
4604 u32 mod = (dfr >> 28) & 0xf;
4605
4606
4607
4608
4609
4610
4611 if (kvm_svm->ldr_mode == mod)
4612 return 0;
4613
4614 clear_page(page_address(kvm_svm->avic_logical_id_table_page));
4615 kvm_svm->ldr_mode = mod;
4616
4617 if (svm->ldr_reg)
4618 avic_handle_ldr_update(vcpu);
4619 return 0;
4620}
4621
4622static int avic_unaccel_trap_write(struct vcpu_svm *svm)
4623{
4624 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4625 u32 offset = svm->vmcb->control.exit_info_1 &
4626 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4627
4628 switch (offset) {
4629 case APIC_ID:
4630 if (avic_handle_apic_id_update(&svm->vcpu))
4631 return 0;
4632 break;
4633 case APIC_LDR:
4634 if (avic_handle_ldr_update(&svm->vcpu))
4635 return 0;
4636 break;
4637 case APIC_DFR:
4638 avic_handle_dfr_update(&svm->vcpu);
4639 break;
4640 default:
4641 break;
4642 }
4643
4644 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
4645
4646 return 1;
4647}
4648
4649static bool is_avic_unaccelerated_access_trap(u32 offset)
4650{
4651 bool ret = false;
4652
4653 switch (offset) {
4654 case APIC_ID:
4655 case APIC_EOI:
4656 case APIC_RRR:
4657 case APIC_LDR:
4658 case APIC_DFR:
4659 case APIC_SPIV:
4660 case APIC_ESR:
4661 case APIC_ICR:
4662 case APIC_LVTT:
4663 case APIC_LVTTHMR:
4664 case APIC_LVTPC:
4665 case APIC_LVT0:
4666 case APIC_LVT1:
4667 case APIC_LVTERR:
4668 case APIC_TMICT:
4669 case APIC_TDCR:
4670 ret = true;
4671 break;
4672 default:
4673 break;
4674 }
4675 return ret;
4676}
4677
4678static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
4679{
4680 int ret = 0;
4681 u32 offset = svm->vmcb->control.exit_info_1 &
4682 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4683 u32 vector = svm->vmcb->control.exit_info_2 &
4684 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
4685 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
4686 AVIC_UNACCEL_ACCESS_WRITE_MASK;
4687 bool trap = is_avic_unaccelerated_access_trap(offset);
4688
4689 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
4690 trap, write, vector);
4691 if (trap) {
4692
4693 WARN_ONCE(!write, "svm: Handling trap read.\n");
4694 ret = avic_unaccel_trap_write(svm);
4695 } else {
4696
4697 ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
4698 }
4699
4700 return ret;
4701}
4702
4703static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4704 [SVM_EXIT_READ_CR0] = cr_interception,
4705 [SVM_EXIT_READ_CR3] = cr_interception,
4706 [SVM_EXIT_READ_CR4] = cr_interception,
4707 [SVM_EXIT_READ_CR8] = cr_interception,
4708 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
4709 [SVM_EXIT_WRITE_CR0] = cr_interception,
4710 [SVM_EXIT_WRITE_CR3] = cr_interception,
4711 [SVM_EXIT_WRITE_CR4] = cr_interception,
4712 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
4713 [SVM_EXIT_READ_DR0] = dr_interception,
4714 [SVM_EXIT_READ_DR1] = dr_interception,
4715 [SVM_EXIT_READ_DR2] = dr_interception,
4716 [SVM_EXIT_READ_DR3] = dr_interception,
4717 [SVM_EXIT_READ_DR4] = dr_interception,
4718 [SVM_EXIT_READ_DR5] = dr_interception,
4719 [SVM_EXIT_READ_DR6] = dr_interception,
4720 [SVM_EXIT_READ_DR7] = dr_interception,
4721 [SVM_EXIT_WRITE_DR0] = dr_interception,
4722 [SVM_EXIT_WRITE_DR1] = dr_interception,
4723 [SVM_EXIT_WRITE_DR2] = dr_interception,
4724 [SVM_EXIT_WRITE_DR3] = dr_interception,
4725 [SVM_EXIT_WRITE_DR4] = dr_interception,
4726 [SVM_EXIT_WRITE_DR5] = dr_interception,
4727 [SVM_EXIT_WRITE_DR6] = dr_interception,
4728 [SVM_EXIT_WRITE_DR7] = dr_interception,
4729 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
4730 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
4731 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
4732 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
4733 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
4734 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
4735 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
4736 [SVM_EXIT_INTR] = intr_interception,
4737 [SVM_EXIT_NMI] = nmi_interception,
4738 [SVM_EXIT_SMI] = nop_on_interception,
4739 [SVM_EXIT_INIT] = nop_on_interception,
4740 [SVM_EXIT_VINTR] = interrupt_window_interception,
4741 [SVM_EXIT_RDPMC] = rdpmc_interception,
4742 [SVM_EXIT_CPUID] = cpuid_interception,
4743 [SVM_EXIT_IRET] = iret_interception,
4744 [SVM_EXIT_INVD] = emulate_on_interception,
4745 [SVM_EXIT_PAUSE] = pause_interception,
4746 [SVM_EXIT_HLT] = halt_interception,
4747 [SVM_EXIT_INVLPG] = invlpg_interception,
4748 [SVM_EXIT_INVLPGA] = invlpga_interception,
4749 [SVM_EXIT_IOIO] = io_interception,
4750 [SVM_EXIT_MSR] = msr_interception,
4751 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
4752 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
4753 [SVM_EXIT_VMRUN] = vmrun_interception,
4754 [SVM_EXIT_VMMCALL] = vmmcall_interception,
4755 [SVM_EXIT_VMLOAD] = vmload_interception,
4756 [SVM_EXIT_VMSAVE] = vmsave_interception,
4757 [SVM_EXIT_STGI] = stgi_interception,
4758 [SVM_EXIT_CLGI] = clgi_interception,
4759 [SVM_EXIT_SKINIT] = skinit_interception,
4760 [SVM_EXIT_WBINVD] = wbinvd_interception,
4761 [SVM_EXIT_MONITOR] = monitor_interception,
4762 [SVM_EXIT_MWAIT] = mwait_interception,
4763 [SVM_EXIT_XSETBV] = xsetbv_interception,
4764 [SVM_EXIT_NPF] = npf_interception,
4765 [SVM_EXIT_RSM] = rsm_interception,
4766 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4767 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
4768};
4769
4770static void dump_vmcb(struct kvm_vcpu *vcpu)
4771{
4772 struct vcpu_svm *svm = to_svm(vcpu);
4773 struct vmcb_control_area *control = &svm->vmcb->control;
4774 struct vmcb_save_area *save = &svm->vmcb->save;
4775
4776 pr_err("VMCB Control Area:\n");
4777 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
4778 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
4779 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
4780 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
4781 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
4782 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
4783 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
4784 pr_err("%-20s%d\n", "pause filter threshold:",
4785 control->pause_filter_thresh);
4786 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
4787 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
4788 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
4789 pr_err("%-20s%d\n", "asid:", control->asid);
4790 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
4791 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
4792 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
4793 pr_err("%-20s%08x\n", "int_state:", control->int_state);
4794 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
4795 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
4796 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
4797 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
4798 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
4799 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
4800 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
4801 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
4802 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
4803 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
4804 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
4805 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
4806 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
4807 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
4808 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
4809 pr_err("VMCB State Save Area:\n");
4810 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4811 "es:",
4812 save->es.selector, save->es.attrib,
4813 save->es.limit, save->es.base);
4814 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4815 "cs:",
4816 save->cs.selector, save->cs.attrib,
4817 save->cs.limit, save->cs.base);
4818 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4819 "ss:",
4820 save->ss.selector, save->ss.attrib,
4821 save->ss.limit, save->ss.base);
4822 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4823 "ds:",
4824 save->ds.selector, save->ds.attrib,
4825 save->ds.limit, save->ds.base);
4826 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4827 "fs:",
4828 save->fs.selector, save->fs.attrib,
4829 save->fs.limit, save->fs.base);
4830 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4831 "gs:",
4832 save->gs.selector, save->gs.attrib,
4833 save->gs.limit, save->gs.base);
4834 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4835 "gdtr:",
4836 save->gdtr.selector, save->gdtr.attrib,
4837 save->gdtr.limit, save->gdtr.base);
4838 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4839 "ldtr:",
4840 save->ldtr.selector, save->ldtr.attrib,
4841 save->ldtr.limit, save->ldtr.base);
4842 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4843 "idtr:",
4844 save->idtr.selector, save->idtr.attrib,
4845 save->idtr.limit, save->idtr.base);
4846 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4847 "tr:",
4848 save->tr.selector, save->tr.attrib,
4849 save->tr.limit, save->tr.base);
4850 pr_err("cpl: %d efer: %016llx\n",
4851 save->cpl, save->efer);
4852 pr_err("%-15s %016llx %-13s %016llx\n",
4853 "cr0:", save->cr0, "cr2:", save->cr2);
4854 pr_err("%-15s %016llx %-13s %016llx\n",
4855 "cr3:", save->cr3, "cr4:", save->cr4);
4856 pr_err("%-15s %016llx %-13s %016llx\n",
4857 "dr6:", save->dr6, "dr7:", save->dr7);
4858 pr_err("%-15s %016llx %-13s %016llx\n",
4859 "rip:", save->rip, "rflags:", save->rflags);
4860 pr_err("%-15s %016llx %-13s %016llx\n",
4861 "rsp:", save->rsp, "rax:", save->rax);
4862 pr_err("%-15s %016llx %-13s %016llx\n",
4863 "star:", save->star, "lstar:", save->lstar);
4864 pr_err("%-15s %016llx %-13s %016llx\n",
4865 "cstar:", save->cstar, "sfmask:", save->sfmask);
4866 pr_err("%-15s %016llx %-13s %016llx\n",
4867 "kernel_gs_base:", save->kernel_gs_base,
4868 "sysenter_cs:", save->sysenter_cs);
4869 pr_err("%-15s %016llx %-13s %016llx\n",
4870 "sysenter_esp:", save->sysenter_esp,
4871 "sysenter_eip:", save->sysenter_eip);
4872 pr_err("%-15s %016llx %-13s %016llx\n",
4873 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
4874 pr_err("%-15s %016llx %-13s %016llx\n",
4875 "br_from:", save->br_from, "br_to:", save->br_to);
4876 pr_err("%-15s %016llx %-13s %016llx\n",
4877 "excp_from:", save->last_excp_from,
4878 "excp_to:", save->last_excp_to);
4879}
4880
4881static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
4882{
4883 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
4884
4885 *info1 = control->exit_info_1;
4886 *info2 = control->exit_info_2;
4887}
4888
4889static int handle_exit(struct kvm_vcpu *vcpu)
4890{
4891 struct vcpu_svm *svm = to_svm(vcpu);
4892 struct kvm_run *kvm_run = vcpu->run;
4893 u32 exit_code = svm->vmcb->control.exit_code;
4894
4895 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
4896
4897 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
4898 vcpu->arch.cr0 = svm->vmcb->save.cr0;
4899 if (npt_enabled)
4900 vcpu->arch.cr3 = svm->vmcb->save.cr3;
4901
4902 if (unlikely(svm->nested.exit_required)) {
4903 nested_svm_vmexit(svm);
4904 svm->nested.exit_required = false;
4905
4906 return 1;
4907 }
4908
4909 if (is_guest_mode(vcpu)) {
4910 int vmexit;
4911
4912 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
4913 svm->vmcb->control.exit_info_1,
4914 svm->vmcb->control.exit_info_2,
4915 svm->vmcb->control.exit_int_info,
4916 svm->vmcb->control.exit_int_info_err,
4917 KVM_ISA_SVM);
4918
4919 vmexit = nested_svm_exit_special(svm);
4920
4921 if (vmexit == NESTED_EXIT_CONTINUE)
4922 vmexit = nested_svm_exit_handled(svm);
4923
4924 if (vmexit == NESTED_EXIT_DONE)
4925 return 1;
4926 }
4927
4928 svm_complete_interrupts(svm);
4929
4930 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
4931 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
4932 kvm_run->fail_entry.hardware_entry_failure_reason
4933 = svm->vmcb->control.exit_code;
4934 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
4935 dump_vmcb(vcpu);
4936 return 0;
4937 }
4938
4939 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
4940 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
4941 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
4942 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
4943 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
4944 "exit_code 0x%x\n",
4945 __func__, svm->vmcb->control.exit_int_info,
4946 exit_code);
4947
4948 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
4949 || !svm_exit_handlers[exit_code]) {
4950 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
4951 kvm_queue_exception(vcpu, UD_VECTOR);
4952 return 1;
4953 }
4954
4955 return svm_exit_handlers[exit_code](svm);
4956}
4957
4958static void reload_tss(struct kvm_vcpu *vcpu)
4959{
4960 int cpu = raw_smp_processor_id();
4961
4962 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4963 sd->tss_desc->type = 9;
4964 load_TR_desc();
4965}
4966
4967static void pre_sev_run(struct vcpu_svm *svm, int cpu)
4968{
4969 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4970 int asid = sev_get_asid(svm->vcpu.kvm);
4971
4972
4973 svm->vmcb->control.asid = asid;
4974
4975
4976
4977
4978
4979
4980
4981 if (sd->sev_vmcbs[asid] == svm->vmcb &&
4982 svm->last_cpu == cpu)
4983 return;
4984
4985 svm->last_cpu = cpu;
4986 sd->sev_vmcbs[asid] = svm->vmcb;
4987 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
4988 mark_dirty(svm->vmcb, VMCB_ASID);
4989}
4990
4991static void pre_svm_run(struct vcpu_svm *svm)
4992{
4993 int cpu = raw_smp_processor_id();
4994
4995 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
4996
4997 if (sev_guest(svm->vcpu.kvm))
4998 return pre_sev_run(svm, cpu);
4999
5000
5001 if (svm->asid_generation != sd->asid_generation)
5002 new_asid(svm, sd);
5003}
5004
5005static void svm_inject_nmi(struct kvm_vcpu *vcpu)
5006{
5007 struct vcpu_svm *svm = to_svm(vcpu);
5008
5009 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
5010 vcpu->arch.hflags |= HF_NMI_MASK;
5011 set_intercept(svm, INTERCEPT_IRET);
5012 ++vcpu->stat.nmi_injections;
5013}
5014
5015static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
5016{
5017 struct vmcb_control_area *control;
5018
5019
5020 control = &svm->vmcb->control;
5021 control->int_vector = irq;
5022 control->int_ctl &= ~V_INTR_PRIO_MASK;
5023 control->int_ctl |= V_IRQ_MASK |
5024 (( 0xf) << V_INTR_PRIO_SHIFT);
5025 mark_dirty(svm->vmcb, VMCB_INTR);
5026}
5027
5028static void svm_set_irq(struct kvm_vcpu *vcpu)
5029{
5030 struct vcpu_svm *svm = to_svm(vcpu);
5031
5032 BUG_ON(!(gif_set(svm)));
5033
5034 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
5035 ++vcpu->stat.irq_injections;
5036
5037 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
5038 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
5039}
5040
5041static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
5042{
5043 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
5044}
5045
5046static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
5047{
5048 struct vcpu_svm *svm = to_svm(vcpu);
5049
5050 if (svm_nested_virtualize_tpr(vcpu) ||
5051 kvm_vcpu_apicv_active(vcpu))
5052 return;
5053
5054 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5055
5056 if (irr == -1)
5057 return;
5058
5059 if (tpr >= irr)
5060 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5061}
5062
5063static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
5064{
5065 return;
5066}
5067
5068static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
5069{
5070 return avic && irqchip_split(vcpu->kvm);
5071}
5072
5073static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
5074{
5075}
5076
5077static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
5078{
5079}
5080
5081
5082static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
5083{
5084 struct vcpu_svm *svm = to_svm(vcpu);
5085 struct vmcb *vmcb = svm->vmcb;
5086
5087 if (!kvm_vcpu_apicv_active(&svm->vcpu))
5088 return;
5089
5090 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
5091 mark_dirty(vmcb, VMCB_INTR);
5092}
5093
5094static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
5095{
5096 return;
5097}
5098
5099static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
5100{
5101 kvm_lapic_set_irr(vec, vcpu->arch.apic);
5102 smp_mb__after_atomic();
5103
5104 if (avic_vcpu_is_running(vcpu))
5105 wrmsrl(SVM_AVIC_DOORBELL,
5106 kvm_cpu_get_apicid(vcpu->cpu));
5107 else
5108 kvm_vcpu_wake_up(vcpu);
5109}
5110
5111static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5112{
5113 unsigned long flags;
5114 struct amd_svm_iommu_ir *cur;
5115
5116 spin_lock_irqsave(&svm->ir_list_lock, flags);
5117 list_for_each_entry(cur, &svm->ir_list, node) {
5118 if (cur->data != pi->ir_data)
5119 continue;
5120 list_del(&cur->node);
5121 kfree(cur);
5122 break;
5123 }
5124 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5125}
5126
5127static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5128{
5129 int ret = 0;
5130 unsigned long flags;
5131 struct amd_svm_iommu_ir *ir;
5132
5133
5134
5135
5136
5137
5138 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
5139 struct kvm *kvm = svm->vcpu.kvm;
5140 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
5141 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
5142 struct vcpu_svm *prev_svm;
5143
5144 if (!prev_vcpu) {
5145 ret = -EINVAL;
5146 goto out;
5147 }
5148
5149 prev_svm = to_svm(prev_vcpu);
5150 svm_ir_list_del(prev_svm, pi);
5151 }
5152
5153
5154
5155
5156
5157 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
5158 if (!ir) {
5159 ret = -ENOMEM;
5160 goto out;
5161 }
5162 ir->data = pi->ir_data;
5163
5164 spin_lock_irqsave(&svm->ir_list_lock, flags);
5165 list_add(&ir->node, &svm->ir_list);
5166 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5167out:
5168 return ret;
5169}
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182static int
5183get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
5184 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
5185{
5186 struct kvm_lapic_irq irq;
5187 struct kvm_vcpu *vcpu = NULL;
5188
5189 kvm_set_msi_irq(kvm, e, &irq);
5190
5191 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
5192 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
5193 __func__, irq.vector);
5194 return -1;
5195 }
5196
5197 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
5198 irq.vector);
5199 *svm = to_svm(vcpu);
5200 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
5201 vcpu_info->vector = irq.vector;
5202
5203 return 0;
5204}
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
5216 uint32_t guest_irq, bool set)
5217{
5218 struct kvm_kernel_irq_routing_entry *e;
5219 struct kvm_irq_routing_table *irq_rt;
5220 int idx, ret = -EINVAL;
5221
5222 if (!kvm_arch_has_assigned_device(kvm) ||
5223 !irq_remapping_cap(IRQ_POSTING_CAP))
5224 return 0;
5225
5226 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
5227 __func__, host_irq, guest_irq, set);
5228
5229 idx = srcu_read_lock(&kvm->irq_srcu);
5230 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
5231 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
5232
5233 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
5234 struct vcpu_data vcpu_info;
5235 struct vcpu_svm *svm = NULL;
5236
5237 if (e->type != KVM_IRQ_ROUTING_MSI)
5238 continue;
5239
5240
5241
5242
5243
5244
5245
5246 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
5247 kvm_vcpu_apicv_active(&svm->vcpu)) {
5248 struct amd_iommu_pi_data pi;
5249
5250
5251 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
5252 AVIC_HPA_MASK);
5253 pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
5254 svm->vcpu.vcpu_id);
5255 pi.is_guest_mode = true;
5256 pi.vcpu_data = &vcpu_info;
5257 ret = irq_set_vcpu_affinity(host_irq, &pi);
5258
5259
5260
5261
5262
5263
5264
5265
5266 if (!ret && pi.is_guest_mode)
5267 svm_ir_list_add(svm, &pi);
5268 } else {
5269
5270 struct amd_iommu_pi_data pi;
5271
5272
5273
5274
5275
5276
5277 pi.is_guest_mode = false;
5278 ret = irq_set_vcpu_affinity(host_irq, &pi);
5279
5280
5281
5282
5283
5284
5285
5286 if (!ret && pi.prev_ga_tag) {
5287 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
5288 struct kvm_vcpu *vcpu;
5289
5290 vcpu = kvm_get_vcpu_by_id(kvm, id);
5291 if (vcpu)
5292 svm_ir_list_del(to_svm(vcpu), &pi);
5293 }
5294 }
5295
5296 if (!ret && svm) {
5297 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
5298 e->gsi, vcpu_info.vector,
5299 vcpu_info.pi_desc_addr, set);
5300 }
5301
5302 if (ret < 0) {
5303 pr_err("%s: failed to update PI IRTE\n", __func__);
5304 goto out;
5305 }
5306 }
5307
5308 ret = 0;
5309out:
5310 srcu_read_unlock(&kvm->irq_srcu, idx);
5311 return ret;
5312}
5313
5314static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
5315{
5316 struct vcpu_svm *svm = to_svm(vcpu);
5317 struct vmcb *vmcb = svm->vmcb;
5318 int ret;
5319 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
5320 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
5321 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
5322
5323 return ret;
5324}
5325
5326static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
5327{
5328 struct vcpu_svm *svm = to_svm(vcpu);
5329
5330 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
5331}
5332
5333static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5334{
5335 struct vcpu_svm *svm = to_svm(vcpu);
5336
5337 if (masked) {
5338 svm->vcpu.arch.hflags |= HF_NMI_MASK;
5339 set_intercept(svm, INTERCEPT_IRET);
5340 } else {
5341 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
5342 clr_intercept(svm, INTERCEPT_IRET);
5343 }
5344}
5345
5346static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
5347{
5348 struct vcpu_svm *svm = to_svm(vcpu);
5349 struct vmcb *vmcb = svm->vmcb;
5350 int ret;
5351
5352 if (!gif_set(svm) ||
5353 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
5354 return 0;
5355
5356 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
5357
5358 if (is_guest_mode(vcpu))
5359 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
5360
5361 return ret;
5362}
5363
5364static void enable_irq_window(struct kvm_vcpu *vcpu)
5365{
5366 struct vcpu_svm *svm = to_svm(vcpu);
5367
5368 if (kvm_vcpu_apicv_active(vcpu))
5369 return;
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379 if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
5380 svm_set_vintr(svm);
5381 svm_inject_irq(svm, 0x0);
5382 }
5383}
5384
5385static void enable_nmi_window(struct kvm_vcpu *vcpu)
5386{
5387 struct vcpu_svm *svm = to_svm(vcpu);
5388
5389 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
5390 == HF_NMI_MASK)
5391 return;
5392
5393 if (!gif_set(svm)) {
5394 if (vgif_enabled(svm))
5395 set_intercept(svm, INTERCEPT_STGI);
5396 return;
5397 }
5398
5399 if (svm->nested.exit_required)
5400 return;
5401
5402
5403
5404
5405
5406 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
5407 svm->nmi_singlestep = true;
5408 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
5409}
5410
5411static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
5412{
5413 return 0;
5414}
5415
5416static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5417{
5418 return 0;
5419}
5420
5421static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
5422{
5423 struct vcpu_svm *svm = to_svm(vcpu);
5424
5425 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
5426 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5427 else
5428 svm->asid_generation--;
5429}
5430
5431static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
5432{
5433}
5434
5435static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
5436{
5437 struct vcpu_svm *svm = to_svm(vcpu);
5438
5439 if (svm_nested_virtualize_tpr(vcpu))
5440 return;
5441
5442 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
5443 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
5444 kvm_set_cr8(vcpu, cr8);
5445 }
5446}
5447
5448static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
5449{
5450 struct vcpu_svm *svm = to_svm(vcpu);
5451 u64 cr8;
5452
5453 if (svm_nested_virtualize_tpr(vcpu) ||
5454 kvm_vcpu_apicv_active(vcpu))
5455 return;
5456
5457 cr8 = kvm_get_cr8(vcpu);
5458 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
5459 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
5460}
5461
5462static void svm_complete_interrupts(struct vcpu_svm *svm)
5463{
5464 u8 vector;
5465 int type;
5466 u32 exitintinfo = svm->vmcb->control.exit_int_info;
5467 unsigned int3_injected = svm->int3_injected;
5468
5469 svm->int3_injected = 0;
5470
5471
5472
5473
5474
5475 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
5476 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
5477 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
5478 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5479 }
5480
5481 svm->vcpu.arch.nmi_injected = false;
5482 kvm_clear_exception_queue(&svm->vcpu);
5483 kvm_clear_interrupt_queue(&svm->vcpu);
5484
5485 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
5486 return;
5487
5488 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5489
5490 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
5491 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
5492
5493 switch (type) {
5494 case SVM_EXITINTINFO_TYPE_NMI:
5495 svm->vcpu.arch.nmi_injected = true;
5496 break;
5497 case SVM_EXITINTINFO_TYPE_EXEPT:
5498
5499
5500
5501
5502
5503 if (kvm_exception_is_soft(vector)) {
5504 if (vector == BP_VECTOR && int3_injected &&
5505 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
5506 kvm_rip_write(&svm->vcpu,
5507 kvm_rip_read(&svm->vcpu) -
5508 int3_injected);
5509 break;
5510 }
5511 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
5512 u32 err = svm->vmcb->control.exit_int_info_err;
5513 kvm_requeue_exception_e(&svm->vcpu, vector, err);
5514
5515 } else
5516 kvm_requeue_exception(&svm->vcpu, vector);
5517 break;
5518 case SVM_EXITINTINFO_TYPE_INTR:
5519 kvm_queue_interrupt(&svm->vcpu, vector, false);
5520 break;
5521 default:
5522 break;
5523 }
5524}
5525
5526static void svm_cancel_injection(struct kvm_vcpu *vcpu)
5527{
5528 struct vcpu_svm *svm = to_svm(vcpu);
5529 struct vmcb_control_area *control = &svm->vmcb->control;
5530
5531 control->exit_int_info = control->event_inj;
5532 control->exit_int_info_err = control->event_inj_err;
5533 control->event_inj = 0;
5534 svm_complete_interrupts(svm);
5535}
5536
5537static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5538{
5539 struct vcpu_svm *svm = to_svm(vcpu);
5540
5541 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
5542 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
5543 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
5544
5545
5546
5547
5548
5549 if (unlikely(svm->nested.exit_required))
5550 return;
5551
5552
5553
5554
5555
5556
5557
5558 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
5559
5560
5561
5562
5563
5564 disable_nmi_singlestep(svm);
5565 smp_send_reschedule(vcpu->cpu);
5566 }
5567
5568 pre_svm_run(svm);
5569
5570 sync_lapic_to_cr8(vcpu);
5571
5572 svm->vmcb->save.cr2 = vcpu->arch.cr2;
5573
5574 clgi();
5575
5576 local_irq_enable();
5577
5578
5579
5580
5581
5582
5583
5584 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
5585
5586 asm volatile (
5587 "push %%" _ASM_BP "; \n\t"
5588 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
5589 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
5590 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
5591 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
5592 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
5593 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
5594#ifdef CONFIG_X86_64
5595 "mov %c[r8](%[svm]), %%r8 \n\t"
5596 "mov %c[r9](%[svm]), %%r9 \n\t"
5597 "mov %c[r10](%[svm]), %%r10 \n\t"
5598 "mov %c[r11](%[svm]), %%r11 \n\t"
5599 "mov %c[r12](%[svm]), %%r12 \n\t"
5600 "mov %c[r13](%[svm]), %%r13 \n\t"
5601 "mov %c[r14](%[svm]), %%r14 \n\t"
5602 "mov %c[r15](%[svm]), %%r15 \n\t"
5603#endif
5604
5605
5606 "push %%" _ASM_AX " \n\t"
5607 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
5608 __ex(SVM_VMLOAD) "\n\t"
5609 __ex(SVM_VMRUN) "\n\t"
5610 __ex(SVM_VMSAVE) "\n\t"
5611 "pop %%" _ASM_AX " \n\t"
5612
5613
5614 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
5615 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
5616 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
5617 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
5618 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
5619 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
5620#ifdef CONFIG_X86_64
5621 "mov %%r8, %c[r8](%[svm]) \n\t"
5622 "mov %%r9, %c[r9](%[svm]) \n\t"
5623 "mov %%r10, %c[r10](%[svm]) \n\t"
5624 "mov %%r11, %c[r11](%[svm]) \n\t"
5625 "mov %%r12, %c[r12](%[svm]) \n\t"
5626 "mov %%r13, %c[r13](%[svm]) \n\t"
5627 "mov %%r14, %c[r14](%[svm]) \n\t"
5628 "mov %%r15, %c[r15](%[svm]) \n\t"
5629#endif
5630
5631
5632
5633
5634 "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
5635 "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
5636 "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
5637 "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
5638 "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
5639#ifdef CONFIG_X86_64
5640 "xor %%r8, %%r8 \n\t"
5641 "xor %%r9, %%r9 \n\t"
5642 "xor %%r10, %%r10 \n\t"
5643 "xor %%r11, %%r11 \n\t"
5644 "xor %%r12, %%r12 \n\t"
5645 "xor %%r13, %%r13 \n\t"
5646 "xor %%r14, %%r14 \n\t"
5647 "xor %%r15, %%r15 \n\t"
5648#endif
5649 "pop %%" _ASM_BP
5650 :
5651 : [svm]"a"(svm),
5652 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
5653 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
5654 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
5655 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
5656 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
5657 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
5658 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
5659#ifdef CONFIG_X86_64
5660 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
5661 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
5662 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
5663 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
5664 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
5665 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
5666 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
5667 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
5668#endif
5669 : "cc", "memory"
5670#ifdef CONFIG_X86_64
5671 , "rbx", "rcx", "rdx", "rsi", "rdi"
5672 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
5673#else
5674 , "ebx", "ecx", "edx", "esi", "edi"
5675#endif
5676 );
5677
5678
5679 vmexit_fill_RSB();
5680
5681#ifdef CONFIG_X86_64
5682 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5683#else
5684 loadsegment(fs, svm->host.fs);
5685#ifndef CONFIG_X86_32_LAZY_GS
5686 loadsegment(gs, svm->host.gs);
5687#endif
5688#endif
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5706 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5707
5708 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
5709
5710 reload_tss(vcpu);
5711
5712 local_irq_disable();
5713
5714 vcpu->arch.cr2 = svm->vmcb->save.cr2;
5715 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
5716 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
5717 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
5718
5719 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5720 kvm_before_interrupt(&svm->vcpu);
5721
5722 stgi();
5723
5724
5725
5726 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5727 kvm_after_interrupt(&svm->vcpu);
5728
5729 sync_cr8_to_lapic(vcpu);
5730
5731 svm->next_rip = 0;
5732
5733 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
5734
5735
5736 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
5737 svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
5738
5739 if (npt_enabled) {
5740 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
5741 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
5742 }
5743
5744
5745
5746
5747
5748 if (unlikely(svm->vmcb->control.exit_code ==
5749 SVM_EXIT_EXCP_BASE + MC_VECTOR))
5750 svm_handle_mce(svm);
5751
5752 mark_all_clean(svm->vmcb);
5753}
5754STACK_FRAME_NON_STANDARD(svm_vcpu_run);
5755
5756static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5757{
5758 struct vcpu_svm *svm = to_svm(vcpu);
5759
5760 svm->vmcb->save.cr3 = __sme_set(root);
5761 mark_dirty(svm->vmcb, VMCB_CR);
5762 svm_flush_tlb(vcpu, true);
5763}
5764
5765static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5766{
5767 struct vcpu_svm *svm = to_svm(vcpu);
5768
5769 svm->vmcb->control.nested_cr3 = __sme_set(root);
5770 mark_dirty(svm->vmcb, VMCB_NPT);
5771
5772
5773 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
5774 mark_dirty(svm->vmcb, VMCB_CR);
5775
5776 svm_flush_tlb(vcpu, true);
5777}
5778
5779static int is_disabled(void)
5780{
5781 u64 vm_cr;
5782
5783 rdmsrl(MSR_VM_CR, vm_cr);
5784 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
5785 return 1;
5786
5787 return 0;
5788}
5789
5790static void
5791svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5792{
5793
5794
5795
5796 hypercall[0] = 0x0f;
5797 hypercall[1] = 0x01;
5798 hypercall[2] = 0xd9;
5799}
5800
5801static void svm_check_processor_compat(void *rtn)
5802{
5803 *(int *)rtn = 0;
5804}
5805
5806static bool svm_cpu_has_accelerated_tpr(void)
5807{
5808 return false;
5809}
5810
5811static bool svm_has_emulated_msr(int index)
5812{
5813 return true;
5814}
5815
5816static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
5817{
5818 return 0;
5819}
5820
5821static void svm_cpuid_update(struct kvm_vcpu *vcpu)
5822{
5823 struct vcpu_svm *svm = to_svm(vcpu);
5824
5825
5826 svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
5827
5828 if (!kvm_vcpu_apicv_active(vcpu))
5829 return;
5830
5831 guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
5832}
5833
5834static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
5835{
5836 switch (func) {
5837 case 0x1:
5838 if (avic)
5839 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
5840 break;
5841 case 0x80000001:
5842 if (nested)
5843 entry->ecx |= (1 << 2);
5844 break;
5845 case 0x8000000A:
5846 entry->eax = 1;
5847 entry->ebx = 8;
5848
5849 entry->ecx = 0;
5850 entry->edx = 0;
5851
5852
5853
5854 if (boot_cpu_has(X86_FEATURE_NRIPS))
5855 entry->edx |= SVM_FEATURE_NRIP;
5856
5857
5858 if (npt_enabled)
5859 entry->edx |= SVM_FEATURE_NPT;
5860
5861 break;
5862 case 0x8000001F:
5863
5864 if (boot_cpu_has(X86_FEATURE_SEV))
5865 cpuid(0x8000001f, &entry->eax, &entry->ebx,
5866 &entry->ecx, &entry->edx);
5867
5868 }
5869}
5870
5871static int svm_get_lpage_level(void)
5872{
5873 return PT_PDPE_LEVEL;
5874}
5875
5876static bool svm_rdtscp_supported(void)
5877{
5878 return boot_cpu_has(X86_FEATURE_RDTSCP);
5879}
5880
5881static bool svm_invpcid_supported(void)
5882{
5883 return false;
5884}
5885
5886static bool svm_mpx_supported(void)
5887{
5888 return false;
5889}
5890
5891static bool svm_xsaves_supported(void)
5892{
5893 return false;
5894}
5895
5896static bool svm_umip_emulated(void)
5897{
5898 return false;
5899}
5900
5901static bool svm_has_wbinvd_exit(void)
5902{
5903 return true;
5904}
5905
5906#define PRE_EX(exit) { .exit_code = (exit), \
5907 .stage = X86_ICPT_PRE_EXCEPT, }
5908#define POST_EX(exit) { .exit_code = (exit), \
5909 .stage = X86_ICPT_POST_EXCEPT, }
5910#define POST_MEM(exit) { .exit_code = (exit), \
5911 .stage = X86_ICPT_POST_MEMACCESS, }
5912
5913static const struct __x86_intercept {
5914 u32 exit_code;
5915 enum x86_intercept_stage stage;
5916} x86_intercept_map[] = {
5917 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
5918 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
5919 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
5920 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
5921 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
5922 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
5923 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
5924 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
5925 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
5926 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
5927 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
5928 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
5929 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
5930 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
5931 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
5932 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
5933 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
5934 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
5935 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
5936 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
5937 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
5938 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
5939 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
5940 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
5941 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
5942 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
5943 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
5944 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
5945 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
5946 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
5947 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
5948 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
5949 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
5950 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
5951 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
5952 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
5953 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
5954 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
5955 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
5956 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
5957 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
5958 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
5959 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
5960 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
5961 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
5962 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
5963};
5964
5965#undef PRE_EX
5966#undef POST_EX
5967#undef POST_MEM
5968
5969static int svm_check_intercept(struct kvm_vcpu *vcpu,
5970 struct x86_instruction_info *info,
5971 enum x86_intercept_stage stage)
5972{
5973 struct vcpu_svm *svm = to_svm(vcpu);
5974 int vmexit, ret = X86EMUL_CONTINUE;
5975 struct __x86_intercept icpt_info;
5976 struct vmcb *vmcb = svm->vmcb;
5977
5978 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
5979 goto out;
5980
5981 icpt_info = x86_intercept_map[info->intercept];
5982
5983 if (stage != icpt_info.stage)
5984 goto out;
5985
5986 switch (icpt_info.exit_code) {
5987 case SVM_EXIT_READ_CR0:
5988 if (info->intercept == x86_intercept_cr_read)
5989 icpt_info.exit_code += info->modrm_reg;
5990 break;
5991 case SVM_EXIT_WRITE_CR0: {
5992 unsigned long cr0, val;
5993 u64 intercept;
5994
5995 if (info->intercept == x86_intercept_cr_write)
5996 icpt_info.exit_code += info->modrm_reg;
5997
5998 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
5999 info->intercept == x86_intercept_clts)
6000 break;
6001
6002 intercept = svm->nested.intercept;
6003
6004 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
6005 break;
6006
6007 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
6008 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
6009
6010 if (info->intercept == x86_intercept_lmsw) {
6011 cr0 &= 0xfUL;
6012 val &= 0xfUL;
6013
6014 if (cr0 & X86_CR0_PE)
6015 val |= X86_CR0_PE;
6016 }
6017
6018 if (cr0 ^ val)
6019 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
6020
6021 break;
6022 }
6023 case SVM_EXIT_READ_DR0:
6024 case SVM_EXIT_WRITE_DR0:
6025 icpt_info.exit_code += info->modrm_reg;
6026 break;
6027 case SVM_EXIT_MSR:
6028 if (info->intercept == x86_intercept_wrmsr)
6029 vmcb->control.exit_info_1 = 1;
6030 else
6031 vmcb->control.exit_info_1 = 0;
6032 break;
6033 case SVM_EXIT_PAUSE:
6034
6035
6036
6037
6038 if (info->rep_prefix != REPE_PREFIX)
6039 goto out;
6040 break;
6041 case SVM_EXIT_IOIO: {
6042 u64 exit_info;
6043 u32 bytes;
6044
6045 if (info->intercept == x86_intercept_in ||
6046 info->intercept == x86_intercept_ins) {
6047 exit_info = ((info->src_val & 0xffff) << 16) |
6048 SVM_IOIO_TYPE_MASK;
6049 bytes = info->dst_bytes;
6050 } else {
6051 exit_info = (info->dst_val & 0xffff) << 16;
6052 bytes = info->src_bytes;
6053 }
6054
6055 if (info->intercept == x86_intercept_outs ||
6056 info->intercept == x86_intercept_ins)
6057 exit_info |= SVM_IOIO_STR_MASK;
6058
6059 if (info->rep_prefix)
6060 exit_info |= SVM_IOIO_REP_MASK;
6061
6062 bytes = min(bytes, 4u);
6063
6064 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
6065
6066 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
6067
6068 vmcb->control.exit_info_1 = exit_info;
6069 vmcb->control.exit_info_2 = info->next_rip;
6070
6071 break;
6072 }
6073 default:
6074 break;
6075 }
6076
6077
6078 if (static_cpu_has(X86_FEATURE_NRIPS))
6079 vmcb->control.next_rip = info->next_rip;
6080 vmcb->control.exit_code = icpt_info.exit_code;
6081 vmexit = nested_svm_exit_handled(svm);
6082
6083 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
6084 : X86EMUL_CONTINUE;
6085
6086out:
6087 return ret;
6088}
6089
6090static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
6091{
6092 local_irq_enable();
6093
6094
6095
6096
6097 asm("nop");
6098 local_irq_disable();
6099}
6100
6101static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
6102{
6103 if (pause_filter_thresh)
6104 shrink_ple_window(vcpu);
6105}
6106
6107static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
6108{
6109 if (avic_handle_apic_id_update(vcpu) != 0)
6110 return;
6111 if (avic_handle_dfr_update(vcpu) != 0)
6112 return;
6113 avic_handle_ldr_update(vcpu);
6114}
6115
6116static void svm_setup_mce(struct kvm_vcpu *vcpu)
6117{
6118
6119 vcpu->arch.mcg_cap &= 0x1ff;
6120}
6121
6122static int svm_smi_allowed(struct kvm_vcpu *vcpu)
6123{
6124 struct vcpu_svm *svm = to_svm(vcpu);
6125
6126
6127 if (!gif_set(svm))
6128 return 0;
6129
6130 if (is_guest_mode(&svm->vcpu) &&
6131 svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
6132
6133 svm->vmcb->control.exit_code = SVM_EXIT_SMI;
6134 svm->nested.exit_required = true;
6135 return 0;
6136 }
6137
6138 return 1;
6139}
6140
6141static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
6142{
6143 struct vcpu_svm *svm = to_svm(vcpu);
6144 int ret;
6145
6146 if (is_guest_mode(vcpu)) {
6147
6148 put_smstate(u64, smstate, 0x7ed8, 1);
6149
6150 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
6151
6152 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
6153 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
6154 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
6155
6156 ret = nested_svm_vmexit(svm);
6157 if (ret)
6158 return ret;
6159 }
6160 return 0;
6161}
6162
6163static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
6164{
6165 struct vcpu_svm *svm = to_svm(vcpu);
6166 struct vmcb *nested_vmcb;
6167 struct page *page;
6168 struct {
6169 u64 guest;
6170 u64 vmcb;
6171 } svm_state_save;
6172 int ret;
6173
6174 ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
6175 sizeof(svm_state_save));
6176 if (ret)
6177 return ret;
6178
6179 if (svm_state_save.guest) {
6180 vcpu->arch.hflags &= ~HF_SMM_MASK;
6181 nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
6182 if (nested_vmcb)
6183 enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
6184 else
6185 ret = 1;
6186 vcpu->arch.hflags |= HF_SMM_MASK;
6187 }
6188 return ret;
6189}
6190
6191static int enable_smi_window(struct kvm_vcpu *vcpu)
6192{
6193 struct vcpu_svm *svm = to_svm(vcpu);
6194
6195 if (!gif_set(svm)) {
6196 if (vgif_enabled(svm))
6197 set_intercept(svm, INTERCEPT_STGI);
6198
6199 return 1;
6200 }
6201 return 0;
6202}
6203
6204static int sev_asid_new(void)
6205{
6206 int pos;
6207
6208
6209
6210
6211 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
6212 if (pos >= max_sev_asid)
6213 return -EBUSY;
6214
6215 set_bit(pos, sev_asid_bitmap);
6216 return pos + 1;
6217}
6218
6219static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
6220{
6221 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6222 int asid, ret;
6223
6224 ret = -EBUSY;
6225 asid = sev_asid_new();
6226 if (asid < 0)
6227 return ret;
6228
6229 ret = sev_platform_init(&argp->error);
6230 if (ret)
6231 goto e_free;
6232
6233 sev->active = true;
6234 sev->asid = asid;
6235 INIT_LIST_HEAD(&sev->regions_list);
6236
6237 return 0;
6238
6239e_free:
6240 __sev_asid_free(asid);
6241 return ret;
6242}
6243
6244static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
6245{
6246 struct sev_data_activate *data;
6247 int asid = sev_get_asid(kvm);
6248 int ret;
6249
6250 wbinvd_on_all_cpus();
6251
6252 ret = sev_guest_df_flush(error);
6253 if (ret)
6254 return ret;
6255
6256 data = kzalloc(sizeof(*data), GFP_KERNEL);
6257 if (!data)
6258 return -ENOMEM;
6259
6260
6261 data->handle = handle;
6262 data->asid = asid;
6263 ret = sev_guest_activate(data, error);
6264 kfree(data);
6265
6266 return ret;
6267}
6268
6269static int __sev_issue_cmd(int fd, int id, void *data, int *error)
6270{
6271 struct fd f;
6272 int ret;
6273
6274 f = fdget(fd);
6275 if (!f.file)
6276 return -EBADF;
6277
6278 ret = sev_issue_cmd_external_user(f.file, id, data, error);
6279
6280 fdput(f);
6281 return ret;
6282}
6283
6284static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
6285{
6286 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6287
6288 return __sev_issue_cmd(sev->fd, id, data, error);
6289}
6290
6291static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
6292{
6293 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6294 struct sev_data_launch_start *start;
6295 struct kvm_sev_launch_start params;
6296 void *dh_blob, *session_blob;
6297 int *error = &argp->error;
6298 int ret;
6299
6300 if (!sev_guest(kvm))
6301 return -ENOTTY;
6302
6303 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6304 return -EFAULT;
6305
6306 start = kzalloc(sizeof(*start), GFP_KERNEL);
6307 if (!start)
6308 return -ENOMEM;
6309
6310 dh_blob = NULL;
6311 if (params.dh_uaddr) {
6312 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
6313 if (IS_ERR(dh_blob)) {
6314 ret = PTR_ERR(dh_blob);
6315 goto e_free;
6316 }
6317
6318 start->dh_cert_address = __sme_set(__pa(dh_blob));
6319 start->dh_cert_len = params.dh_len;
6320 }
6321
6322 session_blob = NULL;
6323 if (params.session_uaddr) {
6324 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
6325 if (IS_ERR(session_blob)) {
6326 ret = PTR_ERR(session_blob);
6327 goto e_free_dh;
6328 }
6329
6330 start->session_address = __sme_set(__pa(session_blob));
6331 start->session_len = params.session_len;
6332 }
6333
6334 start->handle = params.handle;
6335 start->policy = params.policy;
6336
6337
6338 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
6339 if (ret)
6340 goto e_free_session;
6341
6342
6343 ret = sev_bind_asid(kvm, start->handle, error);
6344 if (ret)
6345 goto e_free_session;
6346
6347
6348 params.handle = start->handle;
6349 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) {
6350 sev_unbind_asid(kvm, start->handle);
6351 ret = -EFAULT;
6352 goto e_free_session;
6353 }
6354
6355 sev->handle = start->handle;
6356 sev->fd = argp->sev_fd;
6357
6358e_free_session:
6359 kfree(session_blob);
6360e_free_dh:
6361 kfree(dh_blob);
6362e_free:
6363 kfree(start);
6364 return ret;
6365}
6366
6367static int get_num_contig_pages(int idx, struct page **inpages,
6368 unsigned long npages)
6369{
6370 unsigned long paddr, next_paddr;
6371 int i = idx + 1, pages = 1;
6372
6373
6374 paddr = __sme_page_pa(inpages[idx]);
6375 while (i < npages) {
6376 next_paddr = __sme_page_pa(inpages[i++]);
6377 if ((paddr + PAGE_SIZE) == next_paddr) {
6378 pages++;
6379 paddr = next_paddr;
6380 continue;
6381 }
6382 break;
6383 }
6384
6385 return pages;
6386}
6387
6388static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
6389{
6390 unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
6391 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6392 struct kvm_sev_launch_update_data params;
6393 struct sev_data_launch_update_data *data;
6394 struct page **inpages;
6395 int i, ret, pages;
6396
6397 if (!sev_guest(kvm))
6398 return -ENOTTY;
6399
6400 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6401 return -EFAULT;
6402
6403 data = kzalloc(sizeof(*data), GFP_KERNEL);
6404 if (!data)
6405 return -ENOMEM;
6406
6407 vaddr = params.uaddr;
6408 size = params.len;
6409 vaddr_end = vaddr + size;
6410
6411
6412 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
6413 if (!inpages) {
6414 ret = -ENOMEM;
6415 goto e_free;
6416 }
6417
6418
6419
6420
6421
6422
6423
6424 sev_clflush_pages(inpages, npages);
6425
6426 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
6427 int offset, len;
6428
6429
6430
6431
6432
6433 offset = vaddr & (PAGE_SIZE - 1);
6434
6435
6436 pages = get_num_contig_pages(i, inpages, npages);
6437
6438 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
6439
6440 data->handle = sev->handle;
6441 data->len = len;
6442 data->address = __sme_page_pa(inpages[i]) + offset;
6443 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
6444 if (ret)
6445 goto e_unpin;
6446
6447 size -= len;
6448 next_vaddr = vaddr + len;
6449 }
6450
6451e_unpin:
6452
6453 for (i = 0; i < npages; i++) {
6454 set_page_dirty_lock(inpages[i]);
6455 mark_page_accessed(inpages[i]);
6456 }
6457
6458 sev_unpin_memory(kvm, inpages, npages);
6459e_free:
6460 kfree(data);
6461 return ret;
6462}
6463
6464static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6465{
6466 void __user *measure = (void __user *)(uintptr_t)argp->data;
6467 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6468 struct sev_data_launch_measure *data;
6469 struct kvm_sev_launch_measure params;
6470 void __user *p = NULL;
6471 void *blob = NULL;
6472 int ret;
6473
6474 if (!sev_guest(kvm))
6475 return -ENOTTY;
6476
6477 if (copy_from_user(¶ms, measure, sizeof(params)))
6478 return -EFAULT;
6479
6480 data = kzalloc(sizeof(*data), GFP_KERNEL);
6481 if (!data)
6482 return -ENOMEM;
6483
6484
6485 if (!params.len)
6486 goto cmd;
6487
6488 p = (void __user *)(uintptr_t)params.uaddr;
6489 if (p) {
6490 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6491 ret = -EINVAL;
6492 goto e_free;
6493 }
6494
6495 ret = -ENOMEM;
6496 blob = kmalloc(params.len, GFP_KERNEL);
6497 if (!blob)
6498 goto e_free;
6499
6500 data->address = __psp_pa(blob);
6501 data->len = params.len;
6502 }
6503
6504cmd:
6505 data->handle = sev->handle;
6506 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
6507
6508
6509
6510
6511 if (!params.len)
6512 goto done;
6513
6514 if (ret)
6515 goto e_free_blob;
6516
6517 if (blob) {
6518 if (copy_to_user(p, blob, params.len))
6519 ret = -EFAULT;
6520 }
6521
6522done:
6523 params.len = data->len;
6524 if (copy_to_user(measure, ¶ms, sizeof(params)))
6525 ret = -EFAULT;
6526e_free_blob:
6527 kfree(blob);
6528e_free:
6529 kfree(data);
6530 return ret;
6531}
6532
6533static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
6534{
6535 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6536 struct sev_data_launch_finish *data;
6537 int ret;
6538
6539 if (!sev_guest(kvm))
6540 return -ENOTTY;
6541
6542 data = kzalloc(sizeof(*data), GFP_KERNEL);
6543 if (!data)
6544 return -ENOMEM;
6545
6546 data->handle = sev->handle;
6547 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
6548
6549 kfree(data);
6550 return ret;
6551}
6552
6553static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
6554{
6555 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6556 struct kvm_sev_guest_status params;
6557 struct sev_data_guest_status *data;
6558 int ret;
6559
6560 if (!sev_guest(kvm))
6561 return -ENOTTY;
6562
6563 data = kzalloc(sizeof(*data), GFP_KERNEL);
6564 if (!data)
6565 return -ENOMEM;
6566
6567 data->handle = sev->handle;
6568 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
6569 if (ret)
6570 goto e_free;
6571
6572 params.policy = data->policy;
6573 params.state = data->state;
6574 params.handle = data->handle;
6575
6576 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
6577 ret = -EFAULT;
6578e_free:
6579 kfree(data);
6580 return ret;
6581}
6582
6583static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
6584 unsigned long dst, int size,
6585 int *error, bool enc)
6586{
6587 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6588 struct sev_data_dbg *data;
6589 int ret;
6590
6591 data = kzalloc(sizeof(*data), GFP_KERNEL);
6592 if (!data)
6593 return -ENOMEM;
6594
6595 data->handle = sev->handle;
6596 data->dst_addr = dst;
6597 data->src_addr = src;
6598 data->len = size;
6599
6600 ret = sev_issue_cmd(kvm,
6601 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
6602 data, error);
6603 kfree(data);
6604 return ret;
6605}
6606
6607static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
6608 unsigned long dst_paddr, int sz, int *err)
6609{
6610 int offset;
6611
6612
6613
6614
6615
6616 src_paddr = round_down(src_paddr, 16);
6617 offset = src_paddr & 15;
6618 sz = round_up(sz + offset, 16);
6619
6620 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
6621}
6622
6623static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
6624 unsigned long __user dst_uaddr,
6625 unsigned long dst_paddr,
6626 int size, int *err)
6627{
6628 struct page *tpage = NULL;
6629 int ret, offset;
6630
6631
6632 if (!IS_ALIGNED(dst_paddr, 16) ||
6633 !IS_ALIGNED(paddr, 16) ||
6634 !IS_ALIGNED(size, 16)) {
6635 tpage = (void *)alloc_page(GFP_KERNEL);
6636 if (!tpage)
6637 return -ENOMEM;
6638
6639 dst_paddr = __sme_page_pa(tpage);
6640 }
6641
6642 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
6643 if (ret)
6644 goto e_free;
6645
6646 if (tpage) {
6647 offset = paddr & 15;
6648 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
6649 page_address(tpage) + offset, size))
6650 ret = -EFAULT;
6651 }
6652
6653e_free:
6654 if (tpage)
6655 __free_page(tpage);
6656
6657 return ret;
6658}
6659
6660static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
6661 unsigned long __user vaddr,
6662 unsigned long dst_paddr,
6663 unsigned long __user dst_vaddr,
6664 int size, int *error)
6665{
6666 struct page *src_tpage = NULL;
6667 struct page *dst_tpage = NULL;
6668 int ret, len = size;
6669
6670
6671 if (!IS_ALIGNED(vaddr, 16)) {
6672 src_tpage = alloc_page(GFP_KERNEL);
6673 if (!src_tpage)
6674 return -ENOMEM;
6675
6676 if (copy_from_user(page_address(src_tpage),
6677 (void __user *)(uintptr_t)vaddr, size)) {
6678 __free_page(src_tpage);
6679 return -EFAULT;
6680 }
6681
6682 paddr = __sme_page_pa(src_tpage);
6683 }
6684
6685
6686
6687
6688
6689
6690
6691 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
6692 int dst_offset;
6693
6694 dst_tpage = alloc_page(GFP_KERNEL);
6695 if (!dst_tpage) {
6696 ret = -ENOMEM;
6697 goto e_free;
6698 }
6699
6700 ret = __sev_dbg_decrypt(kvm, dst_paddr,
6701 __sme_page_pa(dst_tpage), size, error);
6702 if (ret)
6703 goto e_free;
6704
6705
6706
6707
6708
6709 dst_offset = dst_paddr & 15;
6710
6711 if (src_tpage)
6712 memcpy(page_address(dst_tpage) + dst_offset,
6713 page_address(src_tpage), size);
6714 else {
6715 if (copy_from_user(page_address(dst_tpage) + dst_offset,
6716 (void __user *)(uintptr_t)vaddr, size)) {
6717 ret = -EFAULT;
6718 goto e_free;
6719 }
6720 }
6721
6722 paddr = __sme_page_pa(dst_tpage);
6723 dst_paddr = round_down(dst_paddr, 16);
6724 len = round_up(size, 16);
6725 }
6726
6727 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
6728
6729e_free:
6730 if (src_tpage)
6731 __free_page(src_tpage);
6732 if (dst_tpage)
6733 __free_page(dst_tpage);
6734 return ret;
6735}
6736
6737static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6738{
6739 unsigned long vaddr, vaddr_end, next_vaddr;
6740 unsigned long dst_vaddr, dst_vaddr_end;
6741 struct page **src_p, **dst_p;
6742 struct kvm_sev_dbg debug;
6743 unsigned long n;
6744 int ret, size;
6745
6746 if (!sev_guest(kvm))
6747 return -ENOTTY;
6748
6749 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
6750 return -EFAULT;
6751
6752 vaddr = debug.src_uaddr;
6753 size = debug.len;
6754 vaddr_end = vaddr + size;
6755 dst_vaddr = debug.dst_uaddr;
6756 dst_vaddr_end = dst_vaddr + size;
6757
6758 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
6759 int len, s_off, d_off;
6760
6761
6762 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
6763 if (!src_p)
6764 return -EFAULT;
6765
6766 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
6767 if (!dst_p) {
6768 sev_unpin_memory(kvm, src_p, n);
6769 return -EFAULT;
6770 }
6771
6772
6773
6774
6775
6776
6777
6778 sev_clflush_pages(src_p, 1);
6779 sev_clflush_pages(dst_p, 1);
6780
6781
6782
6783
6784
6785 s_off = vaddr & ~PAGE_MASK;
6786 d_off = dst_vaddr & ~PAGE_MASK;
6787 len = min_t(size_t, (PAGE_SIZE - s_off), size);
6788
6789 if (dec)
6790 ret = __sev_dbg_decrypt_user(kvm,
6791 __sme_page_pa(src_p[0]) + s_off,
6792 dst_vaddr,
6793 __sme_page_pa(dst_p[0]) + d_off,
6794 len, &argp->error);
6795 else
6796 ret = __sev_dbg_encrypt_user(kvm,
6797 __sme_page_pa(src_p[0]) + s_off,
6798 vaddr,
6799 __sme_page_pa(dst_p[0]) + d_off,
6800 dst_vaddr,
6801 len, &argp->error);
6802
6803 sev_unpin_memory(kvm, src_p, 1);
6804 sev_unpin_memory(kvm, dst_p, 1);
6805
6806 if (ret)
6807 goto err;
6808
6809 next_vaddr = vaddr + len;
6810 dst_vaddr = dst_vaddr + len;
6811 size -= len;
6812 }
6813err:
6814 return ret;
6815}
6816
6817static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6818{
6819 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6820 struct sev_data_launch_secret *data;
6821 struct kvm_sev_launch_secret params;
6822 struct page **pages;
6823 void *blob, *hdr;
6824 unsigned long n;
6825 int ret, offset;
6826
6827 if (!sev_guest(kvm))
6828 return -ENOTTY;
6829
6830 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6831 return -EFAULT;
6832
6833 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
6834 if (!pages)
6835 return -ENOMEM;
6836
6837
6838
6839
6840
6841 if (get_num_contig_pages(0, pages, n) != n) {
6842 ret = -EINVAL;
6843 goto e_unpin_memory;
6844 }
6845
6846 ret = -ENOMEM;
6847 data = kzalloc(sizeof(*data), GFP_KERNEL);
6848 if (!data)
6849 goto e_unpin_memory;
6850
6851 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6852 data->guest_address = __sme_page_pa(pages[0]) + offset;
6853 data->guest_len = params.guest_len;
6854
6855 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6856 if (IS_ERR(blob)) {
6857 ret = PTR_ERR(blob);
6858 goto e_free;
6859 }
6860
6861 data->trans_address = __psp_pa(blob);
6862 data->trans_len = params.trans_len;
6863
6864 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
6865 if (IS_ERR(hdr)) {
6866 ret = PTR_ERR(hdr);
6867 goto e_free_blob;
6868 }
6869 data->hdr_address = __psp_pa(hdr);
6870 data->hdr_len = params.hdr_len;
6871
6872 data->handle = sev->handle;
6873 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
6874
6875 kfree(hdr);
6876
6877e_free_blob:
6878 kfree(blob);
6879e_free:
6880 kfree(data);
6881e_unpin_memory:
6882 sev_unpin_memory(kvm, pages, n);
6883 return ret;
6884}
6885
6886static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
6887{
6888 struct kvm_sev_cmd sev_cmd;
6889 int r;
6890
6891 if (!svm_sev_enabled())
6892 return -ENOTTY;
6893
6894 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
6895 return -EFAULT;
6896
6897 mutex_lock(&kvm->lock);
6898
6899 switch (sev_cmd.id) {
6900 case KVM_SEV_INIT:
6901 r = sev_guest_init(kvm, &sev_cmd);
6902 break;
6903 case KVM_SEV_LAUNCH_START:
6904 r = sev_launch_start(kvm, &sev_cmd);
6905 break;
6906 case KVM_SEV_LAUNCH_UPDATE_DATA:
6907 r = sev_launch_update_data(kvm, &sev_cmd);
6908 break;
6909 case KVM_SEV_LAUNCH_MEASURE:
6910 r = sev_launch_measure(kvm, &sev_cmd);
6911 break;
6912 case KVM_SEV_LAUNCH_FINISH:
6913 r = sev_launch_finish(kvm, &sev_cmd);
6914 break;
6915 case KVM_SEV_GUEST_STATUS:
6916 r = sev_guest_status(kvm, &sev_cmd);
6917 break;
6918 case KVM_SEV_DBG_DECRYPT:
6919 r = sev_dbg_crypt(kvm, &sev_cmd, true);
6920 break;
6921 case KVM_SEV_DBG_ENCRYPT:
6922 r = sev_dbg_crypt(kvm, &sev_cmd, false);
6923 break;
6924 case KVM_SEV_LAUNCH_SECRET:
6925 r = sev_launch_secret(kvm, &sev_cmd);
6926 break;
6927 default:
6928 r = -EINVAL;
6929 goto out;
6930 }
6931
6932 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
6933 r = -EFAULT;
6934
6935out:
6936 mutex_unlock(&kvm->lock);
6937 return r;
6938}
6939
6940static int svm_register_enc_region(struct kvm *kvm,
6941 struct kvm_enc_region *range)
6942{
6943 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6944 struct enc_region *region;
6945 int ret = 0;
6946
6947 if (!sev_guest(kvm))
6948 return -ENOTTY;
6949
6950 region = kzalloc(sizeof(*region), GFP_KERNEL);
6951 if (!region)
6952 return -ENOMEM;
6953
6954 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
6955 if (!region->pages) {
6956 ret = -ENOMEM;
6957 goto e_free;
6958 }
6959
6960
6961
6962
6963
6964
6965
6966 sev_clflush_pages(region->pages, region->npages);
6967
6968 region->uaddr = range->addr;
6969 region->size = range->size;
6970
6971 mutex_lock(&kvm->lock);
6972 list_add_tail(®ion->list, &sev->regions_list);
6973 mutex_unlock(&kvm->lock);
6974
6975 return ret;
6976
6977e_free:
6978 kfree(region);
6979 return ret;
6980}
6981
6982static struct enc_region *
6983find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
6984{
6985 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6986 struct list_head *head = &sev->regions_list;
6987 struct enc_region *i;
6988
6989 list_for_each_entry(i, head, list) {
6990 if (i->uaddr == range->addr &&
6991 i->size == range->size)
6992 return i;
6993 }
6994
6995 return NULL;
6996}
6997
6998
6999static int svm_unregister_enc_region(struct kvm *kvm,
7000 struct kvm_enc_region *range)
7001{
7002 struct enc_region *region;
7003 int ret;
7004
7005 mutex_lock(&kvm->lock);
7006
7007 if (!sev_guest(kvm)) {
7008 ret = -ENOTTY;
7009 goto failed;
7010 }
7011
7012 region = find_enc_region(kvm, range);
7013 if (!region) {
7014 ret = -EINVAL;
7015 goto failed;
7016 }
7017
7018 __unregister_enc_region_locked(kvm, region);
7019
7020 mutex_unlock(&kvm->lock);
7021 return 0;
7022
7023failed:
7024 mutex_unlock(&kvm->lock);
7025 return ret;
7026}
7027
7028static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7029 .cpu_has_kvm_support = has_svm,
7030 .disabled_by_bios = is_disabled,
7031 .hardware_setup = svm_hardware_setup,
7032 .hardware_unsetup = svm_hardware_unsetup,
7033 .check_processor_compatibility = svm_check_processor_compat,
7034 .hardware_enable = svm_hardware_enable,
7035 .hardware_disable = svm_hardware_disable,
7036 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
7037 .has_emulated_msr = svm_has_emulated_msr,
7038
7039 .vcpu_create = svm_create_vcpu,
7040 .vcpu_free = svm_free_vcpu,
7041 .vcpu_reset = svm_vcpu_reset,
7042
7043 .vm_alloc = svm_vm_alloc,
7044 .vm_free = svm_vm_free,
7045 .vm_init = avic_vm_init,
7046 .vm_destroy = svm_vm_destroy,
7047
7048 .prepare_guest_switch = svm_prepare_guest_switch,
7049 .vcpu_load = svm_vcpu_load,
7050 .vcpu_put = svm_vcpu_put,
7051 .vcpu_blocking = svm_vcpu_blocking,
7052 .vcpu_unblocking = svm_vcpu_unblocking,
7053
7054 .update_bp_intercept = update_bp_intercept,
7055 .get_msr_feature = svm_get_msr_feature,
7056 .get_msr = svm_get_msr,
7057 .set_msr = svm_set_msr,
7058 .get_segment_base = svm_get_segment_base,
7059 .get_segment = svm_get_segment,
7060 .set_segment = svm_set_segment,
7061 .get_cpl = svm_get_cpl,
7062 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
7063 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
7064 .decache_cr3 = svm_decache_cr3,
7065 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
7066 .set_cr0 = svm_set_cr0,
7067 .set_cr3 = svm_set_cr3,
7068 .set_cr4 = svm_set_cr4,
7069 .set_efer = svm_set_efer,
7070 .get_idt = svm_get_idt,
7071 .set_idt = svm_set_idt,
7072 .get_gdt = svm_get_gdt,
7073 .set_gdt = svm_set_gdt,
7074 .get_dr6 = svm_get_dr6,
7075 .set_dr6 = svm_set_dr6,
7076 .set_dr7 = svm_set_dr7,
7077 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
7078 .cache_reg = svm_cache_reg,
7079 .get_rflags = svm_get_rflags,
7080 .set_rflags = svm_set_rflags,
7081
7082 .tlb_flush = svm_flush_tlb,
7083
7084 .run = svm_vcpu_run,
7085 .handle_exit = handle_exit,
7086 .skip_emulated_instruction = skip_emulated_instruction,
7087 .set_interrupt_shadow = svm_set_interrupt_shadow,
7088 .get_interrupt_shadow = svm_get_interrupt_shadow,
7089 .patch_hypercall = svm_patch_hypercall,
7090 .set_irq = svm_set_irq,
7091 .set_nmi = svm_inject_nmi,
7092 .queue_exception = svm_queue_exception,
7093 .cancel_injection = svm_cancel_injection,
7094 .interrupt_allowed = svm_interrupt_allowed,
7095 .nmi_allowed = svm_nmi_allowed,
7096 .get_nmi_mask = svm_get_nmi_mask,
7097 .set_nmi_mask = svm_set_nmi_mask,
7098 .enable_nmi_window = enable_nmi_window,
7099 .enable_irq_window = enable_irq_window,
7100 .update_cr8_intercept = update_cr8_intercept,
7101 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
7102 .get_enable_apicv = svm_get_enable_apicv,
7103 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
7104 .load_eoi_exitmap = svm_load_eoi_exitmap,
7105 .hwapic_irr_update = svm_hwapic_irr_update,
7106 .hwapic_isr_update = svm_hwapic_isr_update,
7107 .sync_pir_to_irr = kvm_lapic_find_highest_irr,
7108 .apicv_post_state_restore = avic_post_state_restore,
7109
7110 .set_tss_addr = svm_set_tss_addr,
7111 .set_identity_map_addr = svm_set_identity_map_addr,
7112 .get_tdp_level = get_npt_level,
7113 .get_mt_mask = svm_get_mt_mask,
7114
7115 .get_exit_info = svm_get_exit_info,
7116
7117 .get_lpage_level = svm_get_lpage_level,
7118
7119 .cpuid_update = svm_cpuid_update,
7120
7121 .rdtscp_supported = svm_rdtscp_supported,
7122 .invpcid_supported = svm_invpcid_supported,
7123 .mpx_supported = svm_mpx_supported,
7124 .xsaves_supported = svm_xsaves_supported,
7125 .umip_emulated = svm_umip_emulated,
7126
7127 .set_supported_cpuid = svm_set_supported_cpuid,
7128
7129 .has_wbinvd_exit = svm_has_wbinvd_exit,
7130
7131 .read_l1_tsc_offset = svm_read_l1_tsc_offset,
7132 .write_tsc_offset = svm_write_tsc_offset,
7133
7134 .set_tdp_cr3 = set_tdp_cr3,
7135
7136 .check_intercept = svm_check_intercept,
7137 .handle_external_intr = svm_handle_external_intr,
7138
7139 .sched_in = svm_sched_in,
7140
7141 .pmu_ops = &amd_pmu_ops,
7142 .deliver_posted_interrupt = svm_deliver_avic_intr,
7143 .update_pi_irte = svm_update_pi_irte,
7144 .setup_mce = svm_setup_mce,
7145
7146 .smi_allowed = svm_smi_allowed,
7147 .pre_enter_smm = svm_pre_enter_smm,
7148 .pre_leave_smm = svm_pre_leave_smm,
7149 .enable_smi_window = enable_smi_window,
7150
7151 .mem_enc_op = svm_mem_enc_op,
7152 .mem_enc_reg_region = svm_register_enc_region,
7153 .mem_enc_unreg_region = svm_unregister_enc_region,
7154};
7155
7156static int __init svm_init(void)
7157{
7158 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
7159 __alignof__(struct vcpu_svm), THIS_MODULE);
7160}
7161
7162static void __exit svm_exit(void)
7163{
7164 kvm_exit();
7165}
7166
7167module_init(svm_init)
7168module_exit(svm_exit)
7169