1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/kvm_host.h>
16#include <linux/module.h>
17#include <linux/vmalloc.h>
18#include <linux/uaccess.h>
19#include <asm/user.h>
20#include <asm/xsave.h>
21#include "cpuid.h"
22#include "lapic.h"
23#include "mmu.h"
24#include "trace.h"
25
26void kvm_update_cpuid(struct kvm_vcpu *vcpu)
27{
28 struct kvm_cpuid_entry2 *best;
29 struct kvm_lapic *apic = vcpu->arch.apic;
30
31 best = kvm_find_cpuid_entry(vcpu, 1, 0);
32 if (!best)
33 return;
34
35
36 if (cpu_has_xsave && best->function == 0x1) {
37 best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
38 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
39 best->ecx |= bit(X86_FEATURE_OSXSAVE);
40 }
41
42 if (apic) {
43 if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
44 apic->lapic_timer.timer_mode_mask = 3 << 17;
45 else
46 apic->lapic_timer.timer_mode_mask = 1 << 17;
47 }
48
49 kvm_pmu_cpuid_update(vcpu);
50}
51
52static int is_efer_nx(void)
53{
54 unsigned long long efer = 0;
55
56 rdmsrl_safe(MSR_EFER, &efer);
57 return efer & EFER_NX;
58}
59
60static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
61{
62 int i;
63 struct kvm_cpuid_entry2 *e, *entry;
64
65 entry = NULL;
66 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
67 e = &vcpu->arch.cpuid_entries[i];
68 if (e->function == 0x80000001) {
69 entry = e;
70 break;
71 }
72 }
73 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
74 entry->edx &= ~(1 << 20);
75 printk(KERN_INFO "kvm: guest NX capability removed\n");
76 }
77}
78
79
80int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
81 struct kvm_cpuid *cpuid,
82 struct kvm_cpuid_entry __user *entries)
83{
84 int r, i;
85 struct kvm_cpuid_entry *cpuid_entries;
86
87 r = -E2BIG;
88 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
89 goto out;
90 r = -ENOMEM;
91 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
92 if (!cpuid_entries)
93 goto out;
94 r = -EFAULT;
95 if (copy_from_user(cpuid_entries, entries,
96 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
97 goto out_free;
98 for (i = 0; i < cpuid->nent; i++) {
99 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
100 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
101 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
102 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
103 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
104 vcpu->arch.cpuid_entries[i].index = 0;
105 vcpu->arch.cpuid_entries[i].flags = 0;
106 vcpu->arch.cpuid_entries[i].padding[0] = 0;
107 vcpu->arch.cpuid_entries[i].padding[1] = 0;
108 vcpu->arch.cpuid_entries[i].padding[2] = 0;
109 }
110 vcpu->arch.cpuid_nent = cpuid->nent;
111 cpuid_fix_nx_cap(vcpu);
112 r = 0;
113 kvm_apic_set_version(vcpu);
114 kvm_x86_ops->cpuid_update(vcpu);
115 kvm_update_cpuid(vcpu);
116
117out_free:
118 vfree(cpuid_entries);
119out:
120 return r;
121}
122
123int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
124 struct kvm_cpuid2 *cpuid,
125 struct kvm_cpuid_entry2 __user *entries)
126{
127 int r;
128
129 r = -E2BIG;
130 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
131 goto out;
132 r = -EFAULT;
133 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
134 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
135 goto out;
136 vcpu->arch.cpuid_nent = cpuid->nent;
137 kvm_apic_set_version(vcpu);
138 kvm_x86_ops->cpuid_update(vcpu);
139 kvm_update_cpuid(vcpu);
140 return 0;
141
142out:
143 return r;
144}
145
146int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
147 struct kvm_cpuid2 *cpuid,
148 struct kvm_cpuid_entry2 __user *entries)
149{
150 int r;
151
152 r = -E2BIG;
153 if (cpuid->nent < vcpu->arch.cpuid_nent)
154 goto out;
155 r = -EFAULT;
156 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
157 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
158 goto out;
159 return 0;
160
161out:
162 cpuid->nent = vcpu->arch.cpuid_nent;
163 return r;
164}
165
166static void cpuid_mask(u32 *word, int wordnum)
167{
168 *word &= boot_cpu_data.x86_capability[wordnum];
169}
170
171static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
172 u32 index)
173{
174 entry->function = function;
175 entry->index = index;
176 cpuid_count(entry->function, entry->index,
177 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
178 entry->flags = 0;
179}
180
181static bool supported_xcr0_bit(unsigned bit)
182{
183 u64 mask = ((u64)1 << bit);
184
185 return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
186}
187
188#define F(x) bit(X86_FEATURE_##x)
189
190static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
191 u32 index, int *nent, int maxnent)
192{
193 int r;
194 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
195#ifdef CONFIG_X86_64
196 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
197 ? F(GBPAGES) : 0;
198 unsigned f_lm = F(LM);
199#else
200 unsigned f_gbpages = 0;
201 unsigned f_lm = 0;
202#endif
203 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
204 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
205
206
207 const u32 kvm_supported_word0_x86_features =
208 F(FPU) | F(VME) | F(DE) | F(PSE) |
209 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
210 F(CX8) | F(APIC) | 0 | F(SEP) |
211 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
212 F(PAT) | F(PSE36) | 0 | F(CLFLSH) |
213 0 | F(MMX) |
214 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
215 0 ;
216
217 const u32 kvm_supported_word1_x86_features =
218 F(FPU) | F(VME) | F(DE) | F(PSE) |
219 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
220 F(CX8) | F(APIC) | 0 | F(SYSCALL) |
221 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
222 F(PAT) | F(PSE36) | 0 |
223 f_nx | 0 | F(MMXEXT) | F(MMX) |
224 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
225 0 | f_lm | F(3DNOWEXT) | F(3DNOW);
226
227 const u32 kvm_supported_word4_x86_features =
228 F(XMM3) | F(PCLMULQDQ) | 0 |
229 0 |
230 0 | F(SSSE3) | 0 | 0 |
231 F(FMA) | F(CX16) | 0 |
232 F(PCID) | 0 | F(XMM4_1) |
233 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
234 0 | F(AES) | F(XSAVE) | 0 | F(AVX) |
235 F(F16C) | F(RDRAND);
236
237 const u32 kvm_supported_word6_x86_features =
238 F(LAHF_LM) | F(CMP_LEGACY) | 0 | 0 |
239 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
240 F(3DNOWPREFETCH) | F(OSVW) | 0 | F(XOP) |
241 0 | F(FMA4) | F(TBM);
242
243
244 const u32 kvm_supported_word5_x86_features =
245 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
246 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
247 F(PMM) | F(PMM_EN);
248
249
250 const u32 kvm_supported_word9_x86_features =
251 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
252 F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
253
254
255 get_cpu();
256
257 r = -E2BIG;
258
259 if (*nent >= maxnent)
260 goto out;
261
262 do_cpuid_1_ent(entry, function, index);
263 ++*nent;
264
265 switch (function) {
266 case 0:
267 entry->eax = min(entry->eax, (u32)0xd);
268 break;
269 case 1:
270 entry->edx &= kvm_supported_word0_x86_features;
271 cpuid_mask(&entry->edx, 0);
272 entry->ecx &= kvm_supported_word4_x86_features;
273 cpuid_mask(&entry->ecx, 4);
274
275
276 entry->ecx |= F(X2APIC);
277 break;
278
279
280
281
282 case 2: {
283 int t, times = entry->eax & 0xff;
284
285 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
286 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
287 for (t = 1; t < times; ++t) {
288 if (*nent >= maxnent)
289 goto out;
290
291 do_cpuid_1_ent(&entry[t], function, 0);
292 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
293 ++*nent;
294 }
295 break;
296 }
297
298 case 4: {
299 int i, cache_type;
300
301 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
302
303 for (i = 1; ; ++i) {
304 if (*nent >= maxnent)
305 goto out;
306
307 cache_type = entry[i - 1].eax & 0x1f;
308 if (!cache_type)
309 break;
310 do_cpuid_1_ent(&entry[i], function, i);
311 entry[i].flags |=
312 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
313 ++*nent;
314 }
315 break;
316 }
317 case 7: {
318 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
319
320 if (index == 0) {
321 entry->ebx &= kvm_supported_word9_x86_features;
322 cpuid_mask(&entry->ebx, 9);
323
324 entry->ebx |= F(TSC_ADJUST);
325 } else
326 entry->ebx = 0;
327 entry->eax = 0;
328 entry->ecx = 0;
329 entry->edx = 0;
330 break;
331 }
332 case 9:
333 break;
334 case 0xa: {
335 struct x86_pmu_capability cap;
336 union cpuid10_eax eax;
337 union cpuid10_edx edx;
338
339 perf_get_x86_pmu_capability(&cap);
340
341
342
343
344
345 if (!cap.version)
346 memset(&cap, 0, sizeof(cap));
347
348 eax.split.version_id = min(cap.version, 2);
349 eax.split.num_counters = cap.num_counters_gp;
350 eax.split.bit_width = cap.bit_width_gp;
351 eax.split.mask_length = cap.events_mask_len;
352
353 edx.split.num_counters_fixed = cap.num_counters_fixed;
354 edx.split.bit_width_fixed = cap.bit_width_fixed;
355 edx.split.reserved = 0;
356
357 entry->eax = eax.full;
358 entry->ebx = cap.events_mask;
359 entry->ecx = 0;
360 entry->edx = edx.full;
361 break;
362 }
363
364 case 0xb: {
365 int i, level_type;
366
367 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
368
369 for (i = 1; ; ++i) {
370 if (*nent >= maxnent)
371 goto out;
372
373 level_type = entry[i - 1].ecx & 0xff00;
374 if (!level_type)
375 break;
376 do_cpuid_1_ent(&entry[i], function, i);
377 entry[i].flags |=
378 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
379 ++*nent;
380 }
381 break;
382 }
383 case 0xd: {
384 int idx, i;
385
386 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
387 for (idx = 1, i = 1; idx < 64; ++idx) {
388 if (*nent >= maxnent)
389 goto out;
390
391 do_cpuid_1_ent(&entry[i], function, idx);
392 if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
393 continue;
394 entry[i].flags |=
395 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
396 ++*nent;
397 ++i;
398 }
399 break;
400 }
401 case KVM_CPUID_SIGNATURE: {
402 static const char signature[12] = "KVMKVMKVM\0\0";
403 const u32 *sigptr = (const u32 *)signature;
404 entry->eax = KVM_CPUID_FEATURES;
405 entry->ebx = sigptr[0];
406 entry->ecx = sigptr[1];
407 entry->edx = sigptr[2];
408 break;
409 }
410 case KVM_CPUID_FEATURES:
411 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
412 (1 << KVM_FEATURE_NOP_IO_DELAY) |
413 (1 << KVM_FEATURE_CLOCKSOURCE2) |
414 (1 << KVM_FEATURE_ASYNC_PF) |
415 (1 << KVM_FEATURE_PV_EOI) |
416 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
417
418 if (sched_info_on())
419 entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
420
421 entry->ebx = 0;
422 entry->ecx = 0;
423 entry->edx = 0;
424 break;
425 case 0x80000000:
426 entry->eax = min(entry->eax, 0x8000001a);
427 break;
428 case 0x80000001:
429 entry->edx &= kvm_supported_word1_x86_features;
430 cpuid_mask(&entry->edx, 1);
431 entry->ecx &= kvm_supported_word6_x86_features;
432 cpuid_mask(&entry->ecx, 6);
433 break;
434 case 0x80000008: {
435 unsigned g_phys_as = (entry->eax >> 16) & 0xff;
436 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
437 unsigned phys_as = entry->eax & 0xff;
438
439 if (!g_phys_as)
440 g_phys_as = phys_as;
441 entry->eax = g_phys_as | (virt_as << 8);
442 entry->ebx = entry->edx = 0;
443 break;
444 }
445 case 0x80000019:
446 entry->ecx = entry->edx = 0;
447 break;
448 case 0x8000001a:
449 break;
450 case 0x8000001d:
451 break;
452
453 case 0xC0000000:
454
455 entry->eax = min(entry->eax, 0xC0000004);
456 break;
457 case 0xC0000001:
458 entry->edx &= kvm_supported_word5_x86_features;
459 cpuid_mask(&entry->edx, 5);
460 break;
461 case 3:
462 case 5:
463 case 6:
464 case 0x80000007:
465 case 0xC0000002:
466 case 0xC0000003:
467 case 0xC0000004:
468 default:
469 entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
470 break;
471 }
472
473 kvm_x86_ops->set_supported_cpuid(function, entry);
474
475 r = 0;
476
477out:
478 put_cpu();
479
480 return r;
481}
482
483#undef F
484
485struct kvm_cpuid_param {
486 u32 func;
487 u32 idx;
488 bool has_leaf_count;
489 bool (*qualifier)(const struct kvm_cpuid_param *param);
490};
491
492static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
493{
494 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
495}
496
497int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
498 struct kvm_cpuid_entry2 __user *entries)
499{
500 struct kvm_cpuid_entry2 *cpuid_entries;
501 int limit, nent = 0, r = -E2BIG, i;
502 u32 func;
503 static const struct kvm_cpuid_param param[] = {
504 { .func = 0, .has_leaf_count = true },
505 { .func = 0x80000000, .has_leaf_count = true },
506 { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
507 { .func = KVM_CPUID_SIGNATURE },
508 { .func = KVM_CPUID_FEATURES },
509 };
510
511 if (cpuid->nent < 1)
512 goto out;
513 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
514 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
515 r = -ENOMEM;
516 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
517 if (!cpuid_entries)
518 goto out;
519
520 r = 0;
521 for (i = 0; i < ARRAY_SIZE(param); i++) {
522 const struct kvm_cpuid_param *ent = ¶m[i];
523
524 if (ent->qualifier && !ent->qualifier(ent))
525 continue;
526
527 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
528 &nent, cpuid->nent);
529
530 if (r)
531 goto out_free;
532
533 if (!ent->has_leaf_count)
534 continue;
535
536 limit = cpuid_entries[nent - 1].eax;
537 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
538 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
539 &nent, cpuid->nent);
540
541 if (r)
542 goto out_free;
543 }
544
545 r = -EFAULT;
546 if (copy_to_user(entries, cpuid_entries,
547 nent * sizeof(struct kvm_cpuid_entry2)))
548 goto out_free;
549 cpuid->nent = nent;
550 r = 0;
551
552out_free:
553 vfree(cpuid_entries);
554out:
555 return r;
556}
557
558static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
559{
560 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
561 int j, nent = vcpu->arch.cpuid_nent;
562
563 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
564
565 for (j = i + 1; ; j = (j + 1) % nent) {
566 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
567 if (ej->function == e->function) {
568 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
569 return j;
570 }
571 }
572 return 0;
573}
574
575
576
577static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
578 u32 function, u32 index)
579{
580 if (e->function != function)
581 return 0;
582 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
583 return 0;
584 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
585 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
586 return 0;
587 return 1;
588}
589
590struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
591 u32 function, u32 index)
592{
593 int i;
594 struct kvm_cpuid_entry2 *best = NULL;
595
596 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
597 struct kvm_cpuid_entry2 *e;
598
599 e = &vcpu->arch.cpuid_entries[i];
600 if (is_matching_cpuid_entry(e, function, index)) {
601 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
602 move_to_next_stateful_cpuid_entry(vcpu, i);
603 best = e;
604 break;
605 }
606 }
607 return best;
608}
609EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
610
611int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
612{
613 struct kvm_cpuid_entry2 *best;
614
615 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
616 if (!best || best->eax < 0x80000008)
617 goto not_found;
618 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
619 if (best)
620 return best->eax & 0xff;
621not_found:
622 return 36;
623}
624
625
626
627
628
629
630static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
631 u32 function, u32 index)
632{
633 struct kvm_cpuid_entry2 *maxlevel;
634
635 maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
636 if (!maxlevel || maxlevel->eax >= function)
637 return NULL;
638 if (function & 0x80000000) {
639 maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
640 if (!maxlevel)
641 return NULL;
642 }
643 return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
644}
645
646void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
647{
648 u32 function = *eax, index = *ecx;
649 struct kvm_cpuid_entry2 *best;
650
651 best = kvm_find_cpuid_entry(vcpu, function, index);
652
653 if (!best)
654 best = check_cpuid_limit(vcpu, function, index);
655
656 if (best) {
657 *eax = best->eax;
658 *ebx = best->ebx;
659 *ecx = best->ecx;
660 *edx = best->edx;
661 } else
662 *eax = *ebx = *ecx = *edx = 0;
663}
664EXPORT_SYMBOL_GPL(kvm_cpuid);
665
666void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
667{
668 u32 function, eax, ebx, ecx, edx;
669
670 function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
671 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
672 kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
673 kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
674 kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
675 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
676 kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
677 kvm_x86_ops->skip_emulated_instruction(vcpu);
678 trace_kvm_cpuid(function, eax, ebx, ecx, edx);
679}
680EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
681