linux/arch/x86/kvm/cpuid.c
<<
>>
Prefs
   1/*
   2 * Kernel-based Virtual Machine driver for Linux
   3 * cpuid support routines
   4 *
   5 * derived from arch/x86/kvm/x86.c
   6 *
   7 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
   8 * Copyright IBM Corporation, 2008
   9 *
  10 * This work is licensed under the terms of the GNU GPL, version 2.  See
  11 * the COPYING file in the top-level directory.
  12 *
  13 */
  14
  15#include <linux/kvm_host.h>
  16#include <linux/export.h>
  17#include <linux/vmalloc.h>
  18#include <linux/uaccess.h>
  19#include <asm/fpu/internal.h> /* For use_eager_fpu.  Ugh! */
  20#include <asm/user.h>
  21#include <asm/fpu/xstate.h>
  22#include "cpuid.h"
  23#include "lapic.h"
  24#include "mmu.h"
  25#include "trace.h"
  26#include "pmu.h"
  27
  28static u32 xstate_required_size(u64 xstate_bv, bool compacted)
  29{
  30        int feature_bit = 0;
  31        u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
  32
  33        xstate_bv &= XFEATURE_MASK_EXTEND;
  34        while (xstate_bv) {
  35                if (xstate_bv & 0x1) {
  36                        u32 eax, ebx, ecx, edx, offset;
  37                        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
  38                        offset = compacted ? ret : ebx;
  39                        ret = max(ret, offset + eax);
  40                }
  41
  42                xstate_bv >>= 1;
  43                feature_bit++;
  44        }
  45
  46        return ret;
  47}
  48
  49bool kvm_mpx_supported(void)
  50{
  51        return ((host_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
  52                 && kvm_x86_ops->mpx_supported());
  53}
  54EXPORT_SYMBOL_GPL(kvm_mpx_supported);
  55
  56u64 kvm_supported_xcr0(void)
  57{
  58        u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
  59
  60        if (!kvm_mpx_supported())
  61                xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
  62
  63        return xcr0;
  64}
  65
  66#define F(x) bit(X86_FEATURE_##x)
  67
  68int kvm_update_cpuid(struct kvm_vcpu *vcpu)
  69{
  70        struct kvm_cpuid_entry2 *best;
  71        struct kvm_lapic *apic = vcpu->arch.apic;
  72
  73        best = kvm_find_cpuid_entry(vcpu, 1, 0);
  74        if (!best)
  75                return 0;
  76
  77        /* Update OSXSAVE bit */
  78        if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
  79                best->ecx &= ~F(OSXSAVE);
  80                if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
  81                        best->ecx |= F(OSXSAVE);
  82        }
  83
  84        if (apic) {
  85                if (best->ecx & F(TSC_DEADLINE_TIMER))
  86                        apic->lapic_timer.timer_mode_mask = 3 << 17;
  87                else
  88                        apic->lapic_timer.timer_mode_mask = 1 << 17;
  89        }
  90
  91        best = kvm_find_cpuid_entry(vcpu, 7, 0);
  92        if (best) {
  93                /* Update OSPKE bit */
  94                if (boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) {
  95                        best->ecx &= ~F(OSPKE);
  96                        if (kvm_read_cr4_bits(vcpu, X86_CR4_PKE))
  97                                best->ecx |= F(OSPKE);
  98                }
  99        }
 100
 101        best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
 102        if (!best) {
 103                vcpu->arch.guest_supported_xcr0 = 0;
 104                vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 105        } else {
 106                vcpu->arch.guest_supported_xcr0 =
 107                        (best->eax | ((u64)best->edx << 32)) &
 108                        kvm_supported_xcr0();
 109                vcpu->arch.guest_xstate_size = best->ebx =
 110                        xstate_required_size(vcpu->arch.xcr0, false);
 111        }
 112
 113        best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
 114        if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
 115                best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 116
 117        if (use_eager_fpu())
 118                kvm_x86_ops->fpu_activate(vcpu);
 119
 120        /*
 121         * The existing code assumes virtual address is 48-bit in the canonical
 122         * address checks; exit if it is ever changed.
 123         */
 124        best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
 125        if (best && ((best->eax & 0xff00) >> 8) != 48 &&
 126                ((best->eax & 0xff00) >> 8) != 0)
 127                return -EINVAL;
 128
 129        /* Update physical-address width */
 130        vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
 131
 132        kvm_pmu_refresh(vcpu);
 133        return 0;
 134}
 135
 136static int is_efer_nx(void)
 137{
 138        unsigned long long efer = 0;
 139
 140        rdmsrl_safe(MSR_EFER, &efer);
 141        return efer & EFER_NX;
 142}
 143
 144static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 145{
 146        int i;
 147        struct kvm_cpuid_entry2 *e, *entry;
 148
 149        entry = NULL;
 150        for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
 151                e = &vcpu->arch.cpuid_entries[i];
 152                if (e->function == 0x80000001) {
 153                        entry = e;
 154                        break;
 155                }
 156        }
 157        if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
 158                entry->edx &= ~F(NX);
 159                printk(KERN_INFO "kvm: guest NX capability removed\n");
 160        }
 161}
 162
 163int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
 164{
 165        struct kvm_cpuid_entry2 *best;
 166
 167        best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
 168        if (!best || best->eax < 0x80000008)
 169                goto not_found;
 170        best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
 171        if (best)
 172                return best->eax & 0xff;
 173not_found:
 174        return 36;
 175}
 176EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
 177
 178/* when an old userspace process fills a new kernel module */
 179int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 180                             struct kvm_cpuid *cpuid,
 181                             struct kvm_cpuid_entry __user *entries)
 182{
 183        int r, i;
 184        struct kvm_cpuid_entry *cpuid_entries = NULL;
 185
 186        r = -E2BIG;
 187        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 188                goto out;
 189        r = -ENOMEM;
 190        if (cpuid->nent) {
 191                cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) *
 192                                        cpuid->nent);
 193                if (!cpuid_entries)
 194                        goto out;
 195                r = -EFAULT;
 196                if (copy_from_user(cpuid_entries, entries,
 197                                   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
 198                        goto out;
 199        }
 200        for (i = 0; i < cpuid->nent; i++) {
 201                vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
 202                vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
 203                vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
 204                vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
 205                vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
 206                vcpu->arch.cpuid_entries[i].index = 0;
 207                vcpu->arch.cpuid_entries[i].flags = 0;
 208                vcpu->arch.cpuid_entries[i].padding[0] = 0;
 209                vcpu->arch.cpuid_entries[i].padding[1] = 0;
 210                vcpu->arch.cpuid_entries[i].padding[2] = 0;
 211        }
 212        vcpu->arch.cpuid_nent = cpuid->nent;
 213        cpuid_fix_nx_cap(vcpu);
 214        kvm_apic_set_version(vcpu);
 215        kvm_x86_ops->cpuid_update(vcpu);
 216        r = kvm_update_cpuid(vcpu);
 217
 218out:
 219        vfree(cpuid_entries);
 220        return r;
 221}
 222
 223int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 224                              struct kvm_cpuid2 *cpuid,
 225                              struct kvm_cpuid_entry2 __user *entries)
 226{
 227        int r;
 228
 229        r = -E2BIG;
 230        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 231                goto out;
 232        r = -EFAULT;
 233        if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
 234                           cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
 235                goto out;
 236        vcpu->arch.cpuid_nent = cpuid->nent;
 237        kvm_apic_set_version(vcpu);
 238        kvm_x86_ops->cpuid_update(vcpu);
 239        r = kvm_update_cpuid(vcpu);
 240out:
 241        return r;
 242}
 243
 244int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
 245                              struct kvm_cpuid2 *cpuid,
 246                              struct kvm_cpuid_entry2 __user *entries)
 247{
 248        int r;
 249
 250        r = -E2BIG;
 251        if (cpuid->nent < vcpu->arch.cpuid_nent)
 252                goto out;
 253        r = -EFAULT;
 254        if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
 255                         vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
 256                goto out;
 257        return 0;
 258
 259out:
 260        cpuid->nent = vcpu->arch.cpuid_nent;
 261        return r;
 262}
 263
 264static void cpuid_mask(u32 *word, int wordnum)
 265{
 266        *word &= boot_cpu_data.x86_capability[wordnum];
 267}
 268
 269static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 270                           u32 index)
 271{
 272        entry->function = function;
 273        entry->index = index;
 274        cpuid_count(entry->function, entry->index,
 275                    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
 276        entry->flags = 0;
 277}
 278
 279static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
 280                                   u32 func, u32 index, int *nent, int maxnent)
 281{
 282        switch (func) {
 283        case 0:
 284                entry->eax = 1;         /* only one leaf currently */
 285                ++*nent;
 286                break;
 287        case 1:
 288                entry->ecx = F(MOVBE);
 289                ++*nent;
 290                break;
 291        default:
 292                break;
 293        }
 294
 295        entry->function = func;
 296        entry->index = index;
 297
 298        return 0;
 299}
 300
 301static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 302                                 u32 index, int *nent, int maxnent)
 303{
 304        int r;
 305        unsigned f_nx = is_efer_nx() ? F(NX) : 0;
 306#ifdef CONFIG_X86_64
 307        unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
 308                                ? F(GBPAGES) : 0;
 309        unsigned f_lm = F(LM);
 310#else
 311        unsigned f_gbpages = 0;
 312        unsigned f_lm = 0;
 313#endif
 314        unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 315        unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
 316        unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
 317        unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 318
 319        /* cpuid 1.edx */
 320        const u32 kvm_cpuid_1_edx_x86_features =
 321                F(FPU) | F(VME) | F(DE) | F(PSE) |
 322                F(TSC) | F(MSR) | F(PAE) | F(MCE) |
 323                F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
 324                F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 325                F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
 326                0 /* Reserved, DS, ACPI */ | F(MMX) |
 327                F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
 328                0 /* HTT, TM, Reserved, PBE */;
 329        /* cpuid 0x80000001.edx */
 330        const u32 kvm_cpuid_8000_0001_edx_x86_features =
 331                F(FPU) | F(VME) | F(DE) | F(PSE) |
 332                F(TSC) | F(MSR) | F(PAE) | F(MCE) |
 333                F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
 334                F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 335                F(PAT) | F(PSE36) | 0 /* Reserved */ |
 336                f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
 337                F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
 338                0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
 339        /* cpuid 1.ecx */
 340        const u32 kvm_cpuid_1_ecx_x86_features =
 341                /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
 342                 * but *not* advertised to guests via CPUID ! */
 343                F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
 344                0 /* DS-CPL, VMX, SMX, EST */ |
 345                0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
 346                F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
 347                F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
 348                F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
 349                0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
 350                F(F16C) | F(RDRAND);
 351        /* cpuid 0x80000001.ecx */
 352        const u32 kvm_cpuid_8000_0001_ecx_x86_features =
 353                F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
 354                F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
 355                F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
 356                0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
 357
 358        /* cpuid 0xC0000001.edx */
 359        const u32 kvm_cpuid_C000_0001_edx_x86_features =
 360                F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
 361                F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
 362                F(PMM) | F(PMM_EN);
 363
 364        /* cpuid 7.0.ebx */
 365        const u32 kvm_cpuid_7_0_ebx_x86_features =
 366                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 367                F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
 368                F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
 369                F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
 370                F(AVX512BW) | F(AVX512VL);
 371
 372        /* cpuid 0xD.1.eax */
 373        const u32 kvm_cpuid_D_1_eax_x86_features =
 374                F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 375
 376        /* cpuid 7.0.ecx*/
 377        const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
 378
 379        /* all calls to cpuid_count() should be made on the same cpu */
 380        get_cpu();
 381
 382        r = -E2BIG;
 383
 384        if (*nent >= maxnent)
 385                goto out;
 386
 387        do_cpuid_1_ent(entry, function, index);
 388        ++*nent;
 389
 390        switch (function) {
 391        case 0:
 392                entry->eax = min(entry->eax, (u32)0xd);
 393                break;
 394        case 1:
 395                entry->edx &= kvm_cpuid_1_edx_x86_features;
 396                cpuid_mask(&entry->edx, CPUID_1_EDX);
 397                entry->ecx &= kvm_cpuid_1_ecx_x86_features;
 398                cpuid_mask(&entry->ecx, CPUID_1_ECX);
 399                /* we support x2apic emulation even if host does not support
 400                 * it since we emulate x2apic in software */
 401                entry->ecx |= F(X2APIC);
 402                break;
 403        /* function 2 entries are STATEFUL. That is, repeated cpuid commands
 404         * may return different values. This forces us to get_cpu() before
 405         * issuing the first command, and also to emulate this annoying behavior
 406         * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
 407        case 2: {
 408                int t, times = entry->eax & 0xff;
 409
 410                entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
 411                entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
 412                for (t = 1; t < times; ++t) {
 413                        if (*nent >= maxnent)
 414                                goto out;
 415
 416                        do_cpuid_1_ent(&entry[t], function, 0);
 417                        entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
 418                        ++*nent;
 419                }
 420                break;
 421        }
 422        /* function 4 has additional index. */
 423        case 4: {
 424                int i, cache_type;
 425
 426                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 427                /* read more entries until cache_type is zero */
 428                for (i = 1; ; ++i) {
 429                        if (*nent >= maxnent)
 430                                goto out;
 431
 432                        cache_type = entry[i - 1].eax & 0x1f;
 433                        if (!cache_type)
 434                                break;
 435                        do_cpuid_1_ent(&entry[i], function, i);
 436                        entry[i].flags |=
 437                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 438                        ++*nent;
 439                }
 440                break;
 441        }
 442        case 6: /* Thermal management */
 443                entry->eax = 0x4; /* allow ARAT */
 444                entry->ebx = 0;
 445                entry->ecx = 0;
 446                entry->edx = 0;
 447                break;
 448        case 7: {
 449                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 450                /* Mask ebx against host capability word 9 */
 451                if (index == 0) {
 452                        entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
 453                        cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
 454                        // TSC_ADJUST is emulated
 455                        entry->ebx |= F(TSC_ADJUST);
 456                        entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
 457                        cpuid_mask(&entry->ecx, CPUID_7_ECX);
 458                        /* PKU is not yet implemented for shadow paging. */
 459                        if (!tdp_enabled)
 460                                entry->ecx &= ~F(PKU);
 461                } else {
 462                        entry->ebx = 0;
 463                        entry->ecx = 0;
 464                }
 465                entry->eax = 0;
 466                entry->edx = 0;
 467                break;
 468        }
 469        case 9:
 470                break;
 471        case 0xa: { /* Architectural Performance Monitoring */
 472                struct x86_pmu_capability cap;
 473                union cpuid10_eax eax;
 474                union cpuid10_edx edx;
 475
 476                perf_get_x86_pmu_capability(&cap);
 477
 478                /*
 479                 * Only support guest architectural pmu on a host
 480                 * with architectural pmu.
 481                 */
 482                if (!cap.version)
 483                        memset(&cap, 0, sizeof(cap));
 484
 485                eax.split.version_id = min(cap.version, 2);
 486                eax.split.num_counters = cap.num_counters_gp;
 487                eax.split.bit_width = cap.bit_width_gp;
 488                eax.split.mask_length = cap.events_mask_len;
 489
 490                edx.split.num_counters_fixed = cap.num_counters_fixed;
 491                edx.split.bit_width_fixed = cap.bit_width_fixed;
 492                edx.split.reserved = 0;
 493
 494                entry->eax = eax.full;
 495                entry->ebx = cap.events_mask;
 496                entry->ecx = 0;
 497                entry->edx = edx.full;
 498                break;
 499        }
 500        /* function 0xb has additional index. */
 501        case 0xb: {
 502                int i, level_type;
 503
 504                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 505                /* read more entries until level_type is zero */
 506                for (i = 1; ; ++i) {
 507                        if (*nent >= maxnent)
 508                                goto out;
 509
 510                        level_type = entry[i - 1].ecx & 0xff00;
 511                        if (!level_type)
 512                                break;
 513                        do_cpuid_1_ent(&entry[i], function, i);
 514                        entry[i].flags |=
 515                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 516                        ++*nent;
 517                }
 518                break;
 519        }
 520        case 0xd: {
 521                int idx, i;
 522                u64 supported = kvm_supported_xcr0();
 523
 524                entry->eax &= supported;
 525                entry->ebx = xstate_required_size(supported, false);
 526                entry->ecx = entry->ebx;
 527                entry->edx &= supported >> 32;
 528                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 529                if (!supported)
 530                        break;
 531
 532                for (idx = 1, i = 1; idx < 64; ++idx) {
 533                        u64 mask = ((u64)1 << idx);
 534                        if (*nent >= maxnent)
 535                                goto out;
 536
 537                        do_cpuid_1_ent(&entry[i], function, idx);
 538                        if (idx == 1) {
 539                                entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
 540                                cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
 541                                entry[i].ebx = 0;
 542                                if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
 543                                        entry[i].ebx =
 544                                                xstate_required_size(supported,
 545                                                                     true);
 546                        } else {
 547                                if (entry[i].eax == 0 || !(supported & mask))
 548                                        continue;
 549                                if (WARN_ON_ONCE(entry[i].ecx & 1))
 550                                        continue;
 551                        }
 552                        entry[i].ecx = 0;
 553                        entry[i].edx = 0;
 554                        entry[i].flags |=
 555                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 556                        ++*nent;
 557                        ++i;
 558                }
 559                break;
 560        }
 561        case KVM_CPUID_SIGNATURE: {
 562                static const char signature[12] = "KVMKVMKVM\0\0";
 563                const u32 *sigptr = (const u32 *)signature;
 564                entry->eax = KVM_CPUID_FEATURES;
 565                entry->ebx = sigptr[0];
 566                entry->ecx = sigptr[1];
 567                entry->edx = sigptr[2];
 568                break;
 569        }
 570        case KVM_CPUID_FEATURES:
 571                entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
 572                             (1 << KVM_FEATURE_NOP_IO_DELAY) |
 573                             (1 << KVM_FEATURE_CLOCKSOURCE2) |
 574                             (1 << KVM_FEATURE_ASYNC_PF) |
 575                             (1 << KVM_FEATURE_PV_EOI) |
 576                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 577                             (1 << KVM_FEATURE_PV_UNHALT);
 578
 579                if (sched_info_on())
 580                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
 581
 582                entry->ebx = 0;
 583                entry->ecx = 0;
 584                entry->edx = 0;
 585                break;
 586        case 0x80000000:
 587                entry->eax = min(entry->eax, 0x8000001a);
 588                break;
 589        case 0x80000001:
 590                entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
 591                cpuid_mask(&entry->edx, CPUID_8000_0001_EDX);
 592                entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
 593                cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
 594                break;
 595        case 0x80000007: /* Advanced power management */
 596                /* invariant TSC is CPUID.80000007H:EDX[8] */
 597                entry->edx &= (1 << 8);
 598                /* mask against host */
 599                entry->edx &= boot_cpu_data.x86_power;
 600                entry->eax = entry->ebx = entry->ecx = 0;
 601                break;
 602        case 0x80000008: {
 603                unsigned g_phys_as = (entry->eax >> 16) & 0xff;
 604                unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
 605                unsigned phys_as = entry->eax & 0xff;
 606
 607                if (!g_phys_as)
 608                        g_phys_as = phys_as;
 609                entry->eax = g_phys_as | (virt_as << 8);
 610                entry->ebx = entry->edx = 0;
 611                break;
 612        }
 613        case 0x80000019:
 614                entry->ecx = entry->edx = 0;
 615                break;
 616        case 0x8000001a:
 617                break;
 618        case 0x8000001d:
 619                break;
 620        /*Add support for Centaur's CPUID instruction*/
 621        case 0xC0000000:
 622                /*Just support up to 0xC0000004 now*/
 623                entry->eax = min(entry->eax, 0xC0000004);
 624                break;
 625        case 0xC0000001:
 626                entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
 627                cpuid_mask(&entry->edx, CPUID_C000_0001_EDX);
 628                break;
 629        case 3: /* Processor serial number */
 630        case 5: /* MONITOR/MWAIT */
 631        case 0xC0000002:
 632        case 0xC0000003:
 633        case 0xC0000004:
 634        default:
 635                entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
 636                break;
 637        }
 638
 639        kvm_x86_ops->set_supported_cpuid(function, entry);
 640
 641        r = 0;
 642
 643out:
 644        put_cpu();
 645
 646        return r;
 647}
 648
 649static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
 650                        u32 idx, int *nent, int maxnent, unsigned int type)
 651{
 652        if (type == KVM_GET_EMULATED_CPUID)
 653                return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
 654
 655        return __do_cpuid_ent(entry, func, idx, nent, maxnent);
 656}
 657
 658#undef F
 659
 660struct kvm_cpuid_param {
 661        u32 func;
 662        u32 idx;
 663        bool has_leaf_count;
 664        bool (*qualifier)(const struct kvm_cpuid_param *param);
 665};
 666
 667static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
 668{
 669        return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 670}
 671
 672static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
 673                                 __u32 num_entries, unsigned int ioctl_type)
 674{
 675        int i;
 676        __u32 pad[3];
 677
 678        if (ioctl_type != KVM_GET_EMULATED_CPUID)
 679                return false;
 680
 681        /*
 682         * We want to make sure that ->padding is being passed clean from
 683         * userspace in case we want to use it for something in the future.
 684         *
 685         * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
 686         * have to give ourselves satisfied only with the emulated side. /me
 687         * sheds a tear.
 688         */
 689        for (i = 0; i < num_entries; i++) {
 690                if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
 691                        return true;
 692
 693                if (pad[0] || pad[1] || pad[2])
 694                        return true;
 695        }
 696        return false;
 697}
 698
 699int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
 700                            struct kvm_cpuid_entry2 __user *entries,
 701                            unsigned int type)
 702{
 703        struct kvm_cpuid_entry2 *cpuid_entries;
 704        int limit, nent = 0, r = -E2BIG, i;
 705        u32 func;
 706        static const struct kvm_cpuid_param param[] = {
 707                { .func = 0, .has_leaf_count = true },
 708                { .func = 0x80000000, .has_leaf_count = true },
 709                { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
 710                { .func = KVM_CPUID_SIGNATURE },
 711                { .func = KVM_CPUID_FEATURES },
 712        };
 713
 714        if (cpuid->nent < 1)
 715                goto out;
 716        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 717                cpuid->nent = KVM_MAX_CPUID_ENTRIES;
 718
 719        if (sanity_check_entries(entries, cpuid->nent, type))
 720                return -EINVAL;
 721
 722        r = -ENOMEM;
 723        cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
 724        if (!cpuid_entries)
 725                goto out;
 726
 727        r = 0;
 728        for (i = 0; i < ARRAY_SIZE(param); i++) {
 729                const struct kvm_cpuid_param *ent = &param[i];
 730
 731                if (ent->qualifier && !ent->qualifier(ent))
 732                        continue;
 733
 734                r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
 735                                &nent, cpuid->nent, type);
 736
 737                if (r)
 738                        goto out_free;
 739
 740                if (!ent->has_leaf_count)
 741                        continue;
 742
 743                limit = cpuid_entries[nent - 1].eax;
 744                for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
 745                        r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
 746                                     &nent, cpuid->nent, type);
 747
 748                if (r)
 749                        goto out_free;
 750        }
 751
 752        r = -EFAULT;
 753        if (copy_to_user(entries, cpuid_entries,
 754                         nent * sizeof(struct kvm_cpuid_entry2)))
 755                goto out_free;
 756        cpuid->nent = nent;
 757        r = 0;
 758
 759out_free:
 760        vfree(cpuid_entries);
 761out:
 762        return r;
 763}
 764
 765static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
 766{
 767        struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
 768        int j, nent = vcpu->arch.cpuid_nent;
 769
 770        e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
 771        /* when no next entry is found, the current entry[i] is reselected */
 772        for (j = i + 1; ; j = (j + 1) % nent) {
 773                struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
 774                if (ej->function == e->function) {
 775                        ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
 776                        return j;
 777                }
 778        }
 779        return 0; /* silence gcc, even though control never reaches here */
 780}
 781
 782/* find an entry with matching function, matching index (if needed), and that
 783 * should be read next (if it's stateful) */
 784static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
 785        u32 function, u32 index)
 786{
 787        if (e->function != function)
 788                return 0;
 789        if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
 790                return 0;
 791        if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
 792            !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
 793                return 0;
 794        return 1;
 795}
 796
 797struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 798                                              u32 function, u32 index)
 799{
 800        int i;
 801        struct kvm_cpuid_entry2 *best = NULL;
 802
 803        for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
 804                struct kvm_cpuid_entry2 *e;
 805
 806                e = &vcpu->arch.cpuid_entries[i];
 807                if (is_matching_cpuid_entry(e, function, index)) {
 808                        if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
 809                                move_to_next_stateful_cpuid_entry(vcpu, i);
 810                        best = e;
 811                        break;
 812                }
 813        }
 814        return best;
 815}
 816EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 817
 818/*
 819 * If no match is found, check whether we exceed the vCPU's limit
 820 * and return the content of the highest valid _standard_ leaf instead.
 821 * This is to satisfy the CPUID specification.
 822 */
 823static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
 824                                                  u32 function, u32 index)
 825{
 826        struct kvm_cpuid_entry2 *maxlevel;
 827
 828        maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
 829        if (!maxlevel || maxlevel->eax >= function)
 830                return NULL;
 831        if (function & 0x80000000) {
 832                maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
 833                if (!maxlevel)
 834                        return NULL;
 835        }
 836        return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
 837}
 838
 839void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
 840{
 841        u32 function = *eax, index = *ecx;
 842        struct kvm_cpuid_entry2 *best;
 843
 844        best = kvm_find_cpuid_entry(vcpu, function, index);
 845
 846        if (!best)
 847                best = check_cpuid_limit(vcpu, function, index);
 848
 849        /*
 850         * Perfmon not yet supported for L2 guest.
 851         */
 852        if (is_guest_mode(vcpu) && function == 0xa)
 853                best = NULL;
 854
 855        if (best) {
 856                *eax = best->eax;
 857                *ebx = best->ebx;
 858                *ecx = best->ecx;
 859                *edx = best->edx;
 860        } else
 861                *eax = *ebx = *ecx = *edx = 0;
 862        trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
 863}
 864EXPORT_SYMBOL_GPL(kvm_cpuid);
 865
 866void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 867{
 868        u32 function, eax, ebx, ecx, edx;
 869
 870        function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
 871        ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
 872        kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
 873        kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
 874        kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
 875        kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
 876        kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
 877        kvm_x86_ops->skip_emulated_instruction(vcpu);
 878}
 879EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 880