linux/arch/x86/kernel/cpu/vmware.c
<<
>>
Prefs
   1/*
   2 * VMware Detection code.
   3 *
   4 * Copyright (C) 2008, VMware, Inc.
   5 * Author : Alok N Kataria <akataria@vmware.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful, but
  13 * WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  15 * NON INFRINGEMENT.  See the GNU General Public License for more
  16 * details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21 *
  22 */
  23
  24#include <linux/dmi.h>
  25#include <linux/init.h>
  26#include <linux/export.h>
  27#include <linux/clocksource.h>
  28#include <linux/cpu.h>
  29#include <linux/reboot.h>
  30#include <linux/static_call.h>
  31#include <asm/div64.h>
  32#include <asm/x86_init.h>
  33#include <asm/hypervisor.h>
  34#include <asm/timer.h>
  35#include <asm/apic.h>
  36#include <asm/vmware.h>
  37#include <asm/svm.h>
  38
  39#undef pr_fmt
  40#define pr_fmt(fmt)     "vmware: " fmt
  41
  42#define CPUID_VMWARE_INFO_LEAF               0x40000000
  43#define CPUID_VMWARE_FEATURES_LEAF           0x40000010
  44#define CPUID_VMWARE_FEATURES_ECX_VMMCALL    BIT(0)
  45#define CPUID_VMWARE_FEATURES_ECX_VMCALL     BIT(1)
  46
  47#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
  48
  49#define VMWARE_CMD_GETVERSION    10
  50#define VMWARE_CMD_GETHZ         45
  51#define VMWARE_CMD_GETVCPU_INFO  68
  52#define VMWARE_CMD_LEGACY_X2APIC  3
  53#define VMWARE_CMD_VCPU_RESERVED 31
  54#define VMWARE_CMD_STEALCLOCK    91
  55
  56#define STEALCLOCK_NOT_AVAILABLE (-1)
  57#define STEALCLOCK_DISABLED        0
  58#define STEALCLOCK_ENABLED         1
  59
  60#define VMWARE_PORT(cmd, eax, ebx, ecx, edx)                            \
  61        __asm__("inl (%%dx), %%eax" :                                   \
  62                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
  63                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
  64                "c"(VMWARE_CMD_##cmd),                                  \
  65                "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) :            \
  66                "memory")
  67
  68#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx)                          \
  69        __asm__("vmcall" :                                              \
  70                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
  71                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
  72                "c"(VMWARE_CMD_##cmd),                                  \
  73                "d"(0), "b"(UINT_MAX) :                                 \
  74                "memory")
  75
  76#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx)                         \
  77        __asm__("vmmcall" :                                             \
  78                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
  79                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
  80                "c"(VMWARE_CMD_##cmd),                                  \
  81                "d"(0), "b"(UINT_MAX) :                                 \
  82                "memory")
  83
  84#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do {                \
  85        switch (vmware_hypercall_mode) {                        \
  86        case CPUID_VMWARE_FEATURES_ECX_VMCALL:                  \
  87                VMWARE_VMCALL(cmd, eax, ebx, ecx, edx);         \
  88                break;                                          \
  89        case CPUID_VMWARE_FEATURES_ECX_VMMCALL:                 \
  90                VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx);        \
  91                break;                                          \
  92        default:                                                \
  93                VMWARE_PORT(cmd, eax, ebx, ecx, edx);           \
  94                break;                                          \
  95        }                                                       \
  96        } while (0)
  97
  98struct vmware_steal_time {
  99        union {
 100                uint64_t clock; /* stolen time counter in units of vtsc */
 101                struct {
 102                        /* only for little-endian */
 103                        uint32_t clock_low;
 104                        uint32_t clock_high;
 105                };
 106        };
 107        uint64_t reserved[7];
 108};
 109
 110static unsigned long vmware_tsc_khz __ro_after_init;
 111static u8 vmware_hypercall_mode     __ro_after_init;
 112
 113static inline int __vmware_platform(void)
 114{
 115        uint32_t eax, ebx, ecx, edx;
 116        VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
 117        return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
 118}
 119
 120static unsigned long vmware_get_tsc_khz(void)
 121{
 122        return vmware_tsc_khz;
 123}
 124
 125#ifdef CONFIG_PARAVIRT
 126static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
 127static bool vmw_sched_clock __initdata = true;
 128static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
 129static bool has_steal_clock;
 130static bool steal_acc __initdata = true; /* steal time accounting */
 131
 132static __init int setup_vmw_sched_clock(char *s)
 133{
 134        vmw_sched_clock = false;
 135        return 0;
 136}
 137early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
 138
 139static __init int parse_no_stealacc(char *arg)
 140{
 141        steal_acc = false;
 142        return 0;
 143}
 144early_param("no-steal-acc", parse_no_stealacc);
 145
 146static unsigned long long notrace vmware_sched_clock(void)
 147{
 148        unsigned long long ns;
 149
 150        ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
 151                             vmware_cyc2ns.cyc2ns_shift);
 152        ns -= vmware_cyc2ns.cyc2ns_offset;
 153        return ns;
 154}
 155
 156static void __init vmware_cyc2ns_setup(void)
 157{
 158        struct cyc2ns_data *d = &vmware_cyc2ns;
 159        unsigned long long tsc_now = rdtsc();
 160
 161        clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift,
 162                               vmware_tsc_khz, NSEC_PER_MSEC, 0);
 163        d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
 164                                           d->cyc2ns_shift);
 165
 166        pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
 167}
 168
 169static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
 170{
 171        uint32_t result, info;
 172
 173        asm volatile (VMWARE_HYPERCALL :
 174                "=a"(result),
 175                "=c"(info) :
 176                "a"(VMWARE_HYPERVISOR_MAGIC),
 177                "b"(0),
 178                "c"(VMWARE_CMD_STEALCLOCK),
 179                "d"(0),
 180                "S"(arg1),
 181                "D"(arg2) :
 182                "memory");
 183        return result;
 184}
 185
 186static bool stealclock_enable(phys_addr_t pa)
 187{
 188        return vmware_cmd_stealclock(upper_32_bits(pa),
 189                                     lower_32_bits(pa)) == STEALCLOCK_ENABLED;
 190}
 191
 192static int __stealclock_disable(void)
 193{
 194        return vmware_cmd_stealclock(0, 1);
 195}
 196
 197static void stealclock_disable(void)
 198{
 199        __stealclock_disable();
 200}
 201
 202static bool vmware_is_stealclock_available(void)
 203{
 204        return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
 205}
 206
 207/**
 208 * vmware_steal_clock() - read the per-cpu steal clock
 209 * @cpu:            the cpu number whose steal clock we want to read
 210 *
 211 * The function reads the steal clock if we are on a 64-bit system, otherwise
 212 * reads it in parts, checking that the high part didn't change in the
 213 * meantime.
 214 *
 215 * Return:
 216 *      The steal clock reading in ns.
 217 */
 218static uint64_t vmware_steal_clock(int cpu)
 219{
 220        struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
 221        uint64_t clock;
 222
 223        if (IS_ENABLED(CONFIG_64BIT))
 224                clock = READ_ONCE(steal->clock);
 225        else {
 226                uint32_t initial_high, low, high;
 227
 228                do {
 229                        initial_high = READ_ONCE(steal->clock_high);
 230                        /* Do not reorder initial_high and high readings */
 231                        virt_rmb();
 232                        low = READ_ONCE(steal->clock_low);
 233                        /* Keep low reading in between */
 234                        virt_rmb();
 235                        high = READ_ONCE(steal->clock_high);
 236                } while (initial_high != high);
 237
 238                clock = ((uint64_t)high << 32) | low;
 239        }
 240
 241        return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
 242                             vmware_cyc2ns.cyc2ns_shift);
 243}
 244
 245static void vmware_register_steal_time(void)
 246{
 247        int cpu = smp_processor_id();
 248        struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
 249
 250        if (!has_steal_clock)
 251                return;
 252
 253        if (!stealclock_enable(slow_virt_to_phys(st))) {
 254                has_steal_clock = false;
 255                return;
 256        }
 257
 258        pr_info("vmware-stealtime: cpu %d, pa %llx\n",
 259                cpu, (unsigned long long) slow_virt_to_phys(st));
 260}
 261
 262static void vmware_disable_steal_time(void)
 263{
 264        if (!has_steal_clock)
 265                return;
 266
 267        stealclock_disable();
 268}
 269
 270static void vmware_guest_cpu_init(void)
 271{
 272        if (has_steal_clock)
 273                vmware_register_steal_time();
 274}
 275
 276static void vmware_pv_guest_cpu_reboot(void *unused)
 277{
 278        vmware_disable_steal_time();
 279}
 280
 281static int vmware_pv_reboot_notify(struct notifier_block *nb,
 282                                unsigned long code, void *unused)
 283{
 284        if (code == SYS_RESTART)
 285                on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
 286        return NOTIFY_DONE;
 287}
 288
 289static struct notifier_block vmware_pv_reboot_nb = {
 290        .notifier_call = vmware_pv_reboot_notify,
 291};
 292
 293#ifdef CONFIG_SMP
 294static void __init vmware_smp_prepare_boot_cpu(void)
 295{
 296        vmware_guest_cpu_init();
 297        native_smp_prepare_boot_cpu();
 298}
 299
 300static int vmware_cpu_online(unsigned int cpu)
 301{
 302        local_irq_disable();
 303        vmware_guest_cpu_init();
 304        local_irq_enable();
 305        return 0;
 306}
 307
 308static int vmware_cpu_down_prepare(unsigned int cpu)
 309{
 310        local_irq_disable();
 311        vmware_disable_steal_time();
 312        local_irq_enable();
 313        return 0;
 314}
 315#endif
 316
 317static __init int activate_jump_labels(void)
 318{
 319        if (has_steal_clock) {
 320                static_key_slow_inc(&paravirt_steal_enabled);
 321                if (steal_acc)
 322                        static_key_slow_inc(&paravirt_steal_rq_enabled);
 323        }
 324
 325        return 0;
 326}
 327arch_initcall(activate_jump_labels);
 328
 329static void __init vmware_paravirt_ops_setup(void)
 330{
 331        pv_info.name = "VMware hypervisor";
 332        pv_ops.cpu.io_delay = paravirt_nop;
 333
 334        if (vmware_tsc_khz == 0)
 335                return;
 336
 337        vmware_cyc2ns_setup();
 338
 339        if (vmw_sched_clock)
 340                paravirt_set_sched_clock(vmware_sched_clock);
 341
 342        if (vmware_is_stealclock_available()) {
 343                has_steal_clock = true;
 344                static_call_update(pv_steal_clock, vmware_steal_clock);
 345
 346                /* We use reboot notifier only to disable steal clock */
 347                register_reboot_notifier(&vmware_pv_reboot_nb);
 348
 349#ifdef CONFIG_SMP
 350                smp_ops.smp_prepare_boot_cpu =
 351                        vmware_smp_prepare_boot_cpu;
 352                if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 353                                              "x86/vmware:online",
 354                                              vmware_cpu_online,
 355                                              vmware_cpu_down_prepare) < 0)
 356                        pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
 357#else
 358                vmware_guest_cpu_init();
 359#endif
 360        }
 361}
 362#else
 363#define vmware_paravirt_ops_setup() do {} while (0)
 364#endif
 365
 366/*
 367 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
 368 * Still, due to timing difference when running on virtual cpus, the TSC can
 369 * be marked as unstable in some cases. For example, the TSC sync check at
 370 * bootup can fail due to a marginal offset between vcpus' TSCs (though the
 371 * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
 372 * is not suitable as a watchdog when running on a hypervisor because the
 373 * kernel may miss a wrap of the counter if the vcpu is descheduled for a
 374 * long time. To skip these checks at runtime we set these capability bits,
 375 * so that the kernel could just trust the hypervisor with providing a
 376 * reliable virtual TSC that is suitable for timekeeping.
 377 */
 378static void __init vmware_set_capabilities(void)
 379{
 380        setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
 381        setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
 382        if (vmware_tsc_khz)
 383                setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
 384        if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
 385                setup_force_cpu_cap(X86_FEATURE_VMCALL);
 386        else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
 387                setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
 388}
 389
 390static void __init vmware_platform_setup(void)
 391{
 392        uint32_t eax, ebx, ecx, edx;
 393        uint64_t lpj, tsc_khz;
 394
 395        VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
 396
 397        if (ebx != UINT_MAX) {
 398                lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
 399                do_div(tsc_khz, 1000);
 400                WARN_ON(tsc_khz >> 32);
 401                pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
 402                        (unsigned long) tsc_khz / 1000,
 403                        (unsigned long) tsc_khz % 1000);
 404
 405                if (!preset_lpj) {
 406                        do_div(lpj, HZ);
 407                        preset_lpj = lpj;
 408                }
 409
 410                vmware_tsc_khz = tsc_khz;
 411                x86_platform.calibrate_tsc = vmware_get_tsc_khz;
 412                x86_platform.calibrate_cpu = vmware_get_tsc_khz;
 413
 414#ifdef CONFIG_X86_LOCAL_APIC
 415                /* Skip lapic calibration since we know the bus frequency. */
 416                lapic_timer_period = ecx / HZ;
 417                pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
 418                        ecx);
 419#endif
 420        } else {
 421                pr_warn("Failed to get TSC freq from the hypervisor\n");
 422        }
 423
 424        vmware_paravirt_ops_setup();
 425
 426#ifdef CONFIG_X86_IO_APIC
 427        no_timer_check = 1;
 428#endif
 429
 430        vmware_set_capabilities();
 431}
 432
 433static u8 __init vmware_select_hypercall(void)
 434{
 435        int eax, ebx, ecx, edx;
 436
 437        cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
 438        return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
 439                       CPUID_VMWARE_FEATURES_ECX_VMCALL));
 440}
 441
 442/*
 443 * While checking the dmi string information, just checking the product
 444 * serial key should be enough, as this will always have a VMware
 445 * specific string when running under VMware hypervisor.
 446 * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
 447 * intentionally defaults to 0.
 448 */
 449static uint32_t __init vmware_platform(void)
 450{
 451        if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
 452                unsigned int eax;
 453                unsigned int hyper_vendor_id[3];
 454
 455                cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
 456                      &hyper_vendor_id[1], &hyper_vendor_id[2]);
 457                if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
 458                        if (eax >= CPUID_VMWARE_FEATURES_LEAF)
 459                                vmware_hypercall_mode =
 460                                        vmware_select_hypercall();
 461
 462                        pr_info("hypercall mode: 0x%02x\n",
 463                                (unsigned int) vmware_hypercall_mode);
 464
 465                        return CPUID_VMWARE_INFO_LEAF;
 466                }
 467        } else if (dmi_available && dmi_name_in_serial("VMware") &&
 468                   __vmware_platform())
 469                return 1;
 470
 471        return 0;
 472}
 473
 474/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
 475static bool __init vmware_legacy_x2apic_available(void)
 476{
 477        uint32_t eax, ebx, ecx, edx;
 478        VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
 479        return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
 480               (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
 481}
 482
 483#ifdef CONFIG_AMD_MEM_ENCRYPT
 484static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
 485                                        struct pt_regs *regs)
 486{
 487        /* Copy VMWARE specific Hypercall parameters to the GHCB */
 488        ghcb_set_rip(ghcb, regs->ip);
 489        ghcb_set_rbx(ghcb, regs->bx);
 490        ghcb_set_rcx(ghcb, regs->cx);
 491        ghcb_set_rdx(ghcb, regs->dx);
 492        ghcb_set_rsi(ghcb, regs->si);
 493        ghcb_set_rdi(ghcb, regs->di);
 494        ghcb_set_rbp(ghcb, regs->bp);
 495}
 496
 497static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
 498{
 499        if (!(ghcb_rbx_is_valid(ghcb) &&
 500              ghcb_rcx_is_valid(ghcb) &&
 501              ghcb_rdx_is_valid(ghcb) &&
 502              ghcb_rsi_is_valid(ghcb) &&
 503              ghcb_rdi_is_valid(ghcb) &&
 504              ghcb_rbp_is_valid(ghcb)))
 505                return false;
 506
 507        regs->bx = ghcb_get_rbx(ghcb);
 508        regs->cx = ghcb_get_rcx(ghcb);
 509        regs->dx = ghcb_get_rdx(ghcb);
 510        regs->si = ghcb_get_rsi(ghcb);
 511        regs->di = ghcb_get_rdi(ghcb);
 512        regs->bp = ghcb_get_rbp(ghcb);
 513
 514        return true;
 515}
 516#endif
 517
 518const __initconst struct hypervisor_x86 x86_hyper_vmware = {
 519        .name                           = "VMware",
 520        .detect                         = vmware_platform,
 521        .type                           = X86_HYPER_VMWARE,
 522        .init.init_platform             = vmware_platform_setup,
 523        .init.x2apic_available          = vmware_legacy_x2apic_available,
 524#ifdef CONFIG_AMD_MEM_ENCRYPT
 525        .runtime.sev_es_hcall_prepare   = vmware_sev_es_hcall_prepare,
 526        .runtime.sev_es_hcall_finish    = vmware_sev_es_hcall_finish,
 527#endif
 528};
 529