linux/arch/x86/kernel/cpu/vmware.c
<<
>>
Prefs
   1/*
   2 * VMware Detection code.
   3 *
   4 * Copyright (C) 2008, VMware, Inc.
   5 * Author : Alok N Kataria <akataria@vmware.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful, but
  13 * WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  15 * NON INFRINGEMENT.  See the GNU General Public License for more
  16 * details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21 *
  22 */
  23
  24#include <linux/dmi.h>
  25#include <linux/init.h>
  26#include <linux/export.h>
  27#include <linux/clocksource.h>
  28#include <linux/cpu.h>
  29#include <linux/reboot.h>
  30#include <asm/div64.h>
  31#include <asm/x86_init.h>
  32#include <asm/hypervisor.h>
  33#include <asm/timer.h>
  34#include <asm/apic.h>
  35#include <asm/vmware.h>
  36
  37#undef pr_fmt
  38#define pr_fmt(fmt)     "vmware: " fmt
  39
  40#define CPUID_VMWARE_INFO_LEAF               0x40000000
  41#define CPUID_VMWARE_FEATURES_LEAF           0x40000010
  42#define CPUID_VMWARE_FEATURES_ECX_VMMCALL    BIT(0)
  43#define CPUID_VMWARE_FEATURES_ECX_VMCALL     BIT(1)
  44
  45#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
  46
  47#define VMWARE_CMD_GETVERSION    10
  48#define VMWARE_CMD_GETHZ         45
  49#define VMWARE_CMD_GETVCPU_INFO  68
  50#define VMWARE_CMD_LEGACY_X2APIC  3
  51#define VMWARE_CMD_VCPU_RESERVED 31
  52#define VMWARE_CMD_STEALCLOCK    91
  53
  54#define STEALCLOCK_NOT_AVAILABLE (-1)
  55#define STEALCLOCK_DISABLED        0
  56#define STEALCLOCK_ENABLED         1
  57
  58#define VMWARE_PORT(cmd, eax, ebx, ecx, edx)                            \
  59        __asm__("inl (%%dx), %%eax" :                                   \
  60                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
  61                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
  62                "c"(VMWARE_CMD_##cmd),                                  \
  63                "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) :            \
  64                "memory")
  65
  66#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx)                          \
  67        __asm__("vmcall" :                                              \
  68                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
  69                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
  70                "c"(VMWARE_CMD_##cmd),                                  \
  71                "d"(0), "b"(UINT_MAX) :                                 \
  72                "memory")
  73
  74#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx)                         \
  75        __asm__("vmmcall" :                                             \
  76                "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :            \
  77                "a"(VMWARE_HYPERVISOR_MAGIC),                           \
  78                "c"(VMWARE_CMD_##cmd),                                  \
  79                "d"(0), "b"(UINT_MAX) :                                 \
  80                "memory")
  81
  82#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do {                \
  83        switch (vmware_hypercall_mode) {                        \
  84        case CPUID_VMWARE_FEATURES_ECX_VMCALL:                  \
  85                VMWARE_VMCALL(cmd, eax, ebx, ecx, edx);         \
  86                break;                                          \
  87        case CPUID_VMWARE_FEATURES_ECX_VMMCALL:                 \
  88                VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx);        \
  89                break;                                          \
  90        default:                                                \
  91                VMWARE_PORT(cmd, eax, ebx, ecx, edx);           \
  92                break;                                          \
  93        }                                                       \
  94        } while (0)
  95
  96struct vmware_steal_time {
  97        union {
  98                uint64_t clock; /* stolen time counter in units of vtsc */
  99                struct {
 100                        /* only for little-endian */
 101                        uint32_t clock_low;
 102                        uint32_t clock_high;
 103                };
 104        };
 105        uint64_t reserved[7];
 106};
 107
 108static unsigned long vmware_tsc_khz __ro_after_init;
 109static u8 vmware_hypercall_mode     __ro_after_init;
 110
 111static inline int __vmware_platform(void)
 112{
 113        uint32_t eax, ebx, ecx, edx;
 114        VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
 115        return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
 116}
 117
 118static unsigned long vmware_get_tsc_khz(void)
 119{
 120        return vmware_tsc_khz;
 121}
 122
 123#ifdef CONFIG_PARAVIRT
 124static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
 125static bool vmw_sched_clock __initdata = true;
 126static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
 127static bool has_steal_clock;
 128static bool steal_acc __initdata = true; /* steal time accounting */
 129
 130static __init int setup_vmw_sched_clock(char *s)
 131{
 132        vmw_sched_clock = false;
 133        return 0;
 134}
 135early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
 136
 137static __init int parse_no_stealacc(char *arg)
 138{
 139        steal_acc = false;
 140        return 0;
 141}
 142early_param("no-steal-acc", parse_no_stealacc);
 143
 144static unsigned long long notrace vmware_sched_clock(void)
 145{
 146        unsigned long long ns;
 147
 148        ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
 149                             vmware_cyc2ns.cyc2ns_shift);
 150        ns -= vmware_cyc2ns.cyc2ns_offset;
 151        return ns;
 152}
 153
 154static void __init vmware_cyc2ns_setup(void)
 155{
 156        struct cyc2ns_data *d = &vmware_cyc2ns;
 157        unsigned long long tsc_now = rdtsc();
 158
 159        clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift,
 160                               vmware_tsc_khz, NSEC_PER_MSEC, 0);
 161        d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
 162                                           d->cyc2ns_shift);
 163
 164        pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
 165}
 166
 167static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
 168{
 169        uint32_t result, info;
 170
 171        asm volatile (VMWARE_HYPERCALL :
 172                "=a"(result),
 173                "=c"(info) :
 174                "a"(VMWARE_HYPERVISOR_MAGIC),
 175                "b"(0),
 176                "c"(VMWARE_CMD_STEALCLOCK),
 177                "d"(0),
 178                "S"(arg1),
 179                "D"(arg2) :
 180                "memory");
 181        return result;
 182}
 183
 184static bool stealclock_enable(phys_addr_t pa)
 185{
 186        return vmware_cmd_stealclock(upper_32_bits(pa),
 187                                     lower_32_bits(pa)) == STEALCLOCK_ENABLED;
 188}
 189
 190static int __stealclock_disable(void)
 191{
 192        return vmware_cmd_stealclock(0, 1);
 193}
 194
 195static void stealclock_disable(void)
 196{
 197        __stealclock_disable();
 198}
 199
 200static bool vmware_is_stealclock_available(void)
 201{
 202        return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
 203}
 204
 205/**
 206 * vmware_steal_clock() - read the per-cpu steal clock
 207 * @cpu:            the cpu number whose steal clock we want to read
 208 *
 209 * The function reads the steal clock if we are on a 64-bit system, otherwise
 210 * reads it in parts, checking that the high part didn't change in the
 211 * meantime.
 212 *
 213 * Return:
 214 *      The steal clock reading in ns.
 215 */
 216static uint64_t vmware_steal_clock(int cpu)
 217{
 218        struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
 219        uint64_t clock;
 220
 221        if (IS_ENABLED(CONFIG_64BIT))
 222                clock = READ_ONCE(steal->clock);
 223        else {
 224                uint32_t initial_high, low, high;
 225
 226                do {
 227                        initial_high = READ_ONCE(steal->clock_high);
 228                        /* Do not reorder initial_high and high readings */
 229                        virt_rmb();
 230                        low = READ_ONCE(steal->clock_low);
 231                        /* Keep low reading in between */
 232                        virt_rmb();
 233                        high = READ_ONCE(steal->clock_high);
 234                } while (initial_high != high);
 235
 236                clock = ((uint64_t)high << 32) | low;
 237        }
 238
 239        return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
 240                             vmware_cyc2ns.cyc2ns_shift);
 241}
 242
 243static void vmware_register_steal_time(void)
 244{
 245        int cpu = smp_processor_id();
 246        struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
 247
 248        if (!has_steal_clock)
 249                return;
 250
 251        if (!stealclock_enable(slow_virt_to_phys(st))) {
 252                has_steal_clock = false;
 253                return;
 254        }
 255
 256        pr_info("vmware-stealtime: cpu %d, pa %llx\n",
 257                cpu, (unsigned long long) slow_virt_to_phys(st));
 258}
 259
 260static void vmware_disable_steal_time(void)
 261{
 262        if (!has_steal_clock)
 263                return;
 264
 265        stealclock_disable();
 266}
 267
 268static void vmware_guest_cpu_init(void)
 269{
 270        if (has_steal_clock)
 271                vmware_register_steal_time();
 272}
 273
 274static void vmware_pv_guest_cpu_reboot(void *unused)
 275{
 276        vmware_disable_steal_time();
 277}
 278
 279static int vmware_pv_reboot_notify(struct notifier_block *nb,
 280                                unsigned long code, void *unused)
 281{
 282        if (code == SYS_RESTART)
 283                on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
 284        return NOTIFY_DONE;
 285}
 286
 287static struct notifier_block vmware_pv_reboot_nb = {
 288        .notifier_call = vmware_pv_reboot_notify,
 289};
 290
 291#ifdef CONFIG_SMP
 292static void __init vmware_smp_prepare_boot_cpu(void)
 293{
 294        vmware_guest_cpu_init();
 295        native_smp_prepare_boot_cpu();
 296}
 297
 298static int vmware_cpu_online(unsigned int cpu)
 299{
 300        local_irq_disable();
 301        vmware_guest_cpu_init();
 302        local_irq_enable();
 303        return 0;
 304}
 305
 306static int vmware_cpu_down_prepare(unsigned int cpu)
 307{
 308        local_irq_disable();
 309        vmware_disable_steal_time();
 310        local_irq_enable();
 311        return 0;
 312}
 313#endif
 314
 315static __init int activate_jump_labels(void)
 316{
 317        if (has_steal_clock) {
 318                static_key_slow_inc(&paravirt_steal_enabled);
 319                if (steal_acc)
 320                        static_key_slow_inc(&paravirt_steal_rq_enabled);
 321        }
 322
 323        return 0;
 324}
 325arch_initcall(activate_jump_labels);
 326
 327static void __init vmware_paravirt_ops_setup(void)
 328{
 329        pv_info.name = "VMware hypervisor";
 330        pv_ops.cpu.io_delay = paravirt_nop;
 331
 332        if (vmware_tsc_khz == 0)
 333                return;
 334
 335        vmware_cyc2ns_setup();
 336
 337        if (vmw_sched_clock)
 338                pv_ops.time.sched_clock = vmware_sched_clock;
 339
 340        if (vmware_is_stealclock_available()) {
 341                has_steal_clock = true;
 342                pv_ops.time.steal_clock = vmware_steal_clock;
 343
 344                /* We use reboot notifier only to disable steal clock */
 345                register_reboot_notifier(&vmware_pv_reboot_nb);
 346
 347#ifdef CONFIG_SMP
 348                smp_ops.smp_prepare_boot_cpu =
 349                        vmware_smp_prepare_boot_cpu;
 350                if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 351                                              "x86/vmware:online",
 352                                              vmware_cpu_online,
 353                                              vmware_cpu_down_prepare) < 0)
 354                        pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
 355#else
 356                vmware_guest_cpu_init();
 357#endif
 358        }
 359}
 360#else
 361#define vmware_paravirt_ops_setup() do {} while (0)
 362#endif
 363
 364/*
 365 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
 366 * Still, due to timing difference when running on virtual cpus, the TSC can
 367 * be marked as unstable in some cases. For example, the TSC sync check at
 368 * bootup can fail due to a marginal offset between vcpus' TSCs (though the
 369 * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
 370 * is not suitable as a watchdog when running on a hypervisor because the
 371 * kernel may miss a wrap of the counter if the vcpu is descheduled for a
 372 * long time. To skip these checks at runtime we set these capability bits,
 373 * so that the kernel could just trust the hypervisor with providing a
 374 * reliable virtual TSC that is suitable for timekeeping.
 375 */
 376static void __init vmware_set_capabilities(void)
 377{
 378        setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
 379        setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
 380        if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
 381                setup_force_cpu_cap(X86_FEATURE_VMCALL);
 382        else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
 383                setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
 384}
 385
 386static void __init vmware_platform_setup(void)
 387{
 388        uint32_t eax, ebx, ecx, edx;
 389        uint64_t lpj, tsc_khz;
 390
 391        VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
 392
 393        if (ebx != UINT_MAX) {
 394                lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
 395                do_div(tsc_khz, 1000);
 396                WARN_ON(tsc_khz >> 32);
 397                pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
 398                        (unsigned long) tsc_khz / 1000,
 399                        (unsigned long) tsc_khz % 1000);
 400
 401                if (!preset_lpj) {
 402                        do_div(lpj, HZ);
 403                        preset_lpj = lpj;
 404                }
 405
 406                vmware_tsc_khz = tsc_khz;
 407                x86_platform.calibrate_tsc = vmware_get_tsc_khz;
 408                x86_platform.calibrate_cpu = vmware_get_tsc_khz;
 409
 410#ifdef CONFIG_X86_LOCAL_APIC
 411                /* Skip lapic calibration since we know the bus frequency. */
 412                lapic_timer_period = ecx / HZ;
 413                pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
 414                        ecx);
 415#endif
 416        } else {
 417                pr_warn("Failed to get TSC freq from the hypervisor\n");
 418        }
 419
 420        vmware_paravirt_ops_setup();
 421
 422#ifdef CONFIG_X86_IO_APIC
 423        no_timer_check = 1;
 424#endif
 425
 426        vmware_set_capabilities();
 427}
 428
 429static u8 __init vmware_select_hypercall(void)
 430{
 431        int eax, ebx, ecx, edx;
 432
 433        cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
 434        return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
 435                       CPUID_VMWARE_FEATURES_ECX_VMCALL));
 436}
 437
 438/*
 439 * While checking the dmi string information, just checking the product
 440 * serial key should be enough, as this will always have a VMware
 441 * specific string when running under VMware hypervisor.
 442 * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
 443 * intentionally defaults to 0.
 444 */
 445static uint32_t __init vmware_platform(void)
 446{
 447        if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
 448                unsigned int eax;
 449                unsigned int hyper_vendor_id[3];
 450
 451                cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
 452                      &hyper_vendor_id[1], &hyper_vendor_id[2]);
 453                if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
 454                        if (eax >= CPUID_VMWARE_FEATURES_LEAF)
 455                                vmware_hypercall_mode =
 456                                        vmware_select_hypercall();
 457
 458                        pr_info("hypercall mode: 0x%02x\n",
 459                                (unsigned int) vmware_hypercall_mode);
 460
 461                        return CPUID_VMWARE_INFO_LEAF;
 462                }
 463        } else if (dmi_available && dmi_name_in_serial("VMware") &&
 464                   __vmware_platform())
 465                return 1;
 466
 467        return 0;
 468}
 469
 470/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
 471static bool __init vmware_legacy_x2apic_available(void)
 472{
 473        uint32_t eax, ebx, ecx, edx;
 474        VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
 475        return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
 476               (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
 477}
 478
 479const __initconst struct hypervisor_x86 x86_hyper_vmware = {
 480        .name                   = "VMware",
 481        .detect                 = vmware_platform,
 482        .type                   = X86_HYPER_VMWARE,
 483        .init.init_platform     = vmware_platform_setup,
 484        .init.x2apic_available  = vmware_legacy_x2apic_available,
 485};
 486