qemu/hw/i386/kvm/clock.c
<<
>>
Prefs
   1/*
   2 * QEMU KVM support, paravirtual clock device
   3 *
   4 * Copyright (C) 2011 Siemens AG
   5 *
   6 * Authors:
   7 *  Jan Kiszka        <jan.kiszka@siemens.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL version 2.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "cpu.h"
  18#include "qemu/host-utils.h"
  19#include "qemu/module.h"
  20#include "sysemu/kvm.h"
  21#include "sysemu/runstate.h"
  22#include "sysemu/hw_accel.h"
  23#include "kvm/kvm_i386.h"
  24#include "migration/vmstate.h"
  25#include "hw/sysbus.h"
  26#include "hw/kvm/clock.h"
  27#include "hw/qdev-properties.h"
  28#include "qapi/error.h"
  29
  30#include <linux/kvm.h>
  31#include "standard-headers/asm-x86/kvm_para.h"
  32#include "qom/object.h"
  33
  34#define TYPE_KVM_CLOCK "kvmclock"
  35OBJECT_DECLARE_SIMPLE_TYPE(KVMClockState, KVM_CLOCK)
  36
  37struct KVMClockState {
  38    /*< private >*/
  39    SysBusDevice busdev;
  40    /*< public >*/
  41
  42    uint64_t clock;
  43    bool clock_valid;
  44
  45    /* whether the 'clock' value was obtained in the 'paused' state */
  46    bool runstate_paused;
  47
  48    /* whether machine type supports reliable KVM_GET_CLOCK */
  49    bool mach_use_reliable_get_clock;
  50
  51    /* whether the 'clock' value was obtained in a host with
  52     * reliable KVM_GET_CLOCK */
  53    bool clock_is_reliable;
  54};
  55
  56struct pvclock_vcpu_time_info {
  57    uint32_t   version;
  58    uint32_t   pad0;
  59    uint64_t   tsc_timestamp;
  60    uint64_t   system_time;
  61    uint32_t   tsc_to_system_mul;
  62    int8_t     tsc_shift;
  63    uint8_t    flags;
  64    uint8_t    pad[2];
  65} __attribute__((__packed__)); /* 32 bytes */
  66
  67static uint64_t kvmclock_current_nsec(KVMClockState *s)
  68{
  69    CPUState *cpu = first_cpu;
  70    CPUX86State *env = cpu->env_ptr;
  71    hwaddr kvmclock_struct_pa;
  72    uint64_t migration_tsc = env->tsc;
  73    struct pvclock_vcpu_time_info time;
  74    uint64_t delta;
  75    uint64_t nsec_lo;
  76    uint64_t nsec_hi;
  77    uint64_t nsec;
  78
  79    cpu_synchronize_state(cpu);
  80
  81    if (!(env->system_time_msr & 1ULL)) {
  82        /* KVM clock not active */
  83        return 0;
  84    }
  85
  86    kvmclock_struct_pa = env->system_time_msr & ~1ULL;
  87    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
  88
  89    assert(time.tsc_timestamp <= migration_tsc);
  90    delta = migration_tsc - time.tsc_timestamp;
  91    if (time.tsc_shift < 0) {
  92        delta >>= -time.tsc_shift;
  93    } else {
  94        delta <<= time.tsc_shift;
  95    }
  96
  97    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
  98    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
  99    return nsec + time.system_time;
 100}
 101
 102static void kvm_update_clock(KVMClockState *s)
 103{
 104    struct kvm_clock_data data;
 105    int ret;
 106
 107    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
 108    if (ret < 0) {
 109        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
 110                abort();
 111    }
 112    s->clock = data.clock;
 113
 114    /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns
 115     * essentially CLOCK_MONOTONIC plus a guest-specific adjustment.  This
 116     * can drift from the TSC-based value that is computed by the guest,
 117     * so we need to go through kvmclock_current_nsec().  If
 118     * kvm_has_adjust_clock_stable() is true, and the flags contain
 119     * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value
 120     * and kvmclock_current_nsec() is not necessary.
 121     *
 122     * Here, however, we need not check KVM_CLOCK_TSC_STABLE.  This is because:
 123     *
 124     * - if the host has disabled the kvmclock master clock, the guest already
 125     *   has protection against time going backwards.  This "safety net" is only
 126     *   absent when kvmclock is stable;
 127     *
 128     * - therefore, we can replace a check like
 129     *
 130     *       if last KVM_GET_CLOCK was not reliable then
 131     *               read from memory
 132     *
 133     *   with
 134     *
 135     *       if last KVM_GET_CLOCK was not reliable && masterclock is enabled
 136     *               read from memory
 137     *
 138     * However:
 139     *
 140     * - if kvm_has_adjust_clock_stable() returns false, the left side is
 141     *   always true (KVM_GET_CLOCK is never reliable), and the right side is
 142     *   unknown (because we don't have data.flags).  We must assume it's true
 143     *   and read from memory.
 144     *
 145     * - if kvm_has_adjust_clock_stable() returns true, the result of the &&
 146     *   is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable)
 147     *
 148     * So we can just use this instead:
 149     *
 150     *       if !kvm_has_adjust_clock_stable() then
 151     *               read from memory
 152     */
 153    s->clock_is_reliable = kvm_has_adjust_clock_stable();
 154}
 155
 156static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data)
 157{
 158    int ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0);
 159
 160    if (ret && ret != -EINVAL) {
 161        fprintf(stderr, "%s: %s\n", __func__, strerror(-ret));
 162    }
 163}
 164
 165static void kvmclock_vm_state_change(void *opaque, bool running,
 166                                     RunState state)
 167{
 168    KVMClockState *s = opaque;
 169    CPUState *cpu;
 170    int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL);
 171    int ret;
 172
 173    if (running) {
 174        struct kvm_clock_data data = {};
 175
 176        /*
 177         * If the host where s->clock was read did not support reliable
 178         * KVM_GET_CLOCK, read kvmclock value from memory.
 179         */
 180        if (!s->clock_is_reliable) {
 181            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
 182            /* We can't rely on the saved clock value, just discard it */
 183            if (pvclock_via_mem) {
 184                s->clock = pvclock_via_mem;
 185            }
 186        }
 187
 188        s->clock_valid = false;
 189
 190        data.clock = s->clock;
 191        ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
 192        if (ret < 0) {
 193            fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret));
 194            abort();
 195        }
 196
 197        if (!cap_clock_ctrl) {
 198            return;
 199        }
 200        CPU_FOREACH(cpu) {
 201            run_on_cpu(cpu, do_kvmclock_ctrl, RUN_ON_CPU_NULL);
 202        }
 203    } else {
 204
 205        if (s->clock_valid) {
 206            return;
 207        }
 208
 209        s->runstate_paused = runstate_check(RUN_STATE_PAUSED);
 210
 211        kvm_synchronize_all_tsc();
 212
 213        kvm_update_clock(s);
 214        /*
 215         * If the VM is stopped, declare the clock state valid to
 216         * avoid re-reading it on next vmsave (which would return
 217         * a different value). Will be reset when the VM is continued.
 218         */
 219        s->clock_valid = true;
 220    }
 221}
 222
 223static void kvmclock_realize(DeviceState *dev, Error **errp)
 224{
 225    KVMClockState *s = KVM_CLOCK(dev);
 226
 227    if (!kvm_enabled()) {
 228        error_setg(errp, "kvmclock device requires KVM");
 229        return;
 230    }
 231
 232    kvm_update_clock(s);
 233
 234    qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 235}
 236
 237static bool kvmclock_clock_is_reliable_needed(void *opaque)
 238{
 239    KVMClockState *s = opaque;
 240
 241    return s->mach_use_reliable_get_clock;
 242}
 243
 244static const VMStateDescription kvmclock_reliable_get_clock = {
 245    .name = "kvmclock/clock_is_reliable",
 246    .version_id = 1,
 247    .minimum_version_id = 1,
 248    .needed = kvmclock_clock_is_reliable_needed,
 249    .fields = (VMStateField[]) {
 250        VMSTATE_BOOL(clock_is_reliable, KVMClockState),
 251        VMSTATE_END_OF_LIST()
 252    }
 253};
 254
 255/*
 256 * When migrating, assume the source has an unreliable
 257 * KVM_GET_CLOCK unless told otherwise.
 258 */
 259static int kvmclock_pre_load(void *opaque)
 260{
 261    KVMClockState *s = opaque;
 262
 263    s->clock_is_reliable = false;
 264
 265    return 0;
 266}
 267
 268/*
 269 * When migrating a running guest, read the clock just
 270 * before migration, so that the guest clock counts
 271 * during the events between:
 272 *
 273 *  * vm_stop()
 274 *  *
 275 *  * pre_save()
 276 *
 277 *  This reduces kvmclock difference on migration from 5s
 278 *  to 0.1s (when max_downtime == 5s), because sending the
 279 *  final pages of memory (which happens between vm_stop()
 280 *  and pre_save()) takes max_downtime.
 281 */
 282static int kvmclock_pre_save(void *opaque)
 283{
 284    KVMClockState *s = opaque;
 285
 286    if (!s->runstate_paused) {
 287        kvm_update_clock(s);
 288    }
 289
 290    return 0;
 291}
 292
 293static const VMStateDescription kvmclock_vmsd = {
 294    .name = "kvmclock",
 295    .version_id = 1,
 296    .minimum_version_id = 1,
 297    .pre_load = kvmclock_pre_load,
 298    .pre_save = kvmclock_pre_save,
 299    .fields = (VMStateField[]) {
 300        VMSTATE_UINT64(clock, KVMClockState),
 301        VMSTATE_END_OF_LIST()
 302    },
 303    .subsections = (const VMStateDescription * []) {
 304        &kvmclock_reliable_get_clock,
 305        NULL
 306    }
 307};
 308
 309static Property kvmclock_properties[] = {
 310    DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
 311                      mach_use_reliable_get_clock, true),
 312    DEFINE_PROP_END_OF_LIST(),
 313};
 314
 315static void kvmclock_class_init(ObjectClass *klass, void *data)
 316{
 317    DeviceClass *dc = DEVICE_CLASS(klass);
 318
 319    dc->realize = kvmclock_realize;
 320    dc->vmsd = &kvmclock_vmsd;
 321    device_class_set_props(dc, kvmclock_properties);
 322}
 323
 324static const TypeInfo kvmclock_info = {
 325    .name          = TYPE_KVM_CLOCK,
 326    .parent        = TYPE_SYS_BUS_DEVICE,
 327    .instance_size = sizeof(KVMClockState),
 328    .class_init    = kvmclock_class_init,
 329};
 330
 331/* Note: Must be called after VCPU initialization. */
 332void kvmclock_create(bool create_always)
 333{
 334    X86CPU *cpu = X86_CPU(first_cpu);
 335
 336    if (!kvm_enabled() || !kvm_has_adjust_clock())
 337        return;
 338
 339    if (create_always ||
 340        cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) |
 341                                       (1ULL << KVM_FEATURE_CLOCKSOURCE2))) {
 342        sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL);
 343    }
 344}
 345
 346static void kvmclock_register_types(void)
 347{
 348    type_register_static(&kvmclock_info);
 349}
 350
 351type_init(kvmclock_register_types)
 352