qemu/hw/i386/kvm/clock.c
<<
>>
Prefs
   1/*
   2 * QEMU KVM support, paravirtual clock device
   3 *
   4 * Copyright (C) 2011 Siemens AG
   5 *
   6 * Authors:
   7 *  Jan Kiszka        <jan.kiszka@siemens.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL version 2.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "cpu.h"
  18#include "qemu/host-utils.h"
  19#include "qemu/module.h"
  20#include "sysemu/sysemu.h"
  21#include "sysemu/kvm.h"
  22#include "sysemu/hw_accel.h"
  23#include "kvm_i386.h"
  24#include "hw/sysbus.h"
  25#include "hw/kvm/clock.h"
  26#include "qapi/error.h"
  27
  28#include <linux/kvm.h>
  29#include "standard-headers/asm-x86/kvm_para.h"
  30
  31#define TYPE_KVM_CLOCK "kvmclock"
  32#define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK)
  33
  34typedef struct KVMClockState {
  35    /*< private >*/
  36    SysBusDevice busdev;
  37    /*< public >*/
  38
  39    uint64_t clock;
  40    bool clock_valid;
  41
  42    /* whether machine type supports reliable KVM_GET_CLOCK */
  43    bool mach_use_reliable_get_clock;
  44
  45    /* whether the 'clock' value was obtained in a host with
  46     * reliable KVM_GET_CLOCK */
  47    bool clock_is_reliable;
  48} KVMClockState;
  49
  50struct pvclock_vcpu_time_info {
  51    uint32_t   version;
  52    uint32_t   pad0;
  53    uint64_t   tsc_timestamp;
  54    uint64_t   system_time;
  55    uint32_t   tsc_to_system_mul;
  56    int8_t     tsc_shift;
  57    uint8_t    flags;
  58    uint8_t    pad[2];
  59} __attribute__((__packed__)); /* 32 bytes */
  60
  61static uint64_t kvmclock_current_nsec(KVMClockState *s)
  62{
  63    CPUState *cpu = first_cpu;
  64    CPUX86State *env = cpu->env_ptr;
  65    hwaddr kvmclock_struct_pa;
  66    uint64_t migration_tsc = env->tsc;
  67    struct pvclock_vcpu_time_info time;
  68    uint64_t delta;
  69    uint64_t nsec_lo;
  70    uint64_t nsec_hi;
  71    uint64_t nsec;
  72
  73    cpu_synchronize_state(cpu);
  74
  75    if (!(env->system_time_msr & 1ULL)) {
  76        /* KVM clock not active */
  77        return 0;
  78    }
  79
  80    kvmclock_struct_pa = env->system_time_msr & ~1ULL;
  81    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
  82
  83    assert(time.tsc_timestamp <= migration_tsc);
  84    delta = migration_tsc - time.tsc_timestamp;
  85    if (time.tsc_shift < 0) {
  86        delta >>= -time.tsc_shift;
  87    } else {
  88        delta <<= time.tsc_shift;
  89    }
  90
  91    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
  92    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
  93    return nsec + time.system_time;
  94}
  95
  96static void kvm_update_clock(KVMClockState *s)
  97{
  98    struct kvm_clock_data data;
  99    int ret;
 100
 101    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
 102    if (ret < 0) {
 103        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
 104                abort();
 105    }
 106    s->clock = data.clock;
 107
 108    /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns
 109     * essentially CLOCK_MONOTONIC plus a guest-specific adjustment.  This
 110     * can drift from the TSC-based value that is computed by the guest,
 111     * so we need to go through kvmclock_current_nsec().  If
 112     * kvm_has_adjust_clock_stable() is true, and the flags contain
 113     * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value
 114     * and kvmclock_current_nsec() is not necessary.
 115     *
 116     * Here, however, we need not check KVM_CLOCK_TSC_STABLE.  This is because:
 117     *
 118     * - if the host has disabled the kvmclock master clock, the guest already
 119     *   has protection against time going backwards.  This "safety net" is only
 120     *   absent when kvmclock is stable;
 121     *
 122     * - therefore, we can replace a check like
 123     *
 124     *       if last KVM_GET_CLOCK was not reliable then
 125     *               read from memory
 126     *
 127     *   with
 128     *
 129     *       if last KVM_GET_CLOCK was not reliable && masterclock is enabled
 130     *               read from memory
 131     *
 132     * However:
 133     *
 134     * - if kvm_has_adjust_clock_stable() returns false, the left side is
 135     *   always true (KVM_GET_CLOCK is never reliable), and the right side is
 136     *   unknown (because we don't have data.flags).  We must assume it's true
 137     *   and read from memory.
 138     *
 139     * - if kvm_has_adjust_clock_stable() returns true, the result of the &&
 140     *   is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable)
 141     *
 142     * So we can just use this instead:
 143     *
 144     *       if !kvm_has_adjust_clock_stable() then
 145     *               read from memory
 146     */
 147    s->clock_is_reliable = kvm_has_adjust_clock_stable();
 148}
 149
 150static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data)
 151{
 152    int ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0);
 153
 154    if (ret && ret != -EINVAL) {
 155        fprintf(stderr, "%s: %s\n", __func__, strerror(-ret));
 156    }
 157}
 158
 159static void kvmclock_vm_state_change(void *opaque, int running,
 160                                     RunState state)
 161{
 162    KVMClockState *s = opaque;
 163    CPUState *cpu;
 164    int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL);
 165    int ret;
 166
 167    if (running) {
 168        struct kvm_clock_data data = {};
 169
 170        /*
 171         * If the host where s->clock was read did not support reliable
 172         * KVM_GET_CLOCK, read kvmclock value from memory.
 173         */
 174        if (!s->clock_is_reliable) {
 175            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
 176            /* We can't rely on the saved clock value, just discard it */
 177            if (pvclock_via_mem) {
 178                s->clock = pvclock_via_mem;
 179            }
 180        }
 181
 182        s->clock_valid = false;
 183
 184        data.clock = s->clock;
 185        ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
 186        if (ret < 0) {
 187            fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret));
 188            abort();
 189        }
 190
 191        if (!cap_clock_ctrl) {
 192            return;
 193        }
 194        CPU_FOREACH(cpu) {
 195            run_on_cpu(cpu, do_kvmclock_ctrl, RUN_ON_CPU_NULL);
 196        }
 197    } else {
 198
 199        if (s->clock_valid) {
 200            return;
 201        }
 202
 203        kvm_synchronize_all_tsc();
 204
 205        kvm_update_clock(s);
 206        /*
 207         * If the VM is stopped, declare the clock state valid to
 208         * avoid re-reading it on next vmsave (which would return
 209         * a different value). Will be reset when the VM is continued.
 210         */
 211        s->clock_valid = true;
 212    }
 213}
 214
 215static void kvmclock_realize(DeviceState *dev, Error **errp)
 216{
 217    KVMClockState *s = KVM_CLOCK(dev);
 218
 219    if (!kvm_enabled()) {
 220        error_setg(errp, "kvmclock device requires KVM");
 221        return;
 222    }
 223
 224    kvm_update_clock(s);
 225
 226    qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 227}
 228
 229static bool kvmclock_clock_is_reliable_needed(void *opaque)
 230{
 231    KVMClockState *s = opaque;
 232
 233    return s->mach_use_reliable_get_clock;
 234}
 235
 236static const VMStateDescription kvmclock_reliable_get_clock = {
 237    .name = "kvmclock/clock_is_reliable",
 238    .version_id = 1,
 239    .minimum_version_id = 1,
 240    .needed = kvmclock_clock_is_reliable_needed,
 241    .fields = (VMStateField[]) {
 242        VMSTATE_BOOL(clock_is_reliable, KVMClockState),
 243        VMSTATE_END_OF_LIST()
 244    }
 245};
 246
 247/*
 248 * When migrating, assume the source has an unreliable
 249 * KVM_GET_CLOCK unless told otherwise.
 250 */
 251static int kvmclock_pre_load(void *opaque)
 252{
 253    KVMClockState *s = opaque;
 254
 255    s->clock_is_reliable = false;
 256
 257    return 0;
 258}
 259
 260/*
 261 * When migrating, read the clock just before migration,
 262 * so that the guest clock counts during the events
 263 * between:
 264 *
 265 *  * vm_stop()
 266 *  *
 267 *  * pre_save()
 268 *
 269 *  This reduces kvmclock difference on migration from 5s
 270 *  to 0.1s (when max_downtime == 5s), because sending the
 271 *  final pages of memory (which happens between vm_stop()
 272 *  and pre_save()) takes max_downtime.
 273 */
 274static int kvmclock_pre_save(void *opaque)
 275{
 276    KVMClockState *s = opaque;
 277
 278    kvm_update_clock(s);
 279
 280    return 0;
 281}
 282
 283static const VMStateDescription kvmclock_vmsd = {
 284    .name = "kvmclock",
 285    .version_id = 1,
 286    .minimum_version_id = 1,
 287    .pre_load = kvmclock_pre_load,
 288    .pre_save = kvmclock_pre_save,
 289    .fields = (VMStateField[]) {
 290        VMSTATE_UINT64(clock, KVMClockState),
 291        VMSTATE_END_OF_LIST()
 292    },
 293    .subsections = (const VMStateDescription * []) {
 294        &kvmclock_reliable_get_clock,
 295        NULL
 296    }
 297};
 298
 299static Property kvmclock_properties[] = {
 300    DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
 301                      mach_use_reliable_get_clock, true),
 302    DEFINE_PROP_END_OF_LIST(),
 303};
 304
 305static void kvmclock_class_init(ObjectClass *klass, void *data)
 306{
 307    DeviceClass *dc = DEVICE_CLASS(klass);
 308
 309    dc->realize = kvmclock_realize;
 310    dc->vmsd = &kvmclock_vmsd;
 311    dc->props = kvmclock_properties;
 312}
 313
 314static const TypeInfo kvmclock_info = {
 315    .name          = TYPE_KVM_CLOCK,
 316    .parent        = TYPE_SYS_BUS_DEVICE,
 317    .instance_size = sizeof(KVMClockState),
 318    .class_init    = kvmclock_class_init,
 319};
 320
 321/* Note: Must be called after VCPU initialization. */
 322void kvmclock_create(void)
 323{
 324    X86CPU *cpu = X86_CPU(first_cpu);
 325
 326    if (kvm_enabled() &&
 327        cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) |
 328                                       (1ULL << KVM_FEATURE_CLOCKSOURCE2))) {
 329        sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL);
 330    }
 331}
 332
 333static void kvmclock_register_types(void)
 334{
 335    type_register_static(&kvmclock_info);
 336}
 337
 338type_init(kvmclock_register_types)
 339