qemu/softmmu/icount.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "qemu/cutils.h"
  27#include "migration/vmstate.h"
  28#include "qapi/error.h"
  29#include "qemu/error-report.h"
  30#include "exec/exec-all.h"
  31#include "sysemu/cpus.h"
  32#include "sysemu/qtest.h"
  33#include "qemu/main-loop.h"
  34#include "qemu/option.h"
  35#include "qemu/seqlock.h"
  36#include "sysemu/replay.h"
  37#include "sysemu/runstate.h"
  38#include "hw/core/cpu.h"
  39#include "sysemu/cpu-timers.h"
  40#include "sysemu/cpu-throttle.h"
  41#include "timers-state.h"
  42
  43/*
  44 * ICOUNT: Instruction Counter
  45 *
  46 * this module is split off from cpu-timers because the icount part
  47 * is TCG-specific, and does not need to be built for other accels.
  48 */
  49static bool icount_sleep = true;
  50/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
  51#define MAX_ICOUNT_SHIFT 10
  52
  53/*
  54 * 0 = Do not count executed instructions.
  55 * 1 = Fixed conversion of insn to ns via "shift" option
  56 * 2 = Runtime adaptive algorithm to compute shift
  57 */
  58int use_icount;
  59
  60static void icount_enable_precise(void)
  61{
  62    use_icount = 1;
  63}
  64
  65static void icount_enable_adaptive(void)
  66{
  67    use_icount = 2;
  68}
  69
  70/*
  71 * The current number of executed instructions is based on what we
  72 * originally budgeted minus the current state of the decrementing
  73 * icount counters in extra/u16.low.
  74 */
  75static int64_t icount_get_executed(CPUState *cpu)
  76{
  77    return (cpu->icount_budget -
  78            (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
  79}
  80
  81/*
  82 * Update the global shared timer_state.qemu_icount to take into
  83 * account executed instructions. This is done by the TCG vCPU
  84 * thread so the main-loop can see time has moved forward.
  85 */
  86static void icount_update_locked(CPUState *cpu)
  87{
  88    int64_t executed = icount_get_executed(cpu);
  89    cpu->icount_budget -= executed;
  90
  91    qatomic_set_i64(&timers_state.qemu_icount,
  92                    timers_state.qemu_icount + executed);
  93}
  94
  95/*
  96 * Update the global shared timer_state.qemu_icount to take into
  97 * account executed instructions. This is done by the TCG vCPU
  98 * thread so the main-loop can see time has moved forward.
  99 */
 100void icount_update(CPUState *cpu)
 101{
 102    seqlock_write_lock(&timers_state.vm_clock_seqlock,
 103                       &timers_state.vm_clock_lock);
 104    icount_update_locked(cpu);
 105    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 106                         &timers_state.vm_clock_lock);
 107}
 108
 109static int64_t icount_get_raw_locked(void)
 110{
 111    CPUState *cpu = current_cpu;
 112
 113    if (cpu && cpu->running) {
 114        if (!cpu->can_do_io) {
 115            error_report("Bad icount read");
 116            exit(1);
 117        }
 118        /* Take into account what has run */
 119        icount_update_locked(cpu);
 120    }
 121    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
 122    return qatomic_read_i64(&timers_state.qemu_icount);
 123}
 124
 125static int64_t icount_get_locked(void)
 126{
 127    int64_t icount = icount_get_raw_locked();
 128    return qatomic_read_i64(&timers_state.qemu_icount_bias) +
 129        icount_to_ns(icount);
 130}
 131
 132int64_t icount_get_raw(void)
 133{
 134    int64_t icount;
 135    unsigned start;
 136
 137    do {
 138        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 139        icount = icount_get_raw_locked();
 140    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 141
 142    return icount;
 143}
 144
 145/* Return the virtual CPU time, based on the instruction counter.  */
 146int64_t icount_get(void)
 147{
 148    int64_t icount;
 149    unsigned start;
 150
 151    do {
 152        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 153        icount = icount_get_locked();
 154    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 155
 156    return icount;
 157}
 158
 159int64_t icount_to_ns(int64_t icount)
 160{
 161    return icount << qatomic_read(&timers_state.icount_time_shift);
 162}
 163
 164/*
 165 * Correlation between real and virtual time is always going to be
 166 * fairly approximate, so ignore small variation.
 167 * When the guest is idle real and virtual time will be aligned in
 168 * the IO wait loop.
 169 */
 170#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 171
 172static void icount_adjust(void)
 173{
 174    int64_t cur_time;
 175    int64_t cur_icount;
 176    int64_t delta;
 177
 178    /* If the VM is not running, then do nothing.  */
 179    if (!runstate_is_running()) {
 180        return;
 181    }
 182
 183    seqlock_write_lock(&timers_state.vm_clock_seqlock,
 184                       &timers_state.vm_clock_lock);
 185    cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
 186                                   cpu_get_clock_locked());
 187    cur_icount = icount_get_locked();
 188
 189    delta = cur_icount - cur_time;
 190    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 191    if (delta > 0
 192        && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
 193        && timers_state.icount_time_shift > 0) {
 194        /* The guest is getting too far ahead.  Slow time down.  */
 195        qatomic_set(&timers_state.icount_time_shift,
 196                    timers_state.icount_time_shift - 1);
 197    }
 198    if (delta < 0
 199        && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
 200        && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
 201        /* The guest is getting too far behind.  Speed time up.  */
 202        qatomic_set(&timers_state.icount_time_shift,
 203                    timers_state.icount_time_shift + 1);
 204    }
 205    timers_state.last_delta = delta;
 206    qatomic_set_i64(&timers_state.qemu_icount_bias,
 207                    cur_icount - (timers_state.qemu_icount
 208                                  << timers_state.icount_time_shift));
 209    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 210                         &timers_state.vm_clock_lock);
 211}
 212
 213static void icount_adjust_rt(void *opaque)
 214{
 215    timer_mod(timers_state.icount_rt_timer,
 216              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 217    icount_adjust();
 218}
 219
 220static void icount_adjust_vm(void *opaque)
 221{
 222    timer_mod(timers_state.icount_vm_timer,
 223                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 224                   NANOSECONDS_PER_SECOND / 10);
 225    icount_adjust();
 226}
 227
 228int64_t icount_round(int64_t count)
 229{
 230    int shift = qatomic_read(&timers_state.icount_time_shift);
 231    return (count + (1 << shift) - 1) >> shift;
 232}
 233
 234static void icount_warp_rt(void)
 235{
 236    unsigned seq;
 237    int64_t warp_start;
 238
 239    /*
 240     * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 241     * changes from -1 to another value, so the race here is okay.
 242     */
 243    do {
 244        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 245        warp_start = timers_state.vm_clock_warp_start;
 246    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 247
 248    if (warp_start == -1) {
 249        return;
 250    }
 251
 252    seqlock_write_lock(&timers_state.vm_clock_seqlock,
 253                       &timers_state.vm_clock_lock);
 254    if (runstate_is_running()) {
 255        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
 256                                            cpu_get_clock_locked());
 257        int64_t warp_delta;
 258
 259        warp_delta = clock - timers_state.vm_clock_warp_start;
 260        if (icount_enabled() == 2) {
 261            /*
 262             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 263             * far ahead of real time.
 264             */
 265            int64_t cur_icount = icount_get_locked();
 266            int64_t delta = clock - cur_icount;
 267            warp_delta = MIN(warp_delta, delta);
 268        }
 269        qatomic_set_i64(&timers_state.qemu_icount_bias,
 270                        timers_state.qemu_icount_bias + warp_delta);
 271    }
 272    timers_state.vm_clock_warp_start = -1;
 273    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 274                       &timers_state.vm_clock_lock);
 275
 276    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 277        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 278    }
 279}
 280
 281static void icount_timer_cb(void *opaque)
 282{
 283    /*
 284     * No need for a checkpoint because the timer already synchronizes
 285     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 286     */
 287    icount_warp_rt();
 288}
 289
 290void icount_start_warp_timer(void)
 291{
 292    int64_t clock;
 293    int64_t deadline;
 294
 295    assert(icount_enabled());
 296
 297    /*
 298     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 299     * do not fire, so computing the deadline does not make sense.
 300     */
 301    if (!runstate_is_running()) {
 302        return;
 303    }
 304
 305    if (replay_mode != REPLAY_MODE_PLAY) {
 306        if (!all_cpu_threads_idle()) {
 307            return;
 308        }
 309
 310        if (qtest_enabled()) {
 311            /* When testing, qtest commands advance icount.  */
 312            return;
 313        }
 314
 315        replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
 316    } else {
 317        /* warp clock deterministically in record/replay mode */
 318        if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 319            /*
 320             * vCPU is sleeping and warp can't be started.
 321             * It is probably a race condition: notification sent
 322             * to vCPU was processed in advance and vCPU went to sleep.
 323             * Therefore we have to wake it up for doing someting.
 324             */
 325            if (replay_has_event()) {
 326                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 327            }
 328            return;
 329        }
 330    }
 331
 332    /* We want to use the earliest deadline from ALL vm_clocks */
 333    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 334    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 335                                          ~QEMU_TIMER_ATTR_EXTERNAL);
 336    if (deadline < 0) {
 337        static bool notified;
 338        if (!icount_sleep && !notified) {
 339            warn_report("icount sleep disabled and no active timers");
 340            notified = true;
 341        }
 342        return;
 343    }
 344
 345    if (deadline > 0) {
 346        /*
 347         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 348         * sleep.  Otherwise, the CPU might be waiting for a future timer
 349         * interrupt to wake it up, but the interrupt never comes because
 350         * the vCPU isn't running any insns and thus doesn't advance the
 351         * QEMU_CLOCK_VIRTUAL.
 352         */
 353        if (!icount_sleep) {
 354            /*
 355             * We never let VCPUs sleep in no sleep icount mode.
 356             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 357             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 358             * It is useful when we want a deterministic execution time,
 359             * isolated from host latencies.
 360             */
 361            seqlock_write_lock(&timers_state.vm_clock_seqlock,
 362                               &timers_state.vm_clock_lock);
 363            qatomic_set_i64(&timers_state.qemu_icount_bias,
 364                            timers_state.qemu_icount_bias + deadline);
 365            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 366                                 &timers_state.vm_clock_lock);
 367            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 368        } else {
 369            /*
 370             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 371             * "real" time, (related to the time left until the next event) has
 372             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 373             * This avoids that the warps are visible externally; for example,
 374             * you will not be sending network packets continuously instead of
 375             * every 100ms.
 376             */
 377            seqlock_write_lock(&timers_state.vm_clock_seqlock,
 378                               &timers_state.vm_clock_lock);
 379            if (timers_state.vm_clock_warp_start == -1
 380                || timers_state.vm_clock_warp_start > clock) {
 381                timers_state.vm_clock_warp_start = clock;
 382            }
 383            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 384                                 &timers_state.vm_clock_lock);
 385            timer_mod_anticipate(timers_state.icount_warp_timer,
 386                                 clock + deadline);
 387        }
 388    } else if (deadline == 0) {
 389        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 390    }
 391}
 392
 393void icount_account_warp_timer(void)
 394{
 395    if (!icount_sleep) {
 396        return;
 397    }
 398
 399    /*
 400     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 401     * do not fire, so computing the deadline does not make sense.
 402     */
 403    if (!runstate_is_running()) {
 404        return;
 405    }
 406
 407    replay_async_events();
 408
 409    /* warp clock deterministically in record/replay mode */
 410    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 411        return;
 412    }
 413
 414    timer_del(timers_state.icount_warp_timer);
 415    icount_warp_rt();
 416}
 417
 418void icount_configure(QemuOpts *opts, Error **errp)
 419{
 420    const char *option = qemu_opt_get(opts, "shift");
 421    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
 422    bool align = qemu_opt_get_bool(opts, "align", false);
 423    long time_shift = -1;
 424
 425    if (!option) {
 426        if (qemu_opt_get(opts, "align") != NULL) {
 427            error_setg(errp, "Please specify shift option when using align");
 428        }
 429        return;
 430    }
 431
 432    if (align && !sleep) {
 433        error_setg(errp, "align=on and sleep=off are incompatible");
 434        return;
 435    }
 436
 437    if (strcmp(option, "auto") != 0) {
 438        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
 439            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
 440            error_setg(errp, "icount: Invalid shift value");
 441            return;
 442        }
 443    } else if (icount_align_option) {
 444        error_setg(errp, "shift=auto and align=on are incompatible");
 445        return;
 446    } else if (!icount_sleep) {
 447        error_setg(errp, "shift=auto and sleep=off are incompatible");
 448        return;
 449    }
 450
 451    icount_sleep = sleep;
 452    if (icount_sleep) {
 453        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 454                                         icount_timer_cb, NULL);
 455    }
 456
 457    icount_align_option = align;
 458
 459    if (time_shift >= 0) {
 460        timers_state.icount_time_shift = time_shift;
 461        icount_enable_precise();
 462        return;
 463    }
 464
 465    icount_enable_adaptive();
 466
 467    /*
 468     * 125MIPS seems a reasonable initial guess at the guest speed.
 469     * It will be corrected fairly quickly anyway.
 470     */
 471    timers_state.icount_time_shift = 3;
 472
 473    /*
 474     * Have both realtime and virtual time triggers for speed adjustment.
 475     * The realtime trigger catches emulated time passing too slowly,
 476     * the virtual time trigger catches emulated time passing too fast.
 477     * Realtime triggers occur even when idle, so use them less frequently
 478     * than VM triggers.
 479     */
 480    timers_state.vm_clock_warp_start = -1;
 481    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 482                                   icount_adjust_rt, NULL);
 483    timer_mod(timers_state.icount_rt_timer,
 484                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 485    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 486                                        icount_adjust_vm, NULL);
 487    timer_mod(timers_state.icount_vm_timer,
 488                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 489                   NANOSECONDS_PER_SECOND / 10);
 490}
 491
 492void icount_notify_exit(void)
 493{
 494    if (icount_enabled() && current_cpu) {
 495        qemu_cpu_kick(current_cpu);
 496        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 497    }
 498}
 499