qemu/softmmu/icount.c
<<
>>
Prefs
   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "qemu-common.h"
  27#include "qemu/cutils.h"
  28#include "migration/vmstate.h"
  29#include "qapi/error.h"
  30#include "qemu/error-report.h"
  31#include "exec/exec-all.h"
  32#include "sysemu/cpus.h"
  33#include "sysemu/qtest.h"
  34#include "qemu/main-loop.h"
  35#include "qemu/option.h"
  36#include "qemu/seqlock.h"
  37#include "sysemu/replay.h"
  38#include "sysemu/runstate.h"
  39#include "hw/core/cpu.h"
  40#include "sysemu/cpu-timers.h"
  41#include "sysemu/cpu-throttle.h"
  42#include "timers-state.h"
  43
  44/*
  45 * ICOUNT: Instruction Counter
  46 *
  47 * this module is split off from cpu-timers because the icount part
  48 * is TCG-specific, and does not need to be built for other accels.
  49 */
  50static bool icount_sleep = true;
  51/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
  52#define MAX_ICOUNT_SHIFT 10
  53
  54/*
  55 * 0 = Do not count executed instructions.
  56 * 1 = Fixed conversion of insn to ns via "shift" option
  57 * 2 = Runtime adaptive algorithm to compute shift
  58 */
  59int use_icount;
  60
  61static void icount_enable_precise(void)
  62{
  63    use_icount = 1;
  64}
  65
  66static void icount_enable_adaptive(void)
  67{
  68    use_icount = 2;
  69}
  70
  71/*
  72 * The current number of executed instructions is based on what we
  73 * originally budgeted minus the current state of the decrementing
  74 * icount counters in extra/u16.low.
  75 */
  76static int64_t icount_get_executed(CPUState *cpu)
  77{
  78    return (cpu->icount_budget -
  79            (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
  80}
  81
  82/*
  83 * Update the global shared timer_state.qemu_icount to take into
  84 * account executed instructions. This is done by the TCG vCPU
  85 * thread so the main-loop can see time has moved forward.
  86 */
  87static void icount_update_locked(CPUState *cpu)
  88{
  89    int64_t executed = icount_get_executed(cpu);
  90    cpu->icount_budget -= executed;
  91
  92    qatomic_set_i64(&timers_state.qemu_icount,
  93                    timers_state.qemu_icount + executed);
  94}
  95
  96/*
  97 * Update the global shared timer_state.qemu_icount to take into
  98 * account executed instructions. This is done by the TCG vCPU
  99 * thread so the main-loop can see time has moved forward.
 100 */
 101void icount_update(CPUState *cpu)
 102{
 103    seqlock_write_lock(&timers_state.vm_clock_seqlock,
 104                       &timers_state.vm_clock_lock);
 105    icount_update_locked(cpu);
 106    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 107                         &timers_state.vm_clock_lock);
 108}
 109
 110static int64_t icount_get_raw_locked(void)
 111{
 112    CPUState *cpu = current_cpu;
 113
 114    if (cpu && cpu->running) {
 115        if (!cpu->can_do_io) {
 116            error_report("Bad icount read");
 117            exit(1);
 118        }
 119        /* Take into account what has run */
 120        icount_update_locked(cpu);
 121    }
 122    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
 123    return qatomic_read_i64(&timers_state.qemu_icount);
 124}
 125
 126static int64_t icount_get_locked(void)
 127{
 128    int64_t icount = icount_get_raw_locked();
 129    return qatomic_read_i64(&timers_state.qemu_icount_bias) +
 130        icount_to_ns(icount);
 131}
 132
 133int64_t icount_get_raw(void)
 134{
 135    int64_t icount;
 136    unsigned start;
 137
 138    do {
 139        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 140        icount = icount_get_raw_locked();
 141    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 142
 143    return icount;
 144}
 145
 146/* Return the virtual CPU time, based on the instruction counter.  */
 147int64_t icount_get(void)
 148{
 149    int64_t icount;
 150    unsigned start;
 151
 152    do {
 153        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 154        icount = icount_get_locked();
 155    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 156
 157    return icount;
 158}
 159
 160int64_t icount_to_ns(int64_t icount)
 161{
 162    return icount << qatomic_read(&timers_state.icount_time_shift);
 163}
 164
 165/*
 166 * Correlation between real and virtual time is always going to be
 167 * fairly approximate, so ignore small variation.
 168 * When the guest is idle real and virtual time will be aligned in
 169 * the IO wait loop.
 170 */
 171#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 172
 173static void icount_adjust(void)
 174{
 175    int64_t cur_time;
 176    int64_t cur_icount;
 177    int64_t delta;
 178
 179    /* If the VM is not running, then do nothing.  */
 180    if (!runstate_is_running()) {
 181        return;
 182    }
 183
 184    seqlock_write_lock(&timers_state.vm_clock_seqlock,
 185                       &timers_state.vm_clock_lock);
 186    cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
 187                                   cpu_get_clock_locked());
 188    cur_icount = icount_get_locked();
 189
 190    delta = cur_icount - cur_time;
 191    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 192    if (delta > 0
 193        && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
 194        && timers_state.icount_time_shift > 0) {
 195        /* The guest is getting too far ahead.  Slow time down.  */
 196        qatomic_set(&timers_state.icount_time_shift,
 197                    timers_state.icount_time_shift - 1);
 198    }
 199    if (delta < 0
 200        && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
 201        && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
 202        /* The guest is getting too far behind.  Speed time up.  */
 203        qatomic_set(&timers_state.icount_time_shift,
 204                    timers_state.icount_time_shift + 1);
 205    }
 206    timers_state.last_delta = delta;
 207    qatomic_set_i64(&timers_state.qemu_icount_bias,
 208                    cur_icount - (timers_state.qemu_icount
 209                                  << timers_state.icount_time_shift));
 210    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 211                         &timers_state.vm_clock_lock);
 212}
 213
 214static void icount_adjust_rt(void *opaque)
 215{
 216    timer_mod(timers_state.icount_rt_timer,
 217              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 218    icount_adjust();
 219}
 220
 221static void icount_adjust_vm(void *opaque)
 222{
 223    timer_mod(timers_state.icount_vm_timer,
 224                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 225                   NANOSECONDS_PER_SECOND / 10);
 226    icount_adjust();
 227}
 228
 229int64_t icount_round(int64_t count)
 230{
 231    int shift = qatomic_read(&timers_state.icount_time_shift);
 232    return (count + (1 << shift) - 1) >> shift;
 233}
 234
 235static void icount_warp_rt(void)
 236{
 237    unsigned seq;
 238    int64_t warp_start;
 239
 240    /*
 241     * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 242     * changes from -1 to another value, so the race here is okay.
 243     */
 244    do {
 245        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 246        warp_start = timers_state.vm_clock_warp_start;
 247    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 248
 249    if (warp_start == -1) {
 250        return;
 251    }
 252
 253    seqlock_write_lock(&timers_state.vm_clock_seqlock,
 254                       &timers_state.vm_clock_lock);
 255    if (runstate_is_running()) {
 256        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
 257                                            cpu_get_clock_locked());
 258        int64_t warp_delta;
 259
 260        warp_delta = clock - timers_state.vm_clock_warp_start;
 261        if (icount_enabled() == 2) {
 262            /*
 263             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 264             * far ahead of real time.
 265             */
 266            int64_t cur_icount = icount_get_locked();
 267            int64_t delta = clock - cur_icount;
 268            warp_delta = MIN(warp_delta, delta);
 269        }
 270        qatomic_set_i64(&timers_state.qemu_icount_bias,
 271                        timers_state.qemu_icount_bias + warp_delta);
 272    }
 273    timers_state.vm_clock_warp_start = -1;
 274    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 275                       &timers_state.vm_clock_lock);
 276
 277    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 278        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 279    }
 280}
 281
 282static void icount_timer_cb(void *opaque)
 283{
 284    /*
 285     * No need for a checkpoint because the timer already synchronizes
 286     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 287     */
 288    icount_warp_rt();
 289}
 290
 291void icount_start_warp_timer(void)
 292{
 293    int64_t clock;
 294    int64_t deadline;
 295
 296    assert(icount_enabled());
 297
 298    /*
 299     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 300     * do not fire, so computing the deadline does not make sense.
 301     */
 302    if (!runstate_is_running()) {
 303        return;
 304    }
 305
 306    if (replay_mode != REPLAY_MODE_PLAY) {
 307        if (!all_cpu_threads_idle()) {
 308            return;
 309        }
 310
 311        if (qtest_enabled()) {
 312            /* When testing, qtest commands advance icount.  */
 313            return;
 314        }
 315
 316        replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
 317    } else {
 318        /* warp clock deterministically in record/replay mode */
 319        if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 320            /*
 321             * vCPU is sleeping and warp can't be started.
 322             * It is probably a race condition: notification sent
 323             * to vCPU was processed in advance and vCPU went to sleep.
 324             * Therefore we have to wake it up for doing someting.
 325             */
 326            if (replay_has_checkpoint()) {
 327                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 328            }
 329            return;
 330        }
 331    }
 332
 333    /* We want to use the earliest deadline from ALL vm_clocks */
 334    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 335    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
 336                                          ~QEMU_TIMER_ATTR_EXTERNAL);
 337    if (deadline < 0) {
 338        static bool notified;
 339        if (!icount_sleep && !notified) {
 340            warn_report("icount sleep disabled and no active timers");
 341            notified = true;
 342        }
 343        return;
 344    }
 345
 346    if (deadline > 0) {
 347        /*
 348         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 349         * sleep.  Otherwise, the CPU might be waiting for a future timer
 350         * interrupt to wake it up, but the interrupt never comes because
 351         * the vCPU isn't running any insns and thus doesn't advance the
 352         * QEMU_CLOCK_VIRTUAL.
 353         */
 354        if (!icount_sleep) {
 355            /*
 356             * We never let VCPUs sleep in no sleep icount mode.
 357             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 358             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 359             * It is useful when we want a deterministic execution time,
 360             * isolated from host latencies.
 361             */
 362            seqlock_write_lock(&timers_state.vm_clock_seqlock,
 363                               &timers_state.vm_clock_lock);
 364            qatomic_set_i64(&timers_state.qemu_icount_bias,
 365                            timers_state.qemu_icount_bias + deadline);
 366            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 367                                 &timers_state.vm_clock_lock);
 368            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 369        } else {
 370            /*
 371             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 372             * "real" time, (related to the time left until the next event) has
 373             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 374             * This avoids that the warps are visible externally; for example,
 375             * you will not be sending network packets continuously instead of
 376             * every 100ms.
 377             */
 378            seqlock_write_lock(&timers_state.vm_clock_seqlock,
 379                               &timers_state.vm_clock_lock);
 380            if (timers_state.vm_clock_warp_start == -1
 381                || timers_state.vm_clock_warp_start > clock) {
 382                timers_state.vm_clock_warp_start = clock;
 383            }
 384            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
 385                                 &timers_state.vm_clock_lock);
 386            timer_mod_anticipate(timers_state.icount_warp_timer,
 387                                 clock + deadline);
 388        }
 389    } else if (deadline == 0) {
 390        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 391    }
 392}
 393
 394void icount_account_warp_timer(void)
 395{
 396    if (!icount_sleep) {
 397        return;
 398    }
 399
 400    /*
 401     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 402     * do not fire, so computing the deadline does not make sense.
 403     */
 404    if (!runstate_is_running()) {
 405        return;
 406    }
 407
 408    /* warp clock deterministically in record/replay mode */
 409    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 410        return;
 411    }
 412
 413    timer_del(timers_state.icount_warp_timer);
 414    icount_warp_rt();
 415}
 416
 417void icount_configure(QemuOpts *opts, Error **errp)
 418{
 419    const char *option = qemu_opt_get(opts, "shift");
 420    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
 421    bool align = qemu_opt_get_bool(opts, "align", false);
 422    long time_shift = -1;
 423
 424    if (!option) {
 425        if (qemu_opt_get(opts, "align") != NULL) {
 426            error_setg(errp, "Please specify shift option when using align");
 427        }
 428        return;
 429    }
 430
 431    if (align && !sleep) {
 432        error_setg(errp, "align=on and sleep=off are incompatible");
 433        return;
 434    }
 435
 436    if (strcmp(option, "auto") != 0) {
 437        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
 438            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
 439            error_setg(errp, "icount: Invalid shift value");
 440            return;
 441        }
 442    } else if (icount_align_option) {
 443        error_setg(errp, "shift=auto and align=on are incompatible");
 444        return;
 445    } else if (!icount_sleep) {
 446        error_setg(errp, "shift=auto and sleep=off are incompatible");
 447        return;
 448    }
 449
 450    icount_sleep = sleep;
 451    if (icount_sleep) {
 452        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 453                                         icount_timer_cb, NULL);
 454    }
 455
 456    icount_align_option = align;
 457
 458    if (time_shift >= 0) {
 459        timers_state.icount_time_shift = time_shift;
 460        icount_enable_precise();
 461        return;
 462    }
 463
 464    icount_enable_adaptive();
 465
 466    /*
 467     * 125MIPS seems a reasonable initial guess at the guest speed.
 468     * It will be corrected fairly quickly anyway.
 469     */
 470    timers_state.icount_time_shift = 3;
 471
 472    /*
 473     * Have both realtime and virtual time triggers for speed adjustment.
 474     * The realtime trigger catches emulated time passing too slowly,
 475     * the virtual time trigger catches emulated time passing too fast.
 476     * Realtime triggers occur even when idle, so use them less frequently
 477     * than VM triggers.
 478     */
 479    timers_state.vm_clock_warp_start = -1;
 480    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 481                                   icount_adjust_rt, NULL);
 482    timer_mod(timers_state.icount_rt_timer,
 483                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 484    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 485                                        icount_adjust_vm, NULL);
 486    timer_mod(timers_state.icount_vm_timer,
 487                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 488                   NANOSECONDS_PER_SECOND / 10);
 489}
 490