LXR qemu/cpus.c

   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "qemu/osdep.h"
  27#include "qemu-common.h"
  28#include "qemu/config-file.h"
  29#include "cpu.h"
  30#include "monitor/monitor.h"
  31#include "qapi/qmp/qerror.h"
  32#include "qemu/error-report.h"
  33#include "sysemu/sysemu.h"
  34#include "sysemu/block-backend.h"
  35#include "exec/gdbstub.h"
  36#include "sysemu/dma.h"
  37#include "sysemu/hw_accel.h"
  38#include "sysemu/kvm.h"
  39#include "sysemu/hax.h"
  40#include "qmp-commands.h"
  41#include "exec/exec-all.h"
  42
  43#include "qemu/thread.h"
  44#include "sysemu/cpus.h"
  45#include "sysemu/qtest.h"
  46#include "qemu/main-loop.h"
  47#include "qemu/bitmap.h"
  48#include "qemu/seqlock.h"
  49#include "tcg.h"
  50#include "qapi-event.h"
  51#include "hw/nmi.h"
  52#include "sysemu/replay.h"
  53#include "hw/boards.h"
  54
  55#ifdef CONFIG_LINUX
  56
  57#include <sys/prctl.h>
  58
  59#ifndef PR_MCE_KILL
  60#define PR_MCE_KILL 33
  61#endif
  62
  63#ifndef PR_MCE_KILL_SET
  64#define PR_MCE_KILL_SET 1
  65#endif
  66
  67#ifndef PR_MCE_KILL_EARLY
  68#define PR_MCE_KILL_EARLY 1
  69#endif
  70
  71#endif /* CONFIG_LINUX */
  72
  73int64_t max_delay;
  74int64_t max_advance;
  75
  76/* vcpu throttling controls */
  77static QEMUTimer *throttle_timer;
  78static unsigned int throttle_percentage;
  79
  80#define CPU_THROTTLE_PCT_MIN 1
  81#define CPU_THROTTLE_PCT_MAX 99
  82#define CPU_THROTTLE_TIMESLICE_NS 10000000
  83
  84bool cpu_is_stopped(CPUState *cpu)
  85{
  86    return cpu->stopped || !runstate_is_running();
  87}
  88
  89static bool cpu_thread_is_idle(CPUState *cpu)
  90{
  91    if (cpu->stop || cpu->queued_work_first) {
  92        return false;
  93    }
  94    if (cpu_is_stopped(cpu)) {
  95        return true;
  96    }
  97    if (!cpu->halted || cpu_has_work(cpu) ||
  98        kvm_halt_in_kernel()) {
  99        return false;
 100    }
 101    return true;
 102}
 103
 104bool all_cpu_threads_idle(void)
 105{
 106    CPUState *cpu;
 107
 108    CPU_FOREACH(cpu) {
 109        if (!cpu_thread_is_idle(cpu)) {
 110            return false;
 111        }
 112    }
 113    return true;
 114}
 115
 116/***********************************************************/
 117/* guest cycle counter */
 118
 119/* Protected by TimersState seqlock */
 120
 121static bool icount_sleep = true;
 122static int64_t vm_clock_warp_start = -1;
 123/* Conversion factor from emulated instructions to virtual clock ticks.  */
 124static int icount_time_shift;
 125/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 126#define MAX_ICOUNT_SHIFT 10
 127
 128static QEMUTimer *icount_rt_timer;
 129static QEMUTimer *icount_vm_timer;
 130static QEMUTimer *icount_warp_timer;
 131
 132typedef struct TimersState {
 133    /* Protected by BQL.  */
 134    int64_t cpu_ticks_prev;
 135    int64_t cpu_ticks_offset;
 136
 137    /* cpu_clock_offset can be read out of BQL, so protect it with
 138     * this lock.
 139     */
 140    QemuSeqLock vm_clock_seqlock;
 141    int64_t cpu_clock_offset;
 142    int32_t cpu_ticks_enabled;
 143    int64_t dummy;
 144
 145    /* Compensate for varying guest execution speed.  */
 146    int64_t qemu_icount_bias;
 147    /* Only written by TCG thread */
 148    int64_t qemu_icount;
 149} TimersState;
 150
 151static TimersState timers_state;
 152bool mttcg_enabled;
 153
 154/*
 155 * We default to false if we know other options have been enabled
 156 * which are currently incompatible with MTTCG. Otherwise when each
 157 * guest (target) has been updated to support:
 158 *   - atomic instructions
 159 *   - memory ordering primitives (barriers)
 160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 161 *
 162 * Once a guest architecture has been converted to the new primitives
 163 * there are two remaining limitations to check.
 164 *
 165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
 166 * - The host must have a stronger memory order than the guest
 167 *
 168 * It may be possible in future to support strong guests on weak hosts
 169 * but that will require tagging all load/stores in a guest with their
 170 * implicit memory order requirements which would likely slow things
 171 * down a lot.
 172 */
 173
 174static bool check_tcg_memory_orders_compatible(void)
 175{
 176#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
 177    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
 178#else
 179    return false;
 180#endif
 181}
 182
 183static bool default_mttcg_enabled(void)
 184{
 185    if (use_icount || TCG_OVERSIZED_GUEST) {
 186        return false;
 187    } else {
 188#ifdef TARGET_SUPPORTS_MTTCG
 189        return check_tcg_memory_orders_compatible();
 190#else
 191        return false;
 192#endif
 193    }
 194}
 195
 196void qemu_tcg_configure(QemuOpts *opts, Error **errp)
 197{
 198    const char *t = qemu_opt_get(opts, "thread");
 199    if (t) {
 200        if (strcmp(t, "multi") == 0) {
 201            if (TCG_OVERSIZED_GUEST) {
 202                error_setg(errp, "No MTTCG when guest word size > hosts");
 203            } else if (use_icount) {
 204                error_setg(errp, "No MTTCG when icount is enabled");
 205            } else {
 206#ifndef TARGET_SUPPORTS_MTTCG
 207                error_report("Guest not yet converted to MTTCG - "
 208                             "you may get unexpected results");
 209#endif
 210                if (!check_tcg_memory_orders_compatible()) {
 211                    error_report("Guest expects a stronger memory ordering "
 212                                 "than the host provides");
 213                    error_printf("This may cause strange/hard to debug errors\n");
 214                }
 215                mttcg_enabled = true;
 216            }
 217        } else if (strcmp(t, "single") == 0) {
 218            mttcg_enabled = false;
 219        } else {
 220            error_setg(errp, "Invalid 'thread' setting %s", t);
 221        }
 222    } else {
 223        mttcg_enabled = default_mttcg_enabled();
 224    }
 225}
 226
 227/* The current number of executed instructions is based on what we
 228 * originally budgeted minus the current state of the decrementing
 229 * icount counters in extra/u16.low.
 230 */
 231static int64_t cpu_get_icount_executed(CPUState *cpu)
 232{
 233    return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
 234}
 235
 236/*
 237 * Update the global shared timer_state.qemu_icount to take into
 238 * account executed instructions. This is done by the TCG vCPU
 239 * thread so the main-loop can see time has moved forward.
 240 */
 241void cpu_update_icount(CPUState *cpu)
 242{
 243    int64_t executed = cpu_get_icount_executed(cpu);
 244    cpu->icount_budget -= executed;
 245
 246#ifdef CONFIG_ATOMIC64
 247    atomic_set__nocheck(&timers_state.qemu_icount,
 248                        atomic_read__nocheck(&timers_state.qemu_icount) +
 249                        executed);
 250#else /* FIXME: we need 64bit atomics to do this safely */
 251    timers_state.qemu_icount += executed;
 252#endif
 253}
 254
 255int64_t cpu_get_icount_raw(void)
 256{
 257    CPUState *cpu = current_cpu;
 258
 259    if (cpu && cpu->running) {
 260        if (!cpu->can_do_io) {
 261            qemu_log("Bad icount read\n");
 262        }
 263        /* Take into account what has run */
 264        cpu_update_icount(cpu);
 265    }
 266#ifdef CONFIG_ATOMIC64
 267    return atomic_read__nocheck(&timers_state.qemu_icount);
 268#else /* FIXME: we need 64bit atomics to do this safely */
 269    return timers_state.qemu_icount;
 270#endif
 271}
 272
 273/* Return the virtual CPU time, based on the instruction counter.  */
 274static int64_t cpu_get_icount_locked(void)
 275{
 276    int64_t icount = cpu_get_icount_raw();
 277    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 278}
 279
 280int64_t cpu_get_icount(void)
 281{
 282    int64_t icount;
 283    unsigned start;
 284
 285    do {
 286        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 287        icount = cpu_get_icount_locked();
 288    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 289
 290    return icount;
 291}
 292
 293int64_t cpu_icount_to_ns(int64_t icount)
 294{
 295    return icount << icount_time_shift;
 296}
 297
 298/* return the time elapsed in VM between vm_start and vm_stop.  Unless
 299 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 300 * counter.
 301 *
 302 * Caller must hold the BQL
 303 */
 304int64_t cpu_get_ticks(void)
 305{
 306    int64_t ticks;
 307
 308    if (use_icount) {
 309        return cpu_get_icount();
 310    }
 311
 312    ticks = timers_state.cpu_ticks_offset;
 313    if (timers_state.cpu_ticks_enabled) {
 314        ticks += cpu_get_host_ticks();
 315    }
 316
 317    if (timers_state.cpu_ticks_prev > ticks) {
 318        /* Note: non increasing ticks may happen if the host uses
 319           software suspend */
 320        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 321        ticks = timers_state.cpu_ticks_prev;
 322    }
 323
 324    timers_state.cpu_ticks_prev = ticks;
 325    return ticks;
 326}
 327
 328static int64_t cpu_get_clock_locked(void)
 329{
 330    int64_t time;
 331
 332    time = timers_state.cpu_clock_offset;
 333    if (timers_state.cpu_ticks_enabled) {
 334        time += get_clock();
 335    }
 336
 337    return time;
 338}
 339
 340/* Return the monotonic time elapsed in VM, i.e.,
 341 * the time between vm_start and vm_stop
 342 */
 343int64_t cpu_get_clock(void)
 344{
 345    int64_t ti;
 346    unsigned start;
 347
 348    do {
 349        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 350        ti = cpu_get_clock_locked();
 351    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 352
 353    return ti;
 354}
 355
 356/* enable cpu_get_ticks()
 357 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 358 */
 359void cpu_enable_ticks(void)
 360{
 361    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 362    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 363    if (!timers_state.cpu_ticks_enabled) {
 364        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 365        timers_state.cpu_clock_offset -= get_clock();
 366        timers_state.cpu_ticks_enabled = 1;
 367    }
 368    seqlock_write_end(&timers_state.vm_clock_seqlock);
 369}
 370
 371/* disable cpu_get_ticks() : the clock is stopped. You must not call
 372 * cpu_get_ticks() after that.
 373 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 374 */
 375void cpu_disable_ticks(void)
 376{
 377    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 378    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 379    if (timers_state.cpu_ticks_enabled) {
 380        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 381        timers_state.cpu_clock_offset = cpu_get_clock_locked();
 382        timers_state.cpu_ticks_enabled = 0;
 383    }
 384    seqlock_write_end(&timers_state.vm_clock_seqlock);
 385}
 386
 387/* Correlation between real and virtual time is always going to be
 388   fairly approximate, so ignore small variation.
 389   When the guest is idle real and virtual time will be aligned in
 390   the IO wait loop.  */
 391#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 392
 393static void icount_adjust(void)
 394{
 395    int64_t cur_time;
 396    int64_t cur_icount;
 397    int64_t delta;
 398
 399    /* Protected by TimersState mutex.  */
 400    static int64_t last_delta;
 401
 402    /* If the VM is not running, then do nothing.  */
 403    if (!runstate_is_running()) {
 404        return;
 405    }
 406
 407    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 408    cur_time = cpu_get_clock_locked();
 409    cur_icount = cpu_get_icount_locked();
 410
 411    delta = cur_icount - cur_time;
 412    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 413    if (delta > 0
 414        && last_delta + ICOUNT_WOBBLE < delta * 2
 415        && icount_time_shift > 0) {
 416        /* The guest is getting too far ahead.  Slow time down.  */
 417        icount_time_shift--;
 418    }
 419    if (delta < 0
 420        && last_delta - ICOUNT_WOBBLE > delta * 2
 421        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 422        /* The guest is getting too far behind.  Speed time up.  */
 423        icount_time_shift++;
 424    }
 425    last_delta = delta;
 426    timers_state.qemu_icount_bias = cur_icount
 427                              - (timers_state.qemu_icount << icount_time_shift);
 428    seqlock_write_end(&timers_state.vm_clock_seqlock);
 429}
 430
 431static void icount_adjust_rt(void *opaque)
 432{
 433    timer_mod(icount_rt_timer,
 434              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 435    icount_adjust();
 436}
 437
 438static void icount_adjust_vm(void *opaque)
 439{
 440    timer_mod(icount_vm_timer,
 441                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 442                   NANOSECONDS_PER_SECOND / 10);
 443    icount_adjust();
 444}
 445
 446static int64_t qemu_icount_round(int64_t count)
 447{
 448    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 449}
 450
 451static bool icount_idle_timewarps = true;
 452void qemu_icount_enable_idle_timewarps(bool enable)
 453{
 454    icount_idle_timewarps = enable;
 455}
 456
 457static void icount_warp_rt(void)
 458{
 459    unsigned seq;
 460    int64_t warp_start;
 461
 462    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 463     * changes from -1 to another value, so the race here is okay.
 464     */
 465    do {
 466        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 467        warp_start = vm_clock_warp_start;
 468    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 469
 470    if (warp_start == -1) {
 471        return;
 472    }
 473
 474    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 475    if (runstate_is_running()) {
 476        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 477                                     cpu_get_clock_locked());
 478        int64_t warp_delta;
 479
 480        warp_delta = clock - vm_clock_warp_start;
 481        if (use_icount == 2) {
 482            /*
 483             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 484             * far ahead of real time.
 485             */
 486            int64_t cur_icount = cpu_get_icount_locked();
 487            int64_t delta = clock - cur_icount;
 488            warp_delta = MIN(warp_delta, delta);
 489        }
 490        timers_state.qemu_icount_bias += warp_delta;
 491    }
 492    vm_clock_warp_start = -1;
 493    seqlock_write_end(&timers_state.vm_clock_seqlock);
 494
 495    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 496        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 497    }
 498}
 499
 500static void icount_timer_cb(void *opaque)
 501{
 502    /* No need for a checkpoint because the timer already synchronizes
 503     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 504     */
 505    icount_warp_rt();
 506}
 507
 508void tcg_clock_warp(int64_t dest)
 509{
 510    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 511
 512    if (clock < dest) {
 513        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 514        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 515    }
 516    qemu_notify_event();
 517}
 518
 519bool tcg_idle_clock_warp(int64_t dest)
 520{
 521    if (!all_cpu_threads_idle()) {
 522        return false;
 523    }
 524
 525    tcg_clock_warp(dest);
 526    return true;
 527}
 528
 529void qtest_clock_warp(int64_t dest)
 530{
 531    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 532    AioContext *aio_context;
 533    assert(qtest_enabled());
 534    aio_context = qemu_get_aio_context();
 535    while (clock < dest) {
 536        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 537        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 538
 539        seqlock_write_begin(&timers_state.vm_clock_seqlock);
 540        timers_state.qemu_icount_bias += warp;
 541        seqlock_write_end(&timers_state.vm_clock_seqlock);
 542
 543        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 544        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 545        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 546    }
 547    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 548}
 549
 550void qemu_start_warp_timer(void)
 551{
 552    int64_t clock;
 553    int64_t deadline;
 554
 555    if (!use_icount) {
 556        return;
 557    }
 558
 559    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 560     * do not fire, so computing the deadline does not make sense.
 561     */
 562    if (!runstate_is_running()) {
 563        return;
 564    }
 565
 566    /* warp clock deterministically in record/replay mode */
 567    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 568        return;
 569    }
 570
 571    if (!all_cpu_threads_idle()) {
 572        return;
 573    }
 574
 575    if (qtest_enabled()) {
 576        /* When testing, qtest commands advance icount.  */
 577        return;
 578    }
 579
 580    /* We want to use the earliest deadline from ALL vm_clocks */
 581    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 582    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 583    if (deadline < 0) {
 584        static bool notified;
 585        if (!icount_sleep && !notified) {
 586            warn_report("icount sleep disabled and no active timers");
 587            notified = true;
 588        }
 589        return;
 590    }
 591
 592    if (deadline > 0) {
 593        /*
 594         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 595         * sleep.  Otherwise, the CPU might be waiting for a future timer
 596         * interrupt to wake it up, but the interrupt never comes because
 597         * the vCPU isn't running any insns and thus doesn't advance the
 598         * QEMU_CLOCK_VIRTUAL.
 599         */
 600        if (!icount_sleep) {
 601            /*
 602             * We never let VCPUs sleep in no sleep icount mode.
 603             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 604             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 605             * It is useful when we want a deterministic execution time,
 606             * isolated from host latencies.
 607             */
 608            seqlock_write_begin(&timers_state.vm_clock_seqlock);
 609            timers_state.qemu_icount_bias += deadline;
 610            seqlock_write_end(&timers_state.vm_clock_seqlock);
 611            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 612        } else {
 613            /*
 614             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 615             * "real" time, (related to the time left until the next event) has
 616             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 617             * This avoids that the warps are visible externally; for example,
 618             * you will not be sending network packets continuously instead of
 619             * every 100ms.
 620             */
 621            seqlock_write_begin(&timers_state.vm_clock_seqlock);
 622            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 623                vm_clock_warp_start = clock;
 624            }
 625            seqlock_write_end(&timers_state.vm_clock_seqlock);
 626            timer_mod_anticipate(icount_warp_timer, clock + deadline);
 627        }
 628    } else if (deadline == 0) {
 629        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 630    }
 631}
 632
 633static void qemu_account_warp_timer(void)
 634{
 635    if (!use_icount || !icount_sleep) {
 636        return;
 637    }
 638
 639    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 640     * do not fire, so computing the deadline does not make sense.
 641     */
 642    if (!runstate_is_running()) {
 643        return;
 644    }
 645
 646    /* warp clock deterministically in record/replay mode */
 647    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 648        return;
 649    }
 650
 651    timer_del(icount_warp_timer);
 652    icount_warp_rt();
 653}
 654
 655static bool icount_state_needed(void *opaque)
 656{
 657    return use_icount;
 658}
 659
 660/*
 661 * This is a subsection for icount migration.
 662 */
 663static const VMStateDescription icount_vmstate_timers = {
 664    .name = "timer/icount",
 665    .version_id = 1,
 666    .minimum_version_id = 1,
 667    .needed = icount_state_needed,
 668    .fields = (VMStateField[]) {
 669        VMSTATE_INT64(qemu_icount_bias, TimersState),
 670        VMSTATE_INT64(qemu_icount, TimersState),
 671        VMSTATE_END_OF_LIST()
 672    }
 673};
 674
 675static const VMStateDescription vmstate_timers = {
 676    .name = "timer",
 677    .version_id = 2,
 678    .minimum_version_id = 1,
 679    .fields = (VMStateField[]) {
 680        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 681        VMSTATE_INT64(dummy, TimersState),
 682        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 683        VMSTATE_END_OF_LIST()
 684    },
 685    .subsections = (const VMStateDescription*[]) {
 686        &icount_vmstate_timers,
 687        NULL
 688    }
 689};
 690
 691static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 692{
 693    double pct;
 694    double throttle_ratio;
 695    long sleeptime_ns;
 696
 697    if (!cpu_throttle_get_percentage()) {
 698        return;
 699    }
 700
 701    pct = (double)cpu_throttle_get_percentage()/100;
 702    throttle_ratio = pct / (1 - pct);
 703    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 704
 705    qemu_mutex_unlock_iothread();
 706    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 707    qemu_mutex_lock_iothread();
 708    atomic_set(&cpu->throttle_thread_scheduled, 0);
 709}
 710
 711static void cpu_throttle_timer_tick(void *opaque)
 712{
 713    CPUState *cpu;
 714    double pct;
 715
 716    /* Stop the timer if needed */
 717    if (!cpu_throttle_get_percentage()) {
 718        return;
 719    }
 720    CPU_FOREACH(cpu) {
 721        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 722            async_run_on_cpu(cpu, cpu_throttle_thread,
 723                             RUN_ON_CPU_NULL);
 724        }
 725    }
 726
 727    pct = (double)cpu_throttle_get_percentage()/100;
 728    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 729                                   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 730}
 731
 732void cpu_throttle_set(int new_throttle_pct)
 733{
 734    /* Ensure throttle percentage is within valid range */
 735    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 736    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 737
 738    atomic_set(&throttle_percentage, new_throttle_pct);
 739
 740    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 741                                       CPU_THROTTLE_TIMESLICE_NS);
 742}
 743
 744void cpu_throttle_stop(void)
 745{
 746    atomic_set(&throttle_percentage, 0);
 747}
 748
 749bool cpu_throttle_active(void)
 750{
 751    return (cpu_throttle_get_percentage() != 0);
 752}
 753
 754int cpu_throttle_get_percentage(void)
 755{
 756    return atomic_read(&throttle_percentage);
 757}
 758
 759void cpu_ticks_init(void)
 760{
 761    seqlock_init(&timers_state.vm_clock_seqlock);
 762    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 763    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 764                                           cpu_throttle_timer_tick, NULL);
 765}
 766
 767void configure_icount(QemuOpts *opts, Error **errp)
 768{
 769    const char *option;
 770    char *rem_str = NULL;
 771
 772    option = qemu_opt_get(opts, "shift");
 773    if (!option) {
 774        if (qemu_opt_get(opts, "align") != NULL) {
 775            error_setg(errp, "Please specify shift option when using align");
 776        }
 777        return;
 778    }
 779
 780    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 781    if (icount_sleep) {
 782        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 783                                         icount_timer_cb, NULL);
 784    }
 785
 786    icount_align_option = qemu_opt_get_bool(opts, "align", false);
 787
 788    if (icount_align_option && !icount_sleep) {
 789        error_setg(errp, "align=on and sleep=off are incompatible");
 790    }
 791    if (strcmp(option, "auto") != 0) {
 792        errno = 0;
 793        icount_time_shift = strtol(option, &rem_str, 0);
 794        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 795            error_setg(errp, "icount: Invalid shift value");
 796        }
 797        use_icount = 1;
 798        return;
 799    } else if (icount_align_option) {
 800        error_setg(errp, "shift=auto and align=on are incompatible");
 801    } else if (!icount_sleep) {
 802        error_setg(errp, "shift=auto and sleep=off are incompatible");
 803    }
 804
 805    use_icount = 2;
 806
 807    /* 125MIPS seems a reasonable initial guess at the guest speed.
 808       It will be corrected fairly quickly anyway.  */
 809    icount_time_shift = 3;
 810
 811    /* Have both realtime and virtual time triggers for speed adjustment.
 812       The realtime trigger catches emulated time passing too slowly,
 813       the virtual time trigger catches emulated time passing too fast.
 814       Realtime triggers occur even when idle, so use them less frequently
 815       than VM triggers.  */
 816    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 817                                   icount_adjust_rt, NULL);
 818    timer_mod(icount_rt_timer,
 819                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 820    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 821                                        icount_adjust_vm, NULL);
 822    timer_mod(icount_vm_timer,
 823                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 824                   NANOSECONDS_PER_SECOND / 10);
 825}
 826
 827/***********************************************************/
 828/* TCG vCPU kick timer
 829 *
 830 * The kick timer is responsible for moving single threaded vCPU
 831 * emulation on to the next vCPU. If more than one vCPU is running a
 832 * timer event with force a cpu->exit so the next vCPU can get
 833 * scheduled.
 834 *
 835 * The timer is removed if all vCPUs are idle and restarted again once
 836 * idleness is complete.
 837 */
 838
 839static QEMUTimer *tcg_kick_vcpu_timer;
 840static CPUState *tcg_current_rr_cpu;
 841
 842#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 843
 844static inline int64_t qemu_tcg_next_kick(void)
 845{
 846    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 847}
 848
 849/* Kick the currently round-robin scheduled vCPU */
 850static void qemu_cpu_kick_rr_cpu(void)
 851{
 852    CPUState *cpu;
 853    do {
 854        cpu = atomic_mb_read(&tcg_current_rr_cpu);
 855        if (cpu) {
 856            cpu_exit(cpu);
 857        }
 858    } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 859}
 860
 861static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
 862{
 863}
 864
 865void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 866{
 867    if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
 868        qemu_notify_event();
 869        return;
 870    }
 871
 872    if (!qemu_in_vcpu_thread() && first_cpu) {
 873        /* qemu_cpu_kick is not enough to kick a halted CPU out of
 874         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
 875         * causes cpu_thread_is_idle to return false.  This way,
 876         * handle_icount_deadline can run.
 877         */
 878        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
 879    }
 880}
 881
 882static void kick_tcg_thread(void *opaque)
 883{
 884    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 885    qemu_cpu_kick_rr_cpu();
 886}
 887
 888static void start_tcg_kick_timer(void)
 889{
 890    if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 891        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 892                                           kick_tcg_thread, NULL);
 893        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 894    }
 895}
 896
 897static void stop_tcg_kick_timer(void)
 898{
 899    if (tcg_kick_vcpu_timer) {
 900        timer_del(tcg_kick_vcpu_timer);
 901        tcg_kick_vcpu_timer = NULL;
 902    }
 903}
 904
 905/***********************************************************/
 906void hw_error(const char *fmt, ...)
 907{
 908    va_list ap;
 909    CPUState *cpu;
 910
 911    va_start(ap, fmt);
 912    fprintf(stderr, "qemu: hardware error: ");
 913    vfprintf(stderr, fmt, ap);
 914    fprintf(stderr, "\n");
 915    CPU_FOREACH(cpu) {
 916        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 917        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 918    }
 919    va_end(ap);
 920    abort();
 921}
 922
 923void cpu_synchronize_all_states(void)
 924{
 925    CPUState *cpu;
 926
 927    CPU_FOREACH(cpu) {
 928        cpu_synchronize_state(cpu);
 929    }
 930}
 931
 932void cpu_synchronize_all_post_reset(void)
 933{
 934    CPUState *cpu;
 935
 936    CPU_FOREACH(cpu) {
 937        cpu_synchronize_post_reset(cpu);
 938    }
 939}
 940
 941void cpu_synchronize_all_post_init(void)
 942{
 943    CPUState *cpu;
 944
 945    CPU_FOREACH(cpu) {
 946        cpu_synchronize_post_init(cpu);
 947    }
 948}
 949
 950void cpu_synchronize_all_pre_loadvm(void)
 951{
 952    CPUState *cpu;
 953
 954    CPU_FOREACH(cpu) {
 955        cpu_synchronize_pre_loadvm(cpu);
 956    }
 957}
 958
 959static int do_vm_stop(RunState state)
 960{
 961    int ret = 0;
 962
 963    if (runstate_is_running()) {
 964        cpu_disable_ticks();
 965        pause_all_vcpus();
 966        runstate_set(state);
 967        vm_state_notify(0, state);
 968        qapi_event_send_stop(&error_abort);
 969    }
 970
 971    bdrv_drain_all();
 972    replay_disable_events();
 973    ret = bdrv_flush_all();
 974
 975    return ret;
 976}
 977
 978static bool cpu_can_run(CPUState *cpu)
 979{
 980    if (cpu->stop) {
 981        return false;
 982    }
 983    if (cpu_is_stopped(cpu)) {
 984        return false;
 985    }
 986    return true;
 987}
 988
 989static void cpu_handle_guest_debug(CPUState *cpu)
 990{
 991    gdb_set_stop_cpu(cpu);
 992    qemu_system_debug_request();
 993    cpu->stopped = true;
 994}
 995
 996#ifdef CONFIG_LINUX
 997static void sigbus_reraise(void)
 998{
 999    sigset_t set;
1000    struct sigaction action;

1001
1002    memset(&action, 0, sizeof(action));
1003    action.sa_handler = SIG_DFL;
1004    if (!sigaction(SIGBUS, &action, NULL)) {
1005        raise(SIGBUS);
1006        sigemptyset(&set);
1007        sigaddset(&set, SIGBUS);
1008        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1009    }
1010    perror("Failed to re-raise SIGBUS!\n");
1011    abort();
1012}
1013
1014static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1015{
1016    if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1017        sigbus_reraise();
1018    }
1019
1020    if (current_cpu) {
1021        /* Called asynchronously in VCPU thread.  */
1022        if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1023            sigbus_reraise();
1024        }
1025    } else {
1026        /* Called synchronously (via signalfd) in main thread.  */
1027        if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1028            sigbus_reraise();
1029        }
1030    }
1031}
1032
1033static void qemu_init_sigbus(void)
1034{
1035    struct sigaction action;
1036
1037    memset(&action, 0, sizeof(action));
1038    action.sa_flags = SA_SIGINFO;
1039    action.sa_sigaction = sigbus_handler;
1040    sigaction(SIGBUS, &action, NULL);
1041
1042    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1043}
1044#else /* !CONFIG_LINUX */
1045static void qemu_init_sigbus(void)
1046{
1047}
1048#endif /* !CONFIG_LINUX */
1049
1050static QemuMutex qemu_global_mutex;
1051
1052static QemuThread io_thread;
1053
1054/* cpu creation */
1055static QemuCond qemu_cpu_cond;
1056/* system init */
1057static QemuCond qemu_pause_cond;
1058
1059void qemu_init_cpu_loop(void)
1060{
1061    qemu_init_sigbus();
1062    qemu_cond_init(&qemu_cpu_cond);
1063    qemu_cond_init(&qemu_pause_cond);
1064    qemu_mutex_init(&qemu_global_mutex);
1065
1066    qemu_thread_get_self(&io_thread);
1067}
1068
1069void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1070{
1071    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1072}
1073
1074static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1075{
1076    if (kvm_destroy_vcpu(cpu) < 0) {
1077        error_report("kvm_destroy_vcpu failed");
1078        exit(EXIT_FAILURE);
1079    }
1080}
1081
1082static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1083{
1084}
1085
1086static void qemu_wait_io_event_common(CPUState *cpu)
1087{
1088    atomic_mb_set(&cpu->thread_kicked, false);
1089    if (cpu->stop) {
1090        cpu->stop = false;
1091        cpu->stopped = true;
1092        qemu_cond_broadcast(&qemu_pause_cond);
1093    }
1094    process_queued_cpu_work(cpu);
1095}
1096
1097static bool qemu_tcg_should_sleep(CPUState *cpu)
1098{
1099    if (mttcg_enabled) {
1100        return cpu_thread_is_idle(cpu);
1101    } else {
1102        return all_cpu_threads_idle();
1103    }
1104}
1105
1106static void qemu_tcg_wait_io_event(CPUState *cpu)
1107{
1108    while (qemu_tcg_should_sleep(cpu)) {
1109        stop_tcg_kick_timer();
1110        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1111    }
1112
1113    start_tcg_kick_timer();
1114
1115    qemu_wait_io_event_common(cpu);
1116}
1117
1118static void qemu_kvm_wait_io_event(CPUState *cpu)
1119{
1120    while (cpu_thread_is_idle(cpu)) {
1121        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1122    }
1123
1124    qemu_wait_io_event_common(cpu);
1125}
1126
1127static void *qemu_kvm_cpu_thread_fn(void *arg)
1128{
1129    CPUState *cpu = arg;
1130    int r;
1131
1132    rcu_register_thread();
1133
1134    qemu_mutex_lock_iothread();
1135    qemu_thread_get_self(cpu->thread);
1136    cpu->thread_id = qemu_get_thread_id();
1137    cpu->can_do_io = 1;
1138    current_cpu = cpu;
1139
1140    r = kvm_init_vcpu(cpu);
1141    if (r < 0) {
1142        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1143        exit(1);
1144    }
1145
1146    kvm_init_cpu_signals(cpu);
1147
1148    /* signal CPU creation */
1149    cpu->created = true;
1150    qemu_cond_signal(&qemu_cpu_cond);
1151
1152    do {
1153        if (cpu_can_run(cpu)) {
1154            r = kvm_cpu_exec(cpu);
1155            if (r == EXCP_DEBUG) {
1156                cpu_handle_guest_debug(cpu);
1157            }
1158        }
1159        qemu_kvm_wait_io_event(cpu);
1160    } while (!cpu->unplug || cpu_can_run(cpu));
1161
1162    qemu_kvm_destroy_vcpu(cpu);
1163    cpu->created = false;
1164    qemu_cond_signal(&qemu_cpu_cond);
1165    qemu_mutex_unlock_iothread();
1166    return NULL;
1167}
1168
1169static void *qemu_dummy_cpu_thread_fn(void *arg)
1170{
1171#ifdef _WIN32
1172    fprintf(stderr, "qtest is not supported under Windows\n");
1173    exit(1);
1174#else
1175    CPUState *cpu = arg;
1176    sigset_t waitset;
1177    int r;
1178
1179    rcu_register_thread();
1180
1181    qemu_mutex_lock_iothread();
1182    qemu_thread_get_self(cpu->thread);
1183    cpu->thread_id = qemu_get_thread_id();
1184    cpu->can_do_io = 1;
1185    current_cpu = cpu;
1186
1187    sigemptyset(&waitset);
1188    sigaddset(&waitset, SIG_IPI);
1189
1190    /* signal CPU creation */
1191    cpu->created = true;
1192    qemu_cond_signal(&qemu_cpu_cond);
1193
1194    while (1) {
1195        qemu_mutex_unlock_iothread();
1196        do {
1197            int sig;
1198            r = sigwait(&waitset, &sig);
1199        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1200        if (r == -1) {
1201            perror("sigwait");
1202            exit(1);
1203        }
1204        qemu_mutex_lock_iothread();
1205        qemu_wait_io_event_common(cpu);
1206    }
1207
1208    return NULL;
1209#endif
1210}
1211
1212static int64_t tcg_get_icount_limit(void)
1213{
1214    int64_t deadline;
1215
1216    if (replay_mode != REPLAY_MODE_PLAY) {
1217        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1218
1219        /* Maintain prior (possibly buggy) behaviour where if no deadline
1220         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1221         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1222         * nanoseconds.
1223         */
1224        if ((deadline < 0) || (deadline > INT32_MAX)) {
1225            deadline = INT32_MAX;
1226        }
1227
1228        return qemu_icount_round(deadline);
1229    } else {
1230        return replay_get_instructions();
1231    }
1232}
1233
1234static void handle_icount_deadline(void)
1235{
1236    assert(qemu_in_vcpu_thread());
1237    if (use_icount) {
1238        int64_t deadline =
1239            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1240
1241        if (deadline == 0) {
1242            /* Wake up other AioContexts.  */
1243            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1244            qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1245        }
1246    }
1247}
1248
1249static void prepare_icount_for_run(CPUState *cpu)
1250{
1251    if (use_icount) {
1252        int insns_left;
1253
1254        /* These should always be cleared by process_icount_data after
1255         * each vCPU execution. However u16.high can be raised
1256         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1257         */
1258        g_assert(cpu->icount_decr.u16.low == 0);
1259        g_assert(cpu->icount_extra == 0);
1260
1261        cpu->icount_budget = tcg_get_icount_limit();
1262        insns_left = MIN(0xffff, cpu->icount_budget);
1263        cpu->icount_decr.u16.low = insns_left;
1264        cpu->icount_extra = cpu->icount_budget - insns_left;
1265    }
1266}
1267
1268static void process_icount_data(CPUState *cpu)
1269{
1270    if (use_icount) {
1271        /* Account for executed instructions */
1272        cpu_update_icount(cpu);
1273
1274        /* Reset the counters */
1275        cpu->icount_decr.u16.low = 0;
1276        cpu->icount_extra = 0;
1277        cpu->icount_budget = 0;
1278
1279        replay_account_executed_instructions();
1280    }
1281}
1282
1283
1284static int tcg_cpu_exec(CPUState *cpu)
1285{
1286    int ret;
1287#ifdef CONFIG_PROFILER
1288    int64_t ti;
1289#endif
1290
1291#ifdef CONFIG_PROFILER
1292    ti = profile_getclock();
1293#endif
1294    qemu_mutex_unlock_iothread();
1295    cpu_exec_start(cpu);
1296    ret = cpu_exec(cpu);
1297    cpu_exec_end(cpu);
1298    qemu_mutex_lock_iothread();
1299#ifdef CONFIG_PROFILER
1300    tcg_time += profile_getclock() - ti;
1301#endif
1302    return ret;
1303}
1304
1305/* Destroy any remaining vCPUs which have been unplugged and have
1306 * finished running
1307 */
1308static void deal_with_unplugged_cpus(void)
1309{
1310    CPUState *cpu;
1311
1312    CPU_FOREACH(cpu) {
1313        if (cpu->unplug && !cpu_can_run(cpu)) {
1314            qemu_tcg_destroy_vcpu(cpu);
1315            cpu->created = false;
1316            qemu_cond_signal(&qemu_cpu_cond);
1317            break;
1318        }
1319    }
1320}
1321
1322/* Single-threaded TCG
1323 *
1324 * In the single-threaded case each vCPU is simulated in turn. If
1325 * there is more than a single vCPU we create a simple timer to kick
1326 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1327 * This is done explicitly rather than relying on side-effects
1328 * elsewhere.
1329 */
1330
1331static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1332{
1333    CPUState *cpu = arg;
1334
1335    rcu_register_thread();
1336    tcg_register_thread();
1337
1338    qemu_mutex_lock_iothread();
1339    qemu_thread_get_self(cpu->thread);
1340
1341    CPU_FOREACH(cpu) {
1342        cpu->thread_id = qemu_get_thread_id();
1343        cpu->created = true;
1344        cpu->can_do_io = 1;
1345    }
1346    qemu_cond_signal(&qemu_cpu_cond);
1347
1348    /* wait for initial kick-off after machine start */
1349    while (first_cpu->stopped) {
1350        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1351
1352        /* process any pending work */
1353        CPU_FOREACH(cpu) {
1354            current_cpu = cpu;
1355            qemu_wait_io_event_common(cpu);
1356        }
1357    }
1358
1359    start_tcg_kick_timer();
1360
1361    cpu = first_cpu;
1362
1363    /* process any pending work */
1364    cpu->exit_request = 1;
1365
1366    while (1) {
1367        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1368        qemu_account_warp_timer();
1369
1370        /* Run the timers here.  This is much more efficient than
1371         * waking up the I/O thread and waiting for completion.
1372         */
1373        handle_icount_deadline();
1374
1375        if (!cpu) {
1376            cpu = first_cpu;
1377        }
1378
1379        while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1380
1381            atomic_mb_set(&tcg_current_rr_cpu, cpu);
1382            current_cpu = cpu;
1383
1384            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1385                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1386
1387            if (cpu_can_run(cpu)) {
1388                int r;
1389
1390                prepare_icount_for_run(cpu);
1391
1392                r = tcg_cpu_exec(cpu);
1393
1394                process_icount_data(cpu);
1395
1396                if (r == EXCP_DEBUG) {
1397                    cpu_handle_guest_debug(cpu);
1398                    break;
1399                } else if (r == EXCP_ATOMIC) {
1400                    qemu_mutex_unlock_iothread();
1401                    cpu_exec_step_atomic(cpu);
1402                    qemu_mutex_lock_iothread();
1403                    break;
1404                }
1405            } else if (cpu->stop) {
1406                if (cpu->unplug) {
1407                    cpu = CPU_NEXT(cpu);
1408                }
1409                break;
1410            }
1411
1412            cpu = CPU_NEXT(cpu);
1413        } /* while (cpu && !cpu->exit_request).. */
1414
1415        /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1416        atomic_set(&tcg_current_rr_cpu, NULL);
1417
1418        if (cpu && cpu->exit_request) {
1419            atomic_mb_set(&cpu->exit_request, 0);
1420        }
1421
1422        qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1423        deal_with_unplugged_cpus();
1424    }
1425
1426    return NULL;
1427}
1428
1429static void *qemu_hax_cpu_thread_fn(void *arg)
1430{
1431    CPUState *cpu = arg;
1432    int r;
1433
1434    qemu_mutex_lock_iothread();
1435    qemu_thread_get_self(cpu->thread);
1436
1437    cpu->thread_id = qemu_get_thread_id();
1438    cpu->created = true;
1439    cpu->halted = 0;
1440    current_cpu = cpu;
1441
1442    hax_init_vcpu(cpu);
1443    qemu_cond_signal(&qemu_cpu_cond);
1444
1445    while (1) {
1446        if (cpu_can_run(cpu)) {
1447            r = hax_smp_cpu_exec(cpu);
1448            if (r == EXCP_DEBUG) {
1449                cpu_handle_guest_debug(cpu);
1450            }
1451        }
1452
1453        while (cpu_thread_is_idle(cpu)) {
1454            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1455        }
1456#ifdef _WIN32
1457        SleepEx(0, TRUE);
1458#endif
1459        qemu_wait_io_event_common(cpu);
1460    }
1461    return NULL;
1462}
1463
1464#ifdef _WIN32
1465static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1466{
1467}
1468#endif
1469
1470/* Multi-threaded TCG
1471 *
1472 * In the multi-threaded case each vCPU has its own thread. The TLS
1473 * variable current_cpu can be used deep in the code to find the
1474 * current CPUState for a given thread.
1475 */
1476
1477static void *qemu_tcg_cpu_thread_fn(void *arg)
1478{
1479    CPUState *cpu = arg;
1480
1481    g_assert(!use_icount);
1482
1483    rcu_register_thread();
1484    tcg_register_thread();
1485
1486    qemu_mutex_lock_iothread();
1487    qemu_thread_get_self(cpu->thread);
1488
1489    cpu->thread_id = qemu_get_thread_id();
1490    cpu->created = true;
1491    cpu->can_do_io = 1;
1492    current_cpu = cpu;
1493    qemu_cond_signal(&qemu_cpu_cond);
1494
1495    /* process any pending work */
1496    cpu->exit_request = 1;
1497
1498    while (1) {
1499        if (cpu_can_run(cpu)) {
1500            int r;
1501            r = tcg_cpu_exec(cpu);
1502            switch (r) {
1503            case EXCP_DEBUG:
1504                cpu_handle_guest_debug(cpu);
1505                break;
1506            case EXCP_HALTED:
1507                /* during start-up the vCPU is reset and the thread is
1508                 * kicked several times. If we don't ensure we go back
1509                 * to sleep in the halted state we won't cleanly
1510                 * start-up when the vCPU is enabled.
1511                 *
1512                 * cpu->halted should ensure we sleep in wait_io_event
1513                 */
1514                if (!cpu->halted) {
1515                    qemu_log_mask(LOG_PM, "CPU%d: EXCP_HALTED while halted=0\n",
1516                             cpu->halted);
1517                }
1518                break;
1519            case EXCP_ATOMIC:
1520                qemu_mutex_unlock_iothread();
1521                cpu_exec_step_atomic(cpu);
1522                qemu_mutex_lock_iothread();
1523            default:
1524                /* Ignore everything else? */
1525                break;
1526            }
1527        } else if (cpu->unplug) {
1528            qemu_tcg_destroy_vcpu(cpu);
1529            cpu->created = false;
1530            qemu_cond_signal(&qemu_cpu_cond);
1531            qemu_mutex_unlock_iothread();
1532            return NULL;
1533        }
1534
1535        atomic_mb_set(&cpu->exit_request, 0);
1536        qemu_tcg_wait_io_event(cpu);
1537    }
1538
1539    return NULL;
1540}
1541
1542static void qemu_cpu_kick_thread(CPUState *cpu)
1543{
1544#ifndef _WIN32
1545    int err;
1546
1547    if (cpu->thread_kicked) {
1548        return;
1549    }
1550    cpu->thread_kicked = true;
1551    err = pthread_kill(cpu->thread->thread, SIG_IPI);
1552    if (err) {
1553        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1554        exit(1);
1555    }
1556#else /* _WIN32 */
1557    if (!qemu_cpu_is_self(cpu)) {
1558        if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1559            fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1560                    __func__, GetLastError());
1561            exit(1);
1562        }
1563    }
1564#endif
1565}
1566
1567void qemu_cpu_kick(CPUState *cpu)
1568{
1569    qemu_cond_broadcast(cpu->halt_cond);
1570    if (tcg_enabled()) {
1571        cpu_exit(cpu);
1572        /* NOP unless doing single-thread RR */
1573        qemu_cpu_kick_rr_cpu();
1574    } else {
1575        if (hax_enabled()) {
1576            /*
1577             * FIXME: race condition with the exit_request check in
1578             * hax_vcpu_hax_exec
1579             */
1580            cpu->exit_request = 1;
1581        }
1582        qemu_cpu_kick_thread(cpu);
1583    }
1584}
1585
1586void qemu_cpu_kick_self(void)
1587{
1588    assert(current_cpu);
1589    qemu_cpu_kick_thread(current_cpu);
1590}
1591
1592bool qemu_cpu_is_self(CPUState *cpu)
1593{
1594    return qemu_thread_is_self(cpu->thread);
1595}
1596
1597bool qemu_in_vcpu_thread(void)
1598{
1599    return current_cpu && qemu_cpu_is_self(current_cpu);
1600}
1601
1602static __thread bool iothread_locked = false;
1603
1604bool qemu_mutex_iothread_locked(void)
1605{
1606    return iothread_locked;
1607}
1608
1609void qemu_mutex_lock_iothread(void)
1610{
1611    g_assert(!qemu_mutex_iothread_locked());
1612    qemu_mutex_lock(&qemu_global_mutex);
1613    iothread_locked = true;
1614}
1615
1616void qemu_mutex_unlock_iothread(void)
1617{
1618    g_assert(qemu_mutex_iothread_locked());
1619    iothread_locked = false;
1620    qemu_mutex_unlock(&qemu_global_mutex);
1621}
1622
1623static bool all_vcpus_paused(void)
1624{
1625    CPUState *cpu;
1626
1627    CPU_FOREACH(cpu) {
1628        if (!cpu->stopped) {
1629            return false;
1630        }
1631    }
1632
1633    return true;
1634}
1635
1636void pause_all_vcpus(void)
1637{
1638    CPUState *cpu;
1639
1640    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1641    CPU_FOREACH(cpu) {
1642        cpu->stop = true;
1643        qemu_cpu_kick(cpu);
1644    }
1645
1646    if (qemu_in_vcpu_thread()) {
1647        cpu_stop_current();
1648    }
1649
1650    while (!all_vcpus_paused()) {
1651        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1652        CPU_FOREACH(cpu) {
1653            qemu_cpu_kick(cpu);
1654        }
1655    }
1656}
1657
1658void cpu_resume(CPUState *cpu)
1659{
1660    cpu->stop = false;
1661    cpu->stopped = false;
1662    qemu_cpu_kick(cpu);
1663}
1664
1665void resume_all_vcpus(void)
1666{
1667    CPUState *cpu;
1668
1669    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1670    CPU_FOREACH(cpu) {
1671        cpu_resume(cpu);
1672    }
1673}
1674
1675void cpu_remove(CPUState *cpu)
1676{
1677    cpu->stop = true;
1678    cpu->unplug = true;
1679    qemu_cpu_kick(cpu);
1680}
1681
1682void cpu_remove_sync(CPUState *cpu)
1683{
1684    cpu_remove(cpu);
1685    while (cpu->created) {
1686        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1687    }
1688}
1689
1690/* For temporary buffers for forming a name */
1691#define VCPU_THREAD_NAME_SIZE 16
1692
1693static void qemu_tcg_init_vcpu(CPUState *cpu)
1694{
1695    char thread_name[VCPU_THREAD_NAME_SIZE];
1696    static QemuCond *single_tcg_halt_cond;
1697    static QemuThread *single_tcg_cpu_thread;
1698    static int tcg_region_inited;
1699
1700    /*
1701     * Initialize TCG regions--once. Now is a good time, because:
1702     * (1) TCG's init context, prologue and target globals have been set up.
1703     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1704     *     -accel flag is processed, so the check doesn't work then).
1705     */
1706    if (!tcg_region_inited) {
1707        tcg_region_inited = 1;
1708        tcg_region_init();
1709    }
1710
1711    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1712        cpu->thread = g_malloc0(sizeof(QemuThread));
1713        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1714        qemu_cond_init(cpu->halt_cond);
1715
1716        if (qemu_tcg_mttcg_enabled()) {
1717            /* create a thread per vCPU with TCG (MTTCG) */
1718            parallel_cpus = true;
1719            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1720                 cpu->cpu_index);
1721
1722            qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1723                               cpu, QEMU_THREAD_JOINABLE);
1724
1725        } else {
1726            /* share a single thread for all cpus with TCG */
1727            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1728            qemu_thread_create(cpu->thread, thread_name,
1729                               qemu_tcg_rr_cpu_thread_fn,
1730                               cpu, QEMU_THREAD_JOINABLE);
1731
1732            single_tcg_halt_cond = cpu->halt_cond;
1733            single_tcg_cpu_thread = cpu->thread;
1734        }
1735#ifdef _WIN32
1736        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1737#endif
1738        while (!cpu->created) {
1739            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1740        }
1741    } else {
1742        /* For non-MTTCG cases we share the thread */
1743        cpu->thread = single_tcg_cpu_thread;
1744        cpu->halt_cond = single_tcg_halt_cond;
1745    }
1746}
1747
1748static void qemu_hax_start_vcpu(CPUState *cpu)
1749{
1750    char thread_name[VCPU_THREAD_NAME_SIZE];
1751
1752    cpu->thread = g_malloc0(sizeof(QemuThread));
1753    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1754    qemu_cond_init(cpu->halt_cond);
1755
1756    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1757             cpu->cpu_index);
1758    qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1759                       cpu, QEMU_THREAD_JOINABLE);
1760#ifdef _WIN32
1761    cpu->hThread = qemu_thread_get_handle(cpu->thread);
1762#endif
1763    while (!cpu->created) {
1764        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1765    }
1766}
1767
1768static void qemu_kvm_start_vcpu(CPUState *cpu)
1769{
1770    char thread_name[VCPU_THREAD_NAME_SIZE];
1771
1772    cpu->thread = g_malloc0(sizeof(QemuThread));
1773    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1774    qemu_cond_init(cpu->halt_cond);
1775    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1776             cpu->cpu_index);
1777    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1778                       cpu, QEMU_THREAD_JOINABLE);
1779    while (!cpu->created) {
1780        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1781    }
1782}
1783
1784static void qemu_dummy_start_vcpu(CPUState *cpu)
1785{
1786    char thread_name[VCPU_THREAD_NAME_SIZE];
1787
1788    cpu->thread = g_malloc0(sizeof(QemuThread));
1789    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1790    qemu_cond_init(cpu->halt_cond);
1791    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1792             cpu->cpu_index);
1793    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1794                       QEMU_THREAD_JOINABLE);
1795    while (!cpu->created) {
1796        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1797    }
1798}
1799
1800void qemu_init_vcpu(CPUState *cpu)
1801{
1802    cpu->nr_cores = smp_cores;
1803    cpu->nr_threads = smp_threads;
1804    cpu->stopped = true;
1805
1806    if (!cpu->as) {
1807        /* If the target cpu hasn't set up any address spaces itself,
1808         * give it the default one.
1809         */
1810        AddressSpace *as = g_new0(AddressSpace, 1);
1811
1812        address_space_init(as, cpu->memory, "cpu-memory");
1813        cpu->num_ases = 1;
1814        cpu_address_space_init(cpu, as, 0);
1815    }
1816
1817    if (kvm_enabled()) {
1818        qemu_kvm_start_vcpu(cpu);
1819    } else if (hax_enabled()) {
1820        qemu_hax_start_vcpu(cpu);
1821    } else if (tcg_enabled()) {
1822        qemu_tcg_init_vcpu(cpu);
1823    } else {
1824        qemu_dummy_start_vcpu(cpu);
1825    }
1826}
1827
1828void cpu_stop_current(void)
1829{
1830    if (current_cpu) {
1831        current_cpu->stop = false;
1832        current_cpu->stopped = true;
1833        cpu_exit(current_cpu);
1834        qemu_cond_broadcast(&qemu_pause_cond);
1835    }
1836}
1837
1838void vm_stop_from_timer(RunState state)
1839{
1840    qemu_system_vmstop_request_prepare();
1841    qemu_system_vmstop_request(state);
1842    /*
1843     * FIXME: should not return to device code in case
1844     * vm_stop() has been requested.
1845     */
1846    cpu_stop_current();
1847}
1848
1849int vm_stop(RunState state)
1850{
1851    if (qemu_in_vcpu_thread()) {
1852        qemu_system_vmstop_request_prepare();
1853        qemu_system_vmstop_request(state);
1854        /*
1855         * FIXME: should not return to device code in case
1856         * vm_stop() has been requested.
1857         */
1858        cpu_stop_current();
1859        return 0;
1860    }
1861
1862    return do_vm_stop(state);
1863}
1864
1865/**
1866 * Prepare for (re)starting the VM.
1867 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1868 * running or in case of an error condition), 0 otherwise.
1869 */
1870int vm_prepare_start(void)
1871{
1872    RunState requested;
1873    int res = 0;
1874
1875    qemu_vmstop_requested(&requested);
1876    if (runstate_is_running() && requested == RUN_STATE__MAX) {
1877        return -1;
1878    }
1879
1880    /* Ensure that a STOP/RESUME pair of events is emitted if a
1881     * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1882     * example, according to documentation is always followed by
1883     * the STOP event.
1884     */
1885    if (runstate_is_running()) {
1886        qapi_event_send_stop(&error_abort);
1887        res = -1;
1888    } else {
1889        replay_enable_events();
1890        cpu_enable_ticks();
1891        runstate_set(RUN_STATE_RUNNING);
1892        vm_state_notify(1, RUN_STATE_RUNNING);
1893    }
1894
1895    /* We are sending this now, but the CPUs will be resumed shortly later */
1896    qapi_event_send_resume(&error_abort);
1897    return res;
1898}
1899
1900void vm_start(void)
1901{
1902    if (!vm_prepare_start()) {
1903        resume_all_vcpus();
1904    }
1905}
1906
1907/* does a state transition even if the VM is already stopped,
1908   current state is forgotten forever */
1909int vm_stop_force_state(RunState state)
1910{
1911    if (runstate_is_running()) {
1912        return vm_stop(state);
1913    } else {
1914        runstate_set(state);
1915
1916        bdrv_drain_all();
1917        /* Make sure to return an error if the flush in a previous vm_stop()
1918         * failed. */
1919        return bdrv_flush_all();
1920    }
1921}
1922
1923void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1924{
1925    /* XXX: implement xxx_cpu_list for targets that still miss it */
1926#if defined(cpu_list)
1927    cpu_list(f, cpu_fprintf);
1928#endif
1929}
1930
1931CpuInfoList *qmp_query_cpus(Error **errp)
1932{
1933    MachineState *ms = MACHINE(qdev_get_machine());
1934    MachineClass *mc = MACHINE_GET_CLASS(ms);
1935    CpuInfoList *head = NULL, *cur_item = NULL;
1936    CPUState *cpu;
1937
1938    CPU_FOREACH(cpu) {
1939        CpuInfoList *info;
1940#if defined(TARGET_I386)
1941        X86CPU *x86_cpu = X86_CPU(cpu);
1942        CPUX86State *env = &x86_cpu->env;
1943#elif defined(TARGET_PPC)
1944        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1945        CPUPPCState *env = &ppc_cpu->env;
1946#elif defined(TARGET_SPARC)
1947        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1948        CPUSPARCState *env = &sparc_cpu->env;
1949#elif defined(TARGET_MIPS)
1950        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1951        CPUMIPSState *env = &mips_cpu->env;
1952#elif defined(TARGET_TRICORE)
1953        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1954        CPUTriCoreState *env = &tricore_cpu->env;
1955#endif
1956
1957        cpu_synchronize_state(cpu);
1958
1959        info = g_malloc0(sizeof(*info));
1960        info->value = g_malloc0(sizeof(*info->value));
1961        info->value->CPU = cpu->cpu_index;
1962        info->value->current = (cpu == first_cpu);
1963        info->value->halted = cpu->halted;
1964        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1965        info->value->thread_id = cpu->thread_id;
1966#if defined(TARGET_I386)
1967        info->value->arch = CPU_INFO_ARCH_X86;
1968        info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1969#elif defined(TARGET_PPC)
1970        info->value->arch = CPU_INFO_ARCH_PPC;
1971        info->value->u.ppc.nip = env->nip;
1972#elif defined(TARGET_SPARC)
1973        info->value->arch = CPU_INFO_ARCH_SPARC;
1974        info->value->u.q_sparc.pc = env->pc;
1975        info->value->u.q_sparc.npc = env->npc;
1976#elif defined(TARGET_MIPS)
1977        info->value->arch = CPU_INFO_ARCH_MIPS;
1978        info->value->u.q_mips.PC = env->active_tc.PC;
1979#elif defined(TARGET_TRICORE)
1980        info->value->arch = CPU_INFO_ARCH_TRICORE;
1981        info->value->u.tricore.PC = env->PC;
1982#else
1983        info->value->arch = CPU_INFO_ARCH_OTHER;
1984#endif
1985        info->value->has_props = !!mc->cpu_index_to_instance_props;
1986        if (info->value->has_props) {
1987            CpuInstanceProperties *props;
1988            props = g_malloc0(sizeof(*props));
1989            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1990            info->value->props = props;
1991        }
1992
1993        /* XXX: waiting for the qapi to support GSList */
1994        if (!cur_item) {
1995            head = cur_item = info;
1996        } else {
1997            cur_item->next = info;
1998            cur_item = info;
1999        }
2000    }

2001
2002    return head;
2003}
2004
2005void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2006                 bool has_cpu, int64_t cpu_index, Error **errp)
2007{
2008    FILE *f;
2009    uint32_t l;
2010    CPUState *cpu;
2011    uint8_t buf[1024];
2012    int64_t orig_addr = addr, orig_size = size;
2013
2014    if (!has_cpu) {
2015        cpu_index = 0;
2016    }
2017
2018    cpu = qemu_get_cpu(cpu_index);
2019    if (cpu == NULL) {
2020        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2021                   "a CPU number");
2022        return;
2023    }
2024
2025    f = fopen(filename, "wb");
2026    if (!f) {
2027        error_setg_file_open(errp, errno, filename);
2028        return;
2029    }
2030
2031    while (size != 0) {
2032        l = sizeof(buf);
2033        if (l > size)
2034            l = size;
2035        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2036            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2037                             " specified", orig_addr, orig_size);
2038            goto exit;
2039        }
2040        if (fwrite(buf, 1, l, f) != l) {
2041            error_setg(errp, QERR_IO_ERROR);
2042            goto exit;
2043        }
2044        addr += l;
2045        size -= l;
2046    }
2047
2048exit:
2049    fclose(f);
2050}
2051
2052void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2053                  Error **errp)
2054{
2055    FILE *f;
2056    uint32_t l;
2057    uint8_t buf[1024];
2058
2059    f = fopen(filename, "wb");
2060    if (!f) {
2061        error_setg_file_open(errp, errno, filename);
2062        return;
2063    }
2064
2065    while (size != 0) {
2066        l = sizeof(buf);
2067        if (l > size)
2068            l = size;
2069        cpu_physical_memory_read(addr, buf, l);
2070        if (fwrite(buf, 1, l, f) != l) {
2071            error_setg(errp, QERR_IO_ERROR);
2072            goto exit;
2073        }
2074        addr += l;
2075        size -= l;
2076    }
2077
2078exit:
2079    fclose(f);
2080}
2081
2082void qmp_inject_nmi(Error **errp)
2083{
2084    nmi_monitor_handle(monitor_get_cpu_index(), errp);
2085}
2086
2087void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2088{
2089    if (!use_icount) {
2090        return;
2091    }
2092
2093    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
2094                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2095    if (icount_align_option) {
2096        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
2097        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
2098    } else {
2099        cpu_fprintf(f, "Max guest delay     NA\n");
2100        cpu_fprintf(f, "Max guest advance   NA\n");
2101    }
2102}
2103