LXR qemu/cpus.c

   1/*
   2 * QEMU System Emulator
   3 *
   4 * Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* Needed early for CONFIG_BSD etc. */
  26#include "qemu/osdep.h"
  27#include "qemu-common.h"
  28#include "qemu/config-file.h"
  29#include "cpu.h"
  30#include "monitor/monitor.h"
  31#include "qapi/qmp/qerror.h"
  32#include "qemu/error-report.h"
  33#include "sysemu/sysemu.h"
  34#include "sysemu/block-backend.h"
  35#include "exec/gdbstub.h"
  36#include "sysemu/dma.h"
  37#include "sysemu/hw_accel.h"
  38#include "sysemu/kvm.h"
  39#include "sysemu/hax.h"
  40#include "qmp-commands.h"
  41#include "exec/exec-all.h"
  42
  43#include "qemu/thread.h"
  44#include "sysemu/cpus.h"
  45#include "sysemu/qtest.h"
  46#include "qemu/main-loop.h"
  47#include "qemu/bitmap.h"
  48#include "qemu/seqlock.h"
  49#include "tcg.h"
  50#include "qapi-event.h"
  51#include "hw/nmi.h"
  52#include "sysemu/replay.h"
  53#include "hw/boards.h"
  54
  55#ifdef CONFIG_LINUX
  56
  57#include <sys/prctl.h>
  58
  59#ifndef PR_MCE_KILL
  60#define PR_MCE_KILL 33
  61#endif
  62
  63#ifndef PR_MCE_KILL_SET
  64#define PR_MCE_KILL_SET 1
  65#endif
  66
  67#ifndef PR_MCE_KILL_EARLY
  68#define PR_MCE_KILL_EARLY 1
  69#endif
  70
  71#endif /* CONFIG_LINUX */
  72
  73int64_t max_delay;
  74int64_t max_advance;
  75
  76/* vcpu throttling controls */
  77static QEMUTimer *throttle_timer;
  78static unsigned int throttle_percentage;
  79
  80#define CPU_THROTTLE_PCT_MIN 1
  81#define CPU_THROTTLE_PCT_MAX 99
  82#define CPU_THROTTLE_TIMESLICE_NS 10000000
  83
  84bool cpu_is_stopped(CPUState *cpu)
  85{
  86    return cpu->stopped || !runstate_is_running();
  87}
  88
  89static bool cpu_thread_is_idle(CPUState *cpu)
  90{
  91    if (cpu->stop || cpu->queued_work_first) {
  92        return false;
  93    }
  94    if (cpu_is_stopped(cpu)) {
  95        return true;
  96    }
  97    if (!cpu->halted || cpu_has_work(cpu) ||
  98        kvm_halt_in_kernel()) {
  99        return false;
 100    }
 101    return true;
 102}
 103
 104bool all_cpu_threads_idle(void)
 105{
 106    CPUState *cpu;
 107
 108    CPU_FOREACH(cpu) {
 109        if (!cpu_thread_is_idle(cpu)) {
 110            return false;
 111        }
 112    }
 113    return true;
 114}
 115
 116/***********************************************************/
 117/* guest cycle counter */
 118
 119/* Protected by TimersState seqlock */
 120
 121static bool icount_sleep = true;
 122static int64_t vm_clock_warp_start = -1;
 123/* Conversion factor from emulated instructions to virtual clock ticks.  */
 124static int icount_time_shift;
 125/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 126#define MAX_ICOUNT_SHIFT 10
 127
 128static QEMUTimer *icount_rt_timer;
 129static QEMUTimer *icount_vm_timer;
 130static QEMUTimer *icount_warp_timer;
 131
 132typedef struct TimersState {
 133    /* Protected by BQL.  */
 134    int64_t cpu_ticks_prev;
 135    int64_t cpu_ticks_offset;
 136
 137    /* cpu_clock_offset can be read out of BQL, so protect it with
 138     * this lock.
 139     */
 140    QemuSeqLock vm_clock_seqlock;
 141    int64_t cpu_clock_offset;
 142    int32_t cpu_ticks_enabled;
 143    int64_t dummy;
 144
 145    /* Compensate for varying guest execution speed.  */
 146    int64_t qemu_icount_bias;
 147    /* Only written by TCG thread */
 148    int64_t qemu_icount;
 149} TimersState;
 150
 151static TimersState timers_state;
 152bool mttcg_enabled;
 153
 154/*
 155 * We default to false if we know other options have been enabled
 156 * which are currently incompatible with MTTCG. Otherwise when each
 157 * guest (target) has been updated to support:
 158 *   - atomic instructions
 159 *   - memory ordering primitives (barriers)
 160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 161 *
 162 * Once a guest architecture has been converted to the new primitives
 163 * there are two remaining limitations to check.
 164 *
 165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
 166 * - The host must have a stronger memory order than the guest
 167 *
 168 * It may be possible in future to support strong guests on weak hosts
 169 * but that will require tagging all load/stores in a guest with their
 170 * implicit memory order requirements which would likely slow things
 171 * down a lot.
 172 */
 173
 174static bool check_tcg_memory_orders_compatible(void)
 175{
 176#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
 177    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
 178#else
 179    return false;
 180#endif
 181}
 182
 183static bool default_mttcg_enabled(void)
 184{
 185    if (use_icount || TCG_OVERSIZED_GUEST) {
 186        return false;
 187    } else {
 188#ifdef TARGET_SUPPORTS_MTTCG
 189        return check_tcg_memory_orders_compatible();
 190#else
 191        return false;
 192#endif
 193    }
 194}
 195
 196void qemu_tcg_configure(QemuOpts *opts, Error **errp)
 197{
 198    const char *t = qemu_opt_get(opts, "thread");
 199    if (t) {
 200        if (strcmp(t, "multi") == 0) {
 201            if (TCG_OVERSIZED_GUEST) {
 202                error_setg(errp, "No MTTCG when guest word size > hosts");
 203            } else if (use_icount) {
 204                error_setg(errp, "No MTTCG when icount is enabled");
 205            } else {
 206#ifndef TARGET_SUPPORTS_MTTCG
 207                error_report("Guest not yet converted to MTTCG - "
 208                             "you may get unexpected results");
 209#endif
 210                if (!check_tcg_memory_orders_compatible()) {
 211                    error_report("Guest expects a stronger memory ordering "
 212                                 "than the host provides");
 213                    error_printf("This may cause strange/hard to debug errors\n");
 214                }
 215                mttcg_enabled = true;
 216            }
 217        } else if (strcmp(t, "single") == 0) {
 218            mttcg_enabled = false;
 219        } else {
 220            error_setg(errp, "Invalid 'thread' setting %s", t);
 221        }
 222    } else {
 223        mttcg_enabled = default_mttcg_enabled();
 224    }
 225}
 226
 227/* The current number of executed instructions is based on what we
 228 * originally budgeted minus the current state of the decrementing
 229 * icount counters in extra/u16.low.
 230 */
 231static int64_t cpu_get_icount_executed(CPUState *cpu)
 232{
 233    return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
 234}
 235
 236/*
 237 * Update the global shared timer_state.qemu_icount to take into
 238 * account executed instructions. This is done by the TCG vCPU
 239 * thread so the main-loop can see time has moved forward.
 240 */
 241void cpu_update_icount(CPUState *cpu)
 242{
 243    int64_t executed = cpu_get_icount_executed(cpu);
 244    cpu->icount_budget -= executed;
 245
 246#ifdef CONFIG_ATOMIC64
 247    atomic_set__nocheck(&timers_state.qemu_icount,
 248                        atomic_read__nocheck(&timers_state.qemu_icount) +
 249                        executed);
 250#else /* FIXME: we need 64bit atomics to do this safely */
 251    timers_state.qemu_icount += executed;
 252#endif
 253}
 254
 255int64_t cpu_get_icount_raw(void)
 256{
 257    CPUState *cpu = current_cpu;
 258
 259    if (cpu && cpu->running) {
 260        if (!cpu->can_do_io) {
 261            fprintf(stderr, "Bad icount read\n");
 262            exit(1);
 263        }
 264        /* Take into account what has run */
 265        cpu_update_icount(cpu);
 266    }
 267#ifdef CONFIG_ATOMIC64
 268    return atomic_read__nocheck(&timers_state.qemu_icount);
 269#else /* FIXME: we need 64bit atomics to do this safely */
 270    return timers_state.qemu_icount;
 271#endif
 272}
 273
 274/* Return the virtual CPU time, based on the instruction counter.  */
 275static int64_t cpu_get_icount_locked(void)
 276{
 277    int64_t icount = cpu_get_icount_raw();
 278    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 279}
 280
 281int64_t cpu_get_icount(void)
 282{
 283    int64_t icount;
 284    unsigned start;
 285
 286    do {
 287        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 288        icount = cpu_get_icount_locked();
 289    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 290
 291    return icount;
 292}
 293
 294int64_t cpu_icount_to_ns(int64_t icount)
 295{
 296    return icount << icount_time_shift;
 297}
 298
 299/* return the time elapsed in VM between vm_start and vm_stop.  Unless
 300 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 301 * counter.
 302 *
 303 * Caller must hold the BQL
 304 */
 305int64_t cpu_get_ticks(void)
 306{
 307    int64_t ticks;
 308
 309    if (use_icount) {
 310        return cpu_get_icount();
 311    }
 312
 313    ticks = timers_state.cpu_ticks_offset;
 314    if (timers_state.cpu_ticks_enabled) {
 315        ticks += cpu_get_host_ticks();
 316    }
 317
 318    if (timers_state.cpu_ticks_prev > ticks) {
 319        /* Note: non increasing ticks may happen if the host uses
 320           software suspend */
 321        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 322        ticks = timers_state.cpu_ticks_prev;
 323    }
 324
 325    timers_state.cpu_ticks_prev = ticks;
 326    return ticks;
 327}
 328
 329static int64_t cpu_get_clock_locked(void)
 330{
 331    int64_t time;
 332
 333    time = timers_state.cpu_clock_offset;
 334    if (timers_state.cpu_ticks_enabled) {
 335        time += get_clock();
 336    }
 337
 338    return time;
 339}
 340
 341/* Return the monotonic time elapsed in VM, i.e.,
 342 * the time between vm_start and vm_stop
 343 */
 344int64_t cpu_get_clock(void)
 345{
 346    int64_t ti;
 347    unsigned start;
 348
 349    do {
 350        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 351        ti = cpu_get_clock_locked();
 352    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 353
 354    return ti;
 355}
 356
 357/* enable cpu_get_ticks()
 358 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 359 */
 360void cpu_enable_ticks(void)
 361{
 362    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 363    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 364    if (!timers_state.cpu_ticks_enabled) {
 365        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 366        timers_state.cpu_clock_offset -= get_clock();
 367        timers_state.cpu_ticks_enabled = 1;
 368    }
 369    seqlock_write_end(&timers_state.vm_clock_seqlock);
 370}
 371
 372/* disable cpu_get_ticks() : the clock is stopped. You must not call
 373 * cpu_get_ticks() after that.
 374 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 375 */
 376void cpu_disable_ticks(void)
 377{
 378    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 379    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 380    if (timers_state.cpu_ticks_enabled) {
 381        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 382        timers_state.cpu_clock_offset = cpu_get_clock_locked();
 383        timers_state.cpu_ticks_enabled = 0;
 384    }
 385    seqlock_write_end(&timers_state.vm_clock_seqlock);
 386}
 387
 388/* Correlation between real and virtual time is always going to be
 389   fairly approximate, so ignore small variation.
 390   When the guest is idle real and virtual time will be aligned in
 391   the IO wait loop.  */
 392#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 393
 394static void icount_adjust(void)
 395{
 396    int64_t cur_time;
 397    int64_t cur_icount;
 398    int64_t delta;
 399
 400    /* Protected by TimersState mutex.  */
 401    static int64_t last_delta;
 402
 403    /* If the VM is not running, then do nothing.  */
 404    if (!runstate_is_running()) {
 405        return;
 406    }
 407
 408    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 409    cur_time = cpu_get_clock_locked();
 410    cur_icount = cpu_get_icount_locked();
 411
 412    delta = cur_icount - cur_time;
 413    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 414    if (delta > 0
 415        && last_delta + ICOUNT_WOBBLE < delta * 2
 416        && icount_time_shift > 0) {
 417        /* The guest is getting too far ahead.  Slow time down.  */
 418        icount_time_shift--;
 419    }
 420    if (delta < 0
 421        && last_delta - ICOUNT_WOBBLE > delta * 2
 422        && icount_time_shift < MAX_ICOUNT_SHIFT) {
 423        /* The guest is getting too far behind.  Speed time up.  */
 424        icount_time_shift++;
 425    }
 426    last_delta = delta;
 427    timers_state.qemu_icount_bias = cur_icount
 428                              - (timers_state.qemu_icount << icount_time_shift);
 429    seqlock_write_end(&timers_state.vm_clock_seqlock);
 430}
 431
 432static void icount_adjust_rt(void *opaque)
 433{
 434    timer_mod(icount_rt_timer,
 435              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 436    icount_adjust();
 437}
 438
 439static void icount_adjust_vm(void *opaque)
 440{
 441    timer_mod(icount_vm_timer,
 442                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 443                   NANOSECONDS_PER_SECOND / 10);
 444    icount_adjust();
 445}
 446
 447static int64_t qemu_icount_round(int64_t count)
 448{
 449    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 450}
 451
 452static bool icount_idle_timewarps = true;
 453void qemu_icount_enable_idle_timewarps(bool enable)
 454{
 455    icount_idle_timewarps = enable;
 456}
 457
 458static void icount_warp_rt(void)
 459{
 460    unsigned seq;
 461    int64_t warp_start;
 462
 463    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 464     * changes from -1 to another value, so the race here is okay.
 465     */
 466    do {
 467        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 468        warp_start = vm_clock_warp_start;
 469    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 470
 471    if (warp_start == -1) {
 472        return;
 473    }
 474
 475    seqlock_write_begin(&timers_state.vm_clock_seqlock);
 476    if (runstate_is_running()) {
 477        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 478                                     cpu_get_clock_locked());
 479        int64_t warp_delta;
 480
 481        warp_delta = clock - vm_clock_warp_start;
 482        if (use_icount == 2) {
 483            /*
 484             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 485             * far ahead of real time.
 486             */
 487            int64_t cur_icount = cpu_get_icount_locked();
 488            int64_t delta = clock - cur_icount;
 489            warp_delta = MIN(warp_delta, delta);
 490        }
 491        timers_state.qemu_icount_bias += warp_delta;
 492    }
 493    vm_clock_warp_start = -1;
 494    seqlock_write_end(&timers_state.vm_clock_seqlock);
 495
 496    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 497        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 498    }
 499}
 500
 501static void icount_timer_cb(void *opaque)
 502{
 503    /* No need for a checkpoint because the timer already synchronizes
 504     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 505     */
 506    icount_warp_rt();
 507}
 508
 509void tcg_clock_warp(int64_t dest)
 510{
 511    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 512
 513    if (clock < dest) {
 514        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 515        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 516    }
 517    qemu_notify_event();
 518}
 519
 520bool tcg_idle_clock_warp(int64_t dest)
 521{
 522    if (!all_cpu_threads_idle()) {
 523        return false;
 524    }
 525
 526    tcg_clock_warp(dest);
 527    return true;
 528}
 529
 530void qtest_clock_warp(int64_t dest)
 531{
 532    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 533    AioContext *aio_context;
 534    assert(qtest_enabled());
 535    aio_context = qemu_get_aio_context();
 536    while (clock < dest) {
 537        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 538        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 539
 540        seqlock_write_begin(&timers_state.vm_clock_seqlock);
 541        timers_state.qemu_icount_bias += warp;
 542        seqlock_write_end(&timers_state.vm_clock_seqlock);
 543
 544        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 545        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 546        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 547    }
 548    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 549}
 550
 551void qemu_start_warp_timer(void)
 552{
 553    int64_t clock;
 554    int64_t deadline;
 555
 556    if (!use_icount) {
 557        return;
 558    }
 559
 560    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 561     * do not fire, so computing the deadline does not make sense.
 562     */
 563    if (!runstate_is_running()) {
 564        return;
 565    }
 566
 567    /* warp clock deterministically in record/replay mode */
 568    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 569        return;
 570    }
 571
 572    if (!all_cpu_threads_idle()) {
 573        return;
 574    }
 575
 576    if (qtest_enabled()) {
 577        /* When testing, qtest commands advance icount.  */
 578        return;
 579    }
 580
 581    /* We want to use the earliest deadline from ALL vm_clocks */
 582    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 583    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 584    if (deadline < 0) {
 585        static bool notified;
 586        if (!icount_sleep && !notified) {
 587            warn_report("icount sleep disabled and no active timers");
 588            notified = true;
 589        }
 590        return;
 591    }
 592
 593    if (deadline > 0) {
 594        /*
 595         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 596         * sleep.  Otherwise, the CPU might be waiting for a future timer
 597         * interrupt to wake it up, but the interrupt never comes because
 598         * the vCPU isn't running any insns and thus doesn't advance the
 599         * QEMU_CLOCK_VIRTUAL.
 600         */
 601        if (!icount_sleep) {
 602            /*
 603             * We never let VCPUs sleep in no sleep icount mode.
 604             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 605             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 606             * It is useful when we want a deterministic execution time,
 607             * isolated from host latencies.
 608             */
 609            seqlock_write_begin(&timers_state.vm_clock_seqlock);
 610            timers_state.qemu_icount_bias += deadline;
 611            seqlock_write_end(&timers_state.vm_clock_seqlock);
 612            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 613        } else {
 614            /*
 615             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 616             * "real" time, (related to the time left until the next event) has
 617             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 618             * This avoids that the warps are visible externally; for example,
 619             * you will not be sending network packets continuously instead of
 620             * every 100ms.
 621             */
 622            seqlock_write_begin(&timers_state.vm_clock_seqlock);
 623            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 624                vm_clock_warp_start = clock;
 625            }
 626            seqlock_write_end(&timers_state.vm_clock_seqlock);
 627            timer_mod_anticipate(icount_warp_timer, clock + deadline);
 628        }
 629    } else if (deadline == 0) {
 630        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 631    }
 632}
 633
 634static void qemu_account_warp_timer(void)
 635{
 636    if (!use_icount || !icount_sleep) {
 637        return;
 638    }
 639
 640    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 641     * do not fire, so computing the deadline does not make sense.
 642     */
 643    if (!runstate_is_running()) {
 644        return;
 645    }
 646
 647    /* warp clock deterministically in record/replay mode */
 648    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 649        return;
 650    }
 651
 652    timer_del(icount_warp_timer);
 653    icount_warp_rt();
 654}
 655
 656static bool icount_state_needed(void *opaque)
 657{
 658    return use_icount;
 659}
 660
 661/*
 662 * This is a subsection for icount migration.
 663 */
 664static const VMStateDescription icount_vmstate_timers = {
 665    .name = "timer/icount",
 666    .version_id = 1,
 667    .minimum_version_id = 1,
 668    .needed = icount_state_needed,
 669    .fields = (VMStateField[]) {
 670        VMSTATE_INT64(qemu_icount_bias, TimersState),
 671        VMSTATE_INT64(qemu_icount, TimersState),
 672        VMSTATE_END_OF_LIST()
 673    }
 674};
 675
 676static const VMStateDescription vmstate_timers = {
 677    .name = "timer",
 678    .version_id = 2,
 679    .minimum_version_id = 1,
 680    .fields = (VMStateField[]) {
 681        VMSTATE_INT64(cpu_ticks_offset, TimersState),
 682        VMSTATE_INT64(dummy, TimersState),
 683        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 684        VMSTATE_END_OF_LIST()
 685    },
 686    .subsections = (const VMStateDescription*[]) {
 687        &icount_vmstate_timers,
 688        NULL
 689    }
 690};
 691
 692static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 693{
 694    double pct;
 695    double throttle_ratio;
 696    long sleeptime_ns;
 697
 698    if (!cpu_throttle_get_percentage()) {
 699        return;
 700    }
 701
 702    pct = (double)cpu_throttle_get_percentage()/100;
 703    throttle_ratio = pct / (1 - pct);
 704    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 705
 706    qemu_mutex_unlock_iothread();
 707    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 708    qemu_mutex_lock_iothread();
 709    atomic_set(&cpu->throttle_thread_scheduled, 0);
 710}
 711
 712static void cpu_throttle_timer_tick(void *opaque)
 713{
 714    CPUState *cpu;
 715    double pct;
 716
 717    /* Stop the timer if needed */
 718    if (!cpu_throttle_get_percentage()) {
 719        return;
 720    }
 721    CPU_FOREACH(cpu) {
 722        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 723            async_run_on_cpu(cpu, cpu_throttle_thread,
 724                             RUN_ON_CPU_NULL);
 725        }
 726    }
 727
 728    pct = (double)cpu_throttle_get_percentage()/100;
 729    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 730                                   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 731}
 732
 733void cpu_throttle_set(int new_throttle_pct)
 734{
 735    /* Ensure throttle percentage is within valid range */
 736    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 737    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 738
 739    atomic_set(&throttle_percentage, new_throttle_pct);
 740
 741    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 742                                       CPU_THROTTLE_TIMESLICE_NS);
 743}
 744
 745void cpu_throttle_stop(void)
 746{
 747    atomic_set(&throttle_percentage, 0);
 748}
 749
 750bool cpu_throttle_active(void)
 751{
 752    return (cpu_throttle_get_percentage() != 0);
 753}
 754
 755int cpu_throttle_get_percentage(void)
 756{
 757    return atomic_read(&throttle_percentage);
 758}
 759
 760void cpu_ticks_init(void)
 761{
 762    seqlock_init(&timers_state.vm_clock_seqlock);
 763    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 764    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 765                                           cpu_throttle_timer_tick, NULL);
 766}
 767
 768void configure_icount(QemuOpts *opts, Error **errp)
 769{
 770    const char *option;
 771    char *rem_str = NULL;
 772
 773    option = qemu_opt_get(opts, "shift");
 774    if (!option) {
 775        if (qemu_opt_get(opts, "align") != NULL) {
 776            error_setg(errp, "Please specify shift option when using align");
 777        }
 778        return;
 779    }
 780
 781    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 782    if (icount_sleep) {
 783        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 784                                         icount_timer_cb, NULL);
 785    }
 786
 787    icount_align_option = qemu_opt_get_bool(opts, "align", false);
 788
 789    if (icount_align_option && !icount_sleep) {
 790        error_setg(errp, "align=on and sleep=off are incompatible");
 791    }
 792    if (strcmp(option, "auto") != 0) {
 793        errno = 0;
 794        icount_time_shift = strtol(option, &rem_str, 0);
 795        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 796            error_setg(errp, "icount: Invalid shift value");
 797        }
 798        use_icount = 1;
 799        return;
 800    } else if (icount_align_option) {
 801        error_setg(errp, "shift=auto and align=on are incompatible");
 802    } else if (!icount_sleep) {
 803        error_setg(errp, "shift=auto and sleep=off are incompatible");
 804    }
 805
 806    use_icount = 2;
 807
 808    /* 125MIPS seems a reasonable initial guess at the guest speed.
 809       It will be corrected fairly quickly anyway.  */
 810    icount_time_shift = 3;
 811
 812    /* Have both realtime and virtual time triggers for speed adjustment.
 813       The realtime trigger catches emulated time passing too slowly,
 814       the virtual time trigger catches emulated time passing too fast.
 815       Realtime triggers occur even when idle, so use them less frequently
 816       than VM triggers.  */
 817    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 818                                   icount_adjust_rt, NULL);
 819    timer_mod(icount_rt_timer,
 820                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 821    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 822                                        icount_adjust_vm, NULL);
 823    timer_mod(icount_vm_timer,
 824                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 825                   NANOSECONDS_PER_SECOND / 10);
 826}
 827
 828/***********************************************************/
 829/* TCG vCPU kick timer
 830 *
 831 * The kick timer is responsible for moving single threaded vCPU
 832 * emulation on to the next vCPU. If more than one vCPU is running a
 833 * timer event with force a cpu->exit so the next vCPU can get
 834 * scheduled.
 835 *
 836 * The timer is removed if all vCPUs are idle and restarted again once
 837 * idleness is complete.
 838 */
 839
 840static QEMUTimer *tcg_kick_vcpu_timer;
 841static CPUState *tcg_current_rr_cpu;
 842
 843#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 844
 845static inline int64_t qemu_tcg_next_kick(void)
 846{
 847    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 848}
 849
 850/* Kick the currently round-robin scheduled vCPU */
 851static void qemu_cpu_kick_rr_cpu(void)
 852{
 853    CPUState *cpu;
 854    do {
 855        cpu = atomic_mb_read(&tcg_current_rr_cpu);
 856        if (cpu) {
 857            cpu_exit(cpu);
 858        }
 859    } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 860}
 861
 862static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
 863{
 864}
 865
 866void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 867{
 868    if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
 869        qemu_notify_event();
 870        return;
 871    }
 872
 873    if (!qemu_in_vcpu_thread() && first_cpu) {
 874        /* qemu_cpu_kick is not enough to kick a halted CPU out of
 875         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
 876         * causes cpu_thread_is_idle to return false.  This way,
 877         * handle_icount_deadline can run.
 878         */
 879        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
 880    }
 881}
 882
 883static void kick_tcg_thread(void *opaque)
 884{
 885    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 886    qemu_cpu_kick_rr_cpu();
 887}
 888
 889static void start_tcg_kick_timer(void)
 890{
 891    if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 892        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 893                                           kick_tcg_thread, NULL);
 894        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 895    }
 896}
 897
 898static void stop_tcg_kick_timer(void)
 899{
 900    if (tcg_kick_vcpu_timer) {
 901        timer_del(tcg_kick_vcpu_timer);
 902        tcg_kick_vcpu_timer = NULL;
 903    }
 904}
 905
 906/***********************************************************/
 907void hw_error(const char *fmt, ...)
 908{
 909    va_list ap;
 910    CPUState *cpu;
 911
 912    va_start(ap, fmt);
 913    fprintf(stderr, "qemu: hardware error: ");
 914    vfprintf(stderr, fmt, ap);
 915    fprintf(stderr, "\n");
 916    CPU_FOREACH(cpu) {
 917        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 918        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 919    }
 920    va_end(ap);
 921    abort();
 922}
 923
 924void cpu_synchronize_all_states(void)
 925{
 926    CPUState *cpu;
 927
 928    CPU_FOREACH(cpu) {
 929        cpu_synchronize_state(cpu);
 930    }
 931}
 932
 933void cpu_synchronize_all_post_reset(void)
 934{
 935    CPUState *cpu;
 936
 937    CPU_FOREACH(cpu) {
 938        cpu_synchronize_post_reset(cpu);
 939    }
 940}
 941
 942void cpu_synchronize_all_post_init(void)
 943{
 944    CPUState *cpu;
 945
 946    CPU_FOREACH(cpu) {
 947        cpu_synchronize_post_init(cpu);
 948    }
 949}
 950
 951void cpu_synchronize_all_pre_loadvm(void)
 952{
 953    CPUState *cpu;
 954
 955    CPU_FOREACH(cpu) {
 956        cpu_synchronize_pre_loadvm(cpu);
 957    }
 958}
 959
 960static int do_vm_stop(RunState state)
 961{
 962    int ret = 0;
 963
 964    if (runstate_is_running()) {
 965        cpu_disable_ticks();
 966        pause_all_vcpus();
 967        runstate_set(state);
 968        vm_state_notify(0, state);
 969        qapi_event_send_stop(&error_abort);
 970    }
 971
 972    bdrv_drain_all();
 973    replay_disable_events();
 974    ret = bdrv_flush_all();
 975
 976    return ret;
 977}
 978
 979static bool cpu_can_run(CPUState *cpu)
 980{
 981    if (cpu->stop) {
 982        return false;
 983    }
 984    if (cpu_is_stopped(cpu)) {
 985        return false;
 986    }
 987    return true;
 988}
 989
 990static void cpu_handle_guest_debug(CPUState *cpu)
 991{
 992    gdb_set_stop_cpu(cpu);
 993    qemu_system_debug_request();
 994    cpu->stopped = true;
 995}
 996
 997#ifdef CONFIG_LINUX
 998static void sigbus_reraise(void)
 999{
1000    sigset_t set;

1001    struct sigaction action;
1002
1003    memset(&action, 0, sizeof(action));
1004    action.sa_handler = SIG_DFL;
1005    if (!sigaction(SIGBUS, &action, NULL)) {
1006        raise(SIGBUS);
1007        sigemptyset(&set);
1008        sigaddset(&set, SIGBUS);
1009        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1010    }
1011    perror("Failed to re-raise SIGBUS!\n");
1012    abort();
1013}
1014
1015static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1016{
1017    if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1018        sigbus_reraise();
1019    }
1020
1021    if (current_cpu) {
1022        /* Called asynchronously in VCPU thread.  */
1023        if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1024            sigbus_reraise();
1025        }
1026    } else {
1027        /* Called synchronously (via signalfd) in main thread.  */
1028        if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1029            sigbus_reraise();
1030        }
1031    }
1032}
1033
1034static void qemu_init_sigbus(void)
1035{
1036    struct sigaction action;
1037
1038    memset(&action, 0, sizeof(action));
1039    action.sa_flags = SA_SIGINFO;
1040    action.sa_sigaction = sigbus_handler;
1041    sigaction(SIGBUS, &action, NULL);
1042
1043    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1044}
1045#else /* !CONFIG_LINUX */
1046static void qemu_init_sigbus(void)
1047{
1048}
1049#endif /* !CONFIG_LINUX */
1050
1051static QemuMutex qemu_global_mutex;
1052
1053static QemuThread io_thread;
1054
1055/* cpu creation */
1056static QemuCond qemu_cpu_cond;
1057/* system init */
1058static QemuCond qemu_pause_cond;
1059
1060void qemu_init_cpu_loop(void)
1061{
1062    qemu_init_sigbus();
1063    qemu_cond_init(&qemu_cpu_cond);
1064    qemu_cond_init(&qemu_pause_cond);
1065    qemu_mutex_init(&qemu_global_mutex);
1066
1067    qemu_thread_get_self(&io_thread);
1068}
1069
1070void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1071{
1072    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1073}
1074
1075static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1076{
1077    if (kvm_destroy_vcpu(cpu) < 0) {
1078        error_report("kvm_destroy_vcpu failed");
1079        exit(EXIT_FAILURE);
1080    }
1081}
1082
1083static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1084{
1085}
1086
1087static void qemu_wait_io_event_common(CPUState *cpu)
1088{
1089    atomic_mb_set(&cpu->thread_kicked, false);
1090    if (cpu->stop) {
1091        cpu->stop = false;
1092        cpu->stopped = true;
1093        qemu_cond_broadcast(&qemu_pause_cond);
1094    }
1095    process_queued_cpu_work(cpu);
1096}
1097
1098static bool qemu_tcg_should_sleep(CPUState *cpu)
1099{
1100    if (mttcg_enabled) {
1101        return cpu_thread_is_idle(cpu);
1102    } else {
1103        return all_cpu_threads_idle();
1104    }
1105}
1106
1107static void qemu_tcg_wait_io_event(CPUState *cpu)
1108{
1109    while (qemu_tcg_should_sleep(cpu)) {
1110        stop_tcg_kick_timer();
1111        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1112    }
1113
1114    start_tcg_kick_timer();
1115
1116    qemu_wait_io_event_common(cpu);
1117}
1118
1119static void qemu_kvm_wait_io_event(CPUState *cpu)
1120{
1121    while (cpu_thread_is_idle(cpu)) {
1122        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1123    }
1124
1125    qemu_wait_io_event_common(cpu);
1126}
1127
1128static void *qemu_kvm_cpu_thread_fn(void *arg)
1129{
1130    CPUState *cpu = arg;
1131    int r;
1132
1133    rcu_register_thread();
1134
1135    qemu_mutex_lock_iothread();
1136    qemu_thread_get_self(cpu->thread);
1137    cpu->thread_id = qemu_get_thread_id();
1138    cpu->can_do_io = 1;
1139    current_cpu = cpu;
1140
1141    r = kvm_init_vcpu(cpu);
1142    if (r < 0) {
1143        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1144        exit(1);
1145    }
1146
1147    kvm_init_cpu_signals(cpu);
1148
1149    /* signal CPU creation */
1150    cpu->created = true;
1151    qemu_cond_signal(&qemu_cpu_cond);
1152
1153    do {
1154        if (cpu_can_run(cpu)) {
1155            r = kvm_cpu_exec(cpu);
1156            if (r == EXCP_DEBUG) {
1157                cpu_handle_guest_debug(cpu);
1158            }
1159        }
1160        qemu_kvm_wait_io_event(cpu);
1161    } while (!cpu->unplug || cpu_can_run(cpu));
1162
1163    qemu_kvm_destroy_vcpu(cpu);
1164    cpu->created = false;
1165    qemu_cond_signal(&qemu_cpu_cond);
1166    qemu_mutex_unlock_iothread();
1167    return NULL;
1168}
1169
1170static void *qemu_dummy_cpu_thread_fn(void *arg)
1171{
1172#ifdef _WIN32
1173    fprintf(stderr, "qtest is not supported under Windows\n");
1174    exit(1);
1175#else
1176    CPUState *cpu = arg;
1177    sigset_t waitset;
1178    int r;
1179
1180    rcu_register_thread();
1181
1182    qemu_mutex_lock_iothread();
1183    qemu_thread_get_self(cpu->thread);
1184    cpu->thread_id = qemu_get_thread_id();
1185    cpu->can_do_io = 1;
1186    current_cpu = cpu;
1187
1188    sigemptyset(&waitset);
1189    sigaddset(&waitset, SIG_IPI);
1190
1191    /* signal CPU creation */
1192    cpu->created = true;
1193    qemu_cond_signal(&qemu_cpu_cond);
1194
1195    while (1) {
1196        qemu_mutex_unlock_iothread();
1197        do {
1198            int sig;
1199            r = sigwait(&waitset, &sig);
1200        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1201        if (r == -1) {
1202            perror("sigwait");
1203            exit(1);
1204        }
1205        qemu_mutex_lock_iothread();
1206        qemu_wait_io_event_common(cpu);
1207    }
1208
1209    return NULL;
1210#endif
1211}
1212
1213static int64_t tcg_get_icount_limit(void)
1214{
1215    int64_t deadline;
1216
1217    if (replay_mode != REPLAY_MODE_PLAY) {
1218        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1219
1220        /* Maintain prior (possibly buggy) behaviour where if no deadline
1221         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1222         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1223         * nanoseconds.
1224         */
1225        if ((deadline < 0) || (deadline > INT32_MAX)) {
1226            deadline = INT32_MAX;
1227        }
1228
1229        return qemu_icount_round(deadline);
1230    } else {
1231        return replay_get_instructions();
1232    }
1233}
1234
1235static void handle_icount_deadline(void)
1236{
1237    assert(qemu_in_vcpu_thread());
1238    if (use_icount) {
1239        int64_t deadline =
1240            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1241
1242        if (deadline == 0) {
1243            /* Wake up other AioContexts.  */
1244            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1245            qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1246        }
1247    }
1248}
1249
1250static void prepare_icount_for_run(CPUState *cpu)
1251{
1252    if (use_icount) {
1253        int insns_left;
1254
1255        /* These should always be cleared by process_icount_data after
1256         * each vCPU execution. However u16.high can be raised
1257         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1258         */
1259        g_assert(cpu->icount_decr.u16.low == 0);
1260        g_assert(cpu->icount_extra == 0);
1261
1262        cpu->icount_budget = tcg_get_icount_limit();
1263        insns_left = MIN(0xffff, cpu->icount_budget);
1264        cpu->icount_decr.u16.low = insns_left;
1265        cpu->icount_extra = cpu->icount_budget - insns_left;
1266    }
1267}
1268
1269static void process_icount_data(CPUState *cpu)
1270{
1271    if (use_icount) {
1272        /* Account for executed instructions */
1273        cpu_update_icount(cpu);
1274
1275        /* Reset the counters */
1276        cpu->icount_decr.u16.low = 0;
1277        cpu->icount_extra = 0;
1278        cpu->icount_budget = 0;
1279
1280        replay_account_executed_instructions();
1281    }
1282}
1283
1284
1285static int tcg_cpu_exec(CPUState *cpu)
1286{
1287    int ret;
1288#ifdef CONFIG_PROFILER
1289    int64_t ti;
1290#endif
1291
1292#ifdef CONFIG_PROFILER
1293    ti = profile_getclock();
1294#endif
1295    qemu_mutex_unlock_iothread();
1296    cpu_exec_start(cpu);
1297    ret = cpu_exec(cpu);
1298    cpu_exec_end(cpu);
1299    qemu_mutex_lock_iothread();
1300#ifdef CONFIG_PROFILER
1301    tcg_time += profile_getclock() - ti;
1302#endif
1303    return ret;
1304}
1305
1306/* Destroy any remaining vCPUs which have been unplugged and have
1307 * finished running
1308 */
1309static void deal_with_unplugged_cpus(void)
1310{
1311    CPUState *cpu;
1312
1313    CPU_FOREACH(cpu) {
1314        if (cpu->unplug && !cpu_can_run(cpu)) {
1315            qemu_tcg_destroy_vcpu(cpu);
1316            cpu->created = false;
1317            qemu_cond_signal(&qemu_cpu_cond);
1318            break;
1319        }
1320    }
1321}
1322
1323/* Single-threaded TCG
1324 *
1325 * In the single-threaded case each vCPU is simulated in turn. If
1326 * there is more than a single vCPU we create a simple timer to kick
1327 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1328 * This is done explicitly rather than relying on side-effects
1329 * elsewhere.
1330 */
1331
1332static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1333{
1334    CPUState *cpu = arg;
1335
1336    rcu_register_thread();
1337    tcg_register_thread();
1338
1339    qemu_mutex_lock_iothread();
1340    qemu_thread_get_self(cpu->thread);
1341
1342    CPU_FOREACH(cpu) {
1343        cpu->thread_id = qemu_get_thread_id();
1344        cpu->created = true;
1345        cpu->can_do_io = 1;
1346    }
1347    qemu_cond_signal(&qemu_cpu_cond);
1348
1349    /* wait for initial kick-off after machine start */
1350    while (first_cpu->stopped) {
1351        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1352
1353        /* process any pending work */
1354        CPU_FOREACH(cpu) {
1355            current_cpu = cpu;
1356            qemu_wait_io_event_common(cpu);
1357        }
1358    }
1359
1360    start_tcg_kick_timer();
1361
1362    cpu = first_cpu;
1363
1364    /* process any pending work */
1365    cpu->exit_request = 1;
1366
1367    while (1) {
1368        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1369        qemu_account_warp_timer();
1370
1371        /* Run the timers here.  This is much more efficient than
1372         * waking up the I/O thread and waiting for completion.
1373         */
1374        handle_icount_deadline();
1375
1376        if (!cpu) {
1377            cpu = first_cpu;
1378        }
1379
1380        while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1381
1382            atomic_mb_set(&tcg_current_rr_cpu, cpu);
1383            current_cpu = cpu;
1384
1385            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1386                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1387
1388            if (cpu_can_run(cpu)) {
1389                int r;
1390
1391                prepare_icount_for_run(cpu);
1392
1393                r = tcg_cpu_exec(cpu);
1394
1395                process_icount_data(cpu);
1396
1397                if (r == EXCP_DEBUG) {
1398                    cpu_handle_guest_debug(cpu);
1399                    break;
1400                } else if (r == EXCP_ATOMIC) {
1401                    qemu_mutex_unlock_iothread();
1402                    cpu_exec_step_atomic(cpu);
1403                    qemu_mutex_lock_iothread();
1404                    break;
1405                }
1406            } else if (cpu->stop) {
1407                if (cpu->unplug) {
1408                    cpu = CPU_NEXT(cpu);
1409                }
1410                break;
1411            }
1412
1413            cpu = CPU_NEXT(cpu);
1414        } /* while (cpu && !cpu->exit_request).. */
1415
1416        /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1417        atomic_set(&tcg_current_rr_cpu, NULL);
1418
1419        if (cpu && cpu->exit_request) {
1420            atomic_mb_set(&cpu->exit_request, 0);
1421        }
1422
1423        qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1424        deal_with_unplugged_cpus();
1425    }
1426
1427    return NULL;
1428}
1429
1430static void *qemu_hax_cpu_thread_fn(void *arg)
1431{
1432    CPUState *cpu = arg;
1433    int r;
1434
1435    qemu_mutex_lock_iothread();
1436    qemu_thread_get_self(cpu->thread);
1437
1438    cpu->thread_id = qemu_get_thread_id();
1439    cpu->created = true;
1440    cpu->halted = 0;
1441    current_cpu = cpu;
1442
1443    hax_init_vcpu(cpu);
1444    qemu_cond_signal(&qemu_cpu_cond);
1445
1446    while (1) {
1447        if (cpu_can_run(cpu)) {
1448            r = hax_smp_cpu_exec(cpu);
1449            if (r == EXCP_DEBUG) {
1450                cpu_handle_guest_debug(cpu);
1451            }
1452        }
1453
1454        while (cpu_thread_is_idle(cpu)) {
1455            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1456        }
1457#ifdef _WIN32
1458        SleepEx(0, TRUE);
1459#endif
1460        qemu_wait_io_event_common(cpu);
1461    }
1462    return NULL;
1463}
1464
1465#ifdef _WIN32
1466static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1467{
1468}
1469#endif
1470
1471/* Multi-threaded TCG
1472 *
1473 * In the multi-threaded case each vCPU has its own thread. The TLS
1474 * variable current_cpu can be used deep in the code to find the
1475 * current CPUState for a given thread.
1476 */
1477
1478static void *qemu_tcg_cpu_thread_fn(void *arg)
1479{
1480    CPUState *cpu = arg;
1481
1482    g_assert(!use_icount);
1483
1484    rcu_register_thread();
1485    tcg_register_thread();
1486
1487    qemu_mutex_lock_iothread();
1488    qemu_thread_get_self(cpu->thread);
1489
1490    cpu->thread_id = qemu_get_thread_id();
1491    cpu->created = true;
1492    cpu->can_do_io = 1;
1493    current_cpu = cpu;
1494    qemu_cond_signal(&qemu_cpu_cond);
1495
1496    /* process any pending work */
1497    cpu->exit_request = 1;
1498
1499    while (1) {
1500        if (cpu_can_run(cpu)) {
1501            int r;
1502            r = tcg_cpu_exec(cpu);
1503            switch (r) {
1504            case EXCP_DEBUG:
1505                cpu_handle_guest_debug(cpu);
1506                break;
1507            case EXCP_HALTED:
1508                /* during start-up the vCPU is reset and the thread is
1509                 * kicked several times. If we don't ensure we go back
1510                 * to sleep in the halted state we won't cleanly
1511                 * start-up when the vCPU is enabled.
1512                 *
1513                 * cpu->halted should ensure we sleep in wait_io_event
1514                 */
1515                g_assert(cpu->halted);
1516                break;
1517            case EXCP_ATOMIC:
1518                qemu_mutex_unlock_iothread();
1519                cpu_exec_step_atomic(cpu);
1520                qemu_mutex_lock_iothread();
1521            default:
1522                /* Ignore everything else? */
1523                break;
1524            }
1525        } else if (cpu->unplug) {
1526            qemu_tcg_destroy_vcpu(cpu);
1527            cpu->created = false;
1528            qemu_cond_signal(&qemu_cpu_cond);
1529            qemu_mutex_unlock_iothread();
1530            return NULL;
1531        }
1532
1533        atomic_mb_set(&cpu->exit_request, 0);
1534        qemu_tcg_wait_io_event(cpu);
1535    }
1536
1537    return NULL;
1538}
1539
1540static void qemu_cpu_kick_thread(CPUState *cpu)
1541{
1542#ifndef _WIN32
1543    int err;
1544
1545    if (cpu->thread_kicked) {
1546        return;
1547    }
1548    cpu->thread_kicked = true;
1549    err = pthread_kill(cpu->thread->thread, SIG_IPI);
1550    if (err) {
1551        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1552        exit(1);
1553    }
1554#else /* _WIN32 */
1555    if (!qemu_cpu_is_self(cpu)) {
1556        if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1557            fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1558                    __func__, GetLastError());
1559            exit(1);
1560        }
1561    }
1562#endif
1563}
1564
1565void qemu_cpu_kick(CPUState *cpu)
1566{
1567    qemu_cond_broadcast(cpu->halt_cond);
1568    if (tcg_enabled()) {
1569        cpu_exit(cpu);
1570        /* NOP unless doing single-thread RR */
1571        qemu_cpu_kick_rr_cpu();
1572    } else {
1573        if (hax_enabled()) {
1574            /*
1575             * FIXME: race condition with the exit_request check in
1576             * hax_vcpu_hax_exec
1577             */
1578            cpu->exit_request = 1;
1579        }
1580        qemu_cpu_kick_thread(cpu);
1581    }
1582}
1583
1584void qemu_cpu_kick_self(void)
1585{
1586    assert(current_cpu);
1587    qemu_cpu_kick_thread(current_cpu);
1588}
1589
1590bool qemu_cpu_is_self(CPUState *cpu)
1591{
1592    return qemu_thread_is_self(cpu->thread);
1593}
1594
1595bool qemu_in_vcpu_thread(void)
1596{
1597    return current_cpu && qemu_cpu_is_self(current_cpu);
1598}
1599
1600static __thread bool iothread_locked = false;
1601
1602bool qemu_mutex_iothread_locked(void)
1603{
1604    return iothread_locked;
1605}
1606
1607void qemu_mutex_lock_iothread(void)
1608{
1609    g_assert(!qemu_mutex_iothread_locked());
1610    qemu_mutex_lock(&qemu_global_mutex);
1611    iothread_locked = true;
1612}
1613
1614void qemu_mutex_unlock_iothread(void)
1615{
1616    g_assert(qemu_mutex_iothread_locked());
1617    iothread_locked = false;
1618    qemu_mutex_unlock(&qemu_global_mutex);
1619}
1620
1621static bool all_vcpus_paused(void)
1622{
1623    CPUState *cpu;
1624
1625    CPU_FOREACH(cpu) {
1626        if (!cpu->stopped) {
1627            return false;
1628        }
1629    }
1630
1631    return true;
1632}
1633
1634void pause_all_vcpus(void)
1635{
1636    CPUState *cpu;
1637
1638    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1639    CPU_FOREACH(cpu) {
1640        cpu->stop = true;
1641        qemu_cpu_kick(cpu);
1642    }
1643
1644    if (qemu_in_vcpu_thread()) {
1645        cpu_stop_current();
1646    }
1647
1648    while (!all_vcpus_paused()) {
1649        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1650        CPU_FOREACH(cpu) {
1651            qemu_cpu_kick(cpu);
1652        }
1653    }
1654}
1655
1656void cpu_resume(CPUState *cpu)
1657{
1658    cpu->stop = false;
1659    cpu->stopped = false;
1660    qemu_cpu_kick(cpu);
1661}
1662
1663void resume_all_vcpus(void)
1664{
1665    CPUState *cpu;
1666
1667    qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1668    CPU_FOREACH(cpu) {
1669        cpu_resume(cpu);
1670    }
1671}
1672
1673void cpu_remove(CPUState *cpu)
1674{
1675    cpu->stop = true;
1676    cpu->unplug = true;
1677    qemu_cpu_kick(cpu);
1678}
1679
1680void cpu_remove_sync(CPUState *cpu)
1681{
1682    cpu_remove(cpu);
1683    while (cpu->created) {
1684        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1685    }
1686}
1687
1688/* For temporary buffers for forming a name */
1689#define VCPU_THREAD_NAME_SIZE 16
1690
1691static void qemu_tcg_init_vcpu(CPUState *cpu)
1692{
1693    char thread_name[VCPU_THREAD_NAME_SIZE];
1694    static QemuCond *single_tcg_halt_cond;
1695    static QemuThread *single_tcg_cpu_thread;
1696    static int tcg_region_inited;
1697
1698    /*
1699     * Initialize TCG regions--once. Now is a good time, because:
1700     * (1) TCG's init context, prologue and target globals have been set up.
1701     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1702     *     -accel flag is processed, so the check doesn't work then).
1703     */
1704    if (!tcg_region_inited) {
1705        tcg_region_inited = 1;
1706        tcg_region_init();
1707    }
1708
1709    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1710        cpu->thread = g_malloc0(sizeof(QemuThread));
1711        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1712        qemu_cond_init(cpu->halt_cond);
1713
1714        if (qemu_tcg_mttcg_enabled()) {
1715            /* create a thread per vCPU with TCG (MTTCG) */
1716            parallel_cpus = true;
1717            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1718                 cpu->cpu_index);
1719
1720            qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1721                               cpu, QEMU_THREAD_JOINABLE);
1722
1723        } else {
1724            /* share a single thread for all cpus with TCG */
1725            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1726            qemu_thread_create(cpu->thread, thread_name,
1727                               qemu_tcg_rr_cpu_thread_fn,
1728                               cpu, QEMU_THREAD_JOINABLE);
1729
1730            single_tcg_halt_cond = cpu->halt_cond;
1731            single_tcg_cpu_thread = cpu->thread;
1732        }
1733#ifdef _WIN32
1734        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1735#endif
1736        while (!cpu->created) {
1737            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1738        }
1739    } else {
1740        /* For non-MTTCG cases we share the thread */
1741        cpu->thread = single_tcg_cpu_thread;
1742        cpu->halt_cond = single_tcg_halt_cond;
1743    }
1744}
1745
1746static void qemu_hax_start_vcpu(CPUState *cpu)
1747{
1748    char thread_name[VCPU_THREAD_NAME_SIZE];
1749
1750    cpu->thread = g_malloc0(sizeof(QemuThread));
1751    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1752    qemu_cond_init(cpu->halt_cond);
1753
1754    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1755             cpu->cpu_index);
1756    qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1757                       cpu, QEMU_THREAD_JOINABLE);
1758#ifdef _WIN32
1759    cpu->hThread = qemu_thread_get_handle(cpu->thread);
1760#endif
1761    while (!cpu->created) {
1762        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1763    }
1764}
1765
1766static void qemu_kvm_start_vcpu(CPUState *cpu)
1767{
1768    char thread_name[VCPU_THREAD_NAME_SIZE];
1769
1770    cpu->thread = g_malloc0(sizeof(QemuThread));
1771    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1772    qemu_cond_init(cpu->halt_cond);
1773    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1774             cpu->cpu_index);
1775    qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1776                       cpu, QEMU_THREAD_JOINABLE);
1777    while (!cpu->created) {
1778        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1779    }
1780}
1781
1782static void qemu_dummy_start_vcpu(CPUState *cpu)
1783{
1784    char thread_name[VCPU_THREAD_NAME_SIZE];
1785
1786    cpu->thread = g_malloc0(sizeof(QemuThread));
1787    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1788    qemu_cond_init(cpu->halt_cond);
1789    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1790             cpu->cpu_index);
1791    qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1792                       QEMU_THREAD_JOINABLE);
1793    while (!cpu->created) {
1794        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1795    }
1796}
1797
1798void qemu_init_vcpu(CPUState *cpu)
1799{
1800    cpu->nr_cores = smp_cores;
1801    cpu->nr_threads = smp_threads;
1802    cpu->stopped = true;
1803
1804    if (!cpu->as) {
1805        /* If the target cpu hasn't set up any address spaces itself,
1806         * give it the default one.
1807         */
1808        AddressSpace *as = g_new0(AddressSpace, 1);
1809
1810        address_space_init(as, cpu->memory, "cpu-memory");
1811        cpu->num_ases = 1;
1812        cpu_address_space_init(cpu, as, 0);
1813    }
1814
1815    if (kvm_enabled()) {
1816        qemu_kvm_start_vcpu(cpu);
1817    } else if (hax_enabled()) {
1818        qemu_hax_start_vcpu(cpu);
1819    } else if (tcg_enabled()) {
1820        qemu_tcg_init_vcpu(cpu);
1821    } else {
1822        qemu_dummy_start_vcpu(cpu);
1823    }
1824}
1825
1826void cpu_stop_current(void)
1827{
1828    if (current_cpu) {
1829        current_cpu->stop = false;
1830        current_cpu->stopped = true;
1831        cpu_exit(current_cpu);
1832        qemu_cond_broadcast(&qemu_pause_cond);
1833    }
1834}
1835
1836void vm_stop_from_timer(RunState state)
1837{
1838    qemu_system_vmstop_request_prepare();
1839    qemu_system_vmstop_request(state);
1840    /*
1841     * FIXME: should not return to device code in case
1842     * vm_stop() has been requested.
1843     */
1844    cpu_stop_current();
1845}
1846
1847int vm_stop(RunState state)
1848{
1849    if (qemu_in_vcpu_thread()) {
1850        qemu_system_vmstop_request_prepare();
1851        qemu_system_vmstop_request(state);
1852        /*
1853         * FIXME: should not return to device code in case
1854         * vm_stop() has been requested.
1855         */
1856        cpu_stop_current();
1857        return 0;
1858    }
1859
1860    return do_vm_stop(state);
1861}
1862
1863/**
1864 * Prepare for (re)starting the VM.
1865 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1866 * running or in case of an error condition), 0 otherwise.
1867 */
1868int vm_prepare_start(void)
1869{
1870    RunState requested;
1871    int res = 0;
1872
1873    qemu_vmstop_requested(&requested);
1874    if (runstate_is_running() && requested == RUN_STATE__MAX) {
1875        return -1;
1876    }
1877
1878    /* Ensure that a STOP/RESUME pair of events is emitted if a
1879     * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1880     * example, according to documentation is always followed by
1881     * the STOP event.
1882     */
1883    if (runstate_is_running()) {
1884        qapi_event_send_stop(&error_abort);
1885        res = -1;
1886    } else {
1887        replay_enable_events();
1888        cpu_enable_ticks();
1889        runstate_set(RUN_STATE_RUNNING);
1890        vm_state_notify(1, RUN_STATE_RUNNING);
1891    }
1892
1893    /* We are sending this now, but the CPUs will be resumed shortly later */
1894    qapi_event_send_resume(&error_abort);
1895    return res;
1896}
1897
1898void vm_start(void)
1899{
1900    if (!vm_prepare_start()) {
1901        resume_all_vcpus();
1902    }
1903}
1904
1905/* does a state transition even if the VM is already stopped,
1906   current state is forgotten forever */
1907int vm_stop_force_state(RunState state)
1908{
1909    if (runstate_is_running()) {
1910        return vm_stop(state);
1911    } else {
1912        runstate_set(state);
1913
1914        bdrv_drain_all();
1915        /* Make sure to return an error if the flush in a previous vm_stop()
1916         * failed. */
1917        return bdrv_flush_all();
1918    }
1919}
1920
1921void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1922{
1923    /* XXX: implement xxx_cpu_list for targets that still miss it */
1924#if defined(cpu_list)
1925    cpu_list(f, cpu_fprintf);
1926#endif
1927}
1928
1929CpuInfoList *qmp_query_cpus(Error **errp)
1930{
1931    MachineState *ms = MACHINE(qdev_get_machine());
1932    MachineClass *mc = MACHINE_GET_CLASS(ms);
1933    CpuInfoList *head = NULL, *cur_item = NULL;
1934    CPUState *cpu;
1935
1936    CPU_FOREACH(cpu) {
1937        CpuInfoList *info;
1938#if defined(TARGET_I386)
1939        X86CPU *x86_cpu = X86_CPU(cpu);
1940        CPUX86State *env = &x86_cpu->env;
1941#elif defined(TARGET_PPC)
1942        PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1943        CPUPPCState *env = &ppc_cpu->env;
1944#elif defined(TARGET_SPARC)
1945        SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1946        CPUSPARCState *env = &sparc_cpu->env;
1947#elif defined(TARGET_MIPS)
1948        MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1949        CPUMIPSState *env = &mips_cpu->env;
1950#elif defined(TARGET_TRICORE)
1951        TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1952        CPUTriCoreState *env = &tricore_cpu->env;
1953#endif
1954
1955        cpu_synchronize_state(cpu);
1956
1957        info = g_malloc0(sizeof(*info));
1958        info->value = g_malloc0(sizeof(*info->value));
1959        info->value->CPU = cpu->cpu_index;
1960        info->value->current = (cpu == first_cpu);
1961        info->value->halted = cpu->halted;
1962        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1963        info->value->thread_id = cpu->thread_id;
1964#if defined(TARGET_I386)
1965        info->value->arch = CPU_INFO_ARCH_X86;
1966        info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1967#elif defined(TARGET_PPC)
1968        info->value->arch = CPU_INFO_ARCH_PPC;
1969        info->value->u.ppc.nip = env->nip;
1970#elif defined(TARGET_SPARC)
1971        info->value->arch = CPU_INFO_ARCH_SPARC;
1972        info->value->u.q_sparc.pc = env->pc;
1973        info->value->u.q_sparc.npc = env->npc;
1974#elif defined(TARGET_MIPS)
1975        info->value->arch = CPU_INFO_ARCH_MIPS;
1976        info->value->u.q_mips.PC = env->active_tc.PC;
1977#elif defined(TARGET_TRICORE)
1978        info->value->arch = CPU_INFO_ARCH_TRICORE;
1979        info->value->u.tricore.PC = env->PC;
1980#else
1981        info->value->arch = CPU_INFO_ARCH_OTHER;
1982#endif
1983        info->value->has_props = !!mc->cpu_index_to_instance_props;
1984        if (info->value->has_props) {
1985            CpuInstanceProperties *props;
1986            props = g_malloc0(sizeof(*props));
1987            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1988            info->value->props = props;
1989        }
1990
1991        /* XXX: waiting for the qapi to support GSList */
1992        if (!cur_item) {
1993            head = cur_item = info;
1994        } else {
1995            cur_item->next = info;
1996            cur_item = info;
1997        }
1998    }
1999
2000    return head;

2001}
2002
2003void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2004                 bool has_cpu, int64_t cpu_index, Error **errp)
2005{
2006    FILE *f;
2007    uint32_t l;
2008    CPUState *cpu;
2009    uint8_t buf[1024];
2010    int64_t orig_addr = addr, orig_size = size;
2011
2012    if (!has_cpu) {
2013        cpu_index = 0;
2014    }
2015
2016    cpu = qemu_get_cpu(cpu_index);
2017    if (cpu == NULL) {
2018        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2019                   "a CPU number");
2020        return;
2021    }
2022
2023    f = fopen(filename, "wb");
2024    if (!f) {
2025        error_setg_file_open(errp, errno, filename);
2026        return;
2027    }
2028
2029    while (size != 0) {
2030        l = sizeof(buf);
2031        if (l > size)
2032            l = size;
2033        if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2034            error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2035                             " specified", orig_addr, orig_size);
2036            goto exit;
2037        }
2038        if (fwrite(buf, 1, l, f) != l) {
2039            error_setg(errp, QERR_IO_ERROR);
2040            goto exit;
2041        }
2042        addr += l;
2043        size -= l;
2044    }
2045
2046exit:
2047    fclose(f);
2048}
2049
2050void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2051                  Error **errp)
2052{
2053    FILE *f;
2054    uint32_t l;
2055    uint8_t buf[1024];
2056
2057    f = fopen(filename, "wb");
2058    if (!f) {
2059        error_setg_file_open(errp, errno, filename);
2060        return;
2061    }
2062
2063    while (size != 0) {
2064        l = sizeof(buf);
2065        if (l > size)
2066            l = size;
2067        cpu_physical_memory_read(addr, buf, l);
2068        if (fwrite(buf, 1, l, f) != l) {
2069            error_setg(errp, QERR_IO_ERROR);
2070            goto exit;
2071        }
2072        addr += l;
2073        size -= l;
2074    }
2075
2076exit:
2077    fclose(f);
2078}
2079
2080void qmp_inject_nmi(Error **errp)
2081{
2082    nmi_monitor_handle(monitor_get_cpu_index(), errp);
2083}
2084
2085void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2086{
2087    if (!use_icount) {
2088        return;
2089    }
2090
2091    cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
2092                (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2093    if (icount_align_option) {
2094        cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
2095        cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
2096    } else {
2097        cpu_fprintf(f, "Max guest delay     NA\n");
2098        cpu_fprintf(f, "Max guest advance   NA\n");
2099    }
2100}
2101